// This file is a part of Julia. License is MIT: http://julialang.org/license //===----------------------------------------------------------------------===// // // The ABI implementation used for AArch64 targets. // //===----------------------------------------------------------------------===// // // The Procedure Call Standard can be found here: // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf // //===----------------------------------------------------------------------===// struct ABI_AArch64Layout : AbiLayout { Type *get_llvm_vectype(jl_datatype_t *dt) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0` if (dt->layout == NULL) return nullptr; size_t nfields = dt->layout->nfields; assert(nfields > 0); if (nfields < 2) return nullptr; static Type *T_vec64 = VectorType::get(T_int32, 2); static Type *T_vec128 = VectorType::get(T_int32, 4); Type *lltype; // Short vector should be either 8 bytes or 16 bytes. // Note that there are only two distinct fundamental types for // short vectors so we normalize them to <2 x i32> and <4 x i32> switch (jl_datatype_size(dt)) { case 8: lltype = T_vec64; break; case 16: lltype = T_vec128; break; default: return nullptr; } // Since `dt` is pointer free and has no padding and is 8 or 16 in size, // `ft0` must be concrete, immutable with no padding and we don't need // to check if its size is legal since it is included in // the homogeneity check. jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0); // `ft0` should be a `VecElement` type and the true element type // should be a primitive type if (ft0->name != jl_vecelement_typename || ((jl_datatype_t*)jl_field_type(ft0, 0))->layout->nfields) return nullptr; for (size_t i = 1; i < nfields; i++) { if (jl_field_type(dt, i) != (jl_value_t*)ft0) { // Not homogeneous return nullptr; } } return lltype; } Type *get_llvm_fptype(jl_datatype_t *dt) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0` Type *lltype; // Check size first since it's cheaper. switch (jl_datatype_size(dt)) { case 2: lltype = T_float16; break; case 4: lltype = T_float32; break; case 8: lltype = T_float64; break; case 16: lltype = T_float128; break; default: return nullptr; } return ((jl_floatingpoint_type && jl_is_floattype((jl_value_t*)dt)) ? lltype : nullptr); } Type *get_llvm_fp_or_vectype(jl_datatype_t *dt) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) if (dt->mutabl || dt->layout->npointers || dt->layout->haspadding) return nullptr; return dt->layout->nfields ? get_llvm_vectype(dt) : get_llvm_fptype(dt); } struct ElementType { Type *type; size_t sz; ElementType() : type(nullptr), sz(0) {}; }; // Whether a type is a homogeneous floating-point aggregates (HFA) or a // homogeneous short-vector aggregates (HVA). Returns the element type. // An Homogeneous Aggregate is a Composite Type where all of the Fundamental // Data Types of the members that compose the type are the same. // Note that it is the fundamental types that are important and not the member // types. bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) const { // Assume: // dt is a pointerfree type, (all members are isbits) // dsz == jl_datatype_size(dt) > 0 // 0 <= nele <= 3 // dt has no padding // We ignore zero sized member here. This isn't really consistent with // GCC for zero-sized array members. GCC seems to treat structs with // zero sized array members as non-HFA and non-HVA. Clang (3.7 and 3.8) // handles this slightly differently. // Ref https://llvm.org/bugs/show_bug.cgi?id=26162 while (size_t nfields = jl_datatype_nfields(dt)) { // For composite types, find the first non zero sized member size_t i; size_t fieldsz; for (i = 0;i < nfields;i++) { if ((fieldsz = jl_field_size(dt, i))) { break; } } assert(i < nfields); // If there's only one non zero sized member, try again on this member if (fieldsz == dsz) { dt = (jl_datatype_t*)jl_field_type(dt, i); continue; } if (Type *vectype = get_llvm_vectype(dt)) { if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype)) return false; ele.type = vectype; ele.sz = dsz; nele++; return true; } // Otherwise, process each members for (;i < nfields;i++) { size_t fieldsz = jl_field_size(dt, i); if (fieldsz == 0) continue; jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i); // Check element count. // This needs to be done after the zero size member check if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele)) { return false; } } return true; } // For bitstypes if (ele.sz && dsz != ele.sz) return false; Type *new_type = get_llvm_fptype(dt); if (new_type && (!ele.type || ele.type == new_type)) { ele.type = new_type; ele.sz = dsz; nele++; return true; } return false; } Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const { // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) // An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate // with a Fundamental Data Type that is a Floating-Point type and at most // four uniquely addressable members. // An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate // with a Fundamental Data Type that is a Short-Vector type and at most four // uniquely addressable members. // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector) size_t dsz = jl_datatype_size(dt); if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding) return NULL; nele = 0; ElementType eltype; if (isHFAorHVA(dt, dsz, nele, eltype)) return eltype.type; return NULL; } bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override { // B.2 // If the argument type is an HFA or an HVA, then the argument is used // unmodified. size_t size; if (isHFAorHVA(dt, size)) return false; // B.3 // If the argument type is a Composite Type that is larger than 16 bytes, // then the argument is copied to memory allocated by the caller and the // argument is replaced by a pointer to the copy. // We only check for the total size and not whether it is a composite type // since there's no corresponding C type and we just treat such large // bitstype as a composite type of the right size. return jl_datatype_size(dt) > 16; // B.4 // If the argument type is a Composite Type then the size of the argument // is rounded up to the nearest multiple of 8 bytes. } // Determine which kind of register the argument will be passed in and // if the argument has to be passed on stack (including by reference). // // If the argument should be passed in SIMD and floating-point registers, // we may need to rewrite the argument types to [n x ftype]. // If the argument should be passed in general purpose registers, we may need // to rewrite the argument types to [n x i64]. // // If the argument has to be passed on stack, we need to use sret. // // All the out parameters should be default to `false`. Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack, size_t *rewrite_len) const { // Based on section 5.4 C of the Procedure Call Standard // C.1 // If the argument is a Half-, Single-, Double- or Quad- precision // Floating-point or Short Vector Type and the NSRN is less than 8, then // the argument is allocated to the least significant bits of register // v[NSRN]. The NSRN is incremented by one. The argument has now been // allocated. if (get_llvm_fp_or_vectype(dt)) { *fpreg = true; return NULL; } // C.2 // If the argument is an HFA or an HVA and there are sufficient // unallocated SIMD and Floating-point registers (NSRN + number of // members <= 8), then the argument is allocated to SIMD and // Floating-point Registers (with one register per member of the HFA // or HVA). The NSRN is incremented by the number of registers used. // The argument has now been allocated. if (Type *eltype = isHFAorHVA(dt, *rewrite_len)) { assert(*rewrite_len > 0 && *rewrite_len <= 4); // HFA and HVA have <= 4 members *fpreg = true; // Rewrite to [n x eltype] where n is the number of fundamental types. return eltype; } // Check if the argument needs to be passed by reference. This should be // done before starting step C but we do this here to avoid checking for // HFA and HVA twice. We don't check whether it is a composite type. // See `needPassByRef` above. if (jl_datatype_size(dt) > 16) { *onstack = true; return NULL; } // C.3 // If the argument is an HFA or an HVA then the NSRN is set to 8 and the // size of the argument is rounded up to the nearest multiple of 8 bytes. // C.4 // If the argument is an HFA, an HVA, a Quad-precision Floating-point or // Short Vector Type then the NSAA is rounded up to the larger of 8 or // the Natural Alignment of the argument’s type. // C.5 // If the argument is a Half- or Single- precision Floating Point type, // then the size of the argument is set to 8 bytes. The effect is as if // the argument had been copied to the least significant bits of a 64-bit // register and the remaining bits filled with unspecified values. // C.6 // If the argument is an HFA, an HVA, a Half-, Single-, Double- or // Quad- precision Floating-point or Short Vector Type, then the argument // is copied to memory at the adjusted NSAA. The NSAA is incremented // by the size of the argument. The argument has now been allocated. // // C.7 // If the argument is an Integral or Pointer Type, the size of the // argument is less than or equal to 8 bytes and the NGRN is less than 8, // the argument is copied to the least significant bits in x[NGRN]. // The NGRN is incremented by one. The argument has now been allocated. // Here we treat any bitstype of the right size as integers or pointers // This is needed for types like Cstring which should be treated as // pointers. We don't need to worry about floating points here since they // are handled above. if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 && (jl_datatype_size(dt) == 1 || jl_datatype_size(dt) == 2 || jl_datatype_size(dt) == 4 || jl_datatype_size(dt) == 8 || jl_datatype_size(dt) == 16)) return NULL; // C.8 // If the argument has an alignment of 16 then the NGRN is rounded up to // the next even number. // C.9 // If the argument is an Integral Type, the size of the argument is equal // to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] // and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word // of the memory representation of the argument. The NGRN is incremented // by two. The argument has now been allocated. // // C.10 // If the argument is a Composite Type and the size in double-words of // the argument is not more than 8 minus NGRN, then the argument is // copied into consecutive general-purpose registers, starting at x[NGRN]. // The argument is passed as though it had been loaded into the registers // from a double-word-aligned address with an appropriate sequence of LDR // instructions loading consecutive registers from memory (the contents of // any unused parts of the registers are unspecified by this standard). // The NGRN is incremented by the number of registers used. The argument // has now been allocated. // We don't check for composite types here since the ones that have // corresponding C types are already handled and we just treat the ones // with weird size as a black box composite type. // The type can fit in 8 x 8 bytes since it is handled by // need_pass_by_ref otherwise. // 0-size types (Void) won't be rewritten and that is what we want assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise *rewrite_len = (jl_datatype_size(dt) + 7) >> 3; // Rewrite to [n x Int64] where n is the **size in dword** return jl_datatype_size(dt) ? T_int64 : NULL; // C.11 // The NGRN is set to 8. // C.12 // The NSAA is rounded up to the larger of 8 or the Natural Alignment // of the argument’s type. // C.13 // If the argument is a composite type then the argument is copied to // memory at the adjusted NSAA. The NSAA is incremented by the size of // the argument. The argument has now been allocated. // // C.14 // If the size of the argument is less than 8 bytes then the size of the // argument is set to 8 bytes. The effect is as if the argument was // copied to the least significant bits of a 64-bit register and the // remaining bits filled with unspecified values. // C.15 // The argument is copied to memory at the adjusted NSAA. The NSAA is // incremented by the size of the argument. The argument has now been // allocated. // } bool use_sret(jl_datatype_t *dt) override { // Section 5.5 // If the type, T, of the result of a function is such that // // void func(T arg) // // would require that arg be passed as a value in a register (or set of // registers) according to the rules in section 5.4 Parameter Passing, // then the result is returned in the same registers as would be used for // such an argument. bool fpreg = false; bool onstack = false; size_t rewrite_len = 0; classify_arg(dt, &fpreg, &onstack, &rewrite_len); return onstack; } Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override { if (Type *fptype = get_llvm_fp_or_vectype(dt)) return fptype; bool fpreg = false; bool onstack = false; size_t rewrite_len = 0; if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len)) return ArrayType::get(rewrite_ty, rewrite_len); return NULL; } };