https://github.com/JuliaLang/julia
Tip revision: 35c58d62470577e75cd33c62c59e1a73d0c73a5f authored by Valentin Churavy on 09 November 2023, 19:59:02 UTC
Support multiple invariant
Support multiple invariant
Tip revision: 35c58d6
abi_aarch64.cpp
// This file is a part of Julia. License is MIT: https://julialang.org/license
//===----------------------------------------------------------------------===//
//
// The ABI implementation used for AArch64 targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
//
//===----------------------------------------------------------------------===//
struct ABI_AArch64Layout : AbiLayout {
Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
return nullptr;
size_t nfields = dt->layout->nfields;
assert(nfields > 0);
if (nfields < 2)
return nullptr;
Type *lltype;
// Short vector should be either 8 bytes or 16 bytes.
// Note that there are only two distinct fundamental types for
// short vectors so we normalize them to <2 x i32> and <4 x i32>
switch (jl_datatype_size(dt)) {
case 8:
lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 2);
break;
case 16:
lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 4);
break;
default:
return nullptr;
}
// Since `dt` is pointer free and has no padding and is 8 or 16 in size,
// `ft0` must be concrete, immutable with no padding and we don't need
// to check if its size is legal since it is included in
// the homogeneity check.
jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0);
// `ft0` should be a `VecElement` type and the true element type
// should be a primitive type (nfields == 0)
if (!jl_is_datatype(ft0) || ft0->name != jl_vecelement_typename)
return nullptr;
jl_datatype_t *ft00 = (jl_datatype_t*)jl_field_type(ft0, 0);
if (!jl_is_datatype(ft00) || ft00->layout->nfields)
return nullptr;
for (size_t i = 1; i < nfields; i++) {
if (jl_field_type(dt, i) != (jl_value_t*)ft0) {
// Not homogeneous
return nullptr;
}
}
return lltype;
}
#define jl_is_floattype(v) jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
Type *lltype;
// Check size first since it's cheaper.
switch (jl_datatype_size(dt)) {
case 2:
lltype = Type::getHalfTy(ctx);
break;
case 4:
lltype = Type::getFloatTy(ctx);
break;
case 8:
lltype = Type::getDoubleTy(ctx);
break;
case 16:
lltype = Type::getFP128Ty(ctx);
break;
default:
return nullptr;
}
return ((jl_floatingpoint_type && jl_is_floattype((jl_value_t*)dt)) ?
lltype : nullptr);
}
Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
return nullptr;
return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
}
struct ElementType {
Type *type;
size_t sz;
ElementType() : type(nullptr), sz(0) {};
};
// Whether a type is a homogeneous floating-point aggregates (HFA) or a
// homogeneous short-vector aggregates (HVA). Returns the element type.
// An Homogeneous Aggregate is a Composite Type where all of the Fundamental
// Data Types of the members that compose the type are the same.
// Note that it is the fundamental types that are important and not the member
// types.
bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, LLVMContext &ctx) const
{
// Assume:
// dt is a pointerfree type, (all members are isbits)
// dsz == jl_datatype_size(dt) > 0
// 0 <= nele <= 3
// dt has no padding
// We ignore zero sized member here. This isn't really consistent with
// GCC for zero-sized array members. GCC seems to treat structs with
// zero sized array members as non-HFA and non-HVA. Clang (3.7 and 3.8)
// handles this slightly differently.
// Ref https://llvm.org/bugs/show_bug.cgi?id=26162
while (size_t nfields = jl_datatype_nfields(dt)) {
// For composite types, find the first non zero sized member
size_t i;
size_t fieldsz;
for (i = 0; i < nfields; i++) {
if ((fieldsz = jl_field_size(dt, i))) {
break;
}
}
assert(i < nfields);
// If there's only one non-zero sized member, try again on this member
if (fieldsz == dsz) {
dt = (jl_datatype_t*)jl_field_type(dt, i);
if (!jl_is_datatype(dt))
return false;
continue;
}
if (Type *vectype = get_llvm_vectype(dt, ctx)) {
if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
return false;
ele.type = vectype;
ele.sz = dsz;
nele++;
return true;
}
// Otherwise, process each members
for (; i < nfields; i++) {
size_t fieldsz = jl_field_size(dt, i);
if (fieldsz == 0)
continue;
jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
if (!jl_is_datatype(dt))
return false;
// Check element count.
// This needs to be done after the zero size member check
if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele, ctx)) {
return false;
}
}
return true;
}
// For bitstypes
if (ele.sz && dsz != ele.sz)
return false;
Type *new_type = get_llvm_fptype(dt, ctx);
if (new_type && (!ele.type || ele.type == new_type)) {
ele.type = new_type;
ele.sz = dsz;
nele++;
return true;
}
return false;
}
Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Floating-Point type and at most
// four uniquely addressable members.
// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Short-Vector type and at most four
// uniquely addressable members.
// Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
size_t dsz = jl_datatype_size(dt);
if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
return NULL;
nele = 0;
ElementType eltype;
if (isHFAorHVA(dt, dsz, nele, eltype, ctx))
return eltype.type;
return NULL;
}
bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
{
// B.2
// If the argument type is an HFA or an HVA, then the argument is used
// unmodified.
size_t size;
if (isHFAorHVA(dt, size, ctx))
return false;
// B.3
// If the argument type is a Composite Type that is larger than 16 bytes,
// then the argument is copied to memory allocated by the caller and the
// argument is replaced by a pointer to the copy.
// We only check for the total size and not whether it is a composite type
// since there's no corresponding C type and we just treat such large
// bitstype as a composite type of the right size.
return jl_datatype_size(dt) > 16;
// B.4
// If the argument type is a Composite Type then the size of the argument
// is rounded up to the nearest multiple of 8 bytes.
}
// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
size_t *rewrite_len, LLVMContext &ctx) const
{
// Based on section 5.4 C of the Procedure Call Standard
// C.1
// If the argument is a Half-, Single-, Double- or Quad- precision
// Floating-point or Short Vector Type and the NSRN is less than 8, then
// the argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated.
if (get_llvm_fp_or_vectype(dt, ctx)) {
*fpreg = true;
return NULL;
}
// C.2
// If the argument is an HFA or an HVA and there are sufficient
// unallocated SIMD and Floating-point registers (NSRN + number of
// members <= 8), then the argument is allocated to SIMD and
// Floating-point Registers (with one register per member of the HFA
// or HVA). The NSRN is incremented by the number of registers used.
// The argument has now been allocated.
if (Type *eltype = isHFAorHVA(dt, *rewrite_len, ctx)) {
assert(*rewrite_len > 0 && *rewrite_len <= 4);
// HFA and HVA have <= 4 members
*fpreg = true;
// Rewrite to [n x eltype] where n is the number of fundamental types.
return eltype;
}
// Check if the argument needs to be passed by reference. This should be
// done before starting step C but we do this here to avoid checking for
// HFA and HVA twice. We don't check whether it is a composite type.
// See `needPassByRef` above.
if (jl_datatype_size(dt) > 16) {
*onstack = true;
return NULL;
}
// C.3
// If the argument is an HFA or an HVA then the NSRN is set to 8 and the
// size of the argument is rounded up to the nearest multiple of 8 bytes.
// C.4
// If the argument is an HFA, an HVA, a Quad-precision Floating-point or
// Short Vector Type then the NSAA is rounded up to the larger of 8 or
// the Natural Alignment of the argument’s type.
// C.5
// If the argument is a Half- or Single- precision Floating Point type,
// then the size of the argument is set to 8 bytes. The effect is as if
// the argument had been copied to the least significant bits of a 64-bit
// register and the remaining bits filled with unspecified values.
// C.6
// If the argument is an HFA, an HVA, a Half-, Single-, Double- or
// Quad- precision Floating-point or Short Vector Type, then the argument
// is copied to memory at the adjusted NSAA. The NSAA is incremented
// by the size of the argument. The argument has now been allocated.
// <already included in the C.2 case above>
// C.7
// If the argument is an Integral or Pointer Type, the size of the
// argument is less than or equal to 8 bytes and the NGRN is less than 8,
// the argument is copied to the least significant bits in x[NGRN].
// The NGRN is incremented by one. The argument has now been allocated.
// Here we treat any bitstype of the right size as integers or pointers
// This is needed for types like Cstring which should be treated as
// pointers. We don't need to worry about floating points here since they
// are handled above.
if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 &&
(jl_datatype_size(dt) == 1 || jl_datatype_size(dt) == 2 ||
jl_datatype_size(dt) == 4 || jl_datatype_size(dt) == 8 ||
jl_datatype_size(dt) == 16))
return NULL;
// C.8
// If the argument has an alignment of 16 then the NGRN is rounded up to
// the next even number.
// C.9
// If the argument is an Integral Type, the size of the argument is equal
// to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
// and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word
// of the memory representation of the argument. The NGRN is incremented
// by two. The argument has now been allocated.
// <merged into C.7 above>
// C.10
// If the argument is a Composite Type and the size in double-words of
// the argument is not more than 8 minus NGRN, then the argument is
// copied into consecutive general-purpose registers, starting at x[NGRN].
// The argument is passed as though it had been loaded into the registers
// from a double-word-aligned address with an appropriate sequence of LDR
// instructions loading consecutive registers from memory (the contents of
// any unused parts of the registers are unspecified by this standard).
// The NGRN is incremented by the number of registers used. The argument
// has now been allocated.
// We don't check for composite types here since the ones that have
// corresponding C types are already handled and we just treat the ones
// with weird size as a black box composite type.
// The type can fit in 8 x 8 bytes since it is handled by
// need_pass_by_ref otherwise.
// 0-size types (Nothing) won't be rewritten and that is what we want
assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
*rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
// Rewrite to [n x Int64] where n is the **size in dword**
return jl_datatype_size(dt) ? Type::getInt64Ty(ctx) : NULL;
// C.11
// The NGRN is set to 8.
// C.12
// The NSAA is rounded up to the larger of 8 or the Natural Alignment
// of the argument’s type.
// C.13
// If the argument is a composite type then the argument is copied to
// memory at the adjusted NSAA. The NSAA is incremented by the size of
// the argument. The argument has now been allocated.
// <handled by C.10 above>
// C.14
// If the size of the argument is less than 8 bytes then the size of the
// argument is set to 8 bytes. The effect is as if the argument was
// copied to the least significant bits of a 64-bit register and the
// remaining bits filled with unspecified values.
// C.15
// The argument is copied to memory at the adjusted NSAA. The NSAA is
// incremented by the size of the argument. The argument has now been
// allocated.
// <handled by C.10 above>
}
bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
{
// Section 5.5
// If the type, T, of the result of a function is such that
//
// void func(T arg)
//
// would require that arg be passed as a value in a register (or set of
// registers) according to the rules in section 5.4 Parameter Passing,
// then the result is returned in the same registers as would be used for
// such an argument.
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx);
return onstack;
}
Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
{
if (Type *fptype = get_llvm_fp_or_vectype(dt, ctx))
return fptype;
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx))
return ArrayType::get(rewrite_ty, rewrite_len);
return NULL;
}
};