Revision 65d5762485dfdc3d57f8463ba1852d6526019c70 authored by Jameson Nash on 20 April 2017, 18:42:56 UTC, committed by Jameson Nash on 22 April 2017, 18:58:09 UTC
previously we couldn't reliably tell the difference between a failure to context switch
and an explicitly scheduled error

in the former case, we often need to undo some prior action
by moving the code for the later into Julia,
we can reliably run the necessary undo action when required

fix #21442
1 parent 45092ff
Raw File
abi_aarch64.cpp
// This file is a part of Julia. License is MIT: http://julialang.org/license

//===----------------------------------------------------------------------===//
//
// The ABI implementation used for AArch64 targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
//
//===----------------------------------------------------------------------===//

struct ABI_AArch64Layout : AbiLayout {

Type *get_llvm_vectype(jl_datatype_t *dt) const
{
    // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
    if (dt->layout == NULL)
        return nullptr;
    size_t nfields = dt->layout->nfields;
    assert(nfields > 0);
    if (nfields < 2)
        return nullptr;
    static Type *T_vec64 = VectorType::get(T_int32, 2);
    static Type *T_vec128 = VectorType::get(T_int32, 4);
    Type *lltype;
    // Short vector should be either 8 bytes or 16 bytes.
    // Note that there are only two distinct fundamental types for
    // short vectors so we normalize them to <2 x i32> and <4 x i32>
    switch (jl_datatype_size(dt)) {
    case 8:
        lltype = T_vec64;
        break;
    case 16:
        lltype = T_vec128;
        break;
    default:
        return nullptr;
    }
    // Since `dt` is pointer free and has no padding and is 8 or 16 in size,
    // `ft0` must be concrete, immutable with no padding and we don't need
    // to check if its size is legal since it is included in
    // the homogeneity check.
    jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0);
    // `ft0` should be a `VecElement` type and the true element type
    // should be a primitive type
    if (ft0->name != jl_vecelement_typename ||
        ((jl_datatype_t*)jl_field_type(ft0, 0))->layout->nfields)
        return nullptr;
    for (size_t i = 1; i < nfields; i++) {
        if (jl_field_type(dt, i) != (jl_value_t*)ft0) {
            // Not homogeneous
            return nullptr;
        }
    }
    return lltype;
}

#define jl_is_floattype(v)   jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
Type *get_llvm_fptype(jl_datatype_t *dt) const
{
    // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
    Type *lltype;
    // Check size first since it's cheaper.
    switch (jl_datatype_size(dt)) {
    case 2:
        lltype = T_float16;
        break;
    case 4:
        lltype = T_float32;
        break;
    case 8:
        lltype = T_float64;
        break;
    case 16:
        lltype = T_float128;
        break;
    default:
        return nullptr;
    }
    return ((jl_floatingpoint_type && jl_is_floattype((jl_value_t*)dt)) ?
            lltype : nullptr);
}

Type *get_llvm_fp_or_vectype(jl_datatype_t *dt) const
{
    // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
    if (dt->mutabl || dt->layout->npointers || dt->layout->haspadding)
        return nullptr;
    return dt->layout->nfields ? get_llvm_vectype(dt) : get_llvm_fptype(dt);
}

struct ElementType {
    Type *type;
    size_t sz;
    ElementType() : type(nullptr), sz(0) {};
};

// Whether a type is a homogeneous floating-point aggregates (HFA) or a
// homogeneous short-vector aggregates (HVA). Returns the element type.
// An Homogeneous Aggregate is a Composite Type where all of the Fundamental
// Data Types of the members that compose the type are the same.
// Note that it is the fundamental types that are important and not the member
// types.
bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) const
{
    // Assume:
    //     dt is a pointerfree type, (all members are isbits)
    //     dsz == jl_datatype_size(dt) > 0
    //     0 <= nele <= 3
    //     dt has no padding

    // We ignore zero sized member here. This isn't really consistent with
    // GCC for zero-sized array members. GCC seems to treat structs with
    // zero sized array members as non-HFA and non-HVA. Clang (3.7 and 3.8)
    // handles this slightly differently.
    // Ref https://llvm.org/bugs/show_bug.cgi?id=26162
    while (size_t nfields = jl_datatype_nfields(dt)) {
        // For composite types, find the first non zero sized member
        size_t i;
        size_t fieldsz;
        for (i = 0;i < nfields;i++) {
            if ((fieldsz = jl_field_size(dt, i))) {
                break;
            }
        }
        assert(i < nfields);
        // If there's only one non zero sized member, try again on this member
        if (fieldsz == dsz) {
            dt = (jl_datatype_t*)jl_field_type(dt, i);
            continue;
        }
        if (Type *vectype = get_llvm_vectype(dt)) {
            if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
                return false;
            ele.type = vectype;
            ele.sz = dsz;
            nele++;
            return true;
        }
        // Otherwise, process each members
        for (;i < nfields;i++) {
            size_t fieldsz = jl_field_size(dt, i);
            if (fieldsz == 0)
                continue;
            jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
            // Check element count.
            // This needs to be done after the zero size member check
            if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele)) {
                return false;
            }
        }
        return true;
    }
    // For bitstypes
    if (ele.sz && dsz != ele.sz)
        return false;
    Type *new_type = get_llvm_fptype(dt);
    if (new_type && (!ele.type || ele.type == new_type)) {
        ele.type = new_type;
        ele.sz = dsz;
        nele++;
        return true;
    }
    return false;
}

Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
{
    // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)

    // An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate
    // with a Fundamental Data Type that is a Floating-Point type and at most
    // four uniquely addressable members.
    // An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate
    // with a Fundamental Data Type that is a Short-Vector type and at most four
    // uniquely addressable members.
    // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
    size_t dsz = jl_datatype_size(dt);
    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
        return NULL;
    nele = 0;
    ElementType eltype;
    if (isHFAorHVA(dt, dsz, nele, eltype))
        return eltype.type;
    return NULL;
}

bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
{
    // B.2
    //   If the argument type is an HFA or an HVA, then the argument is used
    //   unmodified.
    size_t size;
    if (isHFAorHVA(dt, size))
        return false;
    // B.3
    //   If the argument type is a Composite Type that is larger than 16 bytes,
    //   then the argument is copied to memory allocated by the caller and the
    //   argument is replaced by a pointer to the copy.
    // We only check for the total size and not whether it is a composite type
    // since there's no corresponding C type and we just treat such large
    // bitstype as a composite type of the right size.
    return jl_datatype_size(dt) > 16;
    // B.4
    //   If the argument type is a Composite Type then the size of the argument
    //   is rounded up to the nearest multiple of 8 bytes.
}

// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
                   size_t *rewrite_len) const
{
    // Based on section 5.4 C of the Procedure Call Standard
    // C.1
    //   If the argument is a Half-, Single-, Double- or Quad- precision
    //   Floating-point or Short Vector Type and the NSRN is less than 8, then
    //   the argument is allocated to the least significant bits of register
    //   v[NSRN]. The NSRN is incremented by one. The argument has now been
    //   allocated.
    if (get_llvm_fp_or_vectype(dt)) {
        *fpreg = true;
        return NULL;
    }

    // C.2
    //   If the argument is an HFA or an HVA and there are sufficient
    //   unallocated SIMD and Floating-point registers (NSRN + number of
    //   members <= 8), then the argument is allocated to SIMD and
    //   Floating-point Registers (with one register per member of the HFA
    //   or HVA). The NSRN is incremented by the number of registers used.
    //   The argument has now been allocated.
    if (Type *eltype = isHFAorHVA(dt, *rewrite_len)) {
        assert(*rewrite_len > 0 && *rewrite_len <= 4);
        // HFA and HVA have <= 4 members
        *fpreg = true;
        // Rewrite to [n x eltype] where n is the number of fundamental types.
        return eltype;
    }

    // Check if the argument needs to be passed by reference. This should be
    // done before starting step C but we do this here to avoid checking for
    // HFA and HVA twice. We don't check whether it is a composite type.
    // See `needPassByRef` above.
    if (jl_datatype_size(dt) > 16) {
        *onstack = true;
        return NULL;
    }

    // C.3
    //   If the argument is an HFA or an HVA then the NSRN is set to 8 and the
    //   size of the argument is rounded up to the nearest multiple of 8 bytes.
    // C.4
    //   If the argument is an HFA, an HVA, a Quad-precision Floating-point or
    //   Short Vector Type then the NSAA is rounded up to the larger of 8 or
    //   the Natural Alignment of the argument’s type.
    // C.5
    //   If the argument is a Half- or Single- precision Floating Point type,
    //   then the size of the argument is set to 8 bytes. The effect is as if
    //   the argument had been copied to the least significant bits of a 64-bit
    //   register and the remaining bits filled with unspecified values.
    // C.6
    //   If the argument is an HFA, an HVA, a Half-, Single-, Double- or
    //   Quad- precision Floating-point or Short Vector Type, then the argument
    //   is copied to memory at the adjusted NSAA. The NSAA is incremented
    //   by the size of the argument. The argument has now been allocated.
    // <already included in the C.2 case above>
    // C.7
    //   If the argument is an Integral or Pointer Type, the size of the
    //   argument is less than or equal to 8 bytes and the NGRN is less than 8,
    //   the argument is copied to the least significant bits in x[NGRN].
    //   The NGRN is incremented by one. The argument has now been allocated.
    // Here we treat any bitstype of the right size as integers or pointers
    // This is needed for types like Cstring which should be treated as
    // pointers. We don't need to worry about floating points here since they
    // are handled above.
    if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 &&
        (jl_datatype_size(dt) == 1 || jl_datatype_size(dt) == 2 ||
         jl_datatype_size(dt) == 4 || jl_datatype_size(dt) == 8 ||
         jl_datatype_size(dt) == 16))
        return NULL;

    // C.8
    //   If the argument has an alignment of 16 then the NGRN is rounded up to
    //   the next even number.
    // C.9
    //   If the argument is an Integral Type, the size of the argument is equal
    //   to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
    //   and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word
    //   of the memory representation of the argument. The NGRN is incremented
    //   by two. The argument has now been allocated.
    // <merged into C.7 above>
    // C.10
    //   If the argument is a Composite Type and the size in double-words of
    //   the argument is not more than 8 minus NGRN, then the argument is
    //   copied into consecutive general-purpose registers, starting at x[NGRN].
    //   The argument is passed as though it had been loaded into the registers
    //   from a double-word-aligned address with an appropriate sequence of LDR
    //   instructions loading consecutive registers from memory (the contents of
    //   any unused parts of the registers are unspecified by this standard).
    //   The NGRN is incremented by the number of registers used. The argument
    //   has now been allocated.
    // We don't check for composite types here since the ones that have
    // corresponding C types are already handled and we just treat the ones
    // with weird size as a black box composite type.
    // The type can fit in 8 x 8 bytes since it is handled by
    // need_pass_by_ref otherwise.
    // 0-size types (Void) won't be rewritten and that is what we want
    assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
    *rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
    // Rewrite to [n x Int64] where n is the **size in dword**
    return jl_datatype_size(dt) ? T_int64 : NULL;

    // C.11
    //   The NGRN is set to 8.
    // C.12
    //   The NSAA is rounded up to the larger of 8 or the Natural Alignment
    //   of the argument’s type.
    // C.13
    //   If the argument is a composite type then the argument is copied to
    //   memory at the adjusted NSAA. The NSAA is incremented by the size of
    //   the argument. The argument has now been allocated.
    // <handled by C.10 above>
    // C.14
    //   If the size of the argument is less than 8 bytes then the size of the
    //   argument is set to 8 bytes. The effect is as if the argument was
    //   copied to the least significant bits of a 64-bit register and the
    //   remaining bits filled with unspecified values.
    // C.15
    //   The argument is copied to memory at the adjusted NSAA. The NSAA is
    //   incremented by the size of the argument. The argument has now been
    //   allocated.
    // <handled by C.10 above>
}

bool use_sret(jl_datatype_t *dt) override
{
    // Section 5.5
    // If the type, T, of the result of a function is such that
    //
    //     void func(T arg)
    //
    // would require that arg be passed as a value in a register (or set of
    // registers) according to the rules in section 5.4 Parameter Passing,
    // then the result is returned in the same registers as would be used for
    // such an argument.
    bool fpreg = false;
    bool onstack = false;
    size_t rewrite_len = 0;
    classify_arg(dt, &fpreg, &onstack, &rewrite_len);
    return onstack;
}

Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
{
    if (Type *fptype = get_llvm_fp_or_vectype(dt))
        return fptype;
    bool fpreg = false;
    bool onstack = false;
    size_t rewrite_len = 0;
    if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len))
        return ArrayType::get(rewrite_ty, rewrite_len);
    return NULL;
}

};
back to top