https://github.com/halide/Halide
Revision cd95b27d6b969a1da8337ba820ad8d0c50884bff authored by Steven Johnson on 27 July 2018, 16:56:13 UTC, committed by GitHub on 27 July 2018, 16:56:13 UTC
Rework runtime to allow more than 64 Target::Features (Issue #2911)
2 parent s d91647c + d734dc0
Raw File
Tip revision: cd95b27d6b969a1da8337ba820ad8d0c50884bff authored by Steven Johnson on 27 July 2018, 16:56:13 UTC
Merge pull request #3116 from halide/srj-features
Tip revision: cd95b27
CodeGen_Hexagon.h
#ifndef HALIDE_CODEGEN_HEXAGON_H
#define HALIDE_CODEGEN_HEXAGON_H

/** \file
 * Defines the code-generator for producing Hexagon machine code
 */

#include "CodeGen_Posix.h"

namespace Halide {
namespace Internal {

/** A code generator that emits Hexagon code from a given Halide stmt. */
class CodeGen_Hexagon : public CodeGen_Posix {
public:
    /** Create a Hexagon code generator for the given Hexagon target. */
    CodeGen_Hexagon(Target);

    std::unique_ptr<llvm::Module> compile(const Module &module);

protected:
    void compile_func(const LoweredFunc &f,
                      const std::string &simple_name, const std::string &extern_name);

    void init_module();

    Expr mulhi_shr(Expr a, Expr b, int shr);
    Expr sorted_avg(Expr a, Expr b);

    std::string mcpu() const;
    std::string mattrs() const;
    int isa_version;
    bool use_soft_float_abi() const;
    int native_vector_bits() const;

    llvm::Function *define_hvx_intrinsic(int intrin, Type ret_ty,
                                         const std::string &name,
                                         const std::vector<Type> &arg_types,
                                         int flags);
    llvm::Function *define_hvx_intrinsic(llvm::Function *intrin, Type ret_ty,
                                         const std::string &name,
                                         std::vector<Type> arg_types,
                                         int flags);

    int is_hvx_v62_or_later() {return (isa_version >= 62);}
    int is_hvx_v65_or_later() {return (isa_version >= 65);}

    using CodeGen_Posix::visit;

    /** Nodes for which we want to emit specific hexagon intrinsics */
    ///@{
    void visit(const Add *);
    void visit(const Sub *);
    void visit(const Broadcast *);
    void visit(const Div *);
    void visit(const Max *);
    void visit(const Min *);
    void visit(const Cast *);
    void visit(const Call *);
    void visit(const Mul *);
    void visit(const GE *);
    void visit(const LE *);
    void visit(const LT *);
    void visit(const NE *);
    void visit(const GT *);
    void visit(const EQ *);
    void visit(const Select *);
    ///@}

    /** We ask for an extra vector on each allocation to enable fast
     * clamped ramp loads. */
    int allocation_padding(Type type) const {
        return CodeGen_Posix::allocation_padding(type) + native_vector_bits()/8;
    }

    /** Call an LLVM intrinsic, potentially casting the operands to
     * match the type of the function. */
    ///@{
    llvm::Value *call_intrin_cast(llvm::Type *ret_ty, llvm::Function *F,
                                  std::vector<llvm::Value *> Ops);
    llvm::Value *call_intrin_cast(llvm::Type *ret_ty, int id,
                                  std::vector<llvm::Value *> Ops);
    ///@}

    /** Define overloads of CodeGen_LLVM::call_intrin that determine
     * the intrin_lanes from the type, and allows the function to
     * return null if the maybe option is true and the intrinsic is
     * not found. */
    ///@{
    using CodeGen_LLVM::call_intrin;
    llvm::Value *call_intrin(Type t, const std::string &name,
                             std::vector<Expr>, bool maybe = false);
    llvm::Value *call_intrin(llvm::Type *t, const std::string &name,
                             std::vector<llvm::Value *>, bool maybe = false);
    ///@}

    /** Override CodeGen_LLVM to use hexagon intrinics when possible. */
    ///@{
    llvm::Value *interleave_vectors(const std::vector<llvm::Value *> &v);
    llvm::Value *shuffle_vectors(llvm::Value *a, llvm::Value *b,
                                 const std::vector<int> &indices);
    using CodeGen_Posix::shuffle_vectors;
    ///@}

    /** Generate a LUT lookup using vlut instructions. */
    ///@{
    llvm::Value *vlut(llvm::Value *lut, llvm::Value *indices, int min_index = 0, int max_index = 1 << 30);
    llvm::Value *vlut(llvm::Value *lut, const std::vector<int> &indices);
    ///@}

    llvm::Value *vdelta(llvm::Value *lut, const std::vector<int> &indices);

    /** Because HVX intrinsics operate on vectors of i32, using them
     * requires a lot of extraneous bitcasts, which make it difficult
     * to manipulate the IR. This function avoids generating redundant
     * bitcasts. */
    llvm::Value *create_bitcast(llvm::Value *v, llvm::Type *ty);
};

}  // namespace Internal
}  // namespace Halide

#endif
back to top