https://github.com/halide/Halide
Raw File
Tip revision: b5e8217dfbe905ef1f30f7bd584a83dfb9e2260e authored by Steven Johnson on 19 January 2023, 19:40:52 UTC
Remove the watchdog timer from generator_main(). It was intended to kill pathologically slow builds, but in the environment it was added for (Google build servers), it ended up being redundant to existing mechanisms, and removing it allows us to remove a dependency on threading libraries in libHalide.
Tip revision: b5e8217
CodeGen_GPU_Dev.h
#ifndef HALIDE_CODEGEN_GPU_DEV_H
#define HALIDE_CODEGEN_GPU_DEV_H

/** \file
 * Defines the code-generator interface for producing GPU device code
 */
#include <string>
#include <vector>

#include "CodeGen_C.h"
#include "DeviceArgument.h"
#include "Expr.h"

namespace Halide {
namespace Internal {

/** A code generator that emits GPU code from a given Halide stmt. */
struct CodeGen_GPU_Dev {
    virtual ~CodeGen_GPU_Dev();

    /** Compile a GPU kernel into the module. This may be called many times
     * with different kernels, which will all be accumulated into a single
     * source module shared by a given Halide pipeline. */
    virtual void add_kernel(Stmt stmt,
                            const std::string &name,
                            const std::vector<DeviceArgument> &args) = 0;

    /** (Re)initialize the GPU kernel module. This is separate from compile,
     * since a GPU device module will often have many kernels compiled into it
     * for a single pipeline. */
    virtual void init_module() = 0;

    virtual std::vector<char> compile_to_src() = 0;

    virtual std::string get_current_kernel_name() = 0;

    virtual void dump() = 0;

    /** This routine returns the GPU API name that is combined into
     *  runtime routine names to ensure each GPU API has a unique
     *  name.
     */
    virtual std::string api_unique_name() = 0;

    /** Returns the specified name transformed by the variable naming rules
     * for the GPU language backend. Used to determine the name of a parameter
     * during host codegen. */
    virtual std::string print_gpu_name(const std::string &name) = 0;

    /** Allows the GPU device specific code to request halide_type_t
     * values to be passed to the kernel_run routine rather than just
     * argument type sizes.
     */
    virtual bool kernel_run_takes_types() const {
        return false;
    }

    static bool is_gpu_var(const std::string &name);
    static bool is_gpu_block_var(const std::string &name);
    static bool is_gpu_thread_var(const std::string &name);

    /** Checks if expr is block uniform, i.e. does not depend on a thread
     * var. */
    static bool is_block_uniform(const Expr &expr);
    /** Checks if the buffer is a candidate for constant storage. Most
     * GPUs (APIs) support a constant memory storage class that cannot be
     * written to and performs well for block uniform accesses. A buffer is a
     * candidate for constant storage if it is never written to, and loads are
     * uniform within the workgroup. */
    static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer);

    /** Modifies predicated loads and stores to be non-predicated, since most
     * GPU backends do not support predication. */
    static Stmt scalarize_predicated_loads_stores(Stmt &s);

    /** An mask describing which type of memory fence to use for the gpu_thread_barrier()
     * intrinsic.  Not all GPUs APIs support all types.
     */
    enum MemoryFenceType {
        None = 0,    // No fence required (just a sync)
        Device = 1,  // Device/global memory fence
        Shared = 2   // Threadgroup/shared memory fence
    };
};

/** A base class for GPU backends that require C-like shader output.
 * GPU backends derive from and specialize this class. */
class CodeGen_GPU_C : public CodeGen_C {
public:
    /** OpenCL uses a different syntax than C for immediate vectors.  This
    enum defines which style should be used by the backend. */
    enum class VectorDeclarationStyle {
        CLikeSyntax = 0,
        OpenCLSyntax = 1
    };

    CodeGen_GPU_C(std::ostream &s, Target t)
        : CodeGen_C(s, t) {
    }

protected:
    using CodeGen_C::visit;
    void visit(const Shuffle *op) override;
    void visit(const Call *op) override;

    VectorDeclarationStyle vector_declaration_style = VectorDeclarationStyle::CLikeSyntax;
};

}  // namespace Internal
}  // namespace Halide

#endif
back to top