Revision 3fbb12aa928a87a4c4e69a95eaf6f5ee02958600 authored by John Lawson on 09 February 2021, 18:00:02 UTC, committed by GitHub on 09 February 2021, 18:00:02 UTC
* Add support for AVX512 BF16 dot product

* Match on f32*f32

* Remove f32 check
1 parent 3e034d6
Raw File
CodeGen_PTX_Dev.h
#ifndef HALIDE_CODEGEN_PTX_DEV_H
#define HALIDE_CODEGEN_PTX_DEV_H

/** \file
 * Defines the code-generator for producing CUDA host code
 */

#include <memory>

namespace Halide {

struct Target;

namespace Internal {

struct CodeGen_GPU_Dev;

std::unique_ptr<CodeGen_GPU_Dev> new_CodeGen_PTX_Dev(const Target &target);

}  // namespace Internal
}  // namespace Halide

#endif
back to top