Revision aa696b54a17d5939ebb61ff14965bf0ce16dcd7a authored by Volodymyr Kysenko on 27 February 2024, 22:28:14 UTC, committed by GitHub on 27 February 2024, 22:28:14 UTC
* [Xtensa codegen] Specializations for load/store and fma support

* format
1 parent 85b6d7e
Raw File
LowerWarpShuffles.h
#ifndef HALIDE_LOWER_WARP_SHUFFLES_H
#define HALIDE_LOWER_WARP_SHUFFLES_H

/** \file
 * Defines the lowering pass that injects CUDA warp shuffle
 * instructions to access storage outside of a GPULane loop.
 */

#include "Expr.h"

namespace Halide {
namespace Internal {

/** Rewrite access to things stored outside the loop over GPU lanes to
 * use nvidia's warp shuffle instructions. */
Stmt lower_warp_shuffles(Stmt s, const Target &t);

}  // namespace Internal
}  // namespace Halide

#endif
back to top