Revision 9a94756d01d9071ff1610bfc4cb170bd47f701a8 authored by Alexander Root on 21 July 2022, 15:01:16 UTC, committed by GitHub on 21 July 2022, 15:01:16 UTC
* use pmaddubsw 8-bit horizontal widening adds * add SSE3 versions too * add pmaddubsw tests
1 parent 967c3bf
LoopCarry.h
#ifndef HALIDE_LOOP_CARRY_H
#define HALIDE_LOOP_CARRY_H
#include "Expr.h"
namespace Halide {
namespace Internal {
/** Reuse loads done on previous loop iterations by stashing them in
* induction variables instead of redoing the load. If the loads are
* predicated, the predicates need to match. Can be an optimization or
* pessimization depending on how good the L1 cache is on the architecture
* and how many memory issue slots there are. Currently only intended
* for Hexagon. */
Stmt loop_carry(Stmt, int max_carried_values = 8);
} // namespace Internal
} // namespace Halide
#endif
Computing file changes ...