Revision 9a94756d01d9071ff1610bfc4cb170bd47f701a8 authored by Alexander Root on 21 July 2022, 15:01:16 UTC, committed by GitHub on 21 July 2022, 15:01:16 UTC
* use pmaddubsw 8-bit horizontal widening adds

* add SSE3 versions too

* add pmaddubsw tests
1 parent 967c3bf
Raw File
LoopCarry.h
#ifndef HALIDE_LOOP_CARRY_H
#define HALIDE_LOOP_CARRY_H

#include "Expr.h"

namespace Halide {
namespace Internal {

/** Reuse loads done on previous loop iterations by stashing them in
 * induction variables instead of redoing the load. If the loads are
 * predicated, the predicates need to match. Can be an optimization or
 * pessimization depending on how good the L1 cache is on the architecture
 * and how many memory issue slots there are. Currently only intended
 * for Hexagon. */
Stmt loop_carry(Stmt, int max_carried_values = 8);

}  // namespace Internal
}  // namespace Halide

#endif
back to top