Revision 799236949867b6a2be0d492137b36a0b67011622 authored by Andrew Adams on 07 December 2021, 16:16:50 UTC, committed by GitHub on 07 December 2021, 16:16:50 UTC
* Add a version of fast_integer_divide that rounds towards zero

* clang-format

* Fix test condition

* Clean up debugging code

* Add explanatory comment to performance test

* Pacify clang tidy
1 parent fb305fd
Raw File
gpu_arg_types.cpp
#include "Halide.h"

using namespace Halide;
int main(int argc, char *argv[]) {

    if (!get_jit_target_from_environment().has_gpu_feature()) {
        printf("[SKIP] No GPU target enabled.\n");
        return 0;
    }

    Func f, g;
    Var x, y, tx, ty;
    Param<int16_t> foo;

    Expr e = select(foo > x, cast<int16_t>(255), foo + cast<int16_t>(x));
    f(x) = e;
    g(x) = e;

    foo.set(-1);
    f.gpu_tile(x, tx, 8);

    Buffer<int16_t> out = f.realize({256});
    Buffer<int16_t> out2 = g.realize({256});
    out.copy_to_host();

    for (int i = 0; i < 256; i++) {
        if (out(i) != out2(i)) {
            printf("Incorrect result at %d: %d != %d\n", i, out(i), out2(i));
            printf("Failed\n");
            return -1;
        }
    }

    printf("Success!\n");
    return 0;
}
back to top