Revision 664dc4993b8f7ef320378f6318d7db1e09b3d44d authored by Andrew Adams on 05 September 2020, 21:51:19 UTC, committed by GitHub on 05 September 2020, 21:51:19 UTC
Update comments on depthwise convolution schedule.
2 parent s 6dc230f + 1ebd926
Raw File
fast_sine_cosine.cpp
#include "Halide.h"
#include "halide_benchmark.h"

#ifndef M_PI
#define M_PI 3.14159265358979310000
#endif

using namespace Halide;
using namespace Halide::Tools;

int main(int argc, char **argv) {
    Target target = get_jit_target_from_environment();
    if (target.arch == Target::WebAssembly) {
        printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
        return 0;
    }

    Func sin_f, cos_f, sin_ref, cos_ref;
    Var x;
    Expr t = x / 1000.f;
    const float two_pi = 2.0f * static_cast<float>(M_PI);
    sin_f(x) = fast_sin(-two_pi * t + (1 - t) * two_pi);
    cos_f(x) = fast_cos(-two_pi * t + (1 - t) * two_pi);
    sin_ref(x) = sin(-two_pi * t + (1 - t) * two_pi);
    cos_ref(x) = cos(-two_pi * t + (1 - t) * two_pi);
    sin_f.vectorize(x, 8);
    cos_f.vectorize(x, 8);
    sin_ref.vectorize(x, 8);
    cos_ref.vectorize(x, 8);

    double t1 = 1e6 * benchmark([&]() { sin_f.realize(1000); });
    double t2 = 1e6 * benchmark([&]() { cos_f.realize(1000); });
    double t3 = 1e6 * benchmark([&]() { sin_ref.realize(1000); });
    double t4 = 1e6 * benchmark([&]() { cos_ref.realize(1000); });

    printf("sin: %f ns per pixel\n"
           "fast_sine: %f ns per pixel\n"
           "cosine: %f ns per pixel\n"
           "fast_cosine: %f ns per pixel\n",
           t1, t3, t2, t4);

    if (t3 < 1.5f * t1) {
        printf("fast_sin is not 1.5x faster than sin\n");
        return -1;
    }

    if (t4 < 1.5f * t2) {
        printf("fast_cos is not 1.5x faster than cos\n");
        return -1;
    }

    printf("Success!\n");
    return 0;
}
back to top