https://github.com/halide/Halide
Revision 62d47f1ee4dd8d1e6b270185764f5c2c450715d1 authored by Steven Johnson on 05 February 2020, 00:41:39 UTC, committed by Steven Johnson on 05 February 2020, 00:41:39 UTC
Rework clock.h to use halide_benchmark for timing. Rework lesson_16 to use halide_benchmark directly.
1 parent 0849632
Tip revision: 62d47f1ee4dd8d1e6b270185764f5c2c450715d1 authored by Steven Johnson on 05 February 2020, 00:41:39 UTC
Tutorials should use halide_benachmark instead of clock.h
Tutorials should use halide_benachmark instead of clock.h
Tip revision: 62d47f1
lesson_16_rgb_run.cpp
// Halide tutorial lesson 16: RGB images and memory layouts part 2
// Before reading this file, see lesson_16_rgb_generate.cpp
// This is the code that actually uses the Halide pipeline we've
// compiled. It does not depend on libHalide, so we won't be including
// Halide.h.
//
// Instead, it depends on the header files that lesson_16_rgb_generator produced.
#include "brighten_either.h"
#include "brighten_interleaved.h"
#include "brighten_planar.h"
#include "brighten_specialized.h"
// We'll use the Halide::Runtime::Buffer class for passing data into and out of
// the pipeline.
#include "HalideBuffer.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "halide_benchmark.h"
int main(int argc, char **argv) {
// Let's make some images stored with interleaved and planar
// memory. Halide::Runtime::Buffer is planar by default.
Halide::Runtime::Buffer<uint8_t> planar_input(1024, 768, 3);
Halide::Runtime::Buffer<uint8_t> planar_output(1024, 768, 3);
Halide::Runtime::Buffer<uint8_t> interleaved_input =
Halide::Runtime::Buffer<uint8_t>::make_interleaved(1024, 768, 3);
Halide::Runtime::Buffer<uint8_t> interleaved_output =
Halide::Runtime::Buffer<uint8_t>::make_interleaved(1024, 768, 3);
// Let's check the strides are what we expect, given the
// constraints we set up in the generator.
assert(planar_input.dim(0).stride() == 1);
assert(planar_output.dim(0).stride() == 1);
assert(interleaved_input.dim(0).stride() == 3);
assert(interleaved_output.dim(0).stride() == 3);
assert(interleaved_input.dim(2).stride() == 1);
assert(interleaved_output.dim(2).stride() == 1);
// We'll now call the various functions we compiled and check the
// performance of each.
constexpr int samples = 1;
constexpr int iterations = 1000;
// Run the planar version of the code on the planar images and the
// interleaved version of the code on the interleaved
// images. We'll use Halide's benchmarking utility, which takes a function
// to run, the number of batches to run (1 in this case), and the number
// of iterations per batch (1000 in this case). It returns the best
// average-iteration time, in seconds. (See halide_benchmark.h for more
// information.)
double planar_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_planar(planar_input, 1, planar_output);
});
printf("brighten_planar: %f msec\n", planar_time * 1000.f);
double interleaved_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_interleaved(interleaved_input, 1, interleaved_output);
});
printf("brighten_interleaved: %f msec\n", interleaved_time * 1000.f);
// Planar is generally faster than interleaved for most imaging
// operations.
assert(planar_time < interleaved_time);
// Either of these next two commented-out calls would throw an
// error, because the stride is not what we promised it would be
// in the generator.
// brighten_planar(interleaved_input, 1, interleaved_output);
// Error: Constraint violated: brighter.stride.0 (3) == 1 (1)
// brighten_interleaved(planar_input, 1, planar_output);
// Error: Constraint violated: brighter.stride.0 (1) == 3 (3)
// Run the flexible version of the code and check performance. It
// should work, but it'll be slower than the versions above.
double either_planar_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_either(planar_input, 1, planar_output);
});
printf("brighten_either on planar images: %f msec\n", either_planar_time * 1000.f);
assert(planar_time < either_planar_time);
double either_interleaved_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_either(interleaved_input, 1, interleaved_output);
});
printf("brighten_either on interleaved images: %f msec\n", either_interleaved_time * 1000.f);
assert(interleaved_time < either_interleaved_time);
// Run the specialized version of the code on each layout. It
// should match the performance of the code compiled specifically
// for each case above by branching internally to equivalent
// code.
double specialized_planar_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_specialized(planar_input, 1, planar_output);
});
printf("brighten_specialized on planar images: %f msec\n", specialized_planar_time * 1000.f);
// The cost of the if statement should be negligible, but we'll
// allow a tolerance of 50% for this test to account for
// measurement noise.
assert(specialized_planar_time < 1.5 * planar_time);
double specialized_interleaved_time = Halide::Tools::benchmark(samples, iterations, [&]() {
brighten_specialized(interleaved_input, 1, interleaved_output);
});
printf("brighten_specialized on interleaved images: %f msec\n", specialized_interleaved_time * 1000.f);
assert(specialized_interleaved_time < 2.0 * interleaved_time);
return 0;
}
Computing file changes ...