Revision a0aa20afbc521b6215e4c34deb8070b71a8aa386 authored by Andrew Adams on 14 March 2024, 01:00:04 UTC, committed by Andrew Adams on 14 March 2024, 01:00:04 UTC
1 parent 83616f2
lesson_20_cloning_funcs.cpp
// Halide tutorial lesson 20: Cloning Funcs
// This lesson demonstrates how to use Func::clone_in to create a clone of
// a Func.
// On linux, you can compile and run it like so:
// g++ lesson_20*.cpp -g -I <path/to/Halide.h> -L <path/to/libHalide.so> -lHalide -lpthread -ldl -o lesson_20 -std=c++17
// LD_LIBRARY_PATH=<path/to/libHalide.so> ./lesson_20
// On os x:
// g++ lesson_20*.cpp -g -I <path/to/Halide.h> -L <path/to/libHalide.so> -lHalide -o lesson_20 -std=c++17
// DYLD_LIBRARY_PATH=<path/to/libHalide.dylib> ./lesson_20
// If you have the entire Halide source tree, you can also build it by
// running:
// make tutorial_lesson_20_cloning_funcs
// in a shell at the top of the halide source tree.
// The only Halide header file you need is Halide.h. It includes all of Halide.
#include "Halide.h"
// We'll also include stdio for printf.
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
// First we'll declare some Vars to use below.
Var x("x"), y("y"), xo("xo"), yo("yo"), xi("xi"), yi("yi");
// This lesson will be about cloning a Func using the Func::clone_in
// directive.
{
// Consider a simple two-stage pipeline:
Func f("f_single"), g("g_single"), h("h_single");
f(x, y) = x + y;
g(x, y) = 2 * f(x, y) + 3;
h(x, y) = f(x, y) + g(x, y) + 10;
f.compute_root();
g.compute_root();
h.compute_root();
// This produces the following loop nests:
// for y:
// for x:
// f(x, y) = x + y
// for y:
// for x:
// g(x, y) = 2 * f(x, y) + 3
// for y:
// for x:
// h(x, y) = f(x, y) + g(x, y) + 10
// Using Func::clone_in, we can replace calls to 'f' inside 'g' with
// a clone of 'f' using the schedule alone:
Func f_clone_in_g = f.clone_in(g);
f_clone_in_g.compute_root();
// Equivalently, we could also chain the schedules like so:
// f.clone_in(g).compute_root();
// This produces the following loop nests:
// for y:
// for x:
// f(x, y) = x + y
// for y:
// for x:
// f_clone_in_g(x, y) = x + y
// for y:
// for x:
// g(x, y) = 2 * f_clone_in_g(x, y) + 3
// for y:
// for x:
// h(x, y) = f(x, y) + g(x, y) + 10
h.realize({5, 5});
// The schedule directive f.clone_in(g) replaces all calls to 'f'
// inside 'g' with a clone of 'f' and then returns that clone.
// Essentially, it rewrites the original pipeline above into the
// following:
{
Func f_clone_in_g("f_clone_in_g"), f("f"), g("g"), h("h");
f(x, y) = x + y;
f_clone_in_g(x, y) = x + y;
g(x, y) = 2 * f_clone_in_g(x, y) + 3;
h(x, y) = f(x, y) + g(x, y) + 10;
f.compute_root();
f_clone_in_g.compute_root();
g.compute_root();
h.compute_root();
}
}
{
// In the schedule above, only the calls to 'f' made by 'g' are
// replaced. Other calls made to 'f' would still call 'f' directly
// (i.e. 'h' still calls 'f' and not the clone). If we wish to
// replace all calls to 'f' made by both 'g' and 'h' with a single
// clone, we simply say f.clone_in({g, h}).
// Consider a three stage pipeline, with two consumers of f:
Func f("f_group"), g("g_group"), h("h_group"), out("out_group");
f(x, y) = x + y;
g(x, y) = 2 * f(x, y);
h(x, y) = f(x, y) + 10;
out(x, y) = f(x, y) + g(x, y) + h(x, y);
f.compute_root();
g.compute_root();
h.compute_root();
out.compute_root();
// We will replace all calls to 'f' inside both 'g' and 'h'
// with calls to a single clone:
f.clone_in({g, h}).compute_root();
// The equivalent loop nests are:
// for y:
// for x:
// f(x, y) = x + y
// for y:
// for x:
// f_clone(x, y) = x + y
// for y:
// for x:
// g(x, y) = 2 * f_clone(x, y)
// for y:
// for x:
// h(x, y) = f_clone(x, y) + 10
// for y:
// for x:
// out(x, y) = f(x, y) + g(x, y) + h(x, y)
out.realize({5, 5});
}
{
// One use case of Func::clone_in() is when two consumers of a producer
// consume regions of the producer that are very disjoint. Consider
// the following case for example:
Func f("f"), g("g"), h("h");
f(x) = x;
g(x) = 2 * f(0);
h(x) = f(99) + 10;
// Let's schedule 'f' to be computed at root.
f.compute_root();
// Since both 'g' and 'h' consume 'f', the region required of 'f'
// in the x-dimension is [0, 99]. The equivalent loop nests are:
// for x = 0 to 99
// f(x) = x
// for x:
// g(x) = 2 * f(0)
// for x:
// h(x) = f(99) + 10
// If 'f' is very expensive to compute, we might be better off with
// having distinct copies of 'f' for each consumer, 'g' and 'h', to
// avoid unnecessary computations. To create separate copies of 'f'
// for each consumer, we can do the following:
f.clone_in(g).compute_root();
// The equivalent loop nests are:
// f(0) = x
// f_clone(99) = x
// for x:
// g(x) = 2 * f_clone(0)
// for x:
// h(x) = f(99) + 10
}
printf("Success!\n");
return 0;
}
Computing file changes ...