https://github.com/halide/Halide
Raw File
Tip revision: e4222b5e49a53a300c3f5db1299f2cdf44c040d9 authored by Shoaib Kamil on 19 October 2018, 22:17:07 UTC
Also handle neg inf
Tip revision: e4222b5
process_p010_linear_ro_basic.cpp
#include <stdio.h>
#include <memory.h>
#include <assert.h>
#include <stdlib.h>
#include "halide_benchmark.h"
#include "pipeline_p010_linear_ro_basic.h"
#include "pipeline_p010_linear_ro_async.h"
#include "pipeline_p010_linear_ro_fold.h"
#include "pipeline_p010_linear_ro_split.h"
#include "pipeline_p010_linear_ro_split_fold.h"
#include "HalideRuntimeHexagonDma.h"
#include "HalideBuffer.h"

int main(int argc, char **argv) {
    int ret = 0;

    if (argc < 4) {
        printf("Usage: %s width height func {basic, fold, async, split, split_fold} \n", argv[0]);
        return ret;
    }

    const int width = atoi(argv[1]);
    const int height = atoi(argv[2]);
    const char *schedule = argv[3];

    // Fill the input buffer with random test data. This is just a plain old memory buffer
    const int buf_size = (width * height * 3) / 2;
    uint16_t *data_in = (uint16_t *)malloc(buf_size * sizeof(uint16_t));
    // Creating the Input Data so that we can catch if there are any Errors in DMA
    for (int i = 0; i < buf_size;  i++) {
        data_in[i] =  ((uint16_t)rand()) >> 1;
    }

    // Setup Halide input buffer with the test buffer
    Halide::Runtime::Buffer<uint16_t> input_validation(data_in, width, height, 2);
    Halide::Runtime::Buffer<uint16_t> input(nullptr, width, (3 * height) / 2);
    Halide::Runtime::Buffer<uint16_t> input_y = input.cropped(1, 0, height);            // Luma plane only
    Halide::Runtime::Buffer<uint16_t> input_uv = input.cropped(1, height, height / 2);  // Chroma plane only, with reduced height

    // describe the UV interleaving for 4:2:0 format
    input_uv.embed(2, 0);
    input_uv.raw_buffer()->dim[2].extent = 2;
    input_uv.raw_buffer()->dim[2].stride = 1;
    input_uv.raw_buffer()->dim[0].stride = 2;
    input_uv.raw_buffer()->dim[0].extent = width / 2;

    // DMA_step 1: Assign buffer to DMA interface
    input_y.device_wrap_native(halide_hexagon_dma_device_interface(),
                               reinterpret_cast<uint64_t>(data_in));
    input_uv.device_wrap_native(halide_hexagon_dma_device_interface(),
                                reinterpret_cast<uint64_t>(data_in));
    input_y.set_device_dirty();
    input_uv.set_device_dirty();

    // DMA_step 2: Allocate a DMA engine
    void *dma_engine = nullptr;
    halide_hexagon_dma_allocate_engine(nullptr, &dma_engine);

    // DMA_step 3: Associate buffer to DMA engine, and prepare for copying to host (DMA read)
    halide_hexagon_dma_prepare_for_copy_to_host(nullptr, input_y, dma_engine, false, halide_hexagon_fmt_P010_Y);
    halide_hexagon_dma_prepare_for_copy_to_host(nullptr, input_uv, dma_engine, false, halide_hexagon_fmt_P010_UV);
    
    // Setup Halide output buffer
    Halide::Runtime::Buffer<uint16_t> output(width, (3 * height) / 2);
    Halide::Runtime::Buffer<uint16_t> output_y = output.cropped(1, 0, height);              // Luma plane only
    Halide::Runtime::Buffer<uint16_t> output_uv = output.cropped(1, height, (height / 2));  // Chroma plane only, with reduced height

    // describe the UV interleaving for 4:2:0 format
    output_uv.embed(2, 0);
    output_uv.raw_buffer()->dim[2].extent = 2;
    output_uv.raw_buffer()->dim[2].stride = 1;
    output_uv.raw_buffer()->dim[0].stride = 2;
    output_uv.raw_buffer()->dim[0].extent = width / 2;

    if (!strcmp(schedule,"basic")) {
        printf("Basic pipeline\n");
        ret = pipeline_p010_linear_ro_basic(input_y, input_uv, output_y, output_uv);
    } else if (!strcmp(schedule,"fold")) {
        printf("Fold pipeline\n");
        ret = pipeline_p010_linear_ro_fold(input_y, input_uv, output_y, output_uv);
    } else if (!strcmp(schedule,"async")) {
        printf("Async pipeline\n");
        ret = pipeline_p010_linear_ro_async(input_y, input_uv, output_y, output_uv);
    } else if (!strcmp(schedule,"split")) {
        printf("Split pipeline\n");
        ret = pipeline_p010_linear_ro_split(input_y, input_uv, output_y, output_uv);
    } else if (!strcmp(schedule,"split_fold")) {
        printf("Split Fold pipeline\n");
        ret = pipeline_p010_linear_ro_split_fold(input_y, input_uv, output_y, output_uv);
    } else {
        printf("Incorrect input Correct options: basic, fold, async, split, split_fold\n");
        ret = -1;
    }

    if (ret != 0) {
        printf("pipeline failed! %d\n", ret);
    } else {
        // verify result by comparing to expected values
        int error_count = 0;
        for (int y = 0; y < (3 * height) / 2; y++) {
            for (int x = 0; x < width; x++) {
                uint16_t correct = data_in[x + y * width] * 2;
                if (correct != output(x, y)) {
                    printf("Mismatch at x=%d y=%d : %d != %d\n", x, y, correct, output(x, y));
                    if (++error_count > 20) abort();
                }
            }
        }
        printf("Success!\n");
    }

    // DMA_step 4: Buffer is processed, disassociate buffer from DMA engine
    //             Optional goto DMA_step 0 for processing more buffers
    halide_hexagon_dma_unprepare(nullptr, input_y);
    halide_hexagon_dma_unprepare(nullptr, input_uv);

    // DMA_step 5: Processing is completed and ready to exit, deallocate the DMA engine
    halide_hexagon_dma_deallocate_engine(nullptr, dma_engine);

    free(data_in);

    return ret;
}
back to top