https://github.com/halide/Halide
Raw File
Tip revision: a21837ff1e085f22ef380c65847202d7917950e7 authored by Steven Johnson on 06 February 2021, 00:10:51 UTC
Merge branch 'master' into srj/msan-dtf
Tip revision: a21837f
hexagon_host.cpp
#include "HalideRuntimeHexagonHost.h"
#include "device_buffer_utils.h"
#include "device_interface.h"
#include "printer.h"
#include "runtime_internal.h"
#include "scoped_mutex_lock.h"

namespace Halide {
namespace Runtime {
namespace Internal {
namespace Hexagon {

struct ion_device_handle {
    void *buffer;
    size_t size;
};

WEAK halide_mutex thread_lock = {{0}};

extern WEAK halide_device_interface_t hexagon_device_interface;

// Define dynamic version of hexagon_remote/halide_hexagon_remote.h
typedef struct _remote_buffer__seq_octet _remote_buffer__seq_octet;
typedef _remote_buffer__seq_octet remote_buffer;
struct _remote_buffer__seq_octet {
    unsigned char *data;
    int dataLen;
};

typedef int (*remote_load_library_fn)(const char *, int, const unsigned char *, int, halide_hexagon_handle_t *);
typedef int (*remote_get_symbol_fn)(halide_hexagon_handle_t, const char *, int, halide_hexagon_handle_t *);
typedef int (*remote_run_fn)(halide_hexagon_handle_t, int,
                             const remote_buffer *, int, const remote_buffer *, int,
                             remote_buffer *, int);
typedef int (*remote_release_library_fn)(halide_hexagon_handle_t);
typedef int (*remote_poll_log_fn)(char *, int, int *);
typedef void (*remote_poll_profiler_state_fn)(int *, int *);
typedef int (*remote_profiler_set_current_func_fn)(int);
typedef int (*remote_power_fn)();
typedef int (*remote_power_mode_fn)(int);
typedef int (*remote_power_perf_fn)(int, unsigned int, unsigned int, int, unsigned int, unsigned int, int, int);
typedef int (*remote_thread_priority_fn)(int);

typedef void (*host_malloc_init_fn)();
typedef void *(*host_malloc_fn)(size_t);
typedef void (*host_free_fn)(void *);

WEAK remote_load_library_fn remote_load_library = nullptr;
WEAK remote_get_symbol_fn remote_get_symbol = nullptr;
WEAK remote_run_fn remote_run = nullptr;
WEAK remote_release_library_fn remote_release_library = nullptr;
WEAK remote_poll_log_fn remote_poll_log = nullptr;
WEAK remote_poll_profiler_state_fn remote_poll_profiler_state = nullptr;
WEAK remote_profiler_set_current_func_fn remote_profiler_set_current_func = nullptr;
WEAK remote_power_fn remote_power_hvx_on = nullptr;
WEAK remote_power_fn remote_power_hvx_off = nullptr;
WEAK remote_power_perf_fn remote_set_performance = nullptr;
WEAK remote_power_mode_fn remote_set_performance_mode = nullptr;
WEAK remote_thread_priority_fn remote_set_thread_priority = nullptr;

WEAK host_malloc_init_fn host_malloc_init = nullptr;
WEAK host_malloc_init_fn host_malloc_deinit = nullptr;
WEAK host_malloc_fn host_malloc = nullptr;
WEAK host_free_fn host_free = nullptr;

// This checks if there are any log messages available on the remote
// side. It should be called after every remote call.
WEAK void poll_log(void *user_context) {
    if (!remote_poll_log) {
        return;
    }

    while (true) {
        char message[1024];
        int read = 0;
        int result = remote_poll_log(&message[0], sizeof(message), &read);
        if (result != 0) {
            // Don't make this an error, otherwise we might obscure
            // more information about errors that would come later.
            print(user_context) << "Hexagon: remote_poll_log failed " << result << "\n";
            return;
        }

        if (read > 0) {
            halide_print(user_context, message);
        } else {
            break;
        }
    }
}

WEAK void get_remote_profiler_state(int *func, int *threads) {
    if (!remote_poll_profiler_state) {
        // This should only have been called if there's a remote profiler func installed.
        error(nullptr) << "Hexagon: remote_poll_profiler_func not found\n";
    }

    remote_poll_profiler_state(func, threads);
}

template<typename T>
ALWAYS_INLINE void get_symbol(void *user_context, void *host_lib, const char *name, T &sym, bool required = true) {
    debug(user_context) << "    halide_get_library_symbol('" << name << "') -> \n";
    sym = (T)halide_get_library_symbol(host_lib, name);
    debug(user_context) << "        " << (void *)sym << "\n";
    if (!sym && required) {
        error(user_context) << "Required Hexagon runtime symbol '" << name << "' not found.\n";
    }
}

// Load the hexagon remote runtime.
WEAK int init_hexagon_runtime(void *user_context) {
    if (remote_load_library && remote_run && remote_release_library) {
        // Already loaded.
        return 0;
    }

    // The "support library" for Hexagon is essentially a way to delegate Hexagon
    // code execution based on the runtime; devices with Hexagon hardware will
    // simply provide conduits for execution on that hardware, while test/desktop/etc
    // environments can instead connect a simulator via the API.
    // Load the .so for Linux or Android, and if that fails try the .dll
    // as we may be running the windows hosted simulator.
    void *host_lib = halide_load_library("libhalide_hexagon_host.so");
    if (!host_lib) {
        host_lib = halide_load_library("libhalide_hexagon_host.dll");
    }

    debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n";

    // Get the symbols we need from the library.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_load_library", remote_load_library);
    if (!remote_load_library) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_remote_get_symbol_v4", remote_get_symbol);
    if (!remote_get_symbol) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_remote_run", remote_run);
    if (!remote_run) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_remote_release_library", remote_release_library);
    if (!remote_release_library) {
        return -1;
    }

    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_init", host_malloc_init);
    if (!host_malloc_init) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_deinit", host_malloc_deinit);
    if (!host_malloc_deinit) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc", host_malloc);
    if (!host_malloc) {
        return -1;
    }
    get_symbol(user_context, host_lib, "halide_hexagon_host_free", host_free);
    if (!host_free) {
        return -1;
    }

    // These symbols are optional.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_log", remote_poll_log, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_profiler_state", remote_poll_profiler_state, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_profiler_set_current_func", remote_profiler_set_current_func, /* required */ false);

    // If these are unavailable, then the runtime always powers HVX on and so these are not necessary.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_on", remote_power_hvx_on, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_off", remote_power_hvx_off, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance", remote_set_performance, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance_mode", remote_set_performance_mode, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_set_thread_priority", remote_set_thread_priority, /* required */ false);

    host_malloc_init();

    return 0;
}

// Structure to hold the state of a module attached to the context.
// Also used as a linked-list to keep track of all the different
// modules that are attached to a context in order to release them all
// when then context is released.
struct module_state {
    halide_hexagon_handle_t module;
    module_state *next;
};
WEAK module_state *state_list = nullptr;
WEAK halide_hexagon_handle_t shared_runtime = 0;

#ifdef DEBUG_RUNTIME

// In debug builds, we write shared objects to the current directory (without
// failing on errors).
WEAK void write_shared_object(void *user_context, const char *path,
                              const uint8_t *code, uint64_t code_size) {
    void *f = fopen(path, "wb");
    if (!f) {
        debug(user_context) << "    failed to write shared object to '" << path << "'\n";
        return;
    }
    size_t written = fwrite(code, 1, code_size, f);
    if (written != code_size) {
        debug(user_context) << "    bad write of shared object to '" << path << "'\n";
    }
    fclose(f);
}

#endif

}  // namespace Hexagon
}  // namespace Internal
}  // namespace Runtime
}  // namespace Halide

using namespace Halide::Runtime::Internal;
using namespace Halide::Runtime::Internal::Hexagon;

extern "C" {

WEAK bool halide_is_hexagon_available(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    return result == 0;
}

WEAK int halide_hexagon_initialize_kernels(void *user_context, void **state_ptr,
                                           const uint8_t *code, uint64_t code_size,
                                           const uint8_t *runtime, uint64_t runtime_size) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }
    debug(user_context) << "Hexagon: halide_hexagon_initialize_kernels (user_context: " << user_context
                        << ", state_ptr: " << state_ptr
                        << ", *state_ptr: " << *state_ptr
                        << ", code: " << code
                        << ", code_size: " << (int)code_size << ")\n"
                        << ", code: " << runtime
                        << ", code_size: " << (int)runtime_size << ")\n";
    halide_assert(user_context, state_ptr != nullptr);

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    // Create the state object if necessary. This only happens once,
    // regardless of how many times halide_hexagon_initialize_kernels
    // or halide_hexagon_device_release is called.
    // halide_hexagon_device_release traverses this list and releases
    // the module objects, but it does not modify the list nodes
    // created/inserted here.
    ScopedMutexLock lock(&thread_lock);

    // Initialize the runtime, if necessary.
    if (!shared_runtime) {
        debug(user_context) << "    Initializing shared runtime\n";
        const char soname[] = "libhalide_shared_runtime.so";
#ifdef DEBUG_RUNTIME
        debug(user_context) << "    Writing shared object '" << soname << "'\n";
        write_shared_object(user_context, soname, runtime, runtime_size);
#endif
        debug(user_context) << "    halide_remote_load_library(" << soname << ") -> ";
        result = remote_load_library(soname, sizeof(soname), runtime, runtime_size, &shared_runtime);
        poll_log(user_context);
        if (result == 0) {
            debug(user_context) << "        " << (void *)(size_t)shared_runtime << "\n";
            halide_assert(user_context, shared_runtime != 0);
        } else {
            debug(user_context) << "        " << result << "\n";
            error(user_context) << "Initialization of Hexagon kernels failed\n";
            shared_runtime = 0;
        }
    } else {
        debug(user_context) << "    re-using existing shared runtime " << (void *)(size_t)shared_runtime << "\n";
    }

    if (result != 0) {
        return -1;
    }

    module_state **state = (module_state **)state_ptr;
    if (!(*state)) {
        debug(user_context) << "    allocating module state -> \n";
        *state = (module_state *)malloc(sizeof(module_state));
        debug(user_context) << "        " << *state << "\n";
        (*state)->module = 0;
        (*state)->next = state_list;
        state_list = *state;
    }

    // Create the module itself if necessary.
    if (!(*state)->module) {
        static int unique_id = 0;
        stringstream soname(user_context);
        soname << "libhalide_kernels" << unique_id++ << ".so";
#ifdef DEBUG_RUNTIME
        debug(user_context) << "    Writing shared object '" << soname.str() << "'\n";
        write_shared_object(user_context, soname.str(), code, code_size);
#endif
        debug(user_context) << "    halide_remote_load_library(" << soname.str() << ") -> ";
        halide_hexagon_handle_t module = 0;
        result = remote_load_library(soname.str(), soname.size() + 1, code, code_size, &module);
        poll_log(user_context);
        if (result == 0) {
            debug(user_context) << "        " << (void *)(size_t)module << "\n";
            (*state)->module = module;
        } else {
            debug(user_context) << "        " << result << "\n";
            error(user_context) << "Initialization of Hexagon kernels failed\n";
        }
    } else {
        debug(user_context) << "    re-using existing module " << (void *)(size_t)(*state)->module << "\n";
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return result != 0 ? -1 : 0;
}

WEAK void halide_hexagon_finalize_kernels(void *user_context, void *state_ptr) {
}

namespace {

// Prepare an array of remote_buffer arguments, mapping buffers if
// necessary. Only arguments with flags&flag_mask == flag_value are
// added to the mapped_args array. Returns the number of arguments
// mapped, or a negative number on error.
WEAK int map_arguments(void *user_context, int arg_count,
                       uint64_t arg_sizes[], void *args[], int arg_flags[], int flag_mask, int flag_value,
                       remote_buffer *mapped_args) {
    int mapped_count = 0;
    for (int i = 0; i < arg_count; i++) {
        if ((arg_flags[i] & flag_mask) != flag_value) {
            continue;
        }
        remote_buffer &mapped_arg = mapped_args[mapped_count++];
        if (arg_flags[i] != 0) {
            // This is the way that HexagonOffload packages arguments for us.
            struct hexagon_device_pointer {
                uint64_t dev;
                uint8_t *host;
            };
            const hexagon_device_pointer *b = (hexagon_device_pointer *)args[i];
            uint64_t device = b->dev;
            uint8_t *host = b->host;
            if (device) {
                // This argument has a device handle.
                ion_device_handle *ion_handle = reinterpret<ion_device_handle *>(device);
                debug(user_context) << i << ", " << device << "\n";
                mapped_arg.data = reinterpret_cast<uint8_t *>(ion_handle->buffer);
                mapped_arg.dataLen = ion_handle->size;
            } else {
                // This is just a host buffer, and the size is passed in as the arg size.
                mapped_arg.data = host;
                mapped_arg.dataLen = arg_sizes[i];
            }
        } else {
            // This is a scalar, just put the pointer/size in the result.
            mapped_arg.data = (uint8_t *)args[i];
            mapped_arg.dataLen = arg_sizes[i];
        }
    }
    return mapped_count;
}

}  // namespace

WEAK int halide_hexagon_run(void *user_context,
                            void *state_ptr,
                            const char *name,
                            halide_hexagon_handle_t *function,
                            uint64_t arg_sizes[],
                            void *args[],
                            int arg_flags[]) {
    halide_assert(user_context, state_ptr != nullptr);
    halide_assert(user_context, function != nullptr);
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    halide_hexagon_handle_t module = state_ptr ? ((module_state *)state_ptr)->module : 0;
    debug(user_context) << "Hexagon: halide_hexagon_run ("
                        << "user_context: " << user_context << ", "
                        << "state_ptr: " << state_ptr << " (" << module << "), "
                        << "name: " << name << ", "
                        << "function: " << function << " (" << *function << "))\n";

    // If we haven't gotten the symbol for this function, do so now.
    if (*function == 0) {
        debug(user_context) << "    halide_hexagon_remote_get_symbol " << name << " -> ";
        halide_hexagon_handle_t sym = 0;
        int result = remote_get_symbol(module, name, strlen(name) + 1, &sym);
        *function = result == 0 ? sym : 0;
        poll_log(user_context);
        debug(user_context) << "        " << *function << "\n";
        if (*function == 0) {
            error(user_context) << "Failed to find function " << name << " in module.\n";
            return -1;
        }
    }

    // Allocate some remote_buffer objects on the stack.
    int arg_count = 0;
    while (arg_sizes[arg_count] > 0) {
        arg_count++;
    }
    remote_buffer *mapped_buffers =
        (remote_buffer *)__builtin_alloca(arg_count * sizeof(remote_buffer));

    // Map the arguments.
    // First grab the input buffers (bit 0 of flags is set).
    remote_buffer *input_buffers = mapped_buffers;
    int input_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x1,
                                           input_buffers);
    if (input_buffer_count < 0) {
        return input_buffer_count;
    }

    // Then the output buffers (bit 1 of flags is set).
    remote_buffer *output_buffers = input_buffers + input_buffer_count;
    int output_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x2, 0x2,
                                            output_buffers);
    if (output_buffer_count < 0) {
        return output_buffer_count;
    }

    // And the input scalars (neither bits 0 or 1 of flags is set).
    remote_buffer *input_scalars = output_buffers + output_buffer_count;
    int input_scalar_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x0,
                                           input_scalars);
    if (input_scalar_count < 0) {
        return input_scalar_count;
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    // If remote profiling is supported, tell the profiler to call
    // get_remote_profiler_func to retrieve the current
    // func. Otherwise leave it alone - the cost of remote running
    // will be billed to the calling Func.
    if (remote_poll_profiler_state) {
        halide_profiler_get_state()->get_remote_profiler_state = get_remote_profiler_state;
        if (remote_profiler_set_current_func) {
            remote_profiler_set_current_func(halide_profiler_get_state()->current_func);
        }
    }

    // Call the pipeline on the device side.
    debug(user_context) << "    halide_hexagon_remote_run -> ";
    result = remote_run(module, *function,
                        input_buffers, input_buffer_count,
                        output_buffers, output_buffer_count,
                        input_scalars, input_scalar_count);
    poll_log(user_context);
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "Hexagon pipeline failed.\n";
        return result;
    }

    halide_profiler_get_state()->get_remote_profiler_state = nullptr;

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return result != 0 ? -1 : 0;
}

WEAK int halide_hexagon_device_release(void *user_context) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_release (user_context: " << user_context << ")\n";

    ScopedMutexLock lock(&thread_lock);

    // Release all of the remote side modules.
    module_state *state = state_list;
    while (state) {
        if (state->module) {
            debug(user_context) << "    halide_remote_release_library " << state
                                << " (" << state->module << ") -> ";
            int result = remote_release_library(state->module);
            poll_log(user_context);
            debug(user_context) << "        " << result << "\n";
            state->module = 0;
        }
        state = state->next;
    }
    state_list = nullptr;

    if (shared_runtime) {
        debug(user_context) << "    releasing shared runtime\n";
        debug(user_context) << "    halide_remote_release_library " << shared_runtime << " -> ";
        int result = remote_release_library(shared_runtime);
        poll_log(user_context);
        debug(user_context) << "        " << result << "\n";
        shared_runtime = 0;
    }

    return 0;
}

// When allocations for Hexagon are at least as large as this
// threshold, use an ION allocation (to get zero copy). If the
// allocation is smaller, use a standard allocation instead.  This is
// done because allocating an entire page for a small allocation is
// wasteful, and the copy is not significant.  Additionally, the
// FastRPC interface can probably do a better job with many small
// arguments than simply mapping the pages.
static const int min_ion_allocation_size = 4096;

WEAK int halide_hexagon_device_malloc(void *user_context, halide_buffer_t *buf) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context)
        << "Hexagon: halide_hexagon_device_malloc (user_context: " << user_context
        << ", buf: " << buf << ")\n";

    if (buf->device) {
        // This buffer already has a device allocation
        return 0;
    }

    size_t size = buf->size_in_bytes();
    halide_assert(user_context, size != 0);

    // Hexagon code generation generates clamped ramp loads in a way
    // that requires up to an extra vector beyond the end of the
    // buffer to be legal to access.
    size += 128;

    for (int i = 0; i < buf->dimensions; i++) {
        halide_assert(user_context, buf->dim[i].stride >= 0);
    }

    debug(user_context) << "    allocating buffer of " << (uint64_t)size << " bytes\n";

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    void *ion;
    if (size >= min_ion_allocation_size) {
        debug(user_context) << "    host_malloc len=" << (uint64_t)size << " -> ";
        ion = host_malloc(size);
        debug(user_context) << "        " << ion << "\n";
        if (!ion) {
            error(user_context) << "host_malloc failed\n";
            return -1;
        }
    } else {
        debug(user_context) << "    halide_malloc size=" << (uint64_t)size << " -> ";
        ion = halide_malloc(user_context, size);
        debug(user_context) << "        " << ion << "\n";
        if (!ion) {
            error(user_context) << "halide_malloc failed\n";
            return -1;
        }
    }

    int err = halide_hexagon_wrap_device_handle(user_context, buf, ion, size);
    if (err != 0) {
        if (size >= min_ion_allocation_size) {
            host_free(ion);
        } else {
            halide_free(user_context, ion);
        }
        return err;
    }

    if (!buf->host) {
        // If the host pointer has also not been allocated yet, set it to
        // the ion buffer. This buffer will be zero copy.
        buf->host = (uint8_t *)ion;
        debug(user_context) << "    host <- " << buf->host << "\n";
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return 0;
}

WEAK int halide_hexagon_device_free(void *user_context, halide_buffer_t *buf) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_free (user_context: " << user_context
        << ", buf: " << buf << ")\n";

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    uint64_t size = halide_hexagon_get_device_size(user_context, buf);
    void *ion = halide_hexagon_get_device_handle(user_context, buf);
    halide_hexagon_detach_device_handle(user_context, buf);
    if (size >= min_ion_allocation_size) {
        debug(user_context) << "    host_free ion=" << ion << "\n";
        host_free(ion);
    } else {
        debug(user_context) << "    halide_free ion=" << ion << "\n";
        halide_free(user_context, ion);
    }

    if (buf->host == ion) {
        // If we also set the host pointer, reset it.
        buf->host = nullptr;
        debug(user_context) << "    host <- 0x0\n";
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    // This is to match what the default implementation of halide_device_free does.
    buf->set_device_dirty(false);
    return 0;
}

WEAK int halide_hexagon_copy_to_device(void *user_context, halide_buffer_t *buf) {
    int err = halide_hexagon_device_malloc(user_context, buf);
    if (err) {
        return err;
    }

    debug(user_context)
        << "Hexagon: halide_hexagon_copy_to_device (user_context: " << user_context
        << ", buf: " << buf << ")\n";

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    halide_assert(user_context, buf->host && buf->device);
    device_copy c = make_host_to_device_copy(buf);

    // Get the descriptor associated with the ion buffer.
    c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
    copy_memory(c, user_context);

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return 0;
}

WEAK int halide_hexagon_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context)
        << "Hexagon: halide_hexagon_copy_to_host (user_context: " << user_context
        << ", buf: " << buf << ")\n";

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    halide_assert(user_context, buf->host && buf->device);
    device_copy c = make_device_to_host_copy(buf);

    // Get the descriptor associated with the ion buffer.
    c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
    copy_memory(c, user_context);

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return 0;
}

WEAK int halide_hexagon_device_sync(void *user_context, struct halide_buffer_t *) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_sync (user_context: " << user_context << ")\n";
    // Nothing to do.
    return 0;
}

WEAK int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
                                           void *ion_buf, uint64_t size) {
    halide_assert(user_context, buf->device == 0);
    if (buf->device != 0) {
        return -2;
    }

    ion_device_handle *handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
    if (!handle) {
        return -1;
    }
    handle->buffer = ion_buf;
    handle->size = size;
    buf->device_interface = &hexagon_device_interface;
    buf->device_interface->impl->use_module();
    buf->device = reinterpret<uint64_t>(handle);
    return 0;
}

WEAK int halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return 0;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    free(handle);

    buf->device_interface->impl->release_module();
    buf->device = 0;
    buf->device_interface = nullptr;
    return 0;
}

WEAK void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return nullptr;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    return handle->buffer;
}

WEAK uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return 0;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    return handle->size;
}

WEAK int halide_hexagon_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context) << "halide_hexagon_device_and_host_malloc called.\n";
    int result = halide_hexagon_device_malloc(user_context, buf);
    if (result == 0) {
        buf->host = (uint8_t *)halide_hexagon_get_device_handle(user_context, buf);
    }
    return result;
}

WEAK int halide_hexagon_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context) << "halide_hexagon_device_and_host_free called.\n";
    halide_hexagon_device_free(user_context, buf);
    buf->host = nullptr;
    return 0;
}

WEAK int halide_hexagon_buffer_copy(void *user_context, struct halide_buffer_t *src,
                                    const struct halide_device_interface_t *dst_device_interface,
                                    struct halide_buffer_t *dst) {
    // We only handle copies to hexagon buffers or to host
    halide_assert(user_context, dst_device_interface == nullptr ||
                                    dst_device_interface == &hexagon_device_interface);

    if ((src->device_dirty() || src->host == nullptr) &&
        src->device_interface != &hexagon_device_interface) {
        halide_assert(user_context, dst_device_interface == &hexagon_device_interface);
        // This is handled at the higher level.
        return halide_error_code_incompatible_device_interface;
    }

    bool from_host = (src->device_interface != &hexagon_device_interface) ||
                     (src->device == 0) ||
                     (src->host_dirty() && src->host != nullptr);
    bool to_host = !dst_device_interface;

    halide_assert(user_context, from_host || src->device);
    halide_assert(user_context, to_host || dst->device);

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    device_copy c = make_buffer_copy(src, from_host, dst, to_host);

    int err = 0;

    // Get the descriptor associated with the ion buffer.
    if (!from_host) {
        c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, src));
    }
    if (!to_host) {
        c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, dst));
    }
    copy_memory(c, user_context);

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return err;
}

namespace {

WEAK int hexagon_device_crop_from_offset(const struct halide_buffer_t *src, int64_t offset, struct halide_buffer_t *dst) {
    ion_device_handle *src_handle = (ion_device_handle *)src->device;
    ion_device_handle *dst_handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
    if (!dst_handle) {
        return halide_error_code_out_of_memory;
    }

    dst_handle->buffer = (uint8_t *)src_handle->buffer + offset;
    dst_handle->size = src_handle->size - offset;
    dst->device = reinterpret<uint64_t>(dst_handle);
    dst->device_interface = src->device_interface;
    dst->set_device_dirty(src->device_dirty());
    return 0;
}

}  // namespace

WEAK int halide_hexagon_device_crop(void *user_context, const struct halide_buffer_t *src,
                                    struct halide_buffer_t *dst) {
    debug(user_context) << "halide_hexagon_device_crop called.\n";

    const int64_t offset = calc_device_crop_byte_offset(src, dst);
    return hexagon_device_crop_from_offset(src, offset, dst);
}

WEAK int halide_hexagon_device_slice(void *user_context, const struct halide_buffer_t *src,
                                     int slice_dim, int slice_pos, struct halide_buffer_t *dst) {
    debug(user_context) << "halide_hexagon_device_slice called.\n";

    const int64_t offset = calc_device_slice_byte_offset(src, slice_dim, slice_pos);
    return hexagon_device_crop_from_offset(src, offset, dst);
}

WEAK int halide_hexagon_device_release_crop(void *user_context, struct halide_buffer_t *dst) {
    debug(user_context) << "halide_hexagon_release_crop called\n";
    free((ion_device_handle *)dst->device);
    return 0;
}

WEAK int halide_hexagon_power_hvx_on(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context) << "halide_hexagon_power_hvx_on\n";
    if (!remote_power_hvx_on) {
        // The function is not available in this version of the
        // runtime, this runtime always powers HVX on.
        return 0;
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    debug(user_context) << "    remote_power_hvx_on -> ";
    result = remote_power_hvx_on();
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_power_hvx_on failed.\n";
        return result;
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return 0;
}

WEAK int halide_hexagon_power_hvx_off(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context) << "halide_hexagon_power_hvx_off\n";
    if (!remote_power_hvx_off) {
        // The function is not available in this version of the
        // runtime, this runtime always powers HVX on.
        return 0;
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
#endif

    debug(user_context) << "    remote_power_hvx_off -> ";
    result = remote_power_hvx_off();
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_power_hvx_off failed.\n";
        return result;
    }

#ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
#endif

    return 0;
}

WEAK void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */) {
    halide_hexagon_power_hvx_off(user_context);
}

WEAK int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context) << "halide_hexagon_set_performance_mode\n";
    if (!remote_set_performance_mode) {
        // This runtime doesn't support changing the performance target.
        return 0;
    }

    debug(user_context) << "    remote_set_performance_mode -> ";
    result = remote_set_performance_mode(mode);
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_set_performance_mode failed.\n";
        return result;
    }

    return 0;
}

WEAK int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context) << "halide_hexagon_set_performance\n";
    if (!remote_set_performance) {
        // This runtime doesn't support changing the performance target.
        return 0;
    }

    debug(user_context) << "    remote_set_performance -> ";
    result = remote_set_performance(perf->set_mips,
                                    perf->mipsPerThread,
                                    perf->mipsTotal,
                                    perf->set_bus_bw,
                                    perf->bwMegabytesPerSec,
                                    perf->busbwUsagePercentage,
                                    perf->set_latency,
                                    perf->latency);

    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_set_performance failed.\n";
        return result;
    }

    return 0;
}

WEAK int halide_hexagon_set_thread_priority(void *user_context, int priority) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) {
        return result;
    }

    debug(user_context) << "halide_hexagon_set_thread_priority\n";
    if (!remote_set_thread_priority) {
        // This runtime doesn't support changing the thread priority.
        return 0;
    }

    debug(user_context) << "    remote_set_thread_priority -> ";
    result = remote_set_thread_priority(priority);
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_set_thread_priority failed.\n";
        return result;
    }

    return 0;
}

WEAK const halide_device_interface_t *halide_hexagon_device_interface() {
    return &hexagon_device_interface;
}

namespace {
WEAK __attribute__((destructor)) void halide_hexagon_cleanup() {
    halide_hexagon_device_release(nullptr);
}
}  // namespace

}  // extern "C" linkage

namespace Halide {
namespace Runtime {
namespace Internal {
namespace Hexagon {

WEAK halide_device_interface_impl_t hexagon_device_interface_impl = {
    halide_use_jit_module,
    halide_release_jit_module,
    halide_hexagon_device_malloc,
    halide_hexagon_device_free,
    halide_hexagon_device_sync,
    halide_hexagon_device_release,
    halide_hexagon_copy_to_host,
    halide_hexagon_copy_to_device,
    halide_hexagon_device_and_host_malloc,
    halide_hexagon_device_and_host_free,
    halide_hexagon_buffer_copy,
    halide_hexagon_device_crop,
    halide_hexagon_device_slice,
    halide_hexagon_device_release_crop,
    halide_default_device_wrap_native,
    halide_default_device_detach_native,
};

WEAK halide_device_interface_t hexagon_device_interface = {
    halide_device_malloc,
    halide_device_free,
    halide_device_sync,
    halide_device_release,
    halide_copy_to_host,
    halide_copy_to_device,
    halide_device_and_host_malloc,
    halide_device_and_host_free,
    halide_buffer_copy,
    halide_device_crop,
    halide_device_slice,
    halide_device_release_crop,
    halide_device_wrap_native,
    halide_device_detach_native,
    nullptr,
    &hexagon_device_interface_impl};

}  // namespace Hexagon
}  // namespace Internal
}  // namespace Runtime
}  // namespace Halide
back to top