https://github.com/halide/Halide
Raw File
Tip revision: 825d863e7bd82bc00ca59be3934e1a0199a45dd2 authored by Andrew Adams on 22 May 2018, 21:11:35 UTC
Add stripped-down version of simplifier
Tip revision: 825d863
HalideView.mm
#import "HalideView.h"

#include "HalideBuffer.h"
#include "HalideRuntime.h"
#include "HalideRuntimeMetal.h"
#include "reaction_diffusion_2_init.h"
#include "reaction_diffusion_2_render.h"
#include "reaction_diffusion_2_update.h"
#if HAS_METAL_SDK
#include "reaction_diffusion_2_metal_init.h"
#include "reaction_diffusion_2_metal_render.h"
#include "reaction_diffusion_2_metal_update.h"
#endif

using Halide::Runtime::Buffer;

struct HalideFuncs {
    int (*init)(const void*, halide_buffer_t*);
    int (*update)(const void*, halide_buffer_t*, int, int, int, halide_buffer_t*);
    int (*render)(const void*, halide_buffer_t*, int, halide_buffer_t*);
};

static const HalideFuncs kHalideCPU = {
    reaction_diffusion_2_init,
    reaction_diffusion_2_update,
    reaction_diffusion_2_render
};

#if HAS_METAL_SDK
static const HalideFuncs kHalideMetal = {
    reaction_diffusion_2_metal_init,
    reaction_diffusion_2_metal_update,
    reaction_diffusion_2_metal_render
};
#endif

@implementation HalideView
{
@private
#if HAS_METAL_SDK
    __weak CAMetalLayer *_metalLayer;
#endif  // HAS_METAL_SDK
    
    Buffer<float> buf1;
    Buffer<float> buf2;
    Buffer<uint8_t> pixel_buf;
    
    int32_t iteration;
    double frameElapsedEstimate;
}


#if HAS_METAL_SDK
+ (Class)layerClass
{
    return [CAMetalLayer class];
}
#endif  // HAS_METAL_SDK

- (void)initCommon
{
    self.opaque          = YES;
    self.backgroundColor = nil;

#if HAS_METAL_SDK
    _metalLayer = (CAMetalLayer *)self.layer;
    _metalLayer.delegate = self;

    _device = MTLCreateSystemDefaultDevice();
    _commandQueue = [_device newCommandQueue];
    
    _metalLayer.device      = _device;
    _metalLayer.pixelFormat = MTLPixelFormatBGRA8Unorm;
    
    _metalLayer.framebufferOnly = NO;
#endif  // HAS_METAL_SDK
}

- (void) resetFrameTime
{
    iteration = 0;
    frameElapsedEstimate = -1;
}

- (void) updateFrameTime: (double) elapsed using_metal: (bool)using_metal
{
    // Smooth elapsed using an IIR
    if (frameElapsedEstimate == -1) {
        frameElapsedEstimate = elapsed;
    } else {
        frameElapsedEstimate = (frameElapsedEstimate * 31 + elapsed) / 32.0;
    }
    if ((iteration % 30) == 0) {
        dispatch_async(dispatch_get_main_queue(), ^(void) {
            [self updateLogWith: frameElapsedEstimate using_metal: using_metal];
        });
    }
    iteration += 1;
}

- (void)initBufsWithWidth: (int)w height: (int)h using_metal: (bool) using_metal
{
    // Make a pair of buffers to represent the current state
    if (using_metal) {
        buf1 = Buffer<float>::make_interleaved(w, h, 3);
        buf2 = Buffer<float>::make_interleaved(w, h, 3);
    } else {
        buf1 = Buffer<float>(w, h, 3);
        buf2 = Buffer<float>(w, h, 3);
    }

    // We really only need to pad this for the use_metal case,
    // but it doesn't really hurt to always do it.
    const int c = 4;
    const int pad_pixels = (64 / sizeof(int32_t));
    const int row_stride = (w + pad_pixels - 1) & ~(pad_pixels - 1);
    const halide_dimension_t pixelBufShape[] = {
        {0, w, c},
        {0, h, c * row_stride},
        {0, c, 1}
    };

    // This allows us to make a Buffer with an arbitrary shape
    // and memory managed by Buffer itself
    pixel_buf = Buffer<uint8_t>(nullptr, 3, pixelBufShape);
    pixel_buf.allocate();
}

- (void)didMoveToWindow
{
    self.contentScaleFactor = self.window.screen.nativeScale;
}

- (id)initWithFrame:(CGRect)frame
{
    self = [super initWithFrame:frame];
    if(self)
    {
        [self initCommon];
    }
    return self;
}

- (instancetype)initWithCoder:(NSCoder *)coder
{
    self = [super initWithCoder:coder];
    
    if(self)
    {
        [self initCommon];
    }
    return self;
}

- (void)updateLogWith: (double) elapsedTime using_metal: (bool) using_metal
{
#if HAS_METAL_SDK
    NSString *mode = using_metal ? @"(Metal; Double-tap for CPU)" : @"(CPU; Double-tap for Metal)";
#else
    NSString *mode = @"(CPU; Metal not available)";
#endif
    [self.outputLog setText: [NSString stringWithFormat:@"Halide routine takes %0.3f ms %@", 
                              elapsedTime * 1000, mode]];
}

- (void)touchesBegan:(NSSet *)touches withEvent:(UIEvent *)event {
    UITouch* touch = [touches anyObject];
    self.touch_position = [touch locationInView:self];
    self.touch_active = [self pointInside:self.touch_position withEvent:event];
#if HAS_METAL_SDK
    NSUInteger numTaps = [touch tapCount];
    if (numTaps > 1) {
        self.use_metal = !self.use_metal;
        NSLog(@"TBTaps: %d, self.use_metal %d", (int)numTaps, (int)self.use_metal);
    }
#endif
}

- (void)touchesMoved:(NSSet *)touches withEvent:(UIEvent *)event {
    self.touch_position = [touches.anyObject locationInView:self];
    self.touch_active = [self pointInside:self.touch_position withEvent:event];
}

- (void)touchesEnded:(NSSet *)touches withEvent:(UIEvent *)event {
    self.touch_active = false;
}

- (void)touchesCancelled:(NSSet *)touches withEvent:(UIEvent *)event {
    self.touch_active = false;
}

- (void)renderOneFrame: (const HalideFuncs &) halide_funcs using_metal: (bool) using_metal
{
    int tx = -100, ty = -100;
    if (self.touch_active) {
        // Note that buf/bounds is not necessarily the same as self.contentScaleFactor
        tx = (int) (self.touch_position.x * buf1.dim(0).extent() / self.bounds.size.width);
        ty = (int) (self.touch_position.y * buf1.dim(1).extent() / self.bounds.size.height);
        NSLog(@"touch %d %d", tx, ty);
    }

#if HAS_METAL_SDK
    const bool output_bgra = true;
#else
    const bool output_bgra = false;
#endif

    // A note on timing: based on our experimentation, this is indeed effective for 
    // timing Metal launches, not just CPU kernels. Other GPU API implementations 
    // may return way before actually completing kernel execution, but Metal 
    // (at least in this context) doesn't seem to, making this basic timing approach 
    // fairly effective.
    // 
    // However, there seems to be a large minimum latency to return from the Metal launches, 
    // which can make this an underestimate of the potential GPU throughput; for example, 
    // running the update and render steps 10 times per frame (instead of once) 
    // converges to a steady state per-frame cost which is often much less than 
    // the single iteration cost.

    double t_before = CACurrentMediaTime();
    halide_funcs.update((__bridge void *)self, buf1, tx, ty, iteration, buf2);
    halide_funcs.render((__bridge void *)self, buf2, output_bgra, pixel_buf);
    double t_after = CACurrentMediaTime();

    std::swap(buf1, buf2);

    [self updateFrameTime:(t_after - t_before) using_metal: using_metal];
}

- (void)initiateRender
{
#if HAS_METAL_SDK
    bool using_metal = self.use_metal;
    const HalideFuncs &halide_funcs = using_metal ? kHalideMetal : kHalideCPU;
    const int required_stride = using_metal ? 3 : 1;

    // Create autorelease pool per frame to avoid possible deadlock situations
    // because there are 3 CAMetalDrawables sitting in an autorelease pool.
    @autoreleasepool
    {
        id <CAMetalDrawable> drawable = [_metalLayer nextDrawable];
        id <MTLTexture> texture = drawable.texture;

        // handle display changes here
        if (texture.width != buf1.dim(0).extent() ||
            texture.height != buf1.dim(1).extent() ||
            buf1.dim(0).stride() != required_stride) {
 
            // set the metal layer to the drawable size in case orientation or size changes
            CGSize drawableSize = self.bounds.size;

            // The Metal schedule for our Halide code requires that 
            // the image be exact multiples of 8 in x & y
            drawableSize.width  = ((long)drawableSize.width + 7) & ~7;
            drawableSize.height  = ((long)drawableSize.height + 7) & ~7;
            _metalLayer.drawableSize = drawableSize;
            
            [self initBufsWithWidth: drawableSize.width height: drawableSize.height using_metal: using_metal];
            halide_funcs.init((__bridge void *)self, buf1);

            [self resetFrameTime];
        }
        
        [self renderOneFrame: halide_funcs using_metal: using_metal];

        id <MTLBuffer> buffer = using_metal ?
            (__bridge id <MTLBuffer>)(void *)halide_metal_get_buffer((void *)&self, pixel_buf) :
            [self.device newBufferWithBytes: pixel_buf.data()
                                     length: pixel_buf.size_in_bytes()
                                    options:MTLResourceStorageModeShared];
        id <MTLCommandBuffer> commandBuffer = [_commandQueue commandBuffer];
        id <MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];

        MTLSize image_size;
        image_size.width = pixel_buf.dim(0).extent();
        image_size.height = pixel_buf.dim(1).extent();
        image_size.depth = 1;
        MTLOrigin origin = { 0, 0, 0 };

        const int bytesPerRow = pixel_buf.dim(1).stride() * pixel_buf.type().bits / 8;
        [blitEncoder 
            copyFromBuffer:buffer 
            sourceOffset: 0
            sourceBytesPerRow: bytesPerRow
            sourceBytesPerImage: pixel_buf.size_in_bytes()
            sourceSize: image_size 
            toTexture: drawable.texture
            destinationSlice: 0 
            destinationLevel: 0 
            destinationOrigin: origin];
        [blitEncoder endEncoding];
        [commandBuffer addCompletedHandler: ^(id MTLCommandBuffer) {
            dispatch_async(dispatch_get_main_queue(), ^(void) {
                [drawable present];
                [self initiateRender];
            });
        }];
        [commandBuffer commit];
        [_commandQueue insertDebugCaptureBoundary];
    }
#else
    float f = self.contentScaleFactor;
    int image_width = (int) (self.bounds.size.width * f);
    int image_height = (int) (self.bounds.size.height * f);
    const HalideFuncs &halide_funcs = kHalideCPU;

    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
        [self initBufsWithWidth:image_width height:image_height using_metal: false];

        CGDataProviderRef provider =
            CGDataProviderCreateWithData(NULL, pixel_buf.data(), pixel_buf.size_in_bytes(), NULL);
        CGColorSpaceRef color_space = CGColorSpaceCreateDeviceRGB();

        halide_funcs.init((__bridge void *)self, buf1);
        [self resetFrameTime];
   
        for (;;) {
            [self renderOneFrame: halide_funcs using_metal: false];

            const int bytesPerRow = pixel_buf.dim(1).stride() * pixel_buf.type().bits / 8;
            CGImageRef image_ref =
                CGImageCreate(image_width, image_height, 
                              8,   // bitsPerComponent
                              32,  // bitsPerPixel
                              bytesPerRow,
                              color_space,
                              kCGBitmapByteOrderDefault,
                              provider, NULL, NO,
                              kCGRenderingIntentDefault);
            UIImage *im = [UIImage imageWithCGImage:image_ref];
            CGImageRelease(image_ref);
            
            dispatch_async(dispatch_get_main_queue(), ^(void) {
                [self setImage: im];
            });
        }
    });
#endif  // HAS_METAL_SDK
}

@end

#if HAS_METAL_SDK

#ifdef __cplusplus
extern "C" {
#endif

int halide_metal_acquire_context(void *user_context, struct halide_metal_device **device_ret,
                                 struct halide_metal_command_queue **queue_ret, bool create) {
    HalideView *view = (__bridge HalideView *)user_context;
    *device_ret = (__bridge struct halide_metal_device *)view.device;
    *queue_ret = (__bridge struct halide_metal_command_queue *)view.commandQueue;
    return 0;
}

int halide_metal_release_context(void *user_context) {
    return 0;
}

#ifdef __cplusplus
}
#endif

#endif  // HAS_METAL_SDK
back to top