https://bitbucket.org/daniel_fort/magic-lantern
Raw File
Tip revision: b6e5c0ed8831eecb5f875c0617ce9263b73e5b1d authored by alex@thinkpad on 04 April 2018, 21:43:45 UTC
sd_uhs: experimental driver strength tuning
Tip revision: b6e5c0e
raw_twk.c
/**
 
 */

/*
 * Copyright (C) 2013 Magic Lantern Team
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#define __raw_twk_c__

#include <module.h>
#include <dryos.h>
#include <property.h>
#include <bmp.h>
#include <beep.h>
#include <menu.h>
#include <config.h>
#include <edmac.h>
#include <raw.h>
#include <zebra.h>
#include <util.h>
#include <imgconv.h>
#include <vram.h>
#include <math.h>
#include <string.h>

#include "raw_twk.h"

#define CONFIG_RAW_LIVEVIEW
#define dbg_printf(fmt,...) {}

typedef struct 
{
    void *frameBuffer;
    uint16_t xRes;
    uint16_t yRes;
    uint16_t quality;
    uint16_t bpp;
    uint16_t blackLevel;
    uint8_t  freeFrameBuffer;
} frame_buf_t;

struct s_DefcData
{
    void *pSrcAddress;
    void *pDestAddr;
    uint32_t xRes;
    uint32_t yRes;
    uint32_t rawDepth14;
    uint32_t dstModeRaw;
};

void *get_lcd_422_buf();


static struct msg_queue *mlv_play_queue_raw_to_bpp16;
static struct msg_queue *mlv_play_queue_bpp16_to_yuv;


static int raw_twk_task_abort = 0;
static int raw_twk_zoom = 1;
static int raw_twk_zoom_x_pct = 0;
static int raw_twk_zoom_y_pct = 0;

static struct semaphore * edmac_write_done_sem = 0;
static struct LockEntry * resLock = 0;

static uint32_t edmac_read_chan = 10;
static uint32_t edmac_write_chan = 1;
static uint32_t edmac_read_connection = 1;
static uint32_t edmac_write_connection = 16;

#define ZOOMED_LV2RAW_X(x) ((((x) * (lv2raw.sx / (raw_twk_zoom?raw_twk_zoom:1)) + lv2raw.sx * raw_twk_zoom_x) >> 10))
#define ZOOMED_LV2RAW_Y(y) ((((y) * (lv2raw.sy / (raw_twk_zoom?raw_twk_zoom:1)) + lv2raw.sy * raw_twk_zoom_y) >> 10))

#define CBITS_709   (8)
#define CRANGE_709  (1 << CBITS_709)
#define CYR_709 ((int32_t) ( 0.2126f * CRANGE_709))
#define CYG_709 ((int32_t) ( 0.7152f * CRANGE_709))
#define CYB_709 ((int32_t) ( 0.0722f * CRANGE_709))
#define CUR_709 ((int32_t) (-0.1146f * CRANGE_709))
#define CUG_709 ((int32_t) (-0.3854f * CRANGE_709))
#define CUB_709 ((int32_t) ( 0.5000f * CRANGE_709))
#define CVR_709 ((int32_t) ( 0.5000f * CRANGE_709))
#define CVG_709 ((int32_t) (-0.4541f * CRANGE_709))
#define CVB_709 ((int32_t) (-0.0458f * CRANGE_709))

#define PACK_HALVES(u,l) (((uint32_t)(u & 0xFFFF) << 16) | (uint32_t)(l & 0xFFFF))

const uint32_t yuv_709_table[] = { PACK_HALVES(CYR_709,CUR_709), PACK_HALVES(CYG_709,CUG_709), PACK_HALVES(CYB_709,CUB_709), CVR_709, CVG_709, CVB_709 };

static inline uint32_t rgb2yuv422_rec709_opt(int R, int G, int B)
{
/* basically it is this code, just optimzied as much as i can:

    int Y = (CYR_709 * R + CYG_709 * G + CYB_709 * B) / CRANGE_709;
    int U = (CUR_709 * R + CUG_709 * G + CUB_709 * B) / CRANGE_709;
    int V = (CVR_709 * R + CVG_709 * G + CVB_709 * B) / CRANGE_709;
    return UYVY_PACK(U,Y,V,Y);
*/
    int ret = 0;
    
    /* warning: reordered instructions to (hopefully) prevent pipelining effects */
    asm volatile (
        /* load parameters */
        "LDR R3, =yuv_709_table\r\n"
        "LDMIA R3!, {R4-R9}\r\n"
        
        /* multiply RGB into 16 bit fields in 32 bit registers */
        "MOV R12, #0xFF\r\n"
        /* YU */ "MUL R10, R4, %[R]\r\n"
        /* YU */ "MLA R10, R5, %[G], R10\r\n"
        /* YU */ "MLA R10, R6, %[B], R10\r\n"
        "AND R11, R10, R12, LSL#24\r\n"
        
        /* channel V is multiplied separately */
        /* V */ "MUL R4, R7, %[R]\r\n"
        "ORR R11, R11, LSR#16\r\n" /* R11: YY00YY00 */
        /* V */ "MLA R4, R8, %[G], R4\r\n"
        "AND R10, R10, R12, LSL#8\r\n" /* YYYYUUUU -> 0000UU00 */
        /* V */ "MLA R4, R9, %[B], R4\r\n"
        "ORR R11, R10, LSR#8\r\n" /* R11: YY00YYUU */        
        "AND R4, R4, R12, LSL#8\r\n" /* 0000VVVV -> 0000VV00 */
        "ORR %[ret], R11, R4, LSL#8\r\n" /* R11: YYVVYYUU */
        : 
       [ret]"=r"(ret) : 
       [R]"r"(R),  [G]"r"(G),  [B]"r"(B)  : 
       "r3", /* table */
       "r4", "r5", "r6", "r7", "r8", "r9", /* multipliers from table */
       "r10", "r11", "r12" /* temporary results */
    );
    
    return ret;
}

uint32_t raw_twk_set_zoom(uint32_t zoom, uint32_t x_pct, uint32_t y_pct)
{
    raw_twk_zoom_x_pct = MIN(100, MAX(0, x_pct));
    raw_twk_zoom_y_pct = MIN(100, MAX(0, y_pct));
    
    uint32_t zooom_table[] = { 1, 2, 4, 8 };
    
    raw_twk_zoom = zooom_table[MAX(0, MIN(COUNT(zooom_table), zoom))];
    
    return 0;
}

static void bpp16_to_yuv_task()
{
    TASK_LOOP
    {
        frame_buf_t *in_buf;
        
        /* signal to stop rendering */
        if(raw_twk_task_abort)
        {
            break;
        }
        
        /* is there something to render? */
        if(msg_queue_receive(mlv_play_queue_bpp16_to_yuv, &in_buf, 50))
        {
            continue;
        }

        if(!in_buf->frameBuffer)
        {
            bmp_printf(FONT_MED, 30, 400, "buffer empty");
            beep();
            msleep(1000);
            free(in_buf);
            break;
        }
        
        int xRes = in_buf->xRes;
        int yRes = in_buf->yRes;
        int y1 = BM2LV_X(os.y0);
        int y2 = BM2LV_X(os.y_max);
        int x1 = MAX(BM2LV_X(os.x0), RAW2LV_X(0));
        int x2 = MIN(BM2LV_X(os.x_max), RAW2LV_X(xRes));
        
        uint16_t* lv16 = CACHEABLE(get_lcd_422_buf());
        uint32_t* lv32 = (uint32_t*) lv16;
        uint64_t* lv64 = (uint64_t*) lv16;
    
        if (!lv16) return;
        if (x2 < x1) return;
        
    
        /* cache the LV to RAW transformation for the inner loop to make it faster */
        /* white balance 2,1,2 => use two gamma curves to simplify code */
        static uint8_t gamma_rb[1024];
        static uint8_t gamma_g[1024];
        static uint8_t gamma[1024];
        static int* lv2rx = NULL;
        static int* lv2ry = NULL;
        static int last_x1 = 0;
        static int last_x2 = 0;
        static int last_black_level = 0;
        static int last_zoom = 0;
        static int last_zoom_x = 0;
        static int last_zoom_y = 0;

        if(last_zoom != raw_twk_zoom || last_zoom_x != raw_twk_zoom_x_pct || last_zoom_y != raw_twk_zoom_y_pct || last_black_level != in_buf->blackLevel || last_x1 != x1 || last_x2 != x2)
        {
            if(lv2rx)
            {
                free(lv2rx);
            }
            if(lv2ry)
            {
                free(lv2ry);
            }
            lv2rx = NULL;
            lv2ry = NULL;
        }
        
        if(lv2rx == NULL)
        {
            last_x1 = x1;
            last_x2 = x2;
            last_black_level = in_buf->blackLevel;
            last_zoom = raw_twk_zoom;
            last_zoom_x = raw_twk_zoom_x_pct;
            last_zoom_y = raw_twk_zoom_y_pct;
            
            /* as we only work with 10 upper bits and input data is 16bpp, throw away 6 bits */
            int black = (in_buf->blackLevel >> 6);
            
            for (int i = 0; i < 1024; i++)
            {
                int g = (i > black) ? log2f(i - black) * 255 / 10 : 0;
                gamma[i] = g * g / 255; /* idk, looks better this way */
            }
            for (int i = 0; i < 1024; i++)
            {
                /* only show 10 bits */
                int g_rb = (i > black) ? (log2f(i - black) + 1) * 255 / 10 : 0;
                int g_g  = (i > black) ? (log2f(i - black)) * 255 / 10 : 0;
                gamma_rb[i] = COERCE(g_rb * g_rb / 255, 0, 255); /* idk, looks better this way */
                gamma_g[i]  = COERCE(g_g  * g_g  / 255, 0, 255); /* (it's like a nonlinear curve applied on top of log) */
            }
            
            lv2rx = malloc(x2 * sizeof(int));
            if (!lv2rx) return;
            for (int x = x1; x < x2; x++)
            {
                /* from the percent position of the zoom window, determine the number of pixels */
                int zoom_offset = (raw_twk_zoom_x_pct * (vram_lv.width - vram_lv.width / raw_twk_zoom)) / 100;
                /* determine the LV2RAW steps depending on zoom level */
                int step = lv2raw.sx / (raw_twk_zoom?raw_twk_zoom:1);

                /* this is based on LV2RAW */
                lv2rx[x] = (((x * step + lv2raw.sx * zoom_offset) >> 10) + lv2raw.tx) & ~1;
            }
            
            lv2ry = malloc(y2 * sizeof(int));
            if (!lv2ry) return;
            for (int y = y1; y < y2; y++)
            {
                /* from the percent position of the zoom window, determine the number of pixels */
                int zoom_offset = (raw_twk_zoom_y_pct * (vram_lv.height - vram_lv.height / raw_twk_zoom)) / 100;
                /* determine the LV2RAW steps depending on zoom level */
                int step = lv2raw.sy / (raw_twk_zoom?raw_twk_zoom:1);

                /* this is based on LV2RAW */
                lv2ry[y] = (((y * step + lv2raw.sy * zoom_offset) >> 10) + lv2raw.ty) & ~1;
                
                /* on HDMI screens, BM2LV_DX() may get negative */
                if((lv2ry[y] <= 0 || lv2ry[y] >= yRes) && BM2LV_DX(x2-x1) > 0)
                {
                    /* out of range, just fill with black */
                    lv2ry[y] = -1;
                }
            }
        }
        
        
        if(in_buf->quality == RAW_PREVIEW_GRAY_ULTRA_FAST)
        {
            /* full-res vertically */
            for (int y = y1; y < y2; y+=1)
            {
                /* precalculated above */
                if(lv2ry[y] < 0)
                {
                    /* out of range, just fill with black */
                    memset(&lv32[LV(0,y)/4], 0, BM2LV_DX(x2-x1)*2);
                    continue;
                }
                
                uint16_t * row = ((uint16_t *)in_buf->frameBuffer) + (lv2ry[y] * xRes);

                /* half-res horizontally, to simplify YUV422 math */
                for (int x = x1; x < x2; x += 4)
                {
                    int xr = lv2rx[x];
                    
                    uint32_t c = row[xr];
                    uint64_t Y = gamma[c>>6];
                    
                    Y = (Y << 8) | (Y << 24) | (Y << 40) | (Y << 56);
                    int idx = LV(x,y)/8;
                    lv64[idx] = Y;
                    lv64[idx + vram_lv.pitch/8] = Y;
                }
            }
        }
        else
        {
            /* full-res vertically */
            for (int y = y1; y < y2; y+=1)
            {
                /* precalculated above */
                if(lv2ry[y] < 0)
                {
                    /* out of range, just fill with black */
                    memset(&lv32[LV(0,y)/4], 0, BM2LV_DX(x2-x1)*2);
                    continue;
                }
                
                uint16_t * row = ((uint16_t *)in_buf->frameBuffer) + (lv2ry[y] * xRes);
                
                /* half-res horizontally, to simplify YUV422 math */
                for (int x = x1; x < x2; x += 2)
                {
                    int xr = lv2rx[x];
                    
                    uint32_t rg = *((uint32_t*) &row[xr]) >> 6;
                    uint16_t r = rg & 0x3FF;
                    uint16_t g = rg >> 16;
                    uint16_t b = row[xRes + xr + 1] >> 6;

                    r = gamma_rb[r];
                    g = gamma_g [g];
                    b = gamma_rb[b];
                    
                    lv32[LV(x,y)/4] = rgb2yuv422_rec709_opt(r, g, b);
                }
            }
        }
        
        if(in_buf->freeFrameBuffer)
        {
            free(in_buf->frameBuffer);
        }
        free(in_buf);
    }
}

static void edmac_read_complete_cbr(void *ctx)
{
}

static void edmac_write_complete_cbr(void * ctx)
{
    give_semaphore(edmac_write_done_sem);
}

static uint32_t upconvert_bpp(frame_buf_t *frame, void* buf_out)
{
    uint32_t ret = 0;
    
    /* configure image processing modules */
    const uint32_t DS_SEL            = 0xC0F08104;
    const uint32_t PACK16_ISEL       = 0xC0F082D0;
    const uint32_t PACK16_ISEL2      = 0xC0F0839C;
    const uint32_t WDMAC16_ISEL      = 0xC0F082D8;
    const uint32_t DSUNPACK_ENB      = 0xC0F08060;
    const uint32_t DSUNPACK_MODE     = 0xC0F08064;
    const uint32_t DSUNPACK_DM_EN    = 0xC0F08274;
    const uint32_t DEF_ENB           = 0xC0F080A0;
    const uint32_t DEF_80A4          = 0xC0F080A4;
    const uint32_t DEF_MODE          = 0xC0F080A8;
    const uint32_t DEF_CTRL          = 0xC0F080AC;
    const uint32_t DEF_YB_XB         = 0xC0F080B0;
    const uint32_t DEF_YN_XN         = 0xC0F080B4;
    const uint32_t DEF_YA_XA         = 0xC0F080BC;
    const uint32_t DEF_INTR_EN       = 0xC0F080D0;
    const uint32_t DEF_HOSEI         = 0xC0F080D4;
    const uint32_t DEFC_X2MODE       = 0xC0F08270;
    const uint32_t DEFC_DET_MODE     = 0xC0F082B4;
    const uint32_t PACK16_ENB        = 0xC0F08120;
    const uint32_t PACK16_MODE       = 0xC0F08124;
    const uint32_t PACK16_DEFM_ON    = 0xC0F082B8;
    const uint32_t PACK16_ILIM       = 0xC0F085B4;
    const uint32_t PACK16_CCD2_DM_EN = 0xC0F0827C;

    /* for PACK16_MODE, DSUNPACK_MODE, ADUNPACK_MODE (mask 0x131) */
    const uint32_t MODE_10BIT        = 0x000;
    const uint32_t MODE_12BIT        = 0x010;
    const uint32_t MODE_14BIT        = 0x020;
    const uint32_t MODE_16BIT        = 0x120;

    uint32_t pixel_mode = 0;
    
    switch(frame->bpp)
    {
        case 10:
            pixel_mode = MODE_10BIT;
            break;
        case 12:
            pixel_mode = MODE_12BIT;
            break;
        case 14:
            pixel_mode = MODE_14BIT;
            break;
        case 16:
            pixel_mode = MODE_16BIT;
            break;
    }
    
    if (!resLock)
    {
        int edmac_read_ch_index = edmac_channel_to_index(edmac_read_chan);
        int edmac_write_ch_index = edmac_channel_to_index(edmac_write_chan);
        uint32_t resIds[] = {
            0x00000000 | edmac_write_ch_index,      /* write edmac channel */
            0x00010000 | edmac_read_ch_index,       /* read edmac channel */
            0x00020000 | edmac_write_connection,    /* write connection */
            0x00030000 | edmac_read_connection,     /* read connection */
            0x00050002,                             /* DSUNPACK? */
            0x00050005,                             /* DEFC? */
            0x0005001d,                             /* PACK16/WDMAC16 */
            0x0005001f,                             /* PACK16/WDMAC16 */
        };
        resLock = CreateResLockEntry(resIds, COUNT(resIds));
    }
   
    LockEngineResources(resLock);
    
    engio_write((uint32_t[]) {
        /* input selection for the processing modules? */
        DS_SEL,         0,
        PACK16_ISEL,    4,
        PACK16_ISEL2,   0,
        WDMAC16_ISEL,   0,
        
        /* DSUNPACK module (input image data) */
        DSUNPACK_ENB,   0x80000000,
        DSUNPACK_MODE,  pixel_mode,
        DSUNPACK_DM_EN, 0,
        
        /* DEF(C) module (is it needed?) */
        DEF_ENB,        0x80000000,
        DEF_80A4,       0,
        DEF_CTRL,       0,
        DEF_MODE,       0x104,
        DEF_YN_XN,      0,
        DEF_YB_XB,      ((frame->yRes-1) << 16) | (frame->xRes-1),
        DEF_YA_XA,      0,
        DEF_INTR_EN,    0,
        DEF_HOSEI,      0x11,
        DEFC_X2MODE,    0,
        DEFC_DET_MODE,  1,

        /* PACK16 module (output image data) */
        PACK16_ENB,     0x80000000,
        PACK16_MODE,    MODE_16BIT,
        PACK16_DEFM_ON, 1,
        PACK16_ILIM,    0x3FFF, /* white level? */
        PACK16_CCD2_DM_EN, 0,
        
        /* whew! */
        0xFFFFFFFF,     0xFFFFFFFF
    });
 
    /* EDMAC setup */
    RegisterEDmacCompleteCBR(edmac_read_chan, edmac_read_complete_cbr, 0);
    RegisterEDmacCompleteCBR(edmac_write_chan, edmac_write_complete_cbr, 0);

    ConnectWriteEDmac(edmac_write_chan, edmac_write_connection);
    ConnectReadEDmac(edmac_read_chan, edmac_read_connection);

    struct edmac_info src_edmac_info = {
        .xb = frame->xRes * frame->bpp / 8,
        .yb = frame->yRes - 1,
    };
    
    struct edmac_info dst_edmac_info = {
        .xb = frame->xRes * 2,
        .yb = frame->yRes - 1,
    };
   
    SetEDmac(edmac_read_chan, frame->frameBuffer, &src_edmac_info, 0x20000);
    SetEDmac(edmac_write_chan, buf_out, &dst_edmac_info, 1);

    /* start processing */
    StartEDmac(edmac_write_chan, 0);

    /* start operation */
    engio_write((uint32_t[]) {
        PACK16_ENB,     1,
        DEF_ENB,        1,
        DSUNPACK_ENB,   1,
        0xFFFFFFFF,     0xFFFFFFFF
    });
   
    StartEDmac(edmac_read_chan, 2);
   
    /* wait for everything to finish */
    if(take_semaphore(edmac_write_done_sem, 100))
    {
        ret = 1;
    }

    /* reset processing modules */
    engio_write((uint32_t[]) {
        PACK16_ENB,     0x80000000,
        DEF_ENB,        0x80000000,
        DSUNPACK_ENB,   0x80000000,
        0xFFFFFFFF,     0xFFFFFFFF
    });

    /* aborting EDMACs is required so they don't stay waiting, which would block some other routines using them */
    AbortEDmac(edmac_read_chan);
    AbortEDmac(edmac_write_chan);
    UnregisterEDmacCompleteCBR(edmac_read_chan);
    UnregisterEDmacCompleteCBR(edmac_write_chan);
    UnLockEngineResources(resLock);
    
    return ret;
}

static void raw_to_bpp16_task()
{
    TASK_LOOP
    {
        frame_buf_t *in_buf;
        
        /* signal to stop rendering */
        if(raw_twk_task_abort)
        {
            break;
        }
        
        /* is there something to render? */
        if(msg_queue_receive(mlv_play_queue_raw_to_bpp16, &in_buf, 50))
        {
            continue;
        }

        if(!in_buf->frameBuffer)
        {
            bmp_printf(FONT_MED, 30, 400, "in_buf empty");
            beep();
            msleep(1000);
            break;
        }
        
        void *out_frameBuffer = malloc(in_buf->xRes * in_buf->yRes * 16 / 8);
        
        /* if conversion failed, silently fail for now */
        if(upconvert_bpp(in_buf, out_frameBuffer))
        {
            bmp_printf(FONT_MED, 30, 400, "upconvert_bpp failed");
            beep();
            msleep(1000);
            
            free(out_frameBuffer);
            free(in_buf);
            break;
        }
        
        /* reuse the message buffer */
        in_buf->blackLevel = in_buf->blackLevel << (16 - in_buf->bpp);
        in_buf->frameBuffer = out_frameBuffer;
        in_buf->freeFrameBuffer = 1;
        
        /* and post it to display task, meanwhile this engine can process another frame */
        msg_queue_post(mlv_play_queue_bpp16_to_yuv, (uint32_t)in_buf);
    }
}

uint32_t raw_twk_render_ex(void *raw_buffer, uint32_t xRes, uint32_t yRes, uint32_t bpp, uint32_t quality, uint32_t blackLevel)
{
    frame_buf_t *msg = malloc(sizeof(frame_buf_t));
    
    msg->frameBuffer = raw_buffer;
    msg->xRes = xRes;
    msg->yRes = yRes;
    msg->bpp = bpp;
    msg->quality = quality;
    msg->freeFrameBuffer = 0;
    msg->blackLevel = blackLevel;

    /* caller might be so kind and give us an already 16 bit aligned buffer. we wouldnt have to do much */
    if(msg->bpp != 16)
    {
        return msg_queue_post(mlv_play_queue_raw_to_bpp16, (uint32_t)msg);
    }
    else
    {
        return msg_queue_post(mlv_play_queue_bpp16_to_yuv, (uint32_t)msg);
    }
}

uint32_t raw_twk_render(void *raw_buffer, uint32_t xRes, uint32_t yRes, uint32_t bpp, uint32_t quality)
{
    return raw_twk_render_ex(raw_buffer, xRes, yRes, bpp, quality, raw_info.black_level);
}

static unsigned int raw_twk_init()
{
    edmac_write_done_sem = create_named_semaphore("edmac_write_done_sem", 0);
    mlv_play_queue_raw_to_bpp16 = (struct msg_queue *) msg_queue_create("mlv_play_queue_raw_to_bpp16", 3);
    mlv_play_queue_bpp16_to_yuv = (struct msg_queue *) msg_queue_create("mlv_play_queue_bpp16_to_yuv", 3);
    task_create("bpp16_to_yuv_task", 0x15, 0x4000, bpp16_to_yuv_task, 0);
    task_create("raw_to_bpp16_task", 0x15, 0x4000, raw_to_bpp16_task, 0);
    return 0;
}

static unsigned int raw_twk_deinit()
{
    raw_twk_task_abort = 1;
    return 0;
}

uint32_t raw_twk_available()
{
    return 1;
}

MODULE_INFO_START()
    MODULE_INIT(raw_twk_init)
    MODULE_DEINIT(raw_twk_deinit)
MODULE_INFO_END()

MODULE_CBRS_START()
MODULE_CBRS_END()

MODULE_PROPHANDLERS_START()
MODULE_PROPHANDLERS_END()

MODULE_CONFIGS_START()
MODULE_CONFIGS_END()
back to top