/**
* @file cwinterp.c
* @brief Contour stencil windowed interpolation
* @author Pascal Getreuer <getreuer@gmail.com>
*
* This file implements contour stencil windowed interpolation for integer
* scale factors. The interpolation model supposes that the input image
* \f$v\f$ was created by convolution followed by downsampling
* \f[ v = {\downarrow_r} (h * u) \f]
* where \f$u\f$ is the underlying high resolution image, \f$h\f$ is the point-
* spread function (PSF), and \f$\downarrow_r\f$ denotes downsampling by factor
* \f$r\f$. For simplicity, this code requires that \f$r\f$ is integer and
* \f$h\f$ is a Gaussian. The standard deviation \f$h\f$ is controlled by
* \c PsfSigma.
*
* The image is interpolated by the following steps. First, contour stencils
* are applied to estimate the local contour orientations (in routine
* \c FitStencils). The image is then interpolated by the formula
* \f[ u(x) = \sum_{k\in\mathbb{Z}^2} w(x - k) \Bigl[ v_k +
* \sum_{n\in\mathcal{N}\backslash\{0\}} (v_{k+n} - v_k)
* \psi^n_{\mathcal{S}^\star(k)}(x - k) \Bigr] \f]
* (routine \c CWFirstPass). This initial interpolation approximately
* satisfies the degradation model, \f$v \approx {\downarrow_r} (h * u)\f$. To
* improve the accuracy, several correction passes are applied. In each pass,
* the residual is computed (routine \c CWResidual) and then applied to refine
* the interpolation (routine \c CWRefinementPass). Under typical settings,
* the degradation model is accurately satisfied after two or three correction
* passes.
*
* The main computations \c CWFirstPass and \c CWRefinementPass are done in
* fixed-point integer arithmetic. The downsides of using fixed-point
* arithmetic compared to floating-point arithmetic are more involved code for
* multiplication and division (it is harder to read) and the range and
* precision of fixed-point integers must be explicitly managed for accurate
* results (need to be careful). The upside is that when it does work, fixed-
* point arithmetic is significantly faster. Fortunately, in this application,
* the only needed operations are fixed-point additions and fixed-point
* multiplies where both factors are in a predictable range of values. These
* conditions are good for fixed-point arithmetic.
*
* The number of fractional bits used to represent \f$v\f$ and the residual is
* controlled by \c INPUT_FRACBITS. The number of fractional bits in
* representing \f$\psi\f$ is \c PSI_FRACBITS. Since \f$v\f$ and \f$\psi\f$
* are multiplied, the output has
* \c OUTPUT_FRACBITS = (\c INPUT_FRACBITS + \c PSI_FRACBITS)
* fractional bits. These constants must be balanced between precision and
* avoiding overflow. Fortunately this balance is easy to find for this
* application (neither extreme range nor extreme precision are needed).
*
*
* Copyright (c) 2010-2011, Pascal Getreuer
* All rights reserved.
*
* This program is free software: you can use, modify and/or
* redistribute it under the terms of the simplified BSD License. You
* should have received a copy of this license along this program. If
* not, see <http://www.opensource.org/licenses/bsd-license.html>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "basic.h"
#include "cwinterp.h"
#include "fitsten.h"
#include "invmat.h"
#include "nninterp.h"
#include "drawline.h"
/** @brief The number of contour stencils, cardinality of \f$\Sigma\f$ */
#define NUMSTENCILS 8
/** @brief Cardinality of the neighborhood \f$\mathcal{N}\f$ */
#define NEIGHRADIUS 1
#define NEIGHDIAMETER (2*NEIGHRADIUS+1)
#define NUMNEIGH (NEIGHDIAMETER*NEIGHDIAMETER)
/**
* @brief \c CWRefinementPass residual tolerance.
*
* In the correction passes, pixels will be skipped if they have magnitude
* less than 2^(-INPUT_FRACBITS + CORRECTION_IGNOREBITS), which improves the
* speed. A larger value of CORRECTION_IGNOREBITS makes the correction passes
* faster, but less accurate.
*/
#define CORRECTION_IGNOREBITS 3
/** @brief Number of fractional bits in the input array */
#define INPUT_FRACBITS 8
/** @brief Number of fractional bits in the psi sample arrays */
#define PSI_FRACBITS 12
/** @brief Number of fractional bits in the output array */
#define OUTPUT_FRACBITS (INPUT_FRACBITS + PSI_FRACBITS)
/** @brief The number 1.0 in fixed-point with N fractional bits */
#define FIXED_ONE(N) (1 << (N))
/** @brief The number 0.5 in fixed-point with N fractional bits */
#define FIXED_HALF(N) (1 << ((N) - 1))
/**
* @brief Number of elements between successive RGB fixed-point pixels
*
* \c PIXEL_STRIDE is the number of elements between successive pixels in the
* RGB fixed-point representation used internally by the interpolation.
*
* @note Unlike the other defines here, this one cannot be changed without
* also rewriting much of the code. Its purpose is more for clarity rather
* than working as a parameter.
*/
#define PIXEL_STRIDE 3
/* Generic macros */
/** @brief Clamp X to [A, B] */
#define CLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : (X)))
/** @brief Round and clamp double X to integer */
#define ROUNDCLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : ROUND(X)))
#ifndef M_2PI
/** @brief The constant 2*pi */
#define M_2PI 6.283185307179586476925286766559
#endif
/* Orientation in radians of each stencil */
static const double StencilOrientation[NUMSTENCILS] = {-1.178097245096172,
-0.785398163397448, -0.392699081698724, 0.0, 0.392699081698724,
0.785398163397448, 1.178097245096172, 1.570796326794897};
/** @brief The point spread function (PSF) */
static double Psf(double x, double y, cwparams Param)
{
double SigmaSqr = Param.PsfSigma*Param.PsfSigma;
return exp(-(x*x + y*y)/(2.0*SigmaSqr))/(M_2PI*SigmaSqr);
}
/** @brief The oriented functions phi used in local reconstructions */
static double Phi(double x, double y,
double theta, double PhiSigmaTangent, double PhiSigmaNormal)
{
double t, n;
/* Oriented Gaussian */
t = (cos(theta)*x + sin(theta)*y) / PhiSigmaTangent;
n = (-sin(theta)*x + cos(theta)*y) / PhiSigmaNormal;
return exp(-0.5*(t*t + n*n));
}
/** @brief Cubic B-spline */
static float CubicBSpline(float x)
{
x = (float)fabs(x);
if(x < 1)
return (x/2 - 1)*x*x + 0.66666666666666667f;
else if(x < 2)
{
x = 2 - x;
return x*x*x/6;
}
else
return 0;
}
/** @brief The window used to sum the global solution */
static double Window(double x, double y)
{
double Temp;
x *= 2.0/(NEIGHRADIUS + 1.0);
y *= 2.0/(NEIGHRADIUS + 1.0);
/* Cubic B-spline */
if(-2.0 < x && x < 2.0 && -2.0 < y && y < 2.0)
{
x = fabs(x);
Temp = fabs(1.0 - x);
x = 1.0 - x + (x*x*x - 2.0*Temp*Temp*Temp)/6.0;
y = fabs(y);
Temp = fabs(1.0 - y);
y = 1.0 - y + (y*y*y - 2.0*Temp*Temp*Temp)/6.0;
return (x * y) * 4.0/((NEIGHRADIUS + 1.0)*(NEIGHRADIUS + 1.0));
}
else
return 0.0;
}
/** @brief Quadrature weights and abscissas for composite Gauss-Lobatto */
static void QuadraturePoint(double *Weight, double *Abscissa, int Index, int NumPanels)
{
switch(Index % 3)
{
case 0:
*Weight = (Index == 0 || Index == NumPanels)? 0.25 : 0.5;
*Abscissa = Index;
break;
case 1:
*Weight = 1.25;
/* Abscissa location is Index - (3.0/sqrt(5.0) - 1.0)/2.0 */
*Abscissa = Index - 1.70820393249936919e-1;
break;
case 2:
*Weight = 1.25;
/* Abscissa location is Index + (3.0/sqrt(5.0) - 1.0)/2.0 */
*Abscissa = Index + 1.70820393249936919e-1;
break;
}
}
/** @brief Compute the convolution of the PSF and phi_theta at the point (x,y) */
static double PsfPhiConvolution(int x, int y, double Theta, cwparams Param)
{
/* Integrate over the square [-R,R]x[-R,R] */
const double R = 4.0*Param.PsfSigma;
/* Number of panels along each dimension, must be divisible by 3 */
const int NumPanels = 3*16;
const double PanelSize = 2.0*R/NumPanels;
double Integral = 0.0, Slice = 0.0;
double u, v, wu, wv;
int IndexX, IndexY;
/* Specially handle the case where PSF is Dirac delta */
if(Param.PsfSigma == 0.0)
return Phi(x, y, Theta, Param.PhiSigmaTangent, Param.PhiSigmaNormal);
/* Approximate 2D integral */
for(IndexY = 0; IndexY <= NumPanels; IndexY++)
{
QuadraturePoint(&wv, &v, IndexY, NumPanels);
v = PanelSize*v - R;
for(Slice = 0.0, IndexX = 0; IndexX <= NumPanels; IndexX++)
{
QuadraturePoint(&wu, &u, IndexX, NumPanels);
u = PanelSize*u - R;
Slice += wu*( Psf(u, v, Param) *
Phi(x - u, y - v, Theta, Param.PhiSigmaTangent,
Param.PhiSigmaNormal) );
}
Integral += wv*Slice;
}
Integral *= PanelSize*PanelSize;
return Integral;
}
/** @brief Compute the deconvolution matrices (A_S)^-1 */
static int ComputeMatrices(double *InverseA, cwparams Param)
{
double *A = NULL;
double CosTheta, SinTheta;
int m, mx, my, n, nx, ny, S;
int Status = 0;
if(!(A = (double *)Malloc(sizeof(double)*NUMNEIGH*NUMNEIGH)))
goto Catch;
for(S = 0; S < NUMSTENCILS; S++)
{
CosTheta = cos(StencilOrientation[S]);
SinTheta = sin(StencilOrientation[S]);
for(ny = -NEIGHRADIUS, n = 0; ny <= NEIGHRADIUS; ny++)
for(nx = -NEIGHRADIUS; nx <= NEIGHRADIUS; nx++, n++)
for(my = -NEIGHRADIUS, m = 0; my <= NEIGHRADIUS; my++)
for(mx = -NEIGHRADIUS; mx <= NEIGHRADIUS; mx++, m++)
{
A[m + NUMNEIGH*n] = PsfPhiConvolution(mx - nx, my - ny,
StencilOrientation[S], Param);
}
/* Compute the inverse of A */
if(!InvertMatrix(InverseA + S*(NUMNEIGH*NUMNEIGH), A, NUMNEIGH))
goto Catch;
}
Status = 1;
Catch: /* This label is used for error handling. If something went wrong
above (which may be out of memory or a computation error), then
execution jumps to this point to clean up and exit. */
Free(A);
return Status;
}
#include "imageio.h"
/**
* @brief Precomputations before windowed interpolation \c CWInterp
*
* @param Param cwparams struct of interpolation parameters
*
* @return Pointer to \f$\psi\f$ samples array, or null on failure
*
* \c PreCWInterp precomputes samples of the \f$\psi\f$ functions,
* \f[ \psi^n_\mathcal{S}(x) = \sum_{m\in\mathcal{N}}
* (A_\mathcal{S})^{-1}_{m,n} \varphi^m_\mathcal{S}(x - m). \f]
* The routine allocates memory to store the samples and returns a pointer to
* this memory. It is the responsibility of the caller to call \c free
* on this pointer when done to release the memory.
*
* A non-null pointer indicates success. On failure, the returned pointer
* is null.
*/
int32_t *PreCWInterp(cwparams Param)
{
int32_t *Psi = NULL;
const int ScaleFactor = (int)ceil(Param.ScaleFactor);
const int SupportRadius = (NEIGHRADIUS+1)*ScaleFactor - 1;
const int SupportWidth = 2*SupportRadius + 1;
const int SupportSize = SupportWidth*SupportWidth;
double *InverseA = NULL;
double x, y, Wxy, Psi0, Psim, XStart, YStart, WSum;
int S, sx, sy, i, m0, m, mx, my, n, nx, ny, Success = 0;
if(!(Psi = (int32_t *)Malloc(sizeof(int32_t)*SupportSize*NUMNEIGH*NUMSTENCILS))
|| !(InverseA = (double *)Malloc(sizeof(double)*NUMNEIGH*NUMNEIGH*NUMSTENCILS)))
goto Catch;
/* Compute the matrices, the results are stored in InverseA. */
if(!ComputeMatrices(InverseA, Param))
goto Catch;
if(Param.CenteredGrid)
{
XStart = (1/Param.ScaleFactor - 1)/2;
YStart = (1/Param.ScaleFactor - 1)/2;
}
else
XStart = YStart = 0;
m0 = NEIGHRADIUS + NEIGHRADIUS*NEIGHDIAMETER;
/* Precompute the samples of the Psi functions */
for(S = 0; S < NUMSTENCILS; S++)
for(i = 0, sy = -SupportRadius; sy <= SupportRadius; sy++)
for(sx = -SupportRadius; sx <= SupportRadius; sx++, i++)
{
/* Compute the sum of window translates. This sum should be
exactly constant, but there can be small variations. We
divide the Psi samples computed below by WSum to compensate.
*/
for(ny = -(int)floor((sy + SupportRadius)/ScaleFactor), WSum = 0;
ny <= (2*NEIGHRADIUS + 1) && sy + ny*ScaleFactor <= SupportRadius; ny++)
for(nx = -(int)floor((sx + SupportRadius)/ScaleFactor);
nx <= (2*NEIGHRADIUS + 1) && sx + nx*ScaleFactor <= SupportRadius; nx++)
{
x = XStart + nx + ((double)sx)/((double)ScaleFactor);
y = YStart + ny + ((double)sy)/((double)ScaleFactor);
WSum += ROUND(Window(x,y)*FIXED_ONE(PSI_FRACBITS))
/ (double)FIXED_ONE(PSI_FRACBITS);
}
x = XStart + ((double)sx)/((double)ScaleFactor);
y = YStart + ((double)sy)/((double)ScaleFactor);
Psi0 = Wxy = Window(x, y);
for(my = -NEIGHRADIUS, m = 0; my <= NEIGHRADIUS; my++)
for(mx = -NEIGHRADIUS; mx <= NEIGHRADIUS; mx++, m++)
{
if(m != m0)
{
Psim = 0.0;
for(ny = -NEIGHRADIUS, n = 0; ny <= NEIGHRADIUS; ny++)
for(nx = -NEIGHRADIUS; nx <= NEIGHRADIUS; nx++, n++)
Psim += InverseA[m + NUMNEIGH*(n + NUMNEIGH*S)]
* Phi(x - nx, y - ny,
StencilOrientation[S], Param.PhiSigmaTangent,
Param.PhiSigmaNormal);
Psim *= Wxy;
Psi0 -= Psim;
Psi[i + SupportSize*(m + NUMNEIGH*S)] =
(int32_t)ROUND((Psim/WSum)*FIXED_ONE(PSI_FRACBITS));
}
}
Psi[i + SupportSize*(m0 + NUMNEIGH*S)] =
(int32_t)ROUND((Psi0/WSum)*FIXED_ONE(PSI_FRACBITS));
}
Success = 1;
Catch: /* This label is used for error handling. If something went wrong
above (which may be out of memory or a computation error), then
execution jumps to this point to clean up and exit. */
Free(InverseA);
if(!Success && Psi)
{
Free(Psi);
Psi = NULL;
}
return Psi;
}
/**
* @brief Main interpolation computation for the first pass
* @param Interpolation pointer where to store the result
* @param ScaleFactor the interpolation scale factor
* @param Input pointer to the input image
* @param InputWidth, InputHeight dimensions of the input image
* @param Stencil pointer to the selected stencils
* @param Sample array of precomputed w*psi samples
*
* This is the main computation for refinement passes: it adds to the
* interpolation
* \f[ u(x) = \sum_{k\in\mathbb{Z}^2} w(x - k) \Bigl[ v_k +
* \sum_{n\in\mathcal{N}\backslash\{0\}} (v_{k+n} - v_k)
* \psi^n_{\mathcal{S}^\star(k)}(x - k) \Bigr]. \f]
*/
static void CWFirstPass(int32_t *Interpolation, int ScaleFactor, const int32_t *Input,
int InputWidth, int InputHeight, const int *Stencil, const int32_t *Psi)
{
const int32_t *PsiPtr, *SrcWindow;
int32_t *DestWindow;
const int Pad = 2;
const int SampleRange = (NEIGHRADIUS+1)*ScaleFactor - 1;
const int SampleWidth = 2*SampleRange + 1;
const int OutputWidth = ScaleFactor*InputWidth;
const int DestWindowJump = PIXEL_STRIDE*(OutputWidth - SampleWidth);
const int DestStep = PIXEL_STRIDE*ScaleFactor;
const int DestJump = PIXEL_STRIDE*(ScaleFactor-1)*OutputWidth + 2*Pad*DestStep;
const int SrcWindowJump = PIXEL_STRIDE*(InputWidth - NEIGHDIAMETER);
const int SrcJump = 2*PIXEL_STRIDE*Pad;
const int StencilJump = 2*Pad;
int x, y, NeighX, NeighY, SampleX, SampleY;
int32_t cr, cg, cb;
Interpolation += PIXEL_STRIDE*(Pad*ScaleFactor - SampleRange)*(1 + OutputWidth);
Input += PIXEL_STRIDE*(Pad - NEIGHRADIUS)*(1 + InputWidth);
Stencil += Pad*(1 + InputWidth);
for(y = InputHeight - 2*Pad; y; y--,
Stencil += StencilJump, Input += SrcJump, Interpolation += DestJump)
for(x = InputWidth - 2*Pad; x; x--,
Stencil++, Input += PIXEL_STRIDE, Interpolation += DestStep)
{
PsiPtr = Psi + *Stencil;
SrcWindow = Input;
for(NeighY = NEIGHDIAMETER; NeighY; NeighY--, SrcWindow += SrcWindowJump)
for(NeighX = NEIGHDIAMETER; NeighX; NeighX--, SrcWindow += PIXEL_STRIDE)
{
cr = SrcWindow[0];
cg = SrcWindow[1];
cb = SrcWindow[2];
DestWindow = Interpolation;
SampleY = SampleWidth;
for(SampleY = SampleWidth; SampleY;
SampleY--, DestWindow += DestWindowJump, PsiPtr += SampleWidth)
for(SampleX = 0; SampleX < SampleWidth;
SampleX++, DestWindow += PIXEL_STRIDE)
{
int32_t Temp = PsiPtr[SampleX];
DestWindow[0] += cr * Temp;
DestWindow[1] += cg * Temp;
DestWindow[2] += cb * Temp;
}
}
}
}
/**
* @brief Main interpolation computation for refinement passes
* @param Interpolation pointer to where interpolation is stored
* @param ScaleFactor the interpolation scale factor
* @param Residual pointer to the residual
* @param InputWidth, InputHeight dimensions of the input image
* @param Stencil pointer to the selected stencils
* @param Sample array of precomputed w*psi samples
*
* This is the main computation for refinement passes: it adds to the
* interpolation
* \f[ u(x) = u(x) + \sum_{k\in\mathbb{Z}^2} w(x - k) \Bigl[ r_k +
* \sum_{n\in\mathcal{N}\backslash\{0\}} (r_{k+n} - r_k)
* \psi^n_{\mathcal{S}^\star(k)}(x - k) \Bigr]. \f]
*/
static void CWRefinementPass(int32_t *Interpolation, int ScaleFactor,
const int32_t *Residual, int InputWidth, int InputHeight,
const int *Stencil, const int32_t *Sample)
{
const int Pad = 4;
const int32_t *SamplePtr, *SrcWindow;
int32_t *DestWindow;
const int SampleRange = (NEIGHRADIUS+1)*ScaleFactor - 1;
const int SampleWidth = 2*SampleRange + 1;
const int SampleSize = SampleWidth*SampleWidth;
const int OutputWidth = ScaleFactor*InputWidth;
const int DestWindowJump = PIXEL_STRIDE*(OutputWidth - SampleWidth);
const int DestStep = PIXEL_STRIDE*ScaleFactor;
const int DestJump = PIXEL_STRIDE*(ScaleFactor-1)*OutputWidth + 2*Pad*DestStep;
const int SrcWindowJump = PIXEL_STRIDE*(InputWidth - NEIGHDIAMETER);
const int SrcJump = 2*PIXEL_STRIDE*Pad;
const int StencilJump = 2*Pad;
int x, y, NeighX, NeighY, SampleX, SampleY;
int32_t cr, cg, cb;
Interpolation += PIXEL_STRIDE*(Pad*ScaleFactor - SampleRange)*(1 + OutputWidth);
Residual += PIXEL_STRIDE*(Pad - NEIGHRADIUS)*(1 + InputWidth);
Stencil += Pad*(1 + InputWidth);
for(y = InputHeight - 2*Pad; y; y--,
Stencil += StencilJump, Residual += SrcJump, Interpolation += DestJump)
for(x = InputWidth - 2*Pad; x; x--,
Stencil++, Residual += PIXEL_STRIDE, Interpolation += DestStep)
{
SamplePtr = Sample + *Stencil;
SrcWindow = Residual;
for(NeighY = NEIGHDIAMETER; NeighY; NeighY--, SrcWindow += SrcWindowJump)
for(NeighX = NEIGHDIAMETER; NeighX; NeighX--, SrcWindow += PIXEL_STRIDE)
{
cr = SrcWindow[0];
cg = SrcWindow[1];
cb = SrcWindow[2];
if( ((cr >> CORRECTION_IGNOREBITS) && (-cr >> CORRECTION_IGNOREBITS))
|| ((cg >> CORRECTION_IGNOREBITS) && (-cg >> CORRECTION_IGNOREBITS))
|| ((cb >> CORRECTION_IGNOREBITS) && (-cb >> CORRECTION_IGNOREBITS)) )
{
DestWindow = Interpolation;
SampleY = SampleWidth;
for(SampleY = SampleWidth; SampleY;
SampleY--, DestWindow += DestWindowJump, SamplePtr += SampleWidth)
for(SampleX = 0; SampleX < SampleWidth;
SampleX++, DestWindow += PIXEL_STRIDE)
{
int32_t Temp = SamplePtr[SampleX];
DestWindow[0] += cr * Temp;
DestWindow[1] += cg * Temp;
DestWindow[2] += cb * Temp;
}
}
else
SamplePtr += SampleSize;
}
}
}
/**
* @brief Boundary handling function for constant extension
* @param N is the data length
* @param i is an index into the data
* @return an index that is always between 0 and N - 1
*/
static int ConstExtension(int N, int i)
{
if(i < 0)
return 0;
else if(i >= N)
return N - 1;
else
return i;
}
static float Sqr(float x)
{
return x*x;
}
/** @brief Computes the residual, Residual = Input - sample(PSF * Interpolation) */
static int32_t CWResidual(int32_t *Residual, const int32_t *Interpolation,
const int32_t *Input, int CoarseWidth, int CoarseHeight, cwparams Param)
{
int Pad = 4;
const int ScaleFactor = (int)ceil(Param.ScaleFactor);
const int InterpWidth = ScaleFactor*CoarseWidth;
const int InterpHeight = ScaleFactor*CoarseHeight;
const int CoarseStride = 3*CoarseWidth;
const float PsfRadius = (float)(4*Param.PsfSigma*ScaleFactor);
const int PsfWidth = (int)ceil(2*PsfRadius);
float *Temp = NULL, *PsfBuf = NULL;
float ExpDenom, Weight, Sum[3], DenomSum;
float XStart, YStart, X, Y;
int IndexX0, IndexY0, SrcOffset, DestOffset;
int x, y, i, n, c, Success = 0;
int x1, x2;
int32_t ResNorm = 0;
if(!(Temp = (float *)Malloc(sizeof(float)*3*CoarseWidth*InterpHeight))
|| !(PsfBuf = (float *)Malloc(sizeof(float)*PsfWidth)))
goto Catch;
if(Param.CenteredGrid)
{
XStart = (1.0f/ScaleFactor - 1)/2;
YStart = (1.0f/ScaleFactor - 1)/2;
}
else
XStart = YStart = 0;
if(Param.PsfSigma)
ExpDenom = 2 * Sqr((float)(Param.PsfSigma*ScaleFactor));
else
ExpDenom = 2 * Sqr(1e-2f*ScaleFactor);
if(Pad < ScaleFactor)
Pad = ScaleFactor;
for(x = 0; x < CoarseWidth; x++)
{
X = (-XStart + x)*ScaleFactor;
IndexX0 = (int)ceil(X - PsfRadius);
/* Evaluate the PSF */
for(n = 0; n < PsfWidth; n++)
PsfBuf[n] = (float)exp(-Sqr(X - (IndexX0 + n)) / ExpDenom);
for(y = 0, SrcOffset = 0, DestOffset = 3*x; y < InterpHeight;
y++, SrcOffset += InterpWidth, DestOffset += CoarseStride)
{
Sum[0] = Sum[1] = Sum[2] = DenomSum = 0;
for(n = 0; n < PsfWidth; n++)
{
Weight = PsfBuf[n];
DenomSum += Weight;
i = 3*(ConstExtension(InterpWidth, IndexX0 + n) + SrcOffset);
for(c = 0; c < 3; c++)
Sum[c] += Weight * Interpolation[i + c];
}
for(c = 0; c < 3; c++)
Temp[DestOffset + c] = Sum[c] / DenomSum;
}
}
x1 = 3*Pad;
x2 = CoarseStride - 3*Pad;
for(y = 0; y < CoarseHeight; y++,
Residual += CoarseStride, Input += CoarseStride)
{
if(!(y >= Pad && y < CoarseHeight-Pad))
continue;
Y = (-YStart + y)*ScaleFactor;
IndexY0 = (int)ceil(Y - PsfRadius);
/* Evaluate the PSF */
for(n = 0; n < PsfWidth; n++)
PsfBuf[n] = (float)exp(-Sqr(Y - (IndexY0 + n)) / ExpDenom);
for(x = x1; x < x2; x += 3)
{
Sum[0] = Sum[1] = Sum[2] = DenomSum = 0;
for(n = 0; n < PsfWidth; n++)
{
SrcOffset = x + CoarseStride*ConstExtension(InterpHeight, IndexY0 + n);
Weight = PsfBuf[n];
DenomSum += Weight;
for(c = 0; c < 3; c++)
Sum[c] += Weight * Temp[SrcOffset + c];
}
DenomSum *= FIXED_ONE(PSI_FRACBITS);
for(c = 0; c < 3; c++)
{
Sum[c] = Input[x + c] - Sum[c] / DenomSum;
Residual[x + c] = (int32_t)ROUND(Sum[c]);
if(abs(Residual[x + c]) > ResNorm)
ResNorm = abs(Residual[x + c]);
}
}
}
Success = 1;
Catch:
Free(PsfBuf);
Free(Temp);
return (Success) ? ResNorm : -1;
}
/**
* @brief Simultaneously pad and convert to RGB fixed-point image
*
* @param FixedRgb pointer to hold the padded and converted image data
* @param Input the input 32-bit RGBA image
* @param InputWidth, InputHeight input image dimensions
* @param Padding number of padding pixels
*
* \c ConvertInput is used by \c CWInterp to prepare the input image in a
* format convenient for computations.
*
* \c ConvertInput converts the input RGBA image with 8-bits per component to
* an RGB image with 32-bit fixed-point components, where the number of
* fractional bits is INPUT_FRACBITS. At the same time, the function pads the
* image so that the result has size
* (InputWidth + 2*Padding) by (InputHeight + 2*Padding).
* The padding is constant extension (pixel replication).
*/
static void ConvertInput(int32_t *FixedRgb, const uint32_t *Input, int InputWidth,
int InputHeight, int Padding)
{
const uint8_t *InputPtr = (uint8_t *)Input;
const int InputStride = 4*InputWidth;
const int Stride = PIXEL_STRIDE*(InputWidth + 2*Padding);
int32_t r, g, b;
int i, Row;
FixedRgb += Padding*Stride;
for(Row = InputHeight; Row; Row--, InputPtr += InputStride)
{
r = ((int32_t)InputPtr[0]) << INPUT_FRACBITS;
g = ((int32_t)InputPtr[1]) << INPUT_FRACBITS;
b = ((int32_t)InputPtr[2]) << INPUT_FRACBITS;
/* Pad left side by copying pixel */
for(i = Padding; i; i--)
{
*(FixedRgb++) = r;
*(FixedRgb++) = g;
*(FixedRgb++) = b;
}
/* Convert the interior of the image */
for(i = 0; i < InputStride; i += 4)
{
*(FixedRgb++) = ((int32_t)InputPtr[i+0]) << INPUT_FRACBITS;
*(FixedRgb++) = ((int32_t)InputPtr[i+1]) << INPUT_FRACBITS;
*(FixedRgb++) = ((int32_t)InputPtr[i+2]) << INPUT_FRACBITS;
}
r = ((int32_t)InputPtr[i-4]) << INPUT_FRACBITS;
g = ((int32_t)InputPtr[i-3]) << INPUT_FRACBITS;
b = ((int32_t)InputPtr[i-2]) << INPUT_FRACBITS;
/* Pad right side by copying pixel */
for(i = Padding; i; i--)
{
*(FixedRgb++) = r;
*(FixedRgb++) = g;
*(FixedRgb++) = b;
}
}
/* Pad bottom by copying rows */
for(Row = Padding; Row; Row--, FixedRgb += Stride)
memcpy(FixedRgb, FixedRgb - Stride, sizeof(int32_t)*Stride);
FixedRgb -= Stride*(InputHeight + Padding);
/* Pad top by coping rows */
for(Row = Padding; Row; Row--, FixedRgb -= Stride)
memcpy(FixedRgb - Stride, FixedRgb, sizeof(int32_t)*Stride);
}
/**
* @brief Crop and convert RGB fixed-point image to 32-bit RGBA
*
* @param Output pointer to hold the output converted image data
* @param OutputWidth, OutputHeight cropped image dimensions
* @param FixedRgb the input RGB fixed-point image
* @param Width width of the fixed-point image
*
* \c ConvertOutput is used by \c CWInterp to convert the final interpolation
* from the computation format back to RGBA.
*
* \c ConvertOutput converts from an RGB image with 32-bit fixed-point
* components, where the number of fractional bits is OUPUT_FRACBITS, to RGBA
* with 8-bits per component. The function also crops the image to have
* dimensions OutputWidth by OutputHeight. (Width is also needed to know the
* stride length in memory between successive rows of the input image.) The
* upper-left corner of the cropped image can be specified by adjusted
* \c FixedRgb:
@code
ConvertOutput(Output, OutputWidth, OutputHeight,
FixedRgb + x0 + y0*Width, Width);
@endcode
*/
static void ConvertOutput(uint32_t *Output, int OutputWidth, int OutputHeight,
const int32_t *FixedRgb, int Width)
{
uint8_t *OutputPtr = (uint8_t *)Output;
const int CroppedStride = PIXEL_STRIDE*OutputWidth, Stride = PIXEL_STRIDE*Width;
int32_t r, g, b;
int i, Row;
for(Row = OutputHeight; Row; Row--, FixedRgb += Stride)
for(i = 0; i < CroppedStride; i += PIXEL_STRIDE)
{
/* Convert fixed-point values to integer */
r = (FixedRgb[i+0] + FIXED_HALF(OUTPUT_FRACBITS)) >> OUTPUT_FRACBITS;
g = (FixedRgb[i+1] + FIXED_HALF(OUTPUT_FRACBITS)) >> OUTPUT_FRACBITS;
b = (FixedRgb[i+2] + FIXED_HALF(OUTPUT_FRACBITS)) >> OUTPUT_FRACBITS;
/* Clamp range to [0, 255] and store in Output */
*(OutputPtr++) = CLAMP(r, 0, 255);
*(OutputPtr++) = CLAMP(g, 0, 255);
*(OutputPtr++) = CLAMP(b, 0, 255);
*(OutputPtr++) = 0xFF;
}
}
/**
* @brief Contour stencil windowed interpolation
*
* @param Output pointer to memory for holding the interpolated image
* @param Input the input image
* @param InputWidth, InputHeight input image dimensions
* @param Psi \f$\psi\f$ samples computed by \c PreCWInterp
* @param Param cwparams struct of interpolation parameters
*/
int CWInterp(uint32_t *Output, const uint32_t *Input,
int InputWidth, int InputHeight, const int32_t *Psi, cwparams Param)
{
const int ScaleFactor = (int)ceil(Param.ScaleFactor);
const int SupportRadius = (NEIGHRADIUS+1)*ScaleFactor - 1;
const int SupportWidth = 2*SupportRadius + 1;
const int SupportSize = SupportWidth*SupportWidth;
const int StencilMul = NUMNEIGH*SupportSize;
int *Stencil = NULL;
int32_t *InputFixed = NULL, *OutputFixed = NULL, *Residual = NULL;
unsigned long StartTime, StopTime;
int i, PadInput, pw, ph, Success = 0;
int32_t ResNorm;
/* Iterative refinement is unnecessary if PSF is the Dirac delta */
if(Param.PsfSigma == 0.0)
Param.RefinementSteps = 0;
PadInput = 4 + (ScaleFactor + 1)/2;
pw = InputWidth + 2*PadInput;
ph = InputHeight + 2*PadInput;
if( !(OutputFixed = (int32_t *)Malloc(sizeof(int32_t)*
PIXEL_STRIDE*pw*ScaleFactor*ph*ScaleFactor))
|| !(InputFixed = (int32_t *)Malloc(sizeof(int32_t)*PIXEL_STRIDE*pw*ph))
|| !(Residual = (int32_t *)Malloc(sizeof(int32_t)*PIXEL_STRIDE*pw*ph))
|| !(Stencil = (int *)Malloc(sizeof(int)*pw*ph)) )
goto Catch;
/* Start timing */
StartTime = Clock();
/* Convert 32-bit RGBA pixels to integer array */
ConvertInput(InputFixed, Input, InputWidth, InputHeight, PadInput);
/* Select the best-fitting contour stencils */
if(!FitStencils(Stencil, InputFixed, pw, ph, StencilMul))
goto Catch;
memset(OutputFixed, 0, sizeof(int32_t)*
3*pw*ScaleFactor*ph*ScaleFactor);
memset(Residual, 0, sizeof(int32_t)*3*pw*ph);
printf("\n Iteration Residual norm\n -------------------------\n");
/* First interpolation pass */
CWFirstPass(OutputFixed, ScaleFactor, InputFixed, pw, ph, Stencil, Psi);
/* Iterative refinement */
for(i = 1; i <= Param.RefinementSteps; i++)
{
/* Compute the residual */
if((ResNorm = CWResidual(Residual, OutputFixed, InputFixed,
pw, ph, Param)) < 0.0)
goto Catch;
printf(" %8d %15.8f\n", i, ResNorm/(255.0*256.0));
/* Interpolation refinement pass */
CWRefinementPass(OutputFixed, ScaleFactor, Residual, pw, ph, Stencil, Psi);
}
/* Convert output integer array to 32-bit RGBA */
ConvertOutput(Output, InputWidth*ScaleFactor, InputHeight*ScaleFactor,
OutputFixed + PIXEL_STRIDE*PadInput*ScaleFactor*(1 + pw*ScaleFactor),
pw*ScaleFactor);
/* The final interpolation is now complete, stop timing. */
StopTime = Clock();
/* Compute the residual norm of the final interpolation. This
computation is not included in the CPU timing since it is for
information purposes only. */
ResNorm = CWResidual(Residual, OutputFixed, InputFixed, pw, ph, Param);
printf(" %8d %15.8f\n\n", Param.RefinementSteps + 1, ResNorm/(255.0*256.0));
/* Display the CPU time spent performing the interpolation. */
printf(" CPU time: %.3f s\n\n", 0.001*(StopTime - StartTime));
Success = 1;
Catch: /* This label is used for error handling. If something went wrong
above (which may be out of memory or a computation error), then
execution jumps to this point to clean up and exit. */
Free(Stencil);
Free(Residual);
Free(InputFixed);
Free(OutputFixed);
return Success;
}
/** @brief Round and clamp double X to integer */
#define ROUNDCLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : ROUND(X)))
/* The following parameters define the number of fractional bits used for
signed 32-bit fixedpoint arithmetic in CWSynth2Fixed. These parameters
should be large enough for reasonable precision but small enough to
avoid overflow. Additionally, the implementation constraints on
choosing these parameters are
WINDOW_FRACBITS >= UK_FRACBITS,
TRIG_FRACBITS + XY_FRACBITS - PHITN_FRACBITS >= 1,
COEFF_FRACBITS + PHI_FRACBITS - UK_FRACBITS >= 1.
*/
#define XY_FRACBITS 15
#define TRIG_FRACBITS 10
#define PHITN_FRACBITS 9
#define PHI_FRACBITS 10
#define COEFF_FRACBITS 10
#define WINDOW_FRACBITS 10
#define UK_FRACBITS 10
#define ROUND_FIXED(X,N) (((X) + FIXED_HALF(N)) >> (N))
#define FLOAT_TO_FIXED(X,N) ((int32_t)ROUND((X) * FIXED_ONE(N)))
/** @brief Arbitrary scale factor interpolation */
int CWArbitraryInterp(uint32_t *Output, int OutputWidth, int OutputHeight,
const int32_t *Input, int InputWidth, int InputHeight,
const int *Stencil, const double *InverseA, cwparams Param)
{
/*int (*Extension)(int, int) = ExtensionMethod[Param.Boundary];*/
int (*Extension)(int, int) = ConstExtension;
const int InputNumEl = 3*InputWidth*InputHeight;
const int ExpTableSize = 1024;
const double ExpArgScale = 37.0236;
const double PhiTScale = sqrt(ExpArgScale/2)/Param.PhiSigmaTangent;
const double PhiNScale = sqrt(ExpArgScale/2)/Param.PhiSigmaNormal;
int32_t *Coeff = NULL, *CoeffPtr, *ExpTable = NULL;
float X, Y, XStart, YStart;
float Temp, cr[NUMNEIGH], cg[NUMNEIGH], cb[NUMNEIGH];
int32_t v0[3], v[3], WindowWeight, WindowWeightX[4], WindowWeightY[4];
int32_t Xpf, Ypf, Weight, uk[3], u[3], DenomSum;
int32_t CosTableTf[NUMSTENCILS], SinTableTf[NUMSTENCILS];
int32_t CosTableNf[NUMSTENCILS], SinTableNf[NUMSTENCILS];
int32_t Pixel;
int i, k, x, y, m, n, mx, my, nx, ny, S, Success = 0;
int ix, iy, Cur, Offset;
if(!(Coeff = (int32_t *)Malloc(sizeof(int32_t)*(NUMNEIGH + 1)*InputNumEl))
|| !(ExpTable = (int32_t *)Malloc(sizeof(int32_t)*ExpTableSize)))
goto Catch;
if(Param.CenteredGrid)
{
XStart = (float)(1/Param.ScaleFactor - 1)/2;
YStart = (float)(1/Param.ScaleFactor - 1)/2;
}
else
XStart = YStart = 0;
for(S = 0; S < NUMSTENCILS; S++)
{
CosTableTf[S] = FLOAT_TO_FIXED(
PhiTScale * cos(StencilOrientation[S]), TRIG_FRACBITS);
SinTableTf[S] = FLOAT_TO_FIXED(
PhiTScale * sin(StencilOrientation[S]), TRIG_FRACBITS);
CosTableNf[S] = FLOAT_TO_FIXED(
PhiNScale * cos(StencilOrientation[S]), TRIG_FRACBITS);
SinTableNf[S] = FLOAT_TO_FIXED(
PhiNScale * sin(StencilOrientation[S]), TRIG_FRACBITS);
}
for(i = 0; i < ExpTableSize; i++)
ExpTable[i] = FLOAT_TO_FIXED(exp( -(double)(i + 0.5f)/ExpArgScale), PHI_FRACBITS);
for(y = 0, k = 0, CoeffPtr = Coeff; y < InputHeight; y++)
for(x = 0; x < InputWidth; x++, k++, CoeffPtr += 3*(NUMNEIGH + 1))
{
S = NUMNEIGH*Stencil[k];
v0[0] = Input[3*k + 0];
v0[1] = Input[3*k + 1];
v0[2] = Input[3*k + 2];
for(m = 0; m < NUMNEIGH; m++)
cr[m] = 0;
for(m = 0; m < NUMNEIGH; m++)
cg[m] = 0;
for(m = 0; m < NUMNEIGH; m++)
cb[m] = 0;
for(ny = -NEIGHRADIUS, n = 0; ny <= NEIGHRADIUS; ny++)
{
Offset = InputWidth*Extension(InputHeight, y + ny);
for(nx = -NEIGHRADIUS; nx <= NEIGHRADIUS; nx++, n++)
{
if(n != 1 + (2*NEIGHRADIUS + 1))
{
i = 3*(Extension(InputWidth, x + nx) + Offset);
v[0] = Input[i + 0];
v[1] = Input[i + 1];
v[2] = Input[i + 2];
for(m = 0; m < NUMNEIGH; m++)
{
Temp = (float)InverseA[m + NUMNEIGH*(n + S)];
cr[m] += Temp * (v[0] - v0[0]);
cg[m] += Temp * (v[1] - v0[1]);
cb[m] += Temp * (v[2] - v0[2]);
}
}
}
}
/* The first three coeff values have UK_FRACBITS fractional bits */
CoeffPtr[0] = v0[0] << (UK_FRACBITS - INPUT_FRACBITS);
CoeffPtr[1] = v0[1] << (UK_FRACBITS - INPUT_FRACBITS);
CoeffPtr[2] = v0[2] << (UK_FRACBITS - INPUT_FRACBITS);
/* The other values have COEFF_FRACBITS fractional bits */
for(m = 0; m < NUMNEIGH; m++)
{
CoeffPtr[3*(m + 1) + 0] = FLOAT_TO_FIXED(cr[m]
/ FIXED_ONE(INPUT_FRACBITS), COEFF_FRACBITS);
CoeffPtr[3*(m + 1) + 1] = FLOAT_TO_FIXED(cg[m]
/ FIXED_ONE(INPUT_FRACBITS), COEFF_FRACBITS);
CoeffPtr[3*(m + 1) + 2] = FLOAT_TO_FIXED(cb[m]
/ FIXED_ONE(INPUT_FRACBITS), COEFF_FRACBITS);
}
}
for(y = 0, k = 0; y < OutputHeight; y++)
{
Y = YStart + (float)(y/Param.ScaleFactor);
iy = (int)ceil(Y - 2*NEIGHRADIUS);
/* Precompute y-factor of the window weights */
for(my = 0; my < 4*NEIGHRADIUS; my++)
WindowWeightY[my] = FLOAT_TO_FIXED(
CubicBSpline(Y - (iy + my)), WINDOW_FRACBITS);
for(x = 0; x < OutputWidth; x++, k++)
{
X = XStart + (float)(x/Param.ScaleFactor);
ix = (int)ceil(X - 2*NEIGHRADIUS);
/* Precompute x-factor of the window weights */
for(mx = 0; mx < 4*NEIGHRADIUS; mx++)
WindowWeightX[mx] = FLOAT_TO_FIXED(
CubicBSpline(X - (ix + mx)), WINDOW_FRACBITS);
DenomSum = 0;
u[0] = u[1] = u[2] = 0;
for(my = 0, Ypf = (int32_t)ROUND((Y - iy) * FIXED_ONE(XY_FRACBITS)); my < 4*NEIGHRADIUS;
my++, Ypf -= FIXED_ONE(XY_FRACBITS))
if((iy + my) >= 0 && (iy + my) < InputHeight)
{
for(mx = 0, Xpf = (int32_t)ROUND((X - ix) * FIXED_ONE(XY_FRACBITS)),
i = ix + InputWidth*(iy + my), CoeffPtr = Coeff + i*3*(NUMNEIGH + 1);
mx < 4*NEIGHRADIUS; mx++, i++, CoeffPtr += 3*(NUMNEIGH + 1), Xpf -= FIXED_ONE(XY_FRACBITS))
{
if((ix + mx) < 0 || (ix + mx) >= InputWidth)
continue;
/* WindowWeight has 2*WINDOW_FRACBITS fractional bits. */
WindowWeight = (WindowWeightX[mx] * WindowWeightY[my]);
/* DenomSum is computed using 2*WINDOW_FRACBITS. */
DenomSum += WindowWeight;
/* Now reduce to WindowWeight to WINDOW_FRACBITS. */
WindowWeight = (WindowWeight + FIXED_HALF(WINDOW_FRACBITS))
>> WINDOW_FRACBITS;
if(!WindowWeight)
continue;
S = Stencil[i];
uk[0] = CoeffPtr[0];
uk[1] = CoeffPtr[1];
uk[2] = CoeffPtr[2];
for(ny = -NEIGHRADIUS, n = 3; ny <= NEIGHRADIUS; ny++)
for(nx = -NEIGHRADIUS; nx <= NEIGHRADIUS; nx++, n += 3)
{
int32_t phit, phin;
/* The tables use TRIG_FRACBITS fractional bits,
and X and Y use XY_FRACBITS, so the products
have (TRIG_FRACBITS + XY_FRACBITS) fractional
bits. The shift reduces the result to
PHITN_FRACBITS. */
phit = ( CosTableTf[S]*(Xpf - nx*FIXED_ONE(XY_FRACBITS))
+ SinTableTf[S]*(Ypf - ny*FIXED_ONE(XY_FRACBITS))
+ FIXED_HALF(TRIG_FRACBITS + XY_FRACBITS - PHITN_FRACBITS))
>> (TRIG_FRACBITS + XY_FRACBITS - PHITN_FRACBITS);
phin = ( -SinTableNf[S]*(Xpf - nx*FIXED_ONE(XY_FRACBITS))
+ CosTableNf[S]*(Ypf - ny*FIXED_ONE(XY_FRACBITS))
+ FIXED_HALF(TRIG_FRACBITS + XY_FRACBITS - PHITN_FRACBITS))
>> (TRIG_FRACBITS + XY_FRACBITS - PHITN_FRACBITS);
/* phit and phin have PHITN_FRACBITS, so the
products have 2*PHITN_FRACBITS. The result is
shifted by 2*PHITN_FRACBITS to convert to
quantity (by floor rounding) to an integer. */
Cur = (phit*phit + phin*phin) >> (2*PHITN_FRACBITS);
if(Cur >= ExpTableSize)
continue;
/* Compute exp(-Cur) via table look up. The result
has PHI_FRACBITS fractional bits. */
Weight = ExpTable[Cur];
/* The Coeff values have COEFF_FRACBITS fractional
bits and Weight has PHI_FRACBITS. The products
are shifted so that the result has
WINDOW_FRACBITS. */
uk[0] += (CoeffPtr[n + 0] * Weight
+ FIXED_HALF(COEFF_FRACBITS + PHI_FRACBITS - WINDOW_FRACBITS))
>> (COEFF_FRACBITS + PHI_FRACBITS - UK_FRACBITS);
uk[1] += (CoeffPtr[n + 1] * Weight
+ FIXED_HALF(COEFF_FRACBITS + PHI_FRACBITS - WINDOW_FRACBITS))
>> (COEFF_FRACBITS + PHI_FRACBITS - UK_FRACBITS);
uk[2] += (CoeffPtr[n + 2] * Weight
+ FIXED_HALF(COEFF_FRACBITS + PHI_FRACBITS - WINDOW_FRACBITS))
>> (COEFF_FRACBITS + PHI_FRACBITS - UK_FRACBITS);
}
/* u is computed using WINDOW_FRACBITS + UK_FRACBITS. */
u[0] += WindowWeight * uk[0];
u[1] += WindowWeight * uk[1];
u[2] += WindowWeight * uk[2];
}
}
if(DenomSum >= FIXED_ONE(2*WINDOW_FRACBITS) - 1)
{
u[0] = ROUND_FIXED(u[0], WINDOW_FRACBITS + UK_FRACBITS);
u[1] = ROUND_FIXED(u[1], WINDOW_FRACBITS + UK_FRACBITS);
u[2] = ROUND_FIXED(u[2], WINDOW_FRACBITS + UK_FRACBITS);
}
else
{
/* Reduce DenomSum from 2*WINDOW_FRACBITS fractional bits to
(WINDOW_FRACBITS + UK_FRACBITS) fractional bits. */
#if WINDOW_FRACBITS > UK_FRACBITS
DenomSum = (DenomSum + FIXED_HALF(WINDOW_FRACBITS - UK_FRACBITS))
>> (WINDOW_FRACBITS - UK_FRACBITS);
#endif
/* u and DenomSum both have (WINDOW_FRACBITS + UK_FRACBITS)
fractional bits, so the quotient is integer. */
u[0] = (u[0] + DenomSum/2) / DenomSum;
u[1] = (u[1] + DenomSum/2) / DenomSum;
u[2] = (u[2] + DenomSum/2) / DenomSum;
}
Pixel = 0xFFFFFFFF;
((uint8_t *)&Pixel)[0] = CLAMP(u[0],0,255);
((uint8_t *)&Pixel)[1] = CLAMP(u[1],0,255);
((uint8_t *)&Pixel)[2] = CLAMP(u[2],0,255);
Output[k] = Pixel;
}
}
Success = 1;
Catch:
Free(ExpTable);
Free(Coeff);
return Success;
}
/** @brief Adds residual back to input for refinement passes */
static void AddResidual(int32_t *InputAdjusted, const int32_t *Residual,
int InputWidth, int InputHeight, int PadInput)
{
const int PadWidth = InputWidth + 2*PadInput;
const int RowEl = 3*InputWidth;
const int PadRowEl = 3*PadWidth;
int i, Row;
Residual += 3*(PadInput + PadInput*PadWidth);
for(Row = InputHeight; Row; Row--)
{
for(i = 0; i < RowEl; i++)
InputAdjusted[i] += Residual[i];
InputAdjusted += RowEl;
Residual += PadRowEl;
}
}
/** @brief Removes padding from Stencil array */
static void StencilStripPad(int *Stencil,
int InputWidth, int InputHeight, int PadInput, int StencilMul)
{
const int PadWidth = InputWidth + 2*PadInput;
int *Src;
int i, Row;
Src = Stencil + (PadInput + PadInput*PadWidth);
for(Row = InputHeight; Row; Row--)
{
for(i = 0; i < InputWidth; i++)
Stencil[i] = Src[i] / StencilMul;
Stencil += InputWidth;
Src += PadWidth;
}
}
/**
* @brief Contour stencil windowed interpolation for arbitrary scale factors
*
* @param Output pointer to memory for holding the interpolated image
* @param OutputWidth, OutputHeight output image dimensions
* @param Input the input image
* @param InputWidth, InputHeight input image dimensions
* @param Psi \f$\psi\f$ samples computed by \c PreCWInterp
* @param Param cwparams struct of interpolation parameters
*/
int CWInterpEx(uint32_t *Output, int OutputWidth, int OutputHeight,
const uint32_t *Input, int InputWidth, int InputHeight,
const int32_t *Psi, cwparams Param)
{
const int ScaleFactor = (int)ceil(Param.ScaleFactor);
const int SupportRadius = (NEIGHRADIUS+1)*ScaleFactor - 1;
const int SupportWidth = 2*SupportRadius + 1;
const int SupportSize = SupportWidth*SupportWidth;
const int StencilMul = NUMNEIGH*SupportSize;
double *InverseA = NULL;
int *Stencil = NULL;
int32_t *InputFixed = NULL, *InputAdjusted = NULL, *OutputFixed = NULL, *Residual = NULL;
unsigned long StartTime, StopTime;
int i, PadInput, pw, ph, Success = 0;
int32_t ResNorm;
/* Iterative refinement is unnecessary if PSF is the Dirac delta */
if(Param.PsfSigma == 0.0)
Param.RefinementSteps = 0;
PadInput = 4 + (ScaleFactor + 1)/2;
pw = InputWidth + 2*PadInput;
ph = InputHeight + 2*PadInput;
if( !(InverseA = (double *)Malloc(sizeof(double)*NUMNEIGH*NUMNEIGH*NUMSTENCILS))
|| !(OutputFixed = (int32_t *)Malloc(sizeof(int32_t)*
PIXEL_STRIDE*pw*ScaleFactor*ph*ScaleFactor))
|| !(InputFixed = (int32_t *)Malloc(sizeof(int32_t)*PIXEL_STRIDE*pw*ph))
|| !(InputAdjusted = (int32_t *)Malloc(sizeof(int32_t)*PIXEL_STRIDE*InputWidth*InputHeight))
|| !(Residual = (int32_t *)Malloc(sizeof(int32_t)*PIXEL_STRIDE*pw*ph))
|| !(Stencil = (int *)Malloc(sizeof(int)*pw*ph)) )
goto Catch;
if(!ComputeMatrices(InverseA, Param))
goto Catch;
/* Start timing */
StartTime = Clock();
if(Param.RefinementSteps > 0)
{
/* Convert 32-bit RGBA pixels to integer array */
ConvertInput(InputFixed, Input, InputWidth, InputHeight, PadInput);
memset(InputAdjusted, 0, sizeof(int32_t)*3*InputWidth*InputHeight);
AddResidual(InputAdjusted, InputFixed, InputWidth, InputHeight, PadInput);
/* Select the best-fitting contour stencils */
if(!FitStencils(Stencil, InputFixed, pw, ph, StencilMul))
goto Catch;
memset(OutputFixed, 0, sizeof(int32_t)*
3*pw*ScaleFactor*ph*ScaleFactor);
memset(Residual, 0, sizeof(int32_t)*3*pw*ph);
printf("\n Iteration Residual norm\n -------------------------\n");
/* First interpolation pass */
CWFirstPass(OutputFixed, ScaleFactor, InputFixed, pw, ph, Stencil, Psi);
/* Iterative refinement */
for(i = 1; i <= Param.RefinementSteps; i++)
{
/* Compute the residual */
if((ResNorm = CWResidual(Residual, OutputFixed, InputFixed,
pw, ph, Param)) < 0.0)
goto Catch;
printf(" %8d %15.8f\n", i, ResNorm/(255.0*256.0));
AddResidual(InputAdjusted, Residual, InputWidth, InputHeight, PadInput);
if(i < Param.RefinementSteps)
{
/* Interpolation refinement pass */
CWRefinementPass(OutputFixed, ScaleFactor, Residual, pw, ph, Stencil, Psi);
}
}
StencilStripPad(Stencil, InputWidth, InputHeight, PadInput, StencilMul);
}
else
{
/* Convert 32-bit RGBA pixels to integer array */
ConvertInput(InputAdjusted, Input, InputWidth, InputHeight, 0);
/* Select the best-fitting contour stencils */
if(!FitStencils(Stencil, InputAdjusted, InputWidth, InputHeight, 1))
goto Catch;
}
if(!CWArbitraryInterp(Output, OutputWidth, OutputHeight,
InputAdjusted, InputWidth, InputHeight, Stencil, InverseA, Param))
goto Catch;
/* The final interpolation is now complete, stop timing. */
StopTime = Clock();
if(Param.RefinementSteps > 1)
printf(" %8d (not computed)\n\n", Param.RefinementSteps + 1);
/* Display the CPU time spent performing the interpolation. */
printf(" CPU time: %.3f s\n\n", 0.001*(StopTime - StartTime));
Success = 1;
Catch: /* This label is used for error handling. If something went wrong
above (which may be out of memory or a computation error), then
execution jumps to this point to clean up and exit. */
Free(Stencil);
Free(Residual);
Free(InputAdjusted);
Free(InputFixed);
Free(OutputFixed);
Free(InverseA);
return Success;
}
/** @brief Display the estimated contour orientations */
int DisplayContours(uint32_t *Output, int OutputWidth, int OutputHeight,
uint32_t *Input, int InputWidth, int InputHeight, cwparams Param)
{
const int Pad = 2;
const float LineColor[3] = {0, 0, 0};
int *Stencil = 0;
float dx, dy;
int32_t *InputInt = NULL;
uint32_t Pixel;
int x, y, S, pw, ph, Success = 0;
pw = InputWidth + 2*Pad;
ph = InputHeight + 2*Pad;
if( !(InputInt = (int32_t *)Malloc(sizeof(int32_t)*3*pw*ph))
|| !(Stencil = (int *)Malloc(sizeof(int)*pw*ph)) )
goto Catch;
/* Convert 32-bit RGBA pixels to integer array */
ConvertInput(InputInt, Input, InputWidth, InputHeight, Pad);
/* Select the best-fitting contour stencils */
if(!FitStencils(Stencil, InputInt, pw, ph, 1))
goto Catch;
/* Lighten the image */
for(y = 0; y < InputHeight; y++)
for(x = 0; x < InputWidth; x++)
{
Pixel = Input[x + InputWidth*y];
((uint8_t*)&Pixel)[0] = (uint8_t)(((uint8_t*)&Pixel)[0]/2 + 128);
((uint8_t*)&Pixel)[1] = (uint8_t)(((uint8_t*)&Pixel)[1]/2 + 128);
((uint8_t*)&Pixel)[2] = (uint8_t)(((uint8_t*)&Pixel)[2]/2 + 128);
Input[x + InputWidth*y] = Pixel;
}
/* Nearest neighbor interpolation */
NearestInterp(Output, OutputWidth, OutputHeight,
Input, InputWidth, InputHeight,
(float)Param.ScaleFactor, Param.CenteredGrid);
/* Draw contour orientation lines */
for(y = 0; y < InputHeight; y++)
for(x = 0; x < InputWidth; x++)
{
S = Stencil[(x + Pad) + pw*(y + Pad)];
dx = (float)cos(StencilOrientation[S])*0.6f;
dy = (float)sin(StencilOrientation[S])*0.6f;
DrawLine(Output, OutputWidth, OutputHeight,
(float)Param.ScaleFactor*(x - dx + 0.5f) - 0.5f,
(float)Param.ScaleFactor*(y - dy + 0.5f) - 0.5f,
(float)Param.ScaleFactor*(x + dx + 0.5f) - 0.5f,
(float)Param.ScaleFactor*(y + dy + 0.5f) - 0.5f, LineColor);
}
Success = 1;
Catch: /* This label is used for error handling. If something went wrong
above (which may be out of memory or a computation error), then
execution jumps to this point to clean up and exit. */
Free(Stencil);
Free(InputInt);
return Success;
}