// Inverse analytical texture mapping (ATM)

#version 430

#define SIGN(x)           ( int(x >= 0) - int(x < 0) )  // GLSL sign(0) gives 0 i.s.o 1

// #defines and globals
#define POW2(x)	((x)*(x))
float X2, X3, X4, T2, Y2;

#pragma region TENT Base integral Cell expressions

float i2cell00(float x, float t, float y0) {
#if 1
    return 1.0 / 24 *
          (1 + x)*(1 + x) * (t*t * (1 - 2 * x + 3 * x*x)   
        + 4 * t*(-1 + 2 * x)*(1 + y0)   
        + 6 * (1 + y0)*(1 + y0)); // => +6 * (+1 +2*y0 +y0*y0)
#else
    // Expanded in MM
    // OK, but shows more artifact where tangent is near 0 -- TRY ASWELL A FACTERIZED FORM !!
    return 1.0 / 24 * (
        6 - 4*t + T2 + 12*x + 6*X2 + 12*t*X2 + 8*t*X3 + 4*T2*X3 + 3*T2*X4 
        + 12*y0 - 4*t*y0 + 24*x*y0 + 12*X2*y0 + 12*t*X2*y0 + 8*t*X3*y0 + 6*Y2 + 12*x*Y2 + 6*X2*Y2
    );
#endif
}

float i2cell01(float x, float t, float y0) {
    return -1.0 / 24 * 
          (1 + x)*(1 + x) * (t*t * (1 - 2 * x + 3 * x*x)   
        + 4 * t*(-1 + 2 * x)*(-1 + y0)   
        + 6 * (-1 - 2 * y0 + y0*y0)); //  =!= +6 * (-1 - y0)*(1 + y0), kan niet ontbonden in factoren
}

float i2cell10(float x, float t, float y0) {
    return 1.0 / 24 *
        ( t*t * (1 + 4 * x*x*x - 3 * x*x*x*x)
        - 4 * t*(-1 + x)*(-1 + x) * (1 + 2 * x)*(1 + y0)
        - 6 * (-1 - 2 * x + x*x) * (1 + y0)*(1 + y0) );
}

float i2cell11(float x, float t, float y0) {
    return 1.0 / 24 *
        ( t*t * (-1 - 4 * x*x*x + 3 * x*x*x*x) 
        + 4 * t*(-1 + x)*(-1 + x) * (1 + 2 * x)*(-1 + y0)
        + 6 * (-1 - 2 * x + x*x)*(-1 - 2 * y0 + y0*y0) );
}

#if 1
const float ac[4] = { 1, -1, -1,  1}; // perhaps move from global-scope function-scope
const float bc[4] = { 1,  1, -1, -1};
const float cc[4] = { 4,  4, -4, -4};
const float dc[4] = { 1,  1, -1, -1};

const float ec[4] = {-1, -1,  1,  1};
const float fc[4] = { 1, -1,  1, -1};
const float gc[4] = { 1,  1, -1, -1};
const float hc[4] = { 2,  2, -2, -2};

const float jc[4] = { 1, -1,  1, -1};
const float kc[4] = { 2, -2,  2, -2};

float i2cellGeneric(float x, float t, float y0, int i) {
    return ac[i] / 24 *
        ( T2*(bc[i] + cc[i]*X3 + 3*X4) 
        + 4*t * (dc[i] + x)*(dc[i] + x) * (ec[i] + 2*x) * (fc[i] + y0) 
        + 6*(gc[i] + hc[i]*x + X2)*(jc[i] + kc[i]*y0 + Y2) );
}
#else // partial-transposed version
// Try partial transposed array for higher spatial memory coherency
const vec4 aVec[4] = vec4[4]( vec4(1, 1, 4, 1), vec4(-1, 1, 4, 1), vec4(-1, -1, -4, -1), vec4(1, -1, -4, -1) );
const vec4 bVec[4] = vec4[4]( vec4(-1, 1, 1, 2), vec4(-1, -1, 1, 2), vec4(1, 1, -1, -2), vec4(1, -1, -1, -2) );
const vec4 cVec[4] = vec4[4]( vec4(1, 2, 0, 0), vec4(-1, -2, 0, 0), vec4(1, 2, 0, 0), vec4(-1, -2, 0, 0) );

float i2cellGeneric(float x, float t, float y0, int i) {
    vec4 a = aVec[i];
    vec4 b = bVec[i];
    vec4 c = cVec[i];
    return a.x / 24 *
        ( T2*(a.y + a.z*X3 + 3*X4) 
        + 4*t * (a.w + x)*(a.w + x) * (b.x + 2*x) * (b.y + y0) 
        + 6*(b.z + b.w*x + X2)*(c.x + c.y*y0 + Y2) );
}
#endif

// cell*2 are not needed:
float i2cell02(float x) { // independent of y = g(x) (and thus from t and y0)
    return 1.0 / 2 + x + x*x/2;
}
float i2cell12(float x) { // independent of y = g(x) (and thus from t and y0)
    return 1.0 / 2 + x - x*x/2;
}
#pragma endregion

#pragma region TENT horizontal cell borders constant expressions
float HygxBorderC(float t, float y0) { 
    float a = 1 - t + y0;
    return a*a*a*a / (24 * t*t);
}
float HygxBorderD(float t, float y0) {
    float a = 1 + y0;
    return (t*t*t*t - 4 * t*t*t * a + 6 * t*t * a*a - 4 * t * a*a*a - a*a*a*a) / (24 * t*t);
}
float HygxBorderE(float t, float y0) {
    float a = t - y0;
    return -a*a*a*a / (12 * t*t);
}
float HygxBorderF(float t, float y0) {
    return (-t*t*t*t + 4 * t*t*t * y0 - 6 * t*t * y0*y0 + 4 * t*y0*y0*y0 + y0*y0*y0*y0) / (12 * t*t);
}
float HygxBorderG(float t, float y0) {
    float a = 1 + t - y0;
    return a*a*a*a / (24 * t*t);
}
float HygxBorderH(float t, float y0) {
    float a = -1 + y0;
    return (t*t*t*t - 4 * t*t*t * a + 6 * t*t * a*a - 4 * t * a*a*a - a*a*a*a) / (24 * t*t);
}

#if 1
//        Hygx-Border:  C       D       E       F       G       H
const float  ab[6] = {  0,      1./2,   0,      0,      0,      1./2    };
const float  bb[6] = {  0,      1./12,  0,      -1./6,  0,      1./12   };
const float  cb[6] = {  0,      1,      0,      0,      0,     -1       };
const float  db[6] = {  0,      1./2,   0,     -1,      0,      1./2    };
const float  eb[6] = {  1./24, -1./24, -1./12,  1./12,  1./24, -1./24   };
const float  fb[6] = {  1,      1,      0,      0,     -1,     -1       };
const float  gb[6] = { -1,      1,     -1,      1,     -1,      1       };

float HygxBorderGeneric(float t, float y0, int i) { 
    float pow1 = fb[i] + gb[i]*t + y0;
    float pow2 = pow1 * pow1;
    float pow4 = pow2 * pow2;
    return ab[i] + bb[i]*T2 + cb[i]*y0 + db[i]*Y2  +  (eb[i] * pow4) / T2;
}
#endif

#pragma endregion

#pragma region TENT Hygx_tent

#if 0
// Stub for performance measurement: measures the max fps of 60 fps
float Hygx_tent(float xIn, float t, float y0)
{
    return 0.01;
}
#else
// Hygx_tent inputs:
// xIn      x-coordinate, 
// t        tangent of line g(x): y = t*x +y0 
// y0       y-value at x==0 of line g(x)
float Hygx_tent(float xIn, float t, float y0)
{
    float x = min(xIn, 0.9999);   // clip x against right border of filter footprint
                                // such that value at x==1 is being repeated for all x>1
                                // TODO: change 0.9999 into 1.0
    float gx = t * x + y0;  // y=g(x) defines the (diagonal) line where integration is along
    
    if (x <= -1.0 + 0.0001) return 0; // to the west of all cells, obtaining weight = 0
    // Next "early exit" can safely be omitted.
    if (t >= 0 && gx < -1) return 0; // to the south of all cells, only with positive tangent, obtains weight = 0

    // if (t==0) return 0; // stub perf test: gets 38fps

    // first, evaluate base integral HygxABPcells for the 6 cell area's (with constants A, B and P)
    // --------------------------------------------------------------------------------------------
    float wBase = 0; // weight of cell integral (with only A, B and P constants for vertical line x == -1)
	
	#if 1 // optimize experiment 3)
	// globals; made powers of x and t globals to reduce code redundancy for now.
    // Compute these powers here i.s.o. in each of the cell functions
    X2 = x*x; X3 = X2*x; X4 = X3*x;
    T2 = t*t;
    Y2 = y0*y0;
	#endif
	
    // NOTE: ONLY THE FUNCTIONS WITHIN THE AA-FILTER FOOTPRINT NEED TO BE IMPLEMENTED
    // required VERTICAL CLIPPING IN WtEdgeAB ensures functions outside are not required.

#if 0 // Readable (non-optimized) code 
    // 19.7 fps (debug and release) 
    if (x >= 0) // 0 <= x < 1: we are in cell1*
    {
        if (gx >=  0) {         wBase = i2cell11(x, t, y0); }
        else /*  gx >= -1 */ {  wBase = i2cell10(x, t, y0); }
    } 
    else // -1 <= x < 0: we are in cell0*
    {
        if (gx >=  0) {         wBase = i2cell01(x, t, y0); }
        else /*  gx >= -1 */ {  wBase = i2cell00(x, t, y0); }
    }
#else // Optimized code
    // speedup --> 25.9 fps, with if/else optimized away (debug and release) 
    // 26.2 fps measured with partial-transposed i2cellGeneric

    // if/else optimized away (without 24.2 fps)
    int cellNr = int(x >= 0) << 1 | int   (gx >= 0);
    
    wBase = i2cellGeneric(x, t, y0, cellNr);
#endif
    
    // Stub, rest of this function: 
    // return wBase; // gives ~32.4 fps
    
    
    // second, add diagonal "row"-integral-constants ALLONG TANGENT LINE for (overlapping) area's CDEFGH
    // ----------------------------------------------------------------------------------------

    // 2D area's where integral-constants must be added, are defined by:
    // - horizontal range   where x is restricted to -1 < x < 1
    // - vertical range     where gx is restricted
    // - diagonal range     where y0 is restricted

    // Compute integral constants at horizontal cell piece-wise discontinuities at HygxBorder[C|D|E|F|G|H|] 
    // defined by y=g(x):  y==+1 (G,H), through y==0 (E,F), through y==-1 (C,D)
    // Note: 'vertical middle' areas E and F will be overlapped
    //        for positive tangent, by 'bottom' areas C and D
    //        for negative tangent, by 'top' areas    G and H

    float wConst = 0;
    //float st = sign(t);  // though sign(0)-->0 iso 1, this most likely not a problem
    float st = SIGN(t); // sign(0)-->1 (integer output)

#if 1 // Readable (non-optimized) code 
    // 25.9 fps with above i2cell optimized code (debug and release) 
    // 26.2 fps measured later
    // 27.8 fps is measured when excluding all the HygxBorder* computations (replaced by small number)
    
    // HygxBorder[G|H], only for negative tangent, below horizontal line: g(x) <= +1
    if (t < 0 && gx <= 1.0001) // Only for negative tangent
    {
        // for positive t: y0 >= 1 && y0 < 1 + t, for negative t: y0 <= 1 && y0 > 1 + t
        if ((0 >= (-y0 + 1)*st) && (0 < (-y0 + 1 + t)*st)) wConst += HygxBorderG(t, y0);
        // for positive t: y0 < 1, for negative t: y0 > 1
        else if ( 0 < (-y0+1)*st ) wConst += HygxBorderH(t, y0);
    } 
    // HygxBorder[E|F], for vertical range; above horizontal line: g(x) >= 0 (till infinity)
    if ( (t>=0 && gx>=0) || (t<0 && gx<0)) // for positive t: gx >= 0, for negative t: gx < 0 (not: gx <=0)
    //if ((gx + 0)*st >= 0) // is wrong for gx:exact-zero and neg. t.
    {
        // for positive t: y0 >= 0 && y0 < t, for negative t: y0 <= 0 && y0 > t
        if ((0 >= (-y0 + 0)*st) && (0 < (-y0 + 0 + t)*st)) wConst += HygxBorderE(t, y0);
        // for positive t: y0 < 0, for negative t: y0 > 0
        else if (0 < (-y0 + 0)*st) wConst += HygxBorderF(t, y0);
    } 
    // HygxBorder[C|D], only for positive tangent, above horizontal line: g(x) >= -1
    if ( t > 0 && gx >= -1)
    {
        // for positive t: y0 >= -1 && y0 < -1 + t, for negative t: y0 <= -1 && y0 > -1 + t
        if ((0 >= (-y0 - 1)*st) && (0 < (-y0 - 1 + t)*st)) wConst += HygxBorderC(t, y0);
        // for positive t: y0 < -1, for negative t: y0 > -1
        else if (0 < (-y0 - 1)*st) wConst += HygxBorderD(t, y0);
    }
#else // Optimized code (still conditional addition is needed).
      // the function HygxBorderGeneric replaces all HygxBorder[C|D|E|F|G|H|], and is computed twice (unconditionally).
    // 16.4 fps measured - disappointing! Why is this much slower then the above? 
    // 31.7 fps is measured when computation of both HygxBorderGeneric is excluded (replaced by small nr), so 
    // apparently HygxBorderGeneric has become quite a bit more expensive wrt. the original HygxBorder*. !!!

    // top:    HygxBorder[G|H], only for negative tangent t<0,  below horizontal line: g(x) <= +1
    // bottom: HygxBorder[C|D], only for positive tangent t<=0, above horizontal line: g(x) >= -1
    // Since top and bottom HygxBorder*-constants are not needed simultaneously, only compute one of them. 
    //
    // top (t negative):    y0 >= +1, then compute H otherwise G:
    int borderNrTop    = (0 < (-y0 + 1)*st) ? 5 : 4;  /* 5:H  4:G */
    // bottom (t positive): y0 < -1, then compute D otherwise C:
    int borderNrBottom = (0 < (-y0 - 1)*st) ? 1 : 0;  /* 1:D  0:C */
    int borderNrBottomTop = (t<0) ? borderNrTop : borderNrBottom;
    
    float HygxBorderCDGorH = HygxBorderGeneric(t, y0, borderNrBottomTop);
    // If this is top-border,    (t negative): if y0 > +1 + t, then  add G-or-H  otherwise don't:
    // If this is bottom-border, (t positive): if y0 < -1 + t, then  add C-or-D  otherwise don't:
    if ( 0 < (-y0 - 1*st + t)*st ) wConst += HygxBorderCDGorH;

    // HygxBorder[E|F], for vertical range; above horizontal line: g(x) >= 0 (till infinity)
    // if ( (t>=0 && gx>=0) || (t<0 && gx<0)) // for positive t: gx >= 0, for negative t: gx < 0 (not: gx <=0)
    // condition could be replaced with ((gx + 0)*st >= 0); however is wrong for gx:exact-zero and neg. t.
    //
    // for positive t: y0 < 0, for negative t: y0 > 0 then compute F otherwise E:
    int borderNrMiddle = (0 < (-y0 + 0)*st) ? 3 : 2;  /* 3:F  2:E */
    float HygxBorderEorF = HygxBorderGeneric(t, y0, borderNrMiddle);
    // for positive t: y0 < t, for negative t: y0 > t then add E-or-F otherwise don't:
    if ( 0 < (-y0 + 0 + t)*st   &&   (gx + 0)*st >= 0 ) wConst += HygxBorderEorF;

#endif

    float weight = wBase - wConst * st;
    return weight;
}
#endif

#pragma endregion

// ============================================================================

#pragma region TENT Hyx integral polynomials
    // When tangent=0, i.e. exact horizontal edge. Robust against xa==xb

    float HyxCell11_tent(float x, float y) {
        return
        ((-1 - 2*x + POW2(x))*(-1 - 2*y + POW2(y)))/4;
    }
    float HyxCell10_tent(float x, float y) {
        return
        -((-1 - 2*x + POW2(x))*POW2(1 + y))/4;
    }
    float HyxCell01_tent(float x, float y) {
        return
        -(POW2(1 + x)*(-1 - 2*y + POW2(y)))/4;
    }
    float HyxCell00_tent(float x, float y) {
        return
        (POW2(1 + x)*POW2(1 + y))/4;
    }

float Hyx_tent(float x, float y)
{
    float w = 0;
    if (x >= 0) // 0 <= x < 1: we are in cell10, cell11 (or cell12, but that may be scapped due to applied clipping)
    {
        // NOTE: ONLY THE FUNCTIONS WITHIN THE AA-FILTER FOOTPRINT NEED TO BE IMPLEMENTED
        // required VERTICAL CLIPPING IN WtEdgeAB ensures functions outside are not required.

        if (y >= 0) w = HyxCell11_tent(x, y);
        else        w = HyxCell10_tent(x, y);
    }
    else // -1 <= x < 0: we are in cell00, cell01 (or cell02, but that may be scapped due to applied clipping)
    {
        if (y >= 0) w = HyxCell01_tent(x, y);
        else        w = HyxCell00_tent(x, y);
    }
    return w;
}
#pragma endregion










