diff options
Diffstat (limited to 'source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp')
-rw-r--r-- | source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp | 694 |
1 files changed, 357 insertions, 337 deletions
diff --git a/source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp b/source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp index c74c1899daa..e0a2790b318 100644 --- a/source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp +++ b/source/blender/compositor/operations/COM_GlareFogGlowOperation.cpp @@ -28,12 +28,16 @@ typedef float fREAL; // returns next highest power of 2 of x, as well it's log2 in L2 static unsigned int nextPow2(unsigned int x, unsigned int *L2) { - unsigned int pw, x_notpow2 = x & (x - 1); - *L2 = 0; - while (x >>= 1) ++(*L2); - pw = 1 << (*L2); - if (x_notpow2) { (*L2)++; pw <<= 1; } - return pw; + unsigned int pw, x_notpow2 = x & (x - 1); + *L2 = 0; + while (x >>= 1) + ++(*L2); + pw = 1 << (*L2); + if (x_notpow2) { + (*L2)++; + pw <<= 1; + } + return pw; } //------------------------------------------------------------------------------ @@ -42,143 +46,145 @@ static unsigned int nextPow2(unsigned int x, unsigned int *L2) // use: r = revbin_upd(r, h) where h = N>>1 static unsigned int revbin_upd(unsigned int r, unsigned int h) { - while (!((r ^= h) & h)) h >>= 1; - return r; + while (!((r ^= h) & h)) + h >>= 1; + return r; } //------------------------------------------------------------------------------ static void FHT(fREAL *data, unsigned int M, unsigned int inverse) { - double tt, fc, dc, fs, ds, a = M_PI; - fREAL t1, t2; - int n2, bd, bl, istep, k, len = 1 << M, n = 1; - - int i, j = 0; - unsigned int Nh = len >> 1; - for (i = 1; i < (len - 1); ++i) { - j = revbin_upd(j, Nh); - if (j > i) { - t1 = data[i]; - data[i] = data[j]; - data[j] = t1; - } - } - - do { - fREAL *data_n = &data[n]; - - istep = n << 1; - for (k = 0; k < len; k += istep) { - t1 = data_n[k]; - data_n[k] = data[k] - t1; - data[k] += t1; - } - - n2 = n >> 1; - if (n > 2) { - fc = dc = cos(a); - fs = ds = sqrt(1.0 - fc * fc); //sin(a); - bd = n - 2; - for (bl = 1; bl < n2; bl++) { - fREAL *data_nbd = &data_n[bd]; - fREAL *data_bd = &data[bd]; - for (k = bl; k < len; k += istep) { - t1 = fc * (double)data_n[k] + fs * (double)data_nbd[k]; - t2 = fs * (double)data_n[k] - fc * (double)data_nbd[k]; - data_n[k] = data[k] - t1; - data_nbd[k] = data_bd[k] - t2; - data[k] += t1; - data_bd[k] += t2; - } - tt = fc * dc - fs * ds; - fs = fs * dc + fc * ds; - fc = tt; - bd -= 2; - } - } - - if (n > 1) { - for (k = n2; k < len; k += istep) { - t1 = data_n[k]; - data_n[k] = data[k] - t1; - data[k] += t1; - } - } - - n = istep; - a *= 0.5; - } while (n < len); - - if (inverse) { - fREAL sc = (fREAL)1 / (fREAL)len; - for (k = 0; k < len; ++k) - data[k] *= sc; - } + double tt, fc, dc, fs, ds, a = M_PI; + fREAL t1, t2; + int n2, bd, bl, istep, k, len = 1 << M, n = 1; + + int i, j = 0; + unsigned int Nh = len >> 1; + for (i = 1; i < (len - 1); ++i) { + j = revbin_upd(j, Nh); + if (j > i) { + t1 = data[i]; + data[i] = data[j]; + data[j] = t1; + } + } + + do { + fREAL *data_n = &data[n]; + + istep = n << 1; + for (k = 0; k < len; k += istep) { + t1 = data_n[k]; + data_n[k] = data[k] - t1; + data[k] += t1; + } + + n2 = n >> 1; + if (n > 2) { + fc = dc = cos(a); + fs = ds = sqrt(1.0 - fc * fc); //sin(a); + bd = n - 2; + for (bl = 1; bl < n2; bl++) { + fREAL *data_nbd = &data_n[bd]; + fREAL *data_bd = &data[bd]; + for (k = bl; k < len; k += istep) { + t1 = fc * (double)data_n[k] + fs * (double)data_nbd[k]; + t2 = fs * (double)data_n[k] - fc * (double)data_nbd[k]; + data_n[k] = data[k] - t1; + data_nbd[k] = data_bd[k] - t2; + data[k] += t1; + data_bd[k] += t2; + } + tt = fc * dc - fs * ds; + fs = fs * dc + fc * ds; + fc = tt; + bd -= 2; + } + } + + if (n > 1) { + for (k = n2; k < len; k += istep) { + t1 = data_n[k]; + data_n[k] = data[k] - t1; + data[k] += t1; + } + } + + n = istep; + a *= 0.5; + } while (n < len); + + if (inverse) { + fREAL sc = (fREAL)1 / (fREAL)len; + for (k = 0; k < len; ++k) + data[k] *= sc; + } } //------------------------------------------------------------------------------ /* 2D Fast Hartley Transform, Mx/My -> log2 of width/height, * nzp -> the row where zero pad data starts, * inverse -> see above */ -static void FHT2D(fREAL *data, unsigned int Mx, unsigned int My, - unsigned int nzp, unsigned int inverse) +static void FHT2D( + fREAL *data, unsigned int Mx, unsigned int My, unsigned int nzp, unsigned int inverse) { - unsigned int i, j, Nx, Ny, maxy; - - Nx = 1 << Mx; - Ny = 1 << My; - - // rows (forward transform skips 0 pad data) - maxy = inverse ? Ny : nzp; - for (j = 0; j < maxy; ++j) - FHT(&data[Nx * j], Mx, inverse); - - // transpose data - if (Nx == Ny) { // square - for (j = 0; j < Ny; ++j) - for (i = j + 1; i < Nx; ++i) { - unsigned int op = i + (j << Mx), np = j + (i << My); - SWAP(fREAL, data[op], data[np]); - } - } - else { // rectangular - unsigned int k, Nym = Ny - 1, stm = 1 << (Mx + My); - for (i = 0; stm > 0; i++) { + unsigned int i, j, Nx, Ny, maxy; + + Nx = 1 << Mx; + Ny = 1 << My; + + // rows (forward transform skips 0 pad data) + maxy = inverse ? Ny : nzp; + for (j = 0; j < maxy; ++j) + FHT(&data[Nx * j], Mx, inverse); + + // transpose data + if (Nx == Ny) { // square + for (j = 0; j < Ny; ++j) + for (i = j + 1; i < Nx; ++i) { + unsigned int op = i + (j << Mx), np = j + (i << My); + SWAP(fREAL, data[op], data[np]); + } + } + else { // rectangular + unsigned int k, Nym = Ny - 1, stm = 1 << (Mx + My); + for (i = 0; stm > 0; i++) { #define PRED(k) (((k & Nym) << Mx) + (k >> My)) - for (j = PRED(i); j > i; j = PRED(j)) ; - if (j < i) continue; - for (k = i, j = PRED(i); j != i; k = j, j = PRED(j), stm--) { - SWAP(fREAL, data[j], data[k]); - } + for (j = PRED(i); j > i; j = PRED(j)) + ; + if (j < i) + continue; + for (k = i, j = PRED(i); j != i; k = j, j = PRED(j), stm--) { + SWAP(fREAL, data[j], data[k]); + } #undef PRED - stm--; - } - } - - SWAP(unsigned int, Nx, Ny); - SWAP(unsigned int, Mx, My); - - // now columns == transposed rows - for (j = 0; j < Ny; ++j) - FHT(&data[Nx * j], Mx, inverse); - - // finalize - for (j = 0; j <= (Ny >> 1); j++) { - unsigned int jm = (Ny - j) & (Ny - 1); - unsigned int ji = j << Mx; - unsigned int jmi = jm << Mx; - for (i = 0; i <= (Nx >> 1); i++) { - unsigned int im = (Nx - i) & (Nx - 1); - fREAL A = data[ji + i]; - fREAL B = data[jmi + i]; - fREAL C = data[ji + im]; - fREAL D = data[jmi + im]; - fREAL E = (fREAL)0.5 * ((A + D) - (B + C)); - data[ji + i] = A - E; - data[jmi + i] = B + E; - data[ji + im] = C + E; - data[jmi + im] = D - E; - } - } - + stm--; + } + } + + SWAP(unsigned int, Nx, Ny); + SWAP(unsigned int, Mx, My); + + // now columns == transposed rows + for (j = 0; j < Ny; ++j) + FHT(&data[Nx * j], Mx, inverse); + + // finalize + for (j = 0; j <= (Ny >> 1); j++) { + unsigned int jm = (Ny - j) & (Ny - 1); + unsigned int ji = j << Mx; + unsigned int jmi = jm << Mx; + for (i = 0; i <= (Nx >> 1); i++) { + unsigned int im = (Nx - i) & (Nx - 1); + fREAL A = data[ji + i]; + fREAL B = data[jmi + i]; + fREAL C = data[ji + im]; + fREAL D = data[jmi + im]; + fREAL E = (fREAL)0.5 * ((A + D) - (B + C)); + data[ji + i] = A - E; + data[jmi + i] = B + E; + data[ji + im] = C + E; + data[jmi + im] = D - E; + } + } } //------------------------------------------------------------------------------ @@ -186,218 +192,232 @@ static void FHT2D(fREAL *data, unsigned int Mx, unsigned int My, /* 2D convolution calc, d1 *= d2, M/N - > log2 of width/height */ static void fht_convolve(fREAL *d1, fREAL *d2, unsigned int M, unsigned int N) { - fREAL a, b; - unsigned int i, j, k, L, mj, mL; - unsigned int m = 1 << M, n = 1 << N; - unsigned int m2 = 1 << (M - 1), n2 = 1 << (N - 1); - unsigned int mn2 = m << (N - 1); - - d1[0] *= d2[0]; - d1[mn2] *= d2[mn2]; - d1[m2] *= d2[m2]; - d1[m2 + mn2] *= d2[m2 + mn2]; - for (i = 1; i < m2; i++) { - k = m - i; - a = d1[i] * d2[i] - d1[k] * d2[k]; - b = d1[k] * d2[i] + d1[i] * d2[k]; - d1[i] = (b + a) * (fREAL)0.5; - d1[k] = (b - a) * (fREAL)0.5; - a = d1[i + mn2] * d2[i + mn2] - d1[k + mn2] * d2[k + mn2]; - b = d1[k + mn2] * d2[i + mn2] + d1[i + mn2] * d2[k + mn2]; - d1[i + mn2] = (b + a) * (fREAL)0.5; - d1[k + mn2] = (b - a) * (fREAL)0.5; - } - for (j = 1; j < n2; j++) { - L = n - j; - mj = j << M; - mL = L << M; - a = d1[mj] * d2[mj] - d1[mL] * d2[mL]; - b = d1[mL] * d2[mj] + d1[mj] * d2[mL]; - d1[mj] = (b + a) * (fREAL)0.5; - d1[mL] = (b - a) * (fREAL)0.5; - a = d1[m2 + mj] * d2[m2 + mj] - d1[m2 + mL] * d2[m2 + mL]; - b = d1[m2 + mL] * d2[m2 + mj] + d1[m2 + mj] * d2[m2 + mL]; - d1[m2 + mj] = (b + a) * (fREAL)0.5; - d1[m2 + mL] = (b - a) * (fREAL)0.5; - } - for (i = 1; i < m2; i++) { - k = m - i; - for (j = 1; j < n2; j++) { - L = n - j; - mj = j << M; - mL = L << M; - a = d1[i + mj] * d2[i + mj] - d1[k + mL] * d2[k + mL]; - b = d1[k + mL] * d2[i + mj] + d1[i + mj] * d2[k + mL]; - d1[i + mj] = (b + a) * (fREAL)0.5; - d1[k + mL] = (b - a) * (fREAL)0.5; - a = d1[i + mL] * d2[i + mL] - d1[k + mj] * d2[k + mj]; - b = d1[k + mj] * d2[i + mL] + d1[i + mL] * d2[k + mj]; - d1[i + mL] = (b + a) * (fREAL)0.5; - d1[k + mj] = (b - a) * (fREAL)0.5; - } - } + fREAL a, b; + unsigned int i, j, k, L, mj, mL; + unsigned int m = 1 << M, n = 1 << N; + unsigned int m2 = 1 << (M - 1), n2 = 1 << (N - 1); + unsigned int mn2 = m << (N - 1); + + d1[0] *= d2[0]; + d1[mn2] *= d2[mn2]; + d1[m2] *= d2[m2]; + d1[m2 + mn2] *= d2[m2 + mn2]; + for (i = 1; i < m2; i++) { + k = m - i; + a = d1[i] * d2[i] - d1[k] * d2[k]; + b = d1[k] * d2[i] + d1[i] * d2[k]; + d1[i] = (b + a) * (fREAL)0.5; + d1[k] = (b - a) * (fREAL)0.5; + a = d1[i + mn2] * d2[i + mn2] - d1[k + mn2] * d2[k + mn2]; + b = d1[k + mn2] * d2[i + mn2] + d1[i + mn2] * d2[k + mn2]; + d1[i + mn2] = (b + a) * (fREAL)0.5; + d1[k + mn2] = (b - a) * (fREAL)0.5; + } + for (j = 1; j < n2; j++) { + L = n - j; + mj = j << M; + mL = L << M; + a = d1[mj] * d2[mj] - d1[mL] * d2[mL]; + b = d1[mL] * d2[mj] + d1[mj] * d2[mL]; + d1[mj] = (b + a) * (fREAL)0.5; + d1[mL] = (b - a) * (fREAL)0.5; + a = d1[m2 + mj] * d2[m2 + mj] - d1[m2 + mL] * d2[m2 + mL]; + b = d1[m2 + mL] * d2[m2 + mj] + d1[m2 + mj] * d2[m2 + mL]; + d1[m2 + mj] = (b + a) * (fREAL)0.5; + d1[m2 + mL] = (b - a) * (fREAL)0.5; + } + for (i = 1; i < m2; i++) { + k = m - i; + for (j = 1; j < n2; j++) { + L = n - j; + mj = j << M; + mL = L << M; + a = d1[i + mj] * d2[i + mj] - d1[k + mL] * d2[k + mL]; + b = d1[k + mL] * d2[i + mj] + d1[i + mj] * d2[k + mL]; + d1[i + mj] = (b + a) * (fREAL)0.5; + d1[k + mL] = (b - a) * (fREAL)0.5; + a = d1[i + mL] * d2[i + mL] - d1[k + mj] * d2[k + mj]; + b = d1[k + mj] * d2[i + mL] + d1[i + mL] * d2[k + mj]; + d1[i + mL] = (b + a) * (fREAL)0.5; + d1[k + mj] = (b - a) * (fREAL)0.5; + } + } } //------------------------------------------------------------------------------ static void convolve(float *dst, MemoryBuffer *in1, MemoryBuffer *in2) { - fREAL *data1, *data2, *fp; - unsigned int w2, h2, hw, hh, log2_w, log2_h; - fRGB wt, *colp; - int x, y, ch; - int xbl, ybl, nxb, nyb, xbsz, ybsz; - bool in2done = false; - const unsigned int kernelWidth = in2->getWidth(); - const unsigned int kernelHeight = in2->getHeight(); - const unsigned int imageWidth = in1->getWidth(); - const unsigned int imageHeight = in1->getHeight(); - float *kernelBuffer = in2->getBuffer(); - float *imageBuffer = in1->getBuffer(); - - MemoryBuffer *rdst = new MemoryBuffer(COM_DT_COLOR, in1->getRect()); - memset(rdst->getBuffer(), 0, rdst->getWidth() * rdst->getHeight() * COM_NUM_CHANNELS_COLOR * sizeof(float)); - - // convolution result width & height - w2 = 2 * kernelWidth - 1; - h2 = 2 * kernelHeight - 1; - // FFT pow2 required size & log2 - w2 = nextPow2(w2, &log2_w); - h2 = nextPow2(h2, &log2_h); - - // alloc space - data1 = (fREAL *)MEM_callocN(3 * w2 * h2 * sizeof(fREAL), "convolve_fast FHT data1"); - data2 = (fREAL *)MEM_callocN(w2 * h2 * sizeof(fREAL), "convolve_fast FHT data2"); - - // normalize convolutor - wt[0] = wt[1] = wt[2] = 0.0f; - for (y = 0; y < kernelHeight; y++) { - colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; - for (x = 0; x < kernelWidth; x++) - add_v3_v3(wt, colp[x]); - } - if (wt[0] != 0.0f) wt[0] = 1.0f / wt[0]; - if (wt[1] != 0.0f) wt[1] = 1.0f / wt[1]; - if (wt[2] != 0.0f) wt[2] = 1.0f / wt[2]; - for (y = 0; y < kernelHeight; y++) { - colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; - for (x = 0; x < kernelWidth; x++) - mul_v3_v3(colp[x], wt); - } - - // copy image data, unpacking interleaved RGBA into separate channels - // only need to calc data1 once - - // block add-overlap - hw = kernelWidth >> 1; - hh = kernelHeight >> 1; - xbsz = (w2 + 1) - kernelWidth; - ybsz = (h2 + 1) - kernelHeight; - nxb = imageWidth / xbsz; - if (imageWidth % xbsz) nxb++; - nyb = imageHeight / ybsz; - if (imageHeight % ybsz) nyb++; - for (ybl = 0; ybl < nyb; ybl++) { - for (xbl = 0; xbl < nxb; xbl++) { - - // each channel one by one - for (ch = 0; ch < 3; ch++) { - fREAL *data1ch = &data1[ch * w2 * h2]; - - // only need to calc fht data from in2 once, can re-use for every block - if (!in2done) { - // in2, channel ch -> data1 - for (y = 0; y < kernelHeight; y++) { - fp = &data1ch[y * w2]; - colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; - for (x = 0; x < kernelWidth; x++) - fp[x] = colp[x][ch]; - } - } - - // in1, channel ch -> data2 - memset(data2, 0, w2 * h2 * sizeof(fREAL)); - for (y = 0; y < ybsz; y++) { - int yy = ybl * ybsz + y; - if (yy >= imageHeight) continue; - fp = &data2[y * w2]; - colp = (fRGB *)&imageBuffer[yy * imageWidth * COM_NUM_CHANNELS_COLOR]; - for (x = 0; x < xbsz; x++) { - int xx = xbl * xbsz + x; - if (xx >= imageWidth) continue; - fp[x] = colp[xx][ch]; - } - } - - // forward FHT - // zero pad data start is different for each == height+1 - if (!in2done) FHT2D(data1ch, log2_w, log2_h, kernelHeight + 1, 0); - FHT2D(data2, log2_w, log2_h, kernelHeight + 1, 0); - - // FHT2D transposed data, row/col now swapped - // convolve & inverse FHT - fht_convolve(data2, data1ch, log2_h, log2_w); - FHT2D(data2, log2_h, log2_w, 0, 1); - // data again transposed, so in order again - - // overlap-add result - for (y = 0; y < (int)h2; y++) { - const int yy = ybl * ybsz + y - hh; - if ((yy < 0) || (yy >= imageHeight)) continue; - fp = &data2[y * w2]; - colp = (fRGB *)&rdst->getBuffer()[yy * imageWidth * COM_NUM_CHANNELS_COLOR]; - for (x = 0; x < (int)w2; x++) { - const int xx = xbl * xbsz + x - hw; - if ((xx < 0) || (xx >= imageWidth)) continue; - colp[xx][ch] += fp[x]; - } - } - - } - in2done = true; - } - } - - MEM_freeN(data2); - MEM_freeN(data1); - memcpy(dst, rdst->getBuffer(), sizeof(float) * imageWidth * imageHeight * COM_NUM_CHANNELS_COLOR); - delete(rdst); + fREAL *data1, *data2, *fp; + unsigned int w2, h2, hw, hh, log2_w, log2_h; + fRGB wt, *colp; + int x, y, ch; + int xbl, ybl, nxb, nyb, xbsz, ybsz; + bool in2done = false; + const unsigned int kernelWidth = in2->getWidth(); + const unsigned int kernelHeight = in2->getHeight(); + const unsigned int imageWidth = in1->getWidth(); + const unsigned int imageHeight = in1->getHeight(); + float *kernelBuffer = in2->getBuffer(); + float *imageBuffer = in1->getBuffer(); + + MemoryBuffer *rdst = new MemoryBuffer(COM_DT_COLOR, in1->getRect()); + memset(rdst->getBuffer(), + 0, + rdst->getWidth() * rdst->getHeight() * COM_NUM_CHANNELS_COLOR * sizeof(float)); + + // convolution result width & height + w2 = 2 * kernelWidth - 1; + h2 = 2 * kernelHeight - 1; + // FFT pow2 required size & log2 + w2 = nextPow2(w2, &log2_w); + h2 = nextPow2(h2, &log2_h); + + // alloc space + data1 = (fREAL *)MEM_callocN(3 * w2 * h2 * sizeof(fREAL), "convolve_fast FHT data1"); + data2 = (fREAL *)MEM_callocN(w2 * h2 * sizeof(fREAL), "convolve_fast FHT data2"); + + // normalize convolutor + wt[0] = wt[1] = wt[2] = 0.0f; + for (y = 0; y < kernelHeight; y++) { + colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; + for (x = 0; x < kernelWidth; x++) + add_v3_v3(wt, colp[x]); + } + if (wt[0] != 0.0f) + wt[0] = 1.0f / wt[0]; + if (wt[1] != 0.0f) + wt[1] = 1.0f / wt[1]; + if (wt[2] != 0.0f) + wt[2] = 1.0f / wt[2]; + for (y = 0; y < kernelHeight; y++) { + colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; + for (x = 0; x < kernelWidth; x++) + mul_v3_v3(colp[x], wt); + } + + // copy image data, unpacking interleaved RGBA into separate channels + // only need to calc data1 once + + // block add-overlap + hw = kernelWidth >> 1; + hh = kernelHeight >> 1; + xbsz = (w2 + 1) - kernelWidth; + ybsz = (h2 + 1) - kernelHeight; + nxb = imageWidth / xbsz; + if (imageWidth % xbsz) + nxb++; + nyb = imageHeight / ybsz; + if (imageHeight % ybsz) + nyb++; + for (ybl = 0; ybl < nyb; ybl++) { + for (xbl = 0; xbl < nxb; xbl++) { + + // each channel one by one + for (ch = 0; ch < 3; ch++) { + fREAL *data1ch = &data1[ch * w2 * h2]; + + // only need to calc fht data from in2 once, can re-use for every block + if (!in2done) { + // in2, channel ch -> data1 + for (y = 0; y < kernelHeight; y++) { + fp = &data1ch[y * w2]; + colp = (fRGB *)&kernelBuffer[y * kernelWidth * COM_NUM_CHANNELS_COLOR]; + for (x = 0; x < kernelWidth; x++) + fp[x] = colp[x][ch]; + } + } + + // in1, channel ch -> data2 + memset(data2, 0, w2 * h2 * sizeof(fREAL)); + for (y = 0; y < ybsz; y++) { + int yy = ybl * ybsz + y; + if (yy >= imageHeight) + continue; + fp = &data2[y * w2]; + colp = (fRGB *)&imageBuffer[yy * imageWidth * COM_NUM_CHANNELS_COLOR]; + for (x = 0; x < xbsz; x++) { + int xx = xbl * xbsz + x; + if (xx >= imageWidth) + continue; + fp[x] = colp[xx][ch]; + } + } + + // forward FHT + // zero pad data start is different for each == height+1 + if (!in2done) + FHT2D(data1ch, log2_w, log2_h, kernelHeight + 1, 0); + FHT2D(data2, log2_w, log2_h, kernelHeight + 1, 0); + + // FHT2D transposed data, row/col now swapped + // convolve & inverse FHT + fht_convolve(data2, data1ch, log2_h, log2_w); + FHT2D(data2, log2_h, log2_w, 0, 1); + // data again transposed, so in order again + + // overlap-add result + for (y = 0; y < (int)h2; y++) { + const int yy = ybl * ybsz + y - hh; + if ((yy < 0) || (yy >= imageHeight)) + continue; + fp = &data2[y * w2]; + colp = (fRGB *)&rdst->getBuffer()[yy * imageWidth * COM_NUM_CHANNELS_COLOR]; + for (x = 0; x < (int)w2; x++) { + const int xx = xbl * xbsz + x - hw; + if ((xx < 0) || (xx >= imageWidth)) + continue; + colp[xx][ch] += fp[x]; + } + } + } + in2done = true; + } + } + + MEM_freeN(data2); + MEM_freeN(data1); + memcpy( + dst, rdst->getBuffer(), sizeof(float) * imageWidth * imageHeight * COM_NUM_CHANNELS_COLOR); + delete (rdst); } -void GlareFogGlowOperation::generateGlare(float *data, MemoryBuffer *inputTile, NodeGlare *settings) +void GlareFogGlowOperation::generateGlare(float *data, + MemoryBuffer *inputTile, + NodeGlare *settings) { - int x, y; - float scale, u, v, r, w, d; - fRGB fcol; - MemoryBuffer *ckrn; - unsigned int sz = 1 << settings->size; - const float cs_r = 1.0f, cs_g = 1.0f, cs_b = 1.0f; - - // temp. src image - // make the convolution kernel - rcti kernelRect; - BLI_rcti_init(&kernelRect, 0, sz, 0, sz); - ckrn = new MemoryBuffer(COM_DT_COLOR, &kernelRect); - - scale = 0.25f * sqrtf((float)(sz * sz)); - - for (y = 0; y < sz; ++y) { - v = 2.0f * (y / (float)sz) - 1.0f; - for (x = 0; x < sz; ++x) { - u = 2.0f * (x / (float)sz) - 1.0f; - r = (u * u + v * v) * scale; - d = -sqrtf(sqrtf(sqrtf(r))) * 9.0f; - fcol[0] = expf(d * cs_r); - fcol[1] = expf(d * cs_g); - fcol[2] = expf(d * cs_b); - // linear window good enough here, visual result counts, not scientific analysis - //w = (1.0f-fabs(u))*(1.0f-fabs(v)); - // actually, Hanning window is ok, cos^2 for some reason is slower - w = (0.5f + 0.5f * cosf(u * (float)M_PI)) * (0.5f + 0.5f * cosf(v * (float)M_PI)); - mul_v3_fl(fcol, w); - ckrn->writePixel(x, y, fcol); - } - } - - convolve(data, inputTile, ckrn); - delete ckrn; + int x, y; + float scale, u, v, r, w, d; + fRGB fcol; + MemoryBuffer *ckrn; + unsigned int sz = 1 << settings->size; + const float cs_r = 1.0f, cs_g = 1.0f, cs_b = 1.0f; + + // temp. src image + // make the convolution kernel + rcti kernelRect; + BLI_rcti_init(&kernelRect, 0, sz, 0, sz); + ckrn = new MemoryBuffer(COM_DT_COLOR, &kernelRect); + + scale = 0.25f * sqrtf((float)(sz * sz)); + + for (y = 0; y < sz; ++y) { + v = 2.0f * (y / (float)sz) - 1.0f; + for (x = 0; x < sz; ++x) { + u = 2.0f * (x / (float)sz) - 1.0f; + r = (u * u + v * v) * scale; + d = -sqrtf(sqrtf(sqrtf(r))) * 9.0f; + fcol[0] = expf(d * cs_r); + fcol[1] = expf(d * cs_g); + fcol[2] = expf(d * cs_b); + // linear window good enough here, visual result counts, not scientific analysis + //w = (1.0f-fabs(u))*(1.0f-fabs(v)); + // actually, Hanning window is ok, cos^2 for some reason is slower + w = (0.5f + 0.5f * cosf(u * (float)M_PI)) * (0.5f + 0.5f * cosf(v * (float)M_PI)); + mul_v3_fl(fcol, w); + ckrn->writePixel(x, y, fcol); + } + } + + convolve(data, inputTile, ckrn); + delete ckrn; } |