From 83dfade37a746043dfc8d38f57514706d8505352 Mon Sep 17 00:00:00 2001 From: Daniel Genrich Date: Mon, 25 Jan 2010 15:10:14 +0000 Subject: =?UTF-8?q?Smoke:=20The=20well=20known=20Miika=20H=C3=A4m=C3=A4l?= =?UTF-8?q?=C3=A4inen=20(aka=20MiikaH)=20patch=20(http://blenderartists.or?= =?UTF-8?q?g/forum/showthread.php=3Ft=3D158317&page=3D42)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Better (and windows enabled) OpenMP handling (> 2x-5x speed) * More Volumetric Texture mapping options (heat, etc) <-- Matt if that's not to your liking, just revert that part, it's separate anyway * Initial velocity taken from particle settings (no more slow starting) * Option to select compression method (there seem to be a bug in my high compression usage, at least it's been reported to result in exploding smoke - better use low compression for the time being) It's been tested since a while but as usual please report any (new!) bugs. ;-) --- intern/smoke/intern/EIGENVALUE_HELPER.cpp | 885 ++++++++++++++++++++++++++++++ intern/smoke/intern/EIGENVALUE_HELPER.h | 69 ++- intern/smoke/intern/FLUID_3D.cpp | 761 +++++++++++++++++++------ intern/smoke/intern/FLUID_3D.h | 85 ++- intern/smoke/intern/FLUID_3D_SOLVERS.cpp | 536 ++++++------------ intern/smoke/intern/FLUID_3D_STATIC.cpp | 204 ++++--- intern/smoke/intern/LU_HELPER.cpp | 136 +++++ intern/smoke/intern/LU_HELPER.h | 45 +- intern/smoke/intern/WTURBULENCE.cpp | 166 ++++-- intern/smoke/intern/smoke_API.cpp | 12 +- 10 files changed, 2177 insertions(+), 722 deletions(-) create mode 100644 intern/smoke/intern/EIGENVALUE_HELPER.cpp create mode 100644 intern/smoke/intern/LU_HELPER.cpp (limited to 'intern/smoke') diff --git a/intern/smoke/intern/EIGENVALUE_HELPER.cpp b/intern/smoke/intern/EIGENVALUE_HELPER.cpp new file mode 100644 index 00000000000..f11b5795e33 --- /dev/null +++ b/intern/smoke/intern/EIGENVALUE_HELPER.cpp @@ -0,0 +1,885 @@ + +#include "EIGENVALUE_HELPER.h" + + +void Eigentred2(sEigenvalue& eval) { + + // This is derived from the Algol procedures tred2 by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + int n=eval.n; + + for (int j = 0; j < n; j++) { + eval.d[j] = eval.V[n-1][j]; + } + + // Householder reduction to tridiagonal form. + + for (int i = n-1; i > 0; i--) { + + // Scale to avoid under/overflow. + + float scale = 0.0; + float h = 0.0; + for (int k = 0; k < i; k++) { + scale = scale + fabs(eval.d[k]); + } + if (scale == 0.0) { + eval.e[i] = eval.d[i-1]; + for (int j = 0; j < i; j++) { + eval.d[j] = eval.V[i-1][j]; + eval.V[i][j] = 0.0; + eval.V[j][i] = 0.0; + } + } else { + + // Generate Householder vector. + + for (int k = 0; k < i; k++) { + eval.d[k] /= scale; + h += eval.d[k] * eval.d[k]; + } + float f = eval.d[i-1]; + float g = sqrt(h); + if (f > 0) { + g = -g; + } + eval.e[i] = scale * g; + h = h - f * g; + eval.d[i-1] = f - g; + for (int j = 0; j < i; j++) { + eval.e[j] = 0.0; + } + + // Apply similarity transformation to remaining columns. + + for (int j = 0; j < i; j++) { + f = eval.d[j]; + eval.V[j][i] = f; + g = eval.e[j] + eval.V[j][j] * f; + for (int k = j+1; k <= i-1; k++) { + g += eval.V[k][j] * eval.d[k]; + eval.e[k] += eval.V[k][j] * f; + } + eval.e[j] = g; + } + f = 0.0; + for (int j = 0; j < i; j++) { + eval.e[j] /= h; + f += eval.e[j] * eval.d[j]; + } + float hh = f / (h + h); + for (int j = 0; j < i; j++) { + eval.e[j] -= hh * eval.d[j]; + } + for (int j = 0; j < i; j++) { + f = eval.d[j]; + g = eval.e[j]; + for (int k = j; k <= i-1; k++) { + eval.V[k][j] -= (f * eval.e[k] + g * eval.d[k]); + } + eval.d[j] = eval.V[i-1][j]; + eval.V[i][j] = 0.0; + } + } + eval.d[i] = h; + } + + // Accumulate transformations. + + for (int i = 0; i < n-1; i++) { + eval.V[n-1][i] = eval.V[i][i]; + eval.V[i][i] = 1.0; + float h = eval.d[i+1]; + if (h != 0.0) { + for (int k = 0; k <= i; k++) { + eval.d[k] = eval.V[k][i+1] / h; + } + for (int j = 0; j <= i; j++) { + float g = 0.0; + for (int k = 0; k <= i; k++) { + g += eval.V[k][i+1] * eval.V[k][j]; + } + for (int k = 0; k <= i; k++) { + eval.V[k][j] -= g * eval.d[k]; + } + } + } + for (int k = 0; k <= i; k++) { + eval.V[k][i+1] = 0.0; + } + } + for (int j = 0; j < n; j++) { + eval.d[j] = eval.V[n-1][j]; + eval.V[n-1][j] = 0.0; + } + eval.V[n-1][n-1] = 1.0; + eval.e[0] = 0.0; +} + +void Eigencdiv(sEigenvalue& eval, float xr, float xi, float yr, float yi) { + float r,d; + if (fabs(yr) > fabs(yi)) { + r = yi/yr; + d = yr + r*yi; + eval.cdivr = (xr + r*xi)/d; + eval.cdivi = (xi - r*xr)/d; + } else { + r = yr/yi; + d = yi + r*yr; + eval.cdivr = (r*xr + xi)/d; + eval.cdivi = (r*xi - xr)/d; + } + } + +void Eigentql2 (sEigenvalue& eval) { + + // This is derived from the Algol procedures tql2, by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + int n=eval.n; + + for (int i = 1; i < n; i++) { + eval.e[i-1] = eval.e[i]; + } + eval.e[n-1] = 0.0; + + float f = 0.0; + float tst1 = 0.0; + float eps = pow(2.0,-52.0); + for (int l = 0; l < n; l++) { + + // Find small subdiagonal element + + tst1 = max(tst1,fabs(eval.d[l]) + fabs(eval.e[l])); + int m = l; + + // Original while-loop from Java code + while (m < n) { + if (fabs(eval.e[m]) <= eps*tst1) { + break; + } + m++; + } + + + // If m == l, d[l] is an eigenvalue, + // otherwise, iterate. + + if (m > l) { + int iter = 0; + do { + iter = iter + 1; // (Could check iteration count here.) + + // Compute implicit shift + + float g = eval.d[l]; + float p = (eval.d[l+1] - g) / (2.0 * eval.e[l]); + float r = hypot(p,1.0); + if (p < 0) { + r = -r; + } + eval.d[l] = eval.e[l] / (p + r); + eval.d[l+1] = eval.e[l] * (p + r); + float dl1 = eval.d[l+1]; + float h = g - eval.d[l]; + for (int i = l+2; i < n; i++) { + eval.d[i] -= h; + } + f = f + h; + + // Implicit QL transformation. + + p = eval.d[m]; + float c = 1.0; + float c2 = c; + float c3 = c; + float el1 = eval.e[l+1]; + float s = 0.0; + float s2 = 0.0; + for (int i = m-1; i >= l; i--) { + c3 = c2; + c2 = c; + s2 = s; + g = c * eval.e[i]; + h = c * p; + r = hypot(p,eval.e[i]); + eval.e[i+1] = s * r; + s = eval.e[i] / r; + c = p / r; + p = c * eval.d[i] - s * g; + eval.d[i+1] = h + s * (c * g + s * eval.d[i]); + + // Accumulate transformation. + + for (int k = 0; k < n; k++) { + h = eval.V[k][i+1]; + eval.V[k][i+1] = s * eval.V[k][i] + c * h; + eval.V[k][i] = c * eval.V[k][i] - s * h; + } + } + p = -s * s2 * c3 * el1 * eval.e[l] / dl1; + eval.e[l] = s * p; + eval.d[l] = c * p; + + // Check for convergence. + + } while (fabs(eval.e[l]) > eps*tst1); + } + eval.d[l] = eval.d[l] + f; + eval.e[l] = 0.0; + } + + // Sort eigenvalues and corresponding vectors. + + for (int i = 0; i < n-1; i++) { + int k = i; + float p = eval.d[i]; + for (int j = i+1; j < n; j++) { + if (eval.d[j] < p) { + k = j; + p = eval.d[j]; + } + } + if (k != i) { + eval.d[k] = eval.d[i]; + eval.d[i] = p; + for (int j = 0; j < n; j++) { + p = eval.V[j][i]; + eval.V[j][i] = eval.V[j][k]; + eval.V[j][k] = p; + } + } + } +} + +void Eigenorthes (sEigenvalue& eval) { + + // This is derived from the Algol procedures orthes and ortran, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutines in EISPACK. + + int n=eval.n; + + int low = 0; + int high = n-1; + + for (int m = low+1; m <= high-1; m++) { + + // Scale column. + + float scale = 0.0; + for (int i = m; i <= high; i++) { + scale = scale + fabs(eval.H[i][m-1]); + } + if (scale != 0.0) { + + // Compute Householder transformation. + + float h = 0.0; + for (int i = high; i >= m; i--) { + eval.ort[i] = eval.H[i][m-1]/scale; + h += eval.ort[i] * eval.ort[i]; + } + float g = sqrt(h); + if (eval.ort[m] > 0) { + g = -g; + } + h = h - eval.ort[m] * g; + eval.ort[m] = eval.ort[m] - g; + + // Apply Householder similarity transformation + // H = (I-u*u'/h)*H*(I-u*u')/h) + + for (int j = m; j < n; j++) { + float f = 0.0; + for (int i = high; i >= m; i--) { + f += eval.ort[i]*eval.H[i][j]; + } + f = f/h; + for (int i = m; i <= high; i++) { + eval.H[i][j] -= f*eval.ort[i]; + } + } + + for (int i = 0; i <= high; i++) { + float f = 0.0; + for (int j = high; j >= m; j--) { + f += eval.ort[j]*eval.H[i][j]; + } + f = f/h; + for (int j = m; j <= high; j++) { + eval.H[i][j] -= f*eval.ort[j]; + } + } + eval.ort[m] = scale*eval.ort[m]; + eval.H[m][m-1] = scale*g; + } + } + + // Accumulate transformations (Algol's ortran). + + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + eval.V[i][j] = (i == j ? 1.0 : 0.0); + } + } + + for (int m = high-1; m >= low+1; m--) { + if (eval.H[m][m-1] != 0.0) { + for (int i = m+1; i <= high; i++) { + eval.ort[i] = eval.H[i][m-1]; + } + for (int j = m; j <= high; j++) { + float g = 0.0; + for (int i = m; i <= high; i++) { + g += eval.ort[i] * eval.V[i][j]; + } + // Double division avoids possible underflow + g = (g / eval.ort[m]) / eval.H[m][m-1]; + for (int i = m; i <= high; i++) { + eval.V[i][j] += g * eval.ort[i]; + } + } + } + } + } + +void Eigenhqr2 (sEigenvalue& eval) { + + // This is derived from the Algol procedure hqr2, + // by Martin and Wilkinson, Handbook for Auto. Comp., + // Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + + // Initialize + + int nn = eval.n; + int n = nn-1; + int low = 0; + int high = nn-1; + float eps = pow(2.0,-52.0); + float exshift = 0.0; + float p=0,q=0,r=0,s=0,z=0,t,w,x,y; + + // Store roots isolated by balanc and compute matrix norm + + float norm = 0.0; + for (int i = 0; i < nn; i++) { + if ((i < low) || (i > high)) { + eval.d[i] = eval.H[i][i]; + eval.e[i] = 0.0; + } + for (int j = max(i-1,0); j < nn; j++) { + norm = norm + fabs(eval.H[i][j]); + } + } + + // Outer loop over eigenvalue index + + int iter = 0; + int totIter = 0; + while (n >= low) { + + // NT limit no. of iterations + totIter++; + if(totIter>100) { + //if(totIter>15) std::cout<<"!!!!iter ABORT !!!!!!! "< low) { + s = fabs(eval.H[l-1][l-1]) + fabs(eval.H[l][l]); + if (s == 0.0) { + s = norm; + } + if (fabs(eval.H[l][l-1]) < eps * s) { + break; + } + l--; + } + + // Check for convergence + // One root found + + if (l == n) { + eval.H[n][n] = eval.H[n][n] + exshift; + eval.d[n] = eval.H[n][n]; + eval.e[n] = 0.0; + n--; + iter = 0; + + // Two roots found + + } else if (l == n-1) { + w = eval.H[n][n-1] * eval.H[n-1][n]; + p = (eval.H[n-1][n-1] - eval.H[n][n]) / 2.0; + q = p * p + w; + z = sqrt(fabs(q)); + eval.H[n][n] = eval.H[n][n] + exshift; + eval.H[n-1][n-1] = eval.H[n-1][n-1] + exshift; + x = eval.H[n][n]; + + // float pair + + if (q >= 0) { + if (p >= 0) { + z = p + z; + } else { + z = p - z; + } + eval.d[n-1] = x + z; + eval.d[n] = eval.d[n-1]; + if (z != 0.0) { + eval.d[n] = x - w / z; + } + eval.e[n-1] = 0.0; + eval.e[n] = 0.0; + x = eval.H[n][n-1]; + s = fabs(x) + fabs(z); + p = x / s; + q = z / s; + r = sqrt(p * p+q * q); + p = p / r; + q = q / r; + + // Row modification + + for (int j = n-1; j < nn; j++) { + z = eval.H[n-1][j]; + eval.H[n-1][j] = q * z + p * eval.H[n][j]; + eval.H[n][j] = q * eval.H[n][j] - p * z; + } + + // Column modification + + for (int i = 0; i <= n; i++) { + z = eval.H[i][n-1]; + eval.H[i][n-1] = q * z + p * eval.H[i][n]; + eval.H[i][n] = q * eval.H[i][n] - p * z; + } + + // Accumulate transformations + + for (int i = low; i <= high; i++) { + z = eval.V[i][n-1]; + eval.V[i][n-1] = q * z + p * eval.V[i][n]; + eval.V[i][n] = q * eval.V[i][n] - p * z; + } + + // Complex pair + + } else { + eval.d[n-1] = x + p; + eval.d[n] = x + p; + eval.e[n-1] = z; + eval.e[n] = -z; + } + n = n - 2; + iter = 0; + + // No convergence yet + + } else { + + // Form shift + + x = eval.H[n][n]; + y = 0.0; + w = 0.0; + if (l < n) { + y = eval.H[n-1][n-1]; + w = eval.H[n][n-1] * eval.H[n-1][n]; + } + + // Wilkinson's original ad hoc shift + + if (iter == 10) { + exshift += x; + for (int i = low; i <= n; i++) { + eval.H[i][i] -= x; + } + s = fabs(eval.H[n][n-1]) + fabs(eval.H[n-1][n-2]); + x = y = 0.75 * s; + w = -0.4375 * s * s; + } + + // MATLAB's new ad hoc shift + + if (iter == 30) { + s = (y - x) / 2.0; + s = s * s + w; + if (s > 0) { + s = sqrt(s); + if (y < x) { + s = -s; + } + s = x - w / ((y - x) / 2.0 + s); + for (int i = low; i <= n; i++) { + eval.H[i][i] -= s; + } + exshift += s; + x = y = w = 0.964; + } + } + + iter = iter + 1; // (Could check iteration count here.) + + // Look for two consecutive small sub-diagonal elements + + int m = n-2; + while (m >= l) { + z = eval.H[m][m]; + r = x - z; + s = y - z; + p = (r * s - w) / eval.H[m+1][m] + eval.H[m][m+1]; + q = eval.H[m+1][m+1] - z - r - s; + r = eval.H[m+2][m+1]; + s = fabs(p) + fabs(q) + fabs(r); + p = p / s; + q = q / s; + r = r / s; + if (m == l) { + break; + } + if (fabs(eval.H[m][m-1]) * (fabs(q) + fabs(r)) < + eps * (fabs(p) * (fabs(eval.H[m-1][m-1]) + fabs(z) + + fabs(eval.H[m+1][m+1])))) { + break; + } + m--; + } + + for (int i = m+2; i <= n; i++) { + eval.H[i][i-2] = 0.0; + if (i > m+2) { + eval.H[i][i-3] = 0.0; + } + } + + // Double QR step involving rows l:n and columns m:n + + for (int k = m; k <= n-1; k++) { + int notlast = (k != n-1); + if (k != m) { + p = eval.H[k][k-1]; + q = eval.H[k+1][k-1]; + r = (notlast ? eval.H[k+2][k-1] : 0.0); + x = fabs(p) + fabs(q) + fabs(r); + if (x != 0.0) { + p = p / x; + q = q / x; + r = r / x; + } + } + if (x == 0.0) { + break; + } + s = sqrt(p * p + q * q + r * r); + if (p < 0) { + s = -s; + } + if (s != 0) { + if (k != m) { + eval.H[k][k-1] = -s * x; + } else if (l != m) { + eval.H[k][k-1] = -eval.H[k][k-1]; + } + p = p + s; + x = p / s; + y = q / s; + z = r / s; + q = q / p; + r = r / p; + + // Row modification + + for (int j = k; j < nn; j++) { + p = eval.H[k][j] + q * eval.H[k+1][j]; + if (notlast) { + p = p + r * eval.H[k+2][j]; + eval.H[k+2][j] = eval.H[k+2][j] - p * z; + } + eval.H[k][j] = eval.H[k][j] - p * x; + eval.H[k+1][j] = eval.H[k+1][j] - p * y; + } + + // Column modification + + for (int i = 0; i <= min(n,k+3); i++) { + p = x * eval.H[i][k] + y * eval.H[i][k+1]; + if (notlast) { + p = p + z * eval.H[i][k+2]; + eval.H[i][k+2] = eval.H[i][k+2] - p * r; + } + eval.H[i][k] = eval.H[i][k] - p; + eval.H[i][k+1] = eval.H[i][k+1] - p * q; + } + + // Accumulate transformations + + for (int i = low; i <= high; i++) { + p = x * eval.V[i][k] + y * eval.V[i][k+1]; + if (notlast) { + p = p + z * eval.V[i][k+2]; + eval.V[i][k+2] = eval.V[i][k+2] - p * r; + } + eval.V[i][k] = eval.V[i][k] - p; + eval.V[i][k+1] = eval.V[i][k+1] - p * q; + } + } // (s != 0) + } // k loop + } // check convergence + } // while (n >= low) + //if(totIter>15) std::cout<<"!!!!iter "<= 0; n--) { + p = eval.d[n]; + q = eval.e[n]; + + // float vector + + if (q == 0) { + int l = n; + eval.H[n][n] = 1.0; + for (int i = n-1; i >= 0; i--) { + w = eval.H[i][i] - p; + r = 0.0; + for (int j = l; j <= n; j++) { + r = r + eval.H[i][j] * eval.H[j][n]; + } + if (eval.e[i] < 0.0) { + z = w; + s = r; + } else { + l = i; + if (eval.e[i] == 0.0) { + if (w != 0.0) { + eval.H[i][n] = -r / w; + } else { + eval.H[i][n] = -r / (eps * norm); + } + + // Solve real equations + + } else { + x = eval.H[i][i+1]; + y = eval.H[i+1][i]; + q = (eval.d[i] - p) * (eval.d[i] - p) + eval.e[i] * eval.e[i]; + t = (x * s - z * r) / q; + eval.H[i][n] = t; + if (fabs(x) > fabs(z)) { + eval.H[i+1][n] = (-r - w * t) / x; + } else { + eval.H[i+1][n] = (-s - y * t) / z; + } + } + + // Overflow control + + t = fabs(eval.H[i][n]); + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { + eval.H[j][n] = eval.H[j][n] / t; + } + } + } + } + + // Complex vector + + } else if (q < 0) { + int l = n-1; + + // Last vector component imaginary so matrix is triangular + + if (fabs(eval.H[n][n-1]) > fabs(eval.H[n-1][n])) { + eval.H[n-1][n-1] = q / eval.H[n][n-1]; + eval.H[n-1][n] = -(eval.H[n][n] - p) / eval.H[n][n-1]; + } else { + Eigencdiv(eval, 0.0,-eval.H[n-1][n],eval.H[n-1][n-1]-p,q); + eval.H[n-1][n-1] = eval.cdivr; + eval.H[n-1][n] = eval.cdivi; + } + eval.H[n][n-1] = 0.0; + eval.H[n][n] = 1.0; + for (int i = n-2; i >= 0; i--) { + float ra,sa,vr,vi; + ra = 0.0; + sa = 0.0; + for (int j = l; j <= n; j++) { + ra = ra + eval.H[i][j] * eval.H[j][n-1]; + sa = sa + eval.H[i][j] * eval.H[j][n]; + } + w = eval.H[i][i] - p; + + if (eval.e[i] < 0.0) { + z = w; + r = ra; + s = sa; + } else { + l = i; + if (eval.e[i] == 0) { + Eigencdiv(eval,-ra,-sa,w,q); + eval.H[i][n-1] = eval.cdivr; + eval.H[i][n] = eval.cdivi; + } else { + + // Solve complex equations + + x = eval.H[i][i+1]; + y = eval.H[i+1][i]; + vr = (eval.d[i] - p) * (eval.d[i] - p) + eval.e[i] * eval.e[i] - q * q; + vi = (eval.d[i] - p) * 2.0 * q; + if ((vr == 0.0) && (vi == 0.0)) { + vr = eps * norm * (fabs(w) + fabs(q) + + fabs(x) + fabs(y) + fabs(z)); + } + Eigencdiv(eval, x*r-z*ra+q*sa,x*s-z*sa-q*ra,vr,vi); + eval.H[i][n-1] = eval.cdivr; + eval.H[i][n] = eval.cdivi; + if (fabs(x) > (fabs(z) + fabs(q))) { + eval.H[i+1][n-1] = (-ra - w * eval.H[i][n-1] + q * eval.H[i][n]) / x; + eval.H[i+1][n] = (-sa - w * eval.H[i][n] - q * eval.H[i][n-1]) / x; + } else { + Eigencdiv(eval, -r-y*eval.H[i][n-1],-s-y*eval.H[i][n],z,q); + eval.H[i+1][n-1] = eval.cdivr; + eval.H[i+1][n] = eval.cdivi; + } + } + + // Overflow control + + t = max(fabs(eval.H[i][n-1]),fabs(eval.H[i][n])); + if ((eps * t) * t > 1) { + for (int j = i; j <= n; j++) { + eval.H[j][n-1] = eval.H[j][n-1] / t; + eval.H[j][n] = eval.H[j][n] / t; + } + } + } + } + } + } + + // Vectors of isolated roots + + for (int i = 0; i < nn; i++) { + if (i < low || i > high) { + for (int j = i; j < nn; j++) { + eval.V[i][j] = eval.H[i][j]; + } + } + } + + // Back transformation to get eigenvectors of original matrix + + for (int j = nn-1; j >= low; j--) { + for (int i = low; i <= high; i++) { + z = 0.0; + for (int k = low; k <= min(j,high); k++) { + z = z + eval.V[i][k] * eval.H[k][j]; + } + eval.V[i][j] = z; + } + } +} + + + +int computeEigenvalues3x3( + float dout[3], + float a[3][3]) +{ + /*TNT::Array2D A = TNT::Array2D(3,3, &a[0][0]); + TNT::Array1D eig = TNT::Array1D(3); + TNT::Array1D eigImag = TNT::Array1D(3); + JAMA::Eigenvalue jeig = JAMA::Eigenvalue(A);*/ + + sEigenvalue jeig; + + // Compute the values + { + jeig.n = 3; + int n=3; + //V = Array2D(n,n); + //d = Array1D(n); + //e = Array1D(n); + for (int y=0; y<3; y++) + { + jeig.d[y]=0.0f; + jeig.e[y]=0.0f; + for (int t=0; t<3; t++) jeig.V[y][t]=0.0f; + } + + jeig.issymmetric = 1; + for (int j = 0; (j < 3) && jeig.issymmetric; j++) { + for (int i = 0; (i < 3) && jeig.issymmetric; i++) { + jeig.issymmetric = (a[i][j] == a[j][i]); + } + } + + if (jeig.issymmetric) { + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + jeig.V[i][j] = a[i][j]; + } + } + + // Tridiagonalize. + Eigentred2(jeig); + + // Diagonalize. + Eigentql2(jeig); + + } else { + //H = TNT::Array2D(n,n); + for (int y=0; y<3; y++) + { + jeig.ort[y]=0.0f; + for (int t=0; t<3; t++) jeig.H[y][t]=0.0f; + } + //ort = TNT::Array1D(n); + + for (int j = 0; j < n; j++) { + for (int i = 0; i < n; i++) { + jeig.H[i][j] = a[i][j]; + } + } + + // Reduce to Hessenberg form. + Eigenorthes(jeig); + + // Reduce Hessenberg to real Schur form. + Eigenhqr2(jeig); + } + } + + //jeig.getfloatEigenvalues(eig); + + // complex ones + //jeig.getImagEigenvalues(eigImag); + dout[0] = sqrt(jeig.d[0]*jeig.d[0] + jeig.e[0]*jeig.e[0]); + dout[1] = sqrt(jeig.d[1]*jeig.d[1] + jeig.e[1]*jeig.e[1]); + dout[2] = sqrt(jeig.d[2]*jeig.d[2] + jeig.e[2]*jeig.e[2]); + return 0; +} diff --git a/intern/smoke/intern/EIGENVALUE_HELPER.h b/intern/smoke/intern/EIGENVALUE_HELPER.h index 6ff61c5ca8e..9c169711c09 100644 --- a/intern/smoke/intern/EIGENVALUE_HELPER.h +++ b/intern/smoke/intern/EIGENVALUE_HELPER.h @@ -15,33 +15,60 @@ // along with Wavelet Turbulence. If not, see . // // Copyright 2008 Theodore Kim and Nils Thuerey -// +// +////////////////////////////////////////////////////////////////////// +// Modified to not require TNT matrix library anymore. It was very slow +// when being run in parallel. Required TNT JAMA::Eigenvalue libraries were +// converted into independent functions. +// - MiikaH +// ////////////////////////////////////////////////////////////////////// // Helper function, compute eigenvalues of 3x3 matrix ////////////////////////////////////////////////////////////////////// -#include "tnt/jama_eig.h" +#ifndef EIGENVAL_HELPER_H +#define EIGENVAL_HELPER_H + +//#include "tnt/jama_eig.h" + +#include +#include + +using namespace std; ////////////////////////////////////////////////////////////////////// // eigenvalues of 3x3 non-symmetric matrix ////////////////////////////////////////////////////////////////////// -int inline computeEigenvalues3x3( - float dout[3], - float a[3][3]) + + +struct sEigenvalue { - TNT::Array2D A = TNT::Array2D(3,3, &a[0][0]); - TNT::Array1D eig = TNT::Array1D(3); - TNT::Array1D eigImag = TNT::Array1D(3); - JAMA::Eigenvalue jeig = JAMA::Eigenvalue(A); - jeig.getRealEigenvalues(eig); - - // complex ones - jeig.getImagEigenvalues(eigImag); - dout[0] = sqrt(eig[0]*eig[0] + eigImag[0]*eigImag[0]); - dout[1] = sqrt(eig[1]*eig[1] + eigImag[1]*eigImag[1]); - dout[2] = sqrt(eig[2]*eig[2] + eigImag[2]*eigImag[2]); - return 0; -} - -#undef rfabs -#undef ROT + int n; + int issymmetric; + float d[3]; /* real part */ + float e[3]; /* img part */ + float V[3][3]; /* Eigenvectors */ + + float H[3][3]; + + + float ort[3]; + + float cdivr; + float cdivi; +}; + +void Eigentred2(sEigenvalue& eval); + +void Eigencdiv(sEigenvalue& eval, float xr, float xi, float yr, float yi); + +void Eigentql2 (sEigenvalue& eval); + +void Eigenorthes (sEigenvalue& eval); + +void Eigenhqr2 (sEigenvalue& eval); + +int computeEigenvalues3x3(float dout[3], float a[3][3]); + + +#endif diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp index 729d73bb4f3..25673630fc4 100644 --- a/intern/smoke/intern/FLUID_3D.cpp +++ b/intern/smoke/intern/FLUID_3D.cpp @@ -19,6 +19,11 @@ // FLUID_3D.cpp: implementation of the FLUID_3D class. // ////////////////////////////////////////////////////////////////////// +// Heavy parallel optimization done. Many of the old functions now +// take begin and end parameters and process only specified part of the data. +// Some functions were divided into multiple ones. +// - MiikaH +////////////////////////////////////////////////////////////////////// #include "FLUID_3D.h" #include "IMAGE.h" @@ -26,6 +31,10 @@ #include "SPHERE.h" #include +#if PARALLEL==1 +#include +#endif // PARALLEL + // boundary conditions of the fluid domain #define DOMAIN_BC_FRONT 0 // z #define DOMAIN_BC_TOP 1 // y @@ -90,6 +99,13 @@ FLUID_3D::FLUID_3D(int *res, float *p0, float dt) : _heatOld = new float[_totalCells]; _obstacles = new unsigned char[_totalCells]; // set 0 at end of step + // For threaded version: + _xVelocityTemp = new float[_totalCells]; + _yVelocityTemp = new float[_totalCells]; + _zVelocityTemp = new float[_totalCells]; + _densityTemp = new float[_totalCells]; + _heatTemp = new float[_totalCells]; + // DG TODO: check if alloc went fine for (int x = 0; x < _totalCells; x++) @@ -167,6 +183,12 @@ FLUID_3D::~FLUID_3D() if (_obstacles) delete[] _obstacles; // if (_wTurbulence) delete _wTurbulence; + if (_xVelocityTemp) delete[] _xVelocityTemp; + if (_yVelocityTemp) delete[] _yVelocityTemp; + if (_zVelocityTemp) delete[] _zVelocityTemp; + if (_densityTemp) delete[] _densityTemp; + if (_heatTemp) delete[] _heatTemp; + // printf("deleted fluid\n"); } @@ -182,108 +204,306 @@ void FLUID_3D::initBlenderRNA(float *alpha, float *beta) ////////////////////////////////////////////////////////////////////// void FLUID_3D::step() { - // addSmokeTestCase(_density, _res); - // addSmokeTestCase(_heat, _res); - - wipeBoundaries(); - - // run the solvers - addVorticity(); - addBuoyancy(_heat, _density); - addForce(); - project(); - diffuseHeat(); - - // advect everything - advectMacCormack(); - - // if(_wTurbulence) { - // _wTurbulence->stepTurbulenceFull(_dt/_dx, - // _xVelocity, _yVelocity, _zVelocity, _obstacles); - // _wTurbulence->stepTurbulenceReadable(_dt/_dx, - // _xVelocity, _yVelocity, _zVelocity, _obstacles); - // } -/* - // no file output - float *src = _density; - string prefix = string("./original.preview/density_fullxy_"); - writeImageSliceXY(src,_res, _res[2]/2, prefix, _totalSteps); -*/ - // artificial damping -- this is necessary because we use a - // collated grid, and at very coarse grid resolutions, banding - // artifacts can occur - artificialDamping(_xVelocity); - artificialDamping(_yVelocity); - artificialDamping(_zVelocity); -/* -// no file output - string pbrtPrefix = string("./pbrt/density_small_"); - IMAGE::dumpPBRT(_totalSteps, pbrtPrefix, _density, _res[0],_res[1],_res[2]); - */ - _totalTime += _dt; - _totalSteps++; - // todo xxx dg: only clear obstacles, not boundaries - // memset(_obstacles, 0, sizeof(unsigned char)*_xRes*_yRes*_zRes); + int threadval = 1; +#if PARALLEL==1 + threadval = omp_get_max_threads(); +#endif + + int stepParts = 1; + float partSize = _zRes; + +#if PARALLEL==1 + stepParts = threadval*2; // Dividing parallelized sections into numOfThreads * 2 sections + partSize = (float)_zRes/stepParts; // Size of one part; + + if (partSize < 4) {stepParts = threadval; // If the slice gets too low (might actually slow things down, change it to larger + partSize = (float)_zRes/stepParts;} + if (partSize < 4) {stepParts = (int)(ceil((float)_zRes/4.0f)); // If it's still too low (only possible on future systems with +24 cores), change it to 4 + partSize = (float)_zRes/stepParts;} +#else + int zBegin=0; + int zEnd=_zRes; +#endif + + +#if PARALLEL==1 + #pragma omp parallel + { + #pragma omp for schedule(static,1) + for (int i=0; i so continuing in step 3 +} + +////////////////////////////////////////////////////////////////////// +// Advect using the MacCormack method from the Selle paper +////////////////////////////////////////////////////////////////////// +void FLUID_3D::advectMacCormackEnd2(int zBegin, int zEnd) +{ const float dt0 = _dt / _dx; - // use force arrays as temp arrays - for (int x = 0; x < _totalCells; x++) - _xForce[x] = _yForce[x] = 0.0; + Vec3Int res = Vec3Int(_xRes,_yRes,_zRes); + // use force array as temp arrays float* t1 = _xForce; - float* t2 = _yForce; - advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _densityOld, _density, t1,t2, res, _obstacles); - advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _heatOld, _heat, t1,t2, res, _obstacles); - advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _xVelocityOld, _xVelocity, t1,t2, res, _obstacles); - advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _yVelocityOld, _yVelocity, t1,t2, res, _obstacles); - advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocity, t1,t2, res, _obstacles); + // advectFieldMacCormack2(dt, xVelocity, yVelocity, zVelocity, oldField, newField, tempfield, temp, res, obstacles) + + advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _densityOld, _density, _densityTemp, t1, res, _obstacles, zBegin, zEnd); + advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _heatOld, _heat, _heatTemp, t1, res, _obstacles, zBegin, zEnd); + advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _xVelocityOld, _xVelocityTemp, _xVelocity, t1, res, _obstacles, zBegin, zEnd); + advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _yVelocityOld, _yVelocityTemp, _yVelocity, t1, res, _obstacles, zBegin, zEnd); + advectFieldMacCormack2(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocityTemp, _zVelocity, t1, res, _obstacles, zBegin, zEnd); + + if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocityTemp, res, zBegin, zEnd); + else setZeroX(_xVelocityTemp, res, zBegin, zEnd); - if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocity, res); - else setZeroX(_xVelocity, res); + if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocityTemp, res, zBegin, zEnd); + else setZeroY(_yVelocityTemp, res, zBegin, zEnd); - if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocity, res); - else setZeroY(_yVelocity, res); + if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocityTemp, res, zBegin, zEnd); + else setZeroZ(_zVelocityTemp, res, zBegin, zEnd); - if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocity, res); - else setZeroZ(_zVelocity, res); + setZeroBorder(_density, res, zBegin, zEnd); + setZeroBorder(_heat, res, zBegin, zEnd); - setZeroBorder(_density, res); - setZeroBorder(_heat, res); - for (int x = 0; x < _totalCells; x++) - t1[x] = t2[x] = 0.0; + /*int begin=zBegin * _slabSize; + int end=begin + (zEnd - zBegin) * _slabSize; + for (int x = begin; x < end; x++) + _xForce[x] = _yForce[x] = 0.0f;*/ } diff --git a/intern/smoke/intern/FLUID_3D.h b/intern/smoke/intern/FLUID_3D.h index a7be7f58335..ab121ef506e 100644 --- a/intern/smoke/intern/FLUID_3D.h +++ b/intern/smoke/intern/FLUID_3D.h @@ -19,6 +19,11 @@ // FLUID_3D.h: interface for the FLUID_3D class. // ////////////////////////////////////////////////////////////////////// +// Heavy parallel optimization done. Many of the old functions now +// take begin and end parameters and process only specified part of the data. +// Some functions were divided into multiple ones. +// - MiikaH +////////////////////////////////////////////////////////////////////// #ifndef FLUID_3D_H #define FLUID_3D_H @@ -74,7 +79,8 @@ class FLUID_3D int _totalImgDumps; int _totalVelDumps; - void artificialDamping(float* field); + void artificialDampingSL(int zBegin, int zEnd); + void artificialDampingExactSL(int pos); // fields float* _density; @@ -92,6 +98,13 @@ class FLUID_3D float* _zForce; unsigned char* _obstacles; + // Required for proper threading: + float* _xVelocityTemp; + float* _yVelocityTemp; + float* _zVelocityTemp; + float* _heatTemp; + float* _densityTemp; + // CG fields int _iterations; @@ -107,13 +120,14 @@ class FLUID_3D // WTURBULENCE* _wTurbulence; // boundary setting functions - void copyBorderAll(float* field); + void copyBorderAll(float* field, int zBegin, int zEnd); // timestepping functions - void wipeBoundaries(); - void addForce(); - void addVorticity(); - void addBuoyancy(float *heat, float *density); + void wipeBoundaries(int zBegin, int zEnd); + void wipeBoundariesSL(int zBegin, int zEnd); + void addForce(int zBegin, int zEnd); + void addVorticity(int zBegin, int zEnd); + void addBuoyancy(float *heat, float *density, int zBegin, int zEnd); // solver stuff void project(); @@ -122,41 +136,58 @@ class FLUID_3D void solvePressurePre(float* field, float* b, unsigned char* skip); void solveHeat(float* field, float* b, unsigned char* skip); + // handle obstacle boundaries - void setObstacleBoundaries(float *_pressure); - void setObstaclePressure(float *_pressure); + void setObstacleBoundaries(float *_pressure, int zBegin, int zEnd); + void setObstaclePressure(float *_pressure, int zBegin, int zEnd); public: // advection, accessed e.g. by WTURBULENCE class - void advectMacCormack(); + //void advectMacCormack(); + void advectMacCormackBegin(int zBegin, int zEnd); + void advectMacCormackEnd1(int zBegin, int zEnd); + void advectMacCormackEnd2(int zBegin, int zEnd); // boundary setting functions - static void copyBorderX(float* field, Vec3Int res); - static void copyBorderY(float* field, Vec3Int res); - static void copyBorderZ(float* field, Vec3Int res); - static void setNeumannX(float* field, Vec3Int res); - static void setNeumannY(float* field, Vec3Int res); - static void setNeumannZ(float* field, Vec3Int res); - static void setZeroX(float* field, Vec3Int res); - static void setZeroY(float* field, Vec3Int res); - static void setZeroZ(float* field, Vec3Int res); - static void setZeroBorder(float* field, Vec3Int res) { - setZeroX(field, res); - setZeroY(field, res); - setZeroZ(field, res); + static void copyBorderX(float* field, Vec3Int res, int zBegin, int zEnd); + static void copyBorderY(float* field, Vec3Int res, int zBegin, int zEnd); + static void copyBorderZ(float* field, Vec3Int res, int zBegin, int zEnd); + static void setNeumannX(float* field, Vec3Int res, int zBegin, int zEnd); + static void setNeumannY(float* field, Vec3Int res, int zBegin, int zEnd); + static void setNeumannZ(float* field, Vec3Int res, int zBegin, int zEnd); + static void setZeroX(float* field, Vec3Int res, int zBegin, int zEnd); + static void setZeroY(float* field, Vec3Int res, int zBegin, int zEnd); + static void setZeroZ(float* field, Vec3Int res, int zBegin, int zEnd); + static void setZeroBorder(float* field, Vec3Int res, int zBegin, int zEnd) { + setZeroX(field, res, zBegin, zEnd); + setZeroY(field, res, zBegin, zEnd); + setZeroZ(field, res, zBegin, zEnd); }; + + // static advection functions, also used by WTURBULENCE static void advectFieldSemiLagrange(const float dt, const float* velx, const float* vely, const float* velz, - float* oldField, float* newField, Vec3Int res); - static void advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, - float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles); + float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd); + static void advectFieldMacCormack1(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, + float* oldField, float* tempResult, Vec3Int res, int zBegin, int zEnd); + static void advectFieldMacCormack2(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, + float* oldField, float* newField, float* tempResult, float* temp1,Vec3Int res, const unsigned char* obstacles, int zBegin, int zEnd); + + + // temp ones for testing + /*static void advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, + float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles);*/ + /*static void advectFieldSemiLagrange2(const float dt, const float* velx, const float* vely, const float* velz, + float* oldField, float* newField, Vec3Int res);*/ // maccormack helper functions static void clampExtrema(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, - float* oldField, float* newField, Vec3Int res); + float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd); static void clampOutsideRays(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, - float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection); + float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection, int zBegin, int zEnd); + + // output helper functions // static void writeImageSliceXY(const float *field, Vec3Int res, int slice, string prefix, int picCnt, float scale=1.); diff --git a/intern/smoke/intern/FLUID_3D_SOLVERS.cpp b/intern/smoke/intern/FLUID_3D_SOLVERS.cpp index 7d078d86d61..8b961494ce5 100644 --- a/intern/smoke/intern/FLUID_3D_SOLVERS.cpp +++ b/intern/smoke/intern/FLUID_3D_SOLVERS.cpp @@ -19,16 +19,25 @@ // FLUID_3D.cpp: implementation of the FLUID_3D class. // ////////////////////////////////////////////////////////////////////// +// Both solvers optimized by merging loops and precalculating +// stuff used in iteration loop. +// - MiikaH +////////////////////////////////////////////////////////////////////// #include "FLUID_3D.h" #include #define SOLVER_ACCURACY 1e-06 -void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip) +////////////////////////////////////////////////////////////////////// +// solve the heat equation with CG +////////////////////////////////////////////////////////////////////// +void FLUID_3D::solveHeat(float* field, float* b, unsigned char* skip) { int x, y, z; + const int twoxr = 2 * _xRes; size_t index; - float *_q, *_Precond, *_h, *_residual, *_direction; + const float heatConst = _dt * _heatDiffusion / (_dx * _dx); + float *_q, *_residual, *_direction, *_Acenter; // i = 0 int i = 0; @@ -36,246 +45,201 @@ void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip) _residual = new float[_totalCells]; // set 0 _direction = new float[_totalCells]; // set 0 _q = new float[_totalCells]; // set 0 - _h = new float[_totalCells]; // set 0 - _Precond = new float[_totalCells]; // set 0 + _Acenter = new float[_totalCells]; // set 0 - memset(_residual, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_q, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_direction, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_h, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_Precond, 0, sizeof(float)*_xRes*_yRes*_zRes); - - // r = b - Ax - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - // if the cell is a variable - float Acenter = 0.0f; - if (!skip[index]) - { - // set the matrix to the Poisson stencil in order - if (!skip[index + 1]) Acenter += 1.; - if (!skip[index - 1]) Acenter += 1.; - if (!skip[index + _xRes]) Acenter += 1.; - if (!skip[index - _xRes]) Acenter += 1.; - if (!skip[index + _slabSize]) Acenter += 1.; - if (!skip[index - _slabSize]) Acenter += 1.; - } - - _residual[index] = b[index] - (Acenter * field[index] + - field[index - 1] * (skip[index - 1] ? 0.0 : -1.0f)+ - field[index + 1] * (skip[index + 1] ? 0.0 : -1.0f)+ - field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f)+ - field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+ - field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f)+ - field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f) ); - _residual[index] = (skip[index]) ? 0.0f : _residual[index]; + memset(_residual, 0, sizeof(float)*_totalCells); + memset(_q, 0, sizeof(float)*_totalCells); + memset(_direction, 0, sizeof(float)*_totalCells); + memset(_Acenter, 0, sizeof(float)*_totalCells); - // P^-1 - if(Acenter < 1.0) - _Precond[index] = 0.0; - else - _Precond[index] = 1.0 / Acenter; + float deltaNew = 0.0f; - // p = P^-1 * r - _direction[index] = _residual[index] * _Precond[index]; - } + // r = b - Ax + index = _slabSize + _xRes + 1; + for (z = 1; z < _zRes - 1; z++, index += twoxr) + for (y = 1; y < _yRes - 1; y++, index += 2) + for (x = 1; x < _xRes - 1; x++, index++) + { + // if the cell is a variable + _Acenter[index] = 1.0f; + if (!skip[index]) + { + // set the matrix to the Poisson stencil in order + if (!skip[index + 1]) _Acenter[index] += heatConst; + if (!skip[index - 1]) _Acenter[index] += heatConst; + if (!skip[index + _xRes]) _Acenter[index] += heatConst; + if (!skip[index - _xRes]) _Acenter[index] += heatConst; + if (!skip[index + _slabSize]) _Acenter[index] += heatConst; + if (!skip[index - _slabSize]) _Acenter[index] += heatConst; + + _residual[index] = b[index] - (_Acenter[index] * field[index] + + field[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) + + field[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) + + field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) + + field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) + + field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) + + field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst)); + } + else + { + _residual[index] = 0.0f; + } - // deltaNew = transpose(r) * p - float deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - deltaNew += _residual[index] * _direction[index]; + _direction[index] = _residual[index]; + deltaNew += _residual[index] * _residual[index]; + } - // delta0 = deltaNew - // float delta0 = deltaNew; // While deltaNew > (eps^2) * delta0 const float eps = SOLVER_ACCURACY; - //while ((i < _iterations) && (deltaNew > eps*delta0)) float maxR = 2.0f * eps; - // while (i < _iterations) - while ((i < _iterations) && (maxR > 0.001*eps)) + while ((i < _iterations) && (maxR > eps)) { - // (s) q = Ad (p) + // q = Ad + float alpha = 0.0f; + index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) + for (z = 1; z < _zRes - 1; z++, index += twoxr) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) { // if the cell is a variable - float Acenter = 0.0f; if (!skip[index]) { - // set the matrix to the Poisson stencil in order - if (!skip[index + 1]) Acenter += 1.; - if (!skip[index - 1]) Acenter += 1.; - if (!skip[index + _xRes]) Acenter += 1.; - if (!skip[index - _xRes]) Acenter += 1.; - if (!skip[index + _slabSize]) Acenter += 1.; - if (!skip[index - _slabSize]) Acenter += 1.; + + _q[index] = (_Acenter[index] * _direction[index] + + _direction[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) + + _direction[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) + + _direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) + + _direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) + + _direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) + + _direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst)); } - - _q[index] = Acenter * _direction[index] + - _direction[index - 1] * (skip[index - 1] ? 0.0 : -1.0f) + - _direction[index + 1] * (skip[index + 1] ? 0.0 : -1.0f) + - _direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f) + - _direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+ - _direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f) + - _direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f); - _q[index] = (skip[index]) ? 0.0f : _q[index]; + else + { + _q[index] = 0.0f; + } + alpha += _direction[index] * _q[index]; } - // alpha = deltaNew / (transpose(d) * q) - float alpha = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - alpha += _direction[index] * _q[index]; if (fabs(alpha) > 0.0f) alpha = deltaNew / alpha; + + float deltaOld = deltaNew; + deltaNew = 0.0f; - // x = x + alpha * d - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - field[index] += alpha * _direction[index]; + maxR = 0.0f; - // r = r - alpha * q - maxR = 0.0; index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - _residual[index] -= alpha * _q[index]; - // maxR = (_residual[index] > maxR) ? _residual[index] : maxR; - } - - // if(maxR <= eps) - // break; - - // h = P^-1 * r - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) + for (z = 1; z < _zRes - 1; z++, index += twoxr) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) { - _h[index] = _Precond[index] * _residual[index]; - } + field[index] += alpha * _direction[index]; - // deltaOld = deltaNew - float deltaOld = deltaNew; + _residual[index] -= alpha * _q[index]; + maxR = (_residual[index] > maxR) ? _residual[index] : maxR; - // deltaNew = transpose(r) * h - deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - deltaNew += _residual[index] * _h[index]; - maxR = (_residual[index]* _h[index] > maxR) ? _residual[index]* _h[index] : maxR; + deltaNew += _residual[index] * _residual[index]; } - // beta = deltaNew / deltaOld float beta = deltaNew / deltaOld; - // d = h + beta * d index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) + for (z = 1; z < _zRes - 1; z++, index += twoxr) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) - _direction[index] = _h[index] + beta * _direction[index]; + _direction[index] = _residual[index] + beta * _direction[index]; - // i = i + 1 + i++; } - // cout << i << " iterations converged to " << sqrt(maxR) << endl; + // cout << i << " iterations converged to " << maxR << endl; - if (_h) delete[] _h; - if (_Precond) delete[] _Precond; if (_residual) delete[] _residual; if (_direction) delete[] _direction; if (_q) delete[] _q; + if (_Acenter) delete[] _Acenter; } -////////////////////////////////////////////////////////////////////// -// solve the poisson equation with CG -////////////////////////////////////////////////////////////////////// -#if 0 -void FLUID_3D::solvePressure(float* field, float* b, unsigned char* skip) + +void FLUID_3D::solvePressurePre(float* field, float* b, unsigned char* skip) { int x, y, z; size_t index; + float *_q, *_Precond, *_h, *_residual, *_direction; // i = 0 int i = 0; + _residual = new float[_totalCells]; // set 0 + _direction = new float[_totalCells]; // set 0 + _q = new float[_totalCells]; // set 0 + _h = new float[_totalCells]; // set 0 + _Precond = new float[_totalCells]; // set 0 + memset(_residual, 0, sizeof(float)*_xRes*_yRes*_zRes); memset(_q, 0, sizeof(float)*_xRes*_yRes*_zRes); memset(_direction, 0, sizeof(float)*_xRes*_yRes*_zRes); + memset(_h, 0, sizeof(float)*_xRes*_yRes*_zRes); + memset(_Precond, 0, sizeof(float)*_xRes*_yRes*_zRes); - // r = b - Ax - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - // if the cell is a variable - float Acenter = 0.0f; - if (!skip[index]) - { - // set the matrix to the Poisson stencil in order - if (!skip[index + 1]) Acenter += 1.; - if (!skip[index - 1]) Acenter += 1.; - if (!skip[index + _xRes]) Acenter += 1.; - if (!skip[index - _xRes]) Acenter += 1.; - if (!skip[index + _slabSize]) Acenter += 1.; - if (!skip[index - _slabSize]) Acenter += 1.; - } - - _residual[index] = b[index] - (Acenter * field[index] + - field[index - 1] * (skip[index - 1] ? 0.0 : -1.0f)+ - field[index + 1] * (skip[index + 1] ? 0.0 : -1.0f)+ - field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f)+ - field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+ - field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f)+ - field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f) ); - _residual[index] = (skip[index]) ? 0.0f : _residual[index]; - } - + float deltaNew = 0.0f; - // d = r - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - _direction[index] = _residual[index]; + // r = b - Ax + index = _slabSize + _xRes + 1; + for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) + for (y = 1; y < _yRes - 1; y++, index += 2) + for (x = 1; x < _xRes - 1; x++, index++) + { + // if the cell is a variable + float Acenter = 0.0f; + if (!skip[index]) + { + // set the matrix to the Poisson stencil in order + if (!skip[index + 1]) Acenter += 1.; + if (!skip[index - 1]) Acenter += 1.; + if (!skip[index + _xRes]) Acenter += 1.; + if (!skip[index - _xRes]) Acenter += 1.; + if (!skip[index + _slabSize]) Acenter += 1.; + if (!skip[index - _slabSize]) Acenter += 1.; - // deltaNew = transpose(r) * r - float deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - deltaNew += _residual[index] * _residual[index]; + _residual[index] = b[index] - (Acenter * field[index] + + field[index - 1] * (skip[index - 1] ? 0.0 : -1.0f)+ + field[index + 1] * (skip[index + 1] ? 0.0 : -1.0f)+ + field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f)+ + field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+ + field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f)+ + field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f) ); + } + else + { + _residual[index] = 0.0f; + } + + // P^-1 + if(Acenter < 1.0) + _Precond[index] = 0.0; + else + _Precond[index] = 1.0 / Acenter; + + // p = P^-1 * r + _direction[index] = _residual[index] * _Precond[index]; + + deltaNew += _residual[index] * _direction[index]; + } - // delta0 = deltaNew - // float delta0 = deltaNew; // While deltaNew > (eps^2) * delta0 const float eps = SOLVER_ACCURACY; + //while ((i < _iterations) && (deltaNew > eps*delta0)) float maxR = 2.0f * eps; - while ((i < _iterations) && (maxR > eps)) + // while (i < _iterations) + while ((i < _iterations) && (maxR > 0.001*eps)) { - // q = Ad + + float alpha = 0.0f; + index = _slabSize + _xRes + 1; for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) for (y = 1; y < _yRes - 1; y++, index += 2) @@ -292,233 +256,71 @@ void FLUID_3D::solvePressure(float* field, float* b, unsigned char* skip) if (!skip[index - _xRes]) Acenter += 1.; if (!skip[index + _slabSize]) Acenter += 1.; if (!skip[index - _slabSize]) Acenter += 1.; - } - - _q[index] = Acenter * _direction[index] + + + _q[index] = Acenter * _direction[index] + _direction[index - 1] * (skip[index - 1] ? 0.0 : -1.0f) + _direction[index + 1] * (skip[index + 1] ? 0.0 : -1.0f) + _direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -1.0f) + _direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -1.0f)+ _direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -1.0f) + _direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -1.0f); - _q[index] = (skip[index]) ? 0.0f : _q[index]; - } - - // alpha = deltaNew / (transpose(d) * q) - float alpha = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - alpha += _direction[index] * _q[index]; - if (fabs(alpha) > 0.0f) - alpha = deltaNew / alpha; - - // x = x + alpha * d - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - field[index] += alpha * _direction[index]; - - // r = r - alpha * q - maxR = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - _residual[index] -= alpha * _q[index]; - maxR = (_residual[index] > maxR) ? _residual[index] : maxR; - } - - // deltaOld = deltaNew - float deltaOld = deltaNew; - - // deltaNew = transpose(r) * r - deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - deltaNew += _residual[index] * _residual[index]; - - // beta = deltaNew / deltaOld - float beta = deltaNew / deltaOld; - - // d = r + beta * d - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - _direction[index] = _residual[index] + beta * _direction[index]; - - // i = i + 1 - i++; - } - // cout << i << " iterations converged to " << maxR << endl; -} -#endif - -////////////////////////////////////////////////////////////////////// -// solve the heat equation with CG -////////////////////////////////////////////////////////////////////// -void FLUID_3D::solveHeat(float* field, float* b, unsigned char* skip) -{ - int x, y, z; - size_t index; - const float heatConst = _dt * _heatDiffusion / (_dx * _dx); - float *_q, *_residual, *_direction; - - // i = 0 - int i = 0; - - _residual = new float[_totalCells]; // set 0 - _direction = new float[_totalCells]; // set 0 - _q = new float[_totalCells]; // set 0 - - memset(_residual, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_q, 0, sizeof(float)*_xRes*_yRes*_zRes); - memset(_direction, 0, sizeof(float)*_xRes*_yRes*_zRes); + } + else + { + _q[index] = 0.0f; + } - // r = b - Ax - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - // if the cell is a variable - float Acenter = 1.0f; - if (!skip[index]) - { - // set the matrix to the Poisson stencil in order - if (!skip[index + 1]) Acenter += heatConst; - if (!skip[index - 1]) Acenter += heatConst; - if (!skip[index + _xRes]) Acenter += heatConst; - if (!skip[index - _xRes]) Acenter += heatConst; - if (!skip[index + _slabSize]) Acenter += heatConst; - if (!skip[index - _slabSize]) Acenter += heatConst; + alpha += _direction[index] * _q[index]; } - - _residual[index] = b[index] - (Acenter * field[index] + - field[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) + - field[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) + - field[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) + - field[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) + - field[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) + - field[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst)); - _residual[index] = (skip[index]) ? 0.0f : _residual[index]; - } - // d = r - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - _direction[index] = _residual[index]; - // deltaNew = transpose(r) * r - float deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - deltaNew += _residual[index] * _residual[index]; + if (fabs(alpha) > 0.0f) + alpha = deltaNew / alpha; - // delta0 = deltaNew - // float delta0 = deltaNew; + float deltaOld = deltaNew; + deltaNew = 0.0f; - // While deltaNew > (eps^2) * delta0 - const float eps = SOLVER_ACCURACY; - float maxR = 2.0f * eps; - while ((i < _iterations) && (maxR > eps)) - { - // q = Ad - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - // if the cell is a variable - float Acenter = 1.0f; - if (!skip[index]) - { - // set the matrix to the Poisson stencil in order - if (!skip[index + 1]) Acenter += heatConst; - if (!skip[index - 1]) Acenter += heatConst; - if (!skip[index + _xRes]) Acenter += heatConst; - if (!skip[index - _xRes]) Acenter += heatConst; - if (!skip[index + _slabSize]) Acenter += heatConst; - if (!skip[index - _slabSize]) Acenter += heatConst; - } - - _q[index] = (Acenter * _direction[index] + - _direction[index - 1] * (skip[index - 1] ? 0.0 : -heatConst) + - _direction[index + 1] * (skip[index + 1] ? 0.0 : -heatConst) + - _direction[index - _xRes] * (skip[index - _xRes] ? 0.0 : -heatConst) + - _direction[index + _xRes] * (skip[index + _xRes] ? 0.0 : -heatConst) + - _direction[index - _slabSize] * (skip[index - _slabSize] ? 0.0 : -heatConst) + - _direction[index + _slabSize] * (skip[index + _slabSize] ? 0.0 : -heatConst)); - - _q[index] = (skip[index]) ? 0.0f : _q[index]; - } + maxR = 0.0; - // alpha = deltaNew / (transpose(d) * q) - float alpha = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - alpha += _direction[index] * _q[index]; - if (fabs(alpha) > 0.0f) - alpha = deltaNew / alpha; + float tmp; // x = x + alpha * d index = _slabSize + _xRes + 1; for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) + { field[index] += alpha * _direction[index]; - // r = r - alpha * q - maxR = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - { - _residual[index] -= alpha * _q[index]; - maxR = (_residual[index] > maxR) ? _residual[index] : maxR; - } + _residual[index] -= alpha * _q[index]; - // deltaOld = deltaNew - float deltaOld = deltaNew; + _h[index] = _Precond[index] * _residual[index]; + + tmp = _residual[index] * _h[index]; + deltaNew += tmp; + maxR = (tmp > maxR) ? tmp : maxR; + + } - // deltaNew = transpose(r) * r - deltaNew = 0.0f; - index = _slabSize + _xRes + 1; - for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) - for (y = 1; y < _yRes - 1; y++, index += 2) - for (x = 1; x < _xRes - 1; x++, index++) - deltaNew += _residual[index] * _residual[index]; // beta = deltaNew / deltaOld float beta = deltaNew / deltaOld; - // d = r + beta * d + // d = h + beta * d index = _slabSize + _xRes + 1; for (z = 1; z < _zRes - 1; z++, index += 2 * _xRes) for (y = 1; y < _yRes - 1; y++, index += 2) for (x = 1; x < _xRes - 1; x++, index++) - _direction[index] = _residual[index] + beta * _direction[index]; + _direction[index] = _h[index] + beta * _direction[index]; // i = i + 1 i++; } - // cout << i << " iterations converged to " << maxR << endl; + // cout << i << " iterations converged to " << sqrt(maxR) << endl; + if (_h) delete[] _h; + if (_Precond) delete[] _Precond; if (_residual) delete[] _residual; if (_direction) delete[] _direction; if (_q) delete[] _q; } - diff --git a/intern/smoke/intern/FLUID_3D_STATIC.cpp b/intern/smoke/intern/FLUID_3D_STATIC.cpp index afeca2b1faa..89ac2c74e15 100644 --- a/intern/smoke/intern/FLUID_3D_STATIC.cpp +++ b/intern/smoke/intern/FLUID_3D_STATIC.cpp @@ -19,6 +19,11 @@ // FLUID_3D.cpp: implementation of the static functions of the FLUID_3D class. // ////////////////////////////////////////////////////////////////////// +// Heavy parallel optimization done. Many of the old functions now +// take begin and end parameters and process only specified part of the data. +// Some functions were divided into multiple ones. +// - MiikaH +////////////////////////////////////////////////////////////////////// #include #include "FLUID_3D.h" @@ -75,11 +80,11 @@ void FLUID_3D::addSmokeTestCase(float* field, Vec3Int res) ////////////////////////////////////////////////////////////////////// // set x direction to Neumann boundary conditions ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setNeumannX(float* field, Vec3Int res) +void FLUID_3D::setNeumannX(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int y = 0; y < res[1]; y++) { // left slab @@ -93,7 +98,7 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res) // fix, force top slab to only allow outwards flux for (int y = 0; y < res[1]; y++) - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) { // top slab index = y * res[0] + z * slabSize; @@ -107,11 +112,11 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res) ////////////////////////////////////////////////////////////////////// // set y direction to Neumann boundary conditions ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setNeumannY(float* field, Vec3Int res) +void FLUID_3D::setNeumannY(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int x = 0; x < res[0]; x++) { // bottom slab @@ -124,7 +129,7 @@ void FLUID_3D::setNeumannY(float* field, Vec3Int res) } // fix, force top slab to only allow outwards flux - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int x = 0; x < res[0]; x++) { // top slab @@ -140,22 +145,36 @@ void FLUID_3D::setNeumannY(float* field, Vec3Int res) ////////////////////////////////////////////////////////////////////// // set z direction to Neumann boundary conditions ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setNeumannZ(float* field, Vec3Int res) +void FLUID_3D::setNeumannZ(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; const int totalCells = res[0] * res[1] * res[2]; + const int cellsslab = totalCells - slabSize; int index; + + index = 0; + if (zBegin == 0) for (int y = 0; y < res[1]; y++) - for (int x = 0; x < res[0]; x++) + for (int x = 0; x < res[0]; x++, index++) { // front slab - index = x + y * res[0]; field[index] = field[index + 2 * slabSize]; + } + + if (zEnd == res[2]) + { + index = 0; + int indexx = 0; + + for (int y = 0; y < res[1]; y++) + for (int x = 0; x < res[0]; x++, index++) + { // back slab - index += totalCells - slabSize; - field[index] = field[index - 2 * slabSize]; + indexx = index + cellsslab; + field[indexx] = field[indexx - 2 * slabSize]; } + // fix, force top slab to only allow outwards flux for (int y = 0; y < res[1]; y++) @@ -163,22 +182,24 @@ void FLUID_3D::setNeumannZ(float* field, Vec3Int res) { // top slab index = x + y * res[0]; - index += totalCells - slabSize; + index += cellsslab; if(field[index]<0.) field[index] = 0.; index -= slabSize; if(field[index]<0.) field[index] = 0.; } + + } // zEnd == res[2] } ////////////////////////////////////////////////////////////////////// // set x direction to zero ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setZeroX(float* field, Vec3Int res) +void FLUID_3D::setZeroX(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int y = 0; y < res[1]; y++) { // left slab @@ -194,11 +215,11 @@ void FLUID_3D::setZeroX(float* field, Vec3Int res) ////////////////////////////////////////////////////////////////////// // set y direction to zero ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setZeroY(float* field, Vec3Int res) +void FLUID_3D::setZeroY(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int x = 0; x < res[0]; x++) { // bottom slab @@ -214,32 +235,44 @@ void FLUID_3D::setZeroY(float* field, Vec3Int res) ////////////////////////////////////////////////////////////////////// // set z direction to zero ////////////////////////////////////////////////////////////////////// -void FLUID_3D::setZeroZ(float* field, Vec3Int res) +void FLUID_3D::setZeroZ(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; const int totalCells = res[0] * res[1] * res[2]; - int index; + + int index = 0; + if ((zBegin == 0)) for (int y = 0; y < res[1]; y++) - for (int x = 0; x < res[0]; x++) + for (int x = 0; x < res[0]; x++, index++) { // front slab - index = x + y * res[0]; field[index] = 0.0f; + } - // back slab - index += totalCells - slabSize; - field[index] = 0.0f; - } - } + if (zEnd == res[2]) + { + index=0; + int indexx=0; + const int cellsslab = totalCells - slabSize; + + for (int y = 0; y < res[1]; y++) + for (int x = 0; x < res[0]; x++, index++) + { + // back slab + indexx = index + cellsslab; + field[indexx] = 0.0f; + } + } + } ////////////////////////////////////////////////////////////////////// // copy grid boundary ////////////////////////////////////////////////////////////////////// -void FLUID_3D::copyBorderX(float* field, Vec3Int res) +void FLUID_3D::copyBorderX(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int y = 0; y < res[1]; y++) { // left slab @@ -251,12 +284,12 @@ void FLUID_3D::copyBorderX(float* field, Vec3Int res) field[index] = field[index - 1]; } } -void FLUID_3D::copyBorderY(float* field, Vec3Int res) +void FLUID_3D::copyBorderY(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; - const int totalCells = res[0] * res[1] * res[2]; + //const int totalCells = res[0] * res[1] * res[2]; int index; - for (int z = 0; z < res[2]; z++) + for (int z = zBegin; z < zEnd; z++) for (int x = 0; x < res[0]; x++) { // bottom slab @@ -267,40 +300,49 @@ void FLUID_3D::copyBorderY(float* field, Vec3Int res) field[index] = field[index - res[0]]; } } -void FLUID_3D::copyBorderZ(float* field, Vec3Int res) +void FLUID_3D::copyBorderZ(float* field, Vec3Int res, int zBegin, int zEnd) { const int slabSize = res[0] * res[1]; const int totalCells = res[0] * res[1] * res[2]; - int index; + int index=0; + + if ((zBegin == 0)) for (int y = 0; y < res[1]; y++) - for (int x = 0; x < res[0]; x++) + for (int x = 0; x < res[0]; x++, index++) { - // front slab - index = x + y * res[0]; field[index] = field[index + slabSize]; + } + + if ((zEnd == res[2])) + { + + index=0; + int indexx=0; + const int cellsslab = totalCells - slabSize; + + for (int y = 0; y < res[1]; y++) + for (int x = 0; x < res[0]; x++, index++) + { // back slab - index += totalCells - slabSize; - field[index] = field[index - slabSize]; + indexx = index + cellsslab; + field[indexx] = field[indexx - slabSize]; } + } } ///////////////////////////////////////////////////////////////////// // advect field with the semi lagrangian method ////////////////////////////////////////////////////////////////////// void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const float* vely, const float* velz, - float* oldField, float* newField, Vec3Int res) + float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd) { const int xres = res[0]; const int yres = res[1]; const int zres = res[2]; const int slabSize = res[0] * res[1]; - // scale dt up to grid resolution -#if PARALLEL==1 && !_WIN32 -#pragma omp parallel -#pragma omp for schedule(static) -#endif - for (int z = 0; z < zres; z++) + + for (int z = zBegin; z < zEnd; z++) for (int y = 0; y < yres; y++) for (int x = 0; x < xres; x++) { @@ -357,50 +399,69 @@ void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const } } + ///////////////////////////////////////////////////////////////////// // advect field with the maccormack method // // comments are the pseudocode from selle's paper ////////////////////////////////////////////////////////////////////// -void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, - float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles) +void FLUID_3D::advectFieldMacCormack1(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, + float* oldField, float* tempResult, Vec3Int res, int zBegin, int zEnd) { - float* phiHatN = temp1; - float* phiHatN1 = temp2; const int sx= res[0]; const int sy= res[1]; const int sz= res[2]; - for (int x = 0; x < sx * sy * sz; x++) - phiHatN[x] = phiHatN1[x] = oldField[x]; + /*for (int x = 0; x < sx * sy * sz; x++) + phiHatN[x] = phiHatN1[x] = oldField[x];*/ // not needed as all the values are written first float*& phiN = oldField; - float*& phiN1 = newField; + float*& phiN1 = tempResult; + + // phiHatN1 = A(phiN) - advectFieldSemiLagrange( dt, xVelocity, yVelocity, zVelocity, phiN, phiHatN1, res); + advectFieldSemiLagrange( dt, xVelocity, yVelocity, zVelocity, phiN, phiN1, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole) +} + + + +void FLUID_3D::advectFieldMacCormack2(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, + float* oldField, float* newField, float* tempResult, float* temp1, Vec3Int res, const unsigned char* obstacles, int zBegin, int zEnd) +{ + float* phiHatN = tempResult; + float* t1 = temp1; + const int sx= res[0]; + const int sy= res[1]; + const int sz= res[2]; + + float*& phiN = oldField; + float*& phiN1 = newField; + + // phiHatN = A^R(phiHatN1) - advectFieldSemiLagrange( -1.0*dt, xVelocity, yVelocity, zVelocity, phiHatN1, phiHatN, res); + advectFieldSemiLagrange( -1.0*dt, xVelocity, yVelocity, zVelocity, phiHatN, t1, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole) // phiN1 = phiHatN1 + (phiN - phiHatN) / 2 const int border = 0; - for (int z = border; z < sz-border; z++) + for (int z = zBegin+border; z < zEnd-border; z++) for (int y = border; y < sy-border; y++) for (int x = border; x < sx-border; x++) { int index = x + y * sx + z * sx*sy; - phiN1[index] = phiHatN1[index] + (phiN[index] - phiHatN[index]) * 0.50f; + phiN1[index] = phiHatN[index] + (phiN[index] - t1[index]) * 0.50f; //phiN1[index] = phiHatN1[index]; // debug, correction off } - copyBorderX(phiN1, res); - copyBorderY(phiN1, res); - copyBorderZ(phiN1, res); + copyBorderX(phiN1, res, zBegin, zEnd); + copyBorderY(phiN1, res, zBegin, zEnd); + copyBorderZ(phiN1, res, zBegin, zEnd); // clamp any newly created extrema - clampExtrema(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res); + clampExtrema(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, zBegin, zEnd); // uses wide data from old field and velocities (both are whole) // if the error estimate was bad, revert to first order - clampOutsideRays(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, obstacles, phiHatN1); + clampOutsideRays(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, obstacles, phiHatN, zBegin, zEnd); // phiHatN is only used at cells within thread range, so its ok + } @@ -408,13 +469,21 @@ void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, con // Clamp the extrema generated by the BFECC error correction ////////////////////////////////////////////////////////////////////// void FLUID_3D::clampExtrema(const float dt, const float* velx, const float* vely, const float* velz, - float* oldField, float* newField, Vec3Int res) + float* oldField, float* newField, Vec3Int res, int zBegin, int zEnd) { const int xres= res[0]; const int yres= res[1]; const int zres= res[2]; const int slabSize = res[0] * res[1]; - for (int z = 1; z < zres-1; z++) + + int bb=0; + int bt=0; + + if (zBegin == 0) {bb = 1;} + if (zEnd == res[2]) {bt = 1;} + + + for (int z = zBegin+bb; z < zEnd-bt; z++) for (int y = 1; y < yres-1; y++) for (int x = 1; x < xres-1; x++) { @@ -484,14 +553,20 @@ void FLUID_3D::clampExtrema(const float dt, const float* velx, const float* vely // incorrect ////////////////////////////////////////////////////////////////////// void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float* vely, const float* velz, - float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection) + float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection, int zBegin, int zEnd) { const int sx= res[0]; const int sy= res[1]; const int sz= res[2]; const int slabSize = res[0] * res[1]; - for (int z = 1; z < sz-1; z++) + int bb=0; + int bt=0; + + if (zBegin == 0) {bb = 1;} + if (zEnd == res[2]) {bt = 1;} + + for (int z = zBegin+bb; z < zEnd-bt; z++) for (int y = 1; y < sy-1; y++) for (int x = 1; x < sx-1; x++) { @@ -607,4 +682,3 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float* } } // xyz } - diff --git a/intern/smoke/intern/LU_HELPER.cpp b/intern/smoke/intern/LU_HELPER.cpp new file mode 100644 index 00000000000..43357e6be03 --- /dev/null +++ b/intern/smoke/intern/LU_HELPER.cpp @@ -0,0 +1,136 @@ + +#include "LU_HELPER.h" + +int isNonsingular (sLU LU_) { + for (int j = 0; j < 3; j++) { + if (LU_.values[j][j] == 0) + return 0; + } + return 1; +} + +sLU computeLU( float a[3][3]) +{ + sLU result; + int m=3; + int n=3; + + //float LU_[3][3]; + for (int i = 0; i < m; i++) { + result.piv[i] = i; + for (int j = 0; j < n; j++) result.values[i][j]=a[i][j]; + } + + result.pivsign = 1; + //Real *LUrowi = 0;; + //Array1D LUcolj(m); + //float *LUrowi = 0; + float LUcolj[3]; + + // Outer loop. + + for (int j = 0; j < n; j++) { + + // Make a copy of the j-th column to localize references. + + for (int i = 0; i < m; i++) { + LUcolj[i] = result.values[i][j]; + } + + // Apply previous transformations. + + for (int i = 0; i < m; i++) { + //float LUrowi[3]; + //LUrowi = result.values[i]; + + // Most of the time is spent in the following dot product. + + int kmax = min(i,j); + double s = 0.0; + for (int k = 0; k < kmax; k++) { + s += result.values[i][k]*LUcolj[k]; + } + + result.values[i][j] = LUcolj[i] -= s; + } + + // Find pivot and exchange if necessary. + + int p = j; + for (int i = j+1; i < m; i++) { + if (abs(LUcolj[i]) > abs(LUcolj[p])) { + p = i; + } + } + if (p != j) { + int k=0; + for (k = 0; k < n; k++) { + double t = result.values[p][k]; + result.values[p][k] = result.values[j][k]; + result.values[j][k] = t; + } + k = result.piv[p]; + result.piv[p] = result.piv[j]; + result.piv[j] = k; + result.pivsign = -result.pivsign; + } + + // Compute multipliers. + + if ((j < m) && (result.values[j][j] != 0.0)) { + for (int i = j+1; i < m; i++) { + result.values[i][j] /= result.values[j][j]; + } + } + } + + return result; +} + +void solveLU3x3(sLU& A, float x[3], float b[3]) +{ + //TNT::Array1D jamaB = TNT::Array1D(3, &b[0]); + //TNT::Array1D jamaX = A.solve(jamaB); + + + // Solve A, B + + { + if (!isNonsingular(A)) { + x[0]=0.0f; + x[1]=0.0f; + x[2]=0.0f; + return; + } + + + //Array1D Ax = permute_copy(b, piv); + float Ax[3]; + + // permute copy: b , A.piv + { + for (int i = 0; i < 3; i++) + Ax[i] = b[A.piv[i]]; + } + + // Solve L*Y = B(piv) + for (int k = 0; k < 3; k++) { + for (int i = k+1; i < 3; i++) { + Ax[i] -= Ax[k]*A.values[i][k]; + } + } + + // Solve U*X = Y; + for (int k = 2; k >= 0; k--) { + Ax[k] /= A.values[k][k]; + for (int i = 0; i < k; i++) + Ax[i] -= Ax[k]*A.values[i][k]; + } + + + x[0] = Ax[0]; + x[1] = Ax[1]; + x[2] = Ax[2]; + return; + } +} diff --git a/intern/smoke/intern/LU_HELPER.h b/intern/smoke/intern/LU_HELPER.h index b3f3c5a1cb4..e79b4ffa01b 100644 --- a/intern/smoke/intern/LU_HELPER.h +++ b/intern/smoke/intern/LU_HELPER.h @@ -17,40 +17,35 @@ // Copyright 2008 Theodore Kim and Nils Thuerey // ////////////////////////////////////////////////////////////////////// +// Modified to not require TNT matrix library anymore. It was very slow +// when being run in parallel. Required TNT JAMA:LU libraries were +// converted into independent functions. +// - MiikaH +////////////////////////////////////////////////////////////////////// #ifndef LU_HELPER_H #define LU_HELPER_H -////////////////////////////////////////////////////////////////////// -// Helper function, compute eigenvalues of 3x3 matrix -////////////////////////////////////////////////////////////////////// +#include +#include -#include "tnt/jama_lu.h" +using namespace std; ////////////////////////////////////////////////////////////////////// -// LU decomposition of 3x3 non-symmetric matrix +// Helper function, compute eigenvalues of 3x3 matrix ////////////////////////////////////////////////////////////////////// -JAMA::LU inline computeLU3x3( - float a[3][3]) -{ - TNT::Array2D A = TNT::Array2D(3,3, &a[0][0]); - JAMA::LU jLU= JAMA::LU(A); - return jLU; -} -////////////////////////////////////////////////////////////////////// -// LU decomposition of 3x3 non-symmetric matrix -////////////////////////////////////////////////////////////////////// -void inline solveLU3x3( - JAMA::LU& A, - float x[3], - float b[3]) +struct sLU { - TNT::Array1D jamaB = TNT::Array1D(3, &b[0]); - TNT::Array1D jamaX = A.solve(jamaB); + float values[3][3]; + int pivsign; + int piv[3]; +}; + + +int isNonsingular (sLU LU_); +sLU computeLU( float a[3][3]); +void solveLU3x3(sLU& A, float x[3], float b[3]); + - x[0] = jamaX[0]; - x[1] = jamaX[1]; - x[2] = jamaX[2]; -} #endif diff --git a/intern/smoke/intern/WTURBULENCE.cpp b/intern/smoke/intern/WTURBULENCE.cpp index 7ea4bde3884..6d43bc95471 100644 --- a/intern/smoke/intern/WTURBULENCE.cpp +++ b/intern/smoke/intern/WTURBULENCE.cpp @@ -18,6 +18,10 @@ // // WTURBULENCE handling /////////////////////////////////////////////////////////////////////////////////// +// Parallelized turbulence even further. TNT matrix library functions +// rewritten to improve performance. +// - MiikaH +////////////////////////////////////////////////////////////////////// #include "WTURBULENCE.h" #include "INTERPOLATE.h" @@ -29,6 +33,7 @@ #include "LU_HELPER.h" #include "SPHERE.h" #include +#include // needed to access static advection functions #include "FLUID_3D.h" @@ -278,27 +283,34 @@ static float minDz(int x, int y, int z, float* input, Vec3Int res) // Beware -- uses big density maccormack as temporary arrays ////////////////////////////////////////////////////////////////////// void WTURBULENCE::advectTextureCoordinates (float dtOrg, float* xvel, float* yvel, float* zvel, float *tempBig1, float *tempBig2) { + // advection SWAP_POINTERS(_tcTemp, _tcU); - FLUID_3D::copyBorderX(_tcTemp, _resSm); - FLUID_3D::copyBorderY(_tcTemp, _resSm); - FLUID_3D::copyBorderZ(_tcTemp, _resSm); - FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, - _tcTemp, _tcU, tempBig1, tempBig2, _resSm, NULL); + FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel, + _tcTemp, tempBig1, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel, + _tcTemp, _tcU, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]); SWAP_POINTERS(_tcTemp, _tcV); - FLUID_3D::copyBorderX(_tcTemp, _resSm); - FLUID_3D::copyBorderY(_tcTemp, _resSm); - FLUID_3D::copyBorderZ(_tcTemp, _resSm); - FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, - _tcTemp, _tcV, tempBig1, tempBig2, _resSm, NULL); + FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel, + _tcTemp, tempBig1, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel, + _tcTemp, _tcV, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]); SWAP_POINTERS(_tcTemp, _tcW); - FLUID_3D::copyBorderX(_tcTemp, _resSm); - FLUID_3D::copyBorderY(_tcTemp, _resSm); - FLUID_3D::copyBorderZ(_tcTemp, _resSm); - FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, - _tcTemp, _tcW, tempBig1, tempBig2, _resSm, NULL); + FLUID_3D::copyBorderX(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderY(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderZ(_tcTemp, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack1(dtOrg, xvel, yvel, zvel, + _tcTemp, tempBig1, _resSm, 0 , _resSm[2]); + FLUID_3D::advectFieldMacCormack2(dtOrg, xvel, yvel, zvel, + _tcTemp, _tcW, tempBig1, tempBig2, _resSm, NULL, 0 , _resSm[2]); } ////////////////////////////////////////////////////////////////////// @@ -325,9 +337,9 @@ void WTURBULENCE::computeEigenvalues(float *_eigMin, float *_eigMax) { // ONLY compute the eigenvalues after checking that the matrix // is nonsingular - JAMA::LU LU = computeLU3x3(jacobian); + sLU LU = computeLU(jacobian); - if (LU.isNonsingular()) + if (isNonsingular(LU)) { // get the analytic eigenvalues, quite slow right now... Vec3 eigenvalues = Vec3(1.); @@ -422,9 +434,9 @@ void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* for (int x = 0; x < _totalCellsSm; x++) _energy[x] = 0.5f * (xvel[x] * xvel[x] + yvel[x] * yvel[x] + zvel[x] * zvel[x]); - FLUID_3D::copyBorderX(_energy, _resSm); - FLUID_3D::copyBorderY(_energy, _resSm); - FLUID_3D::copyBorderZ(_energy, _resSm); + FLUID_3D::copyBorderX(_energy, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderY(_energy, _resSm, 0 , _resSm[2]); + FLUID_3D::copyBorderZ(_energy, _resSm, 0 , _resSm[2]); // pseudo-march the values into the obstacles // the wavelet upsampler only uses a 3x3 support neighborhood, so @@ -563,7 +575,7 @@ Vec3 WTURBULENCE::WVelocityWithJacobian(Vec3 orgPos, float* xUnwarped, float* yU ////////////////////////////////////////////////////////////////////// // perform an actual noise advection step ////////////////////////////////////////////////////////////////////// -void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles) +/*void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles) { // enlarge timestep to match grid const float dt = dtOrg * _amplify; @@ -689,9 +701,9 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, const float dtSubdiv = dt / (float)totalSubsteps; // set boundaries of big velocity grid - FLUID_3D::setZeroX(bigUx, _resBig); - FLUID_3D::setZeroY(bigUy, _resBig); - FLUID_3D::setZeroZ(bigUz, _resBig); + FLUID_3D::setZeroX(bigUx, _resBig, 0, _resBig[2]); + FLUID_3D::setZeroY(bigUy, _resBig, 0, _resBig[2]); + FLUID_3D::setZeroZ(bigUz, _resBig, 0, _resBig[2]); // do the MacCormack advection, with substepping if necessary for(int substep = 0; substep < totalSubsteps; substep++) @@ -704,7 +716,7 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, } // substep // wipe the density borders - FLUID_3D::setZeroBorder(_densityBig, _resBig); + FLUID_3D::setZeroBorder(_densityBig, _resBig, 0, _resBig[2]); // reset texture coordinates now in preparation for next timestep // Shouldn't do this before generating the noise because then the @@ -724,7 +736,9 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, _totalStepsBig++; -} +}*/ + +//struct ////////////////////////////////////////////////////////////////////// // perform the full turbulence algorithm, including OpenMP @@ -747,6 +761,7 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa memset(_tcTemp, 0, sizeof(float)*_totalCellsSm); + // prepare textures advectTextureCoordinates(dtOrg, xvel,yvel,zvel, tempBig1, tempBig2); @@ -763,25 +778,38 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa if (obstacles[x]) highFreqEnergy[x] = 0.f; Vec3Int ressm(_xResSm, _yResSm, _zResSm); - FLUID_3D::setNeumannX(highFreqEnergy, ressm); - FLUID_3D::setNeumannY(highFreqEnergy, ressm); - FLUID_3D::setNeumannZ(highFreqEnergy, ressm); + FLUID_3D::setNeumannX(highFreqEnergy, ressm, 0 , ressm[2]); + FLUID_3D::setNeumannY(highFreqEnergy, ressm, 0 , ressm[2]); + FLUID_3D::setNeumannZ(highFreqEnergy, ressm, 0 , ressm[2]); + + + int threadval = 1; +#if PARALLEL==1 + threadval = omp_get_max_threads(); +#endif + // parallel region setup - float maxVelMagThreads[8] = { -1., -1., -1., -1., -1., -1., -1., -1. }; -#if PARALLEL==1 && !_WIN32 + // Uses omp_get_max_trheads to get number of required cells. + float* maxVelMagThreads = new float[threadval]; + + for (int i=0; i LU = computeLU3x3(jacobian); + sLU LU = computeLU(jacobian); float xUnwarped[] = {1.0f, 0.0f, 0.0f}; float yUnwarped[] = {0.0f, 1.0f, 0.0f}; float zUnwarped[] = {0.0f, 0.0f, 1.0f}; float xWarped[] = {1.0f, 0.0f, 0.0f}; float yWarped[] = {0.0f, 1.0f, 0.0f}; float zWarped[] = {0.0f, 0.0f, 1.0f}; - bool nonSingular = LU.isNonsingular(); + bool nonSingular = isNonsingular(LU); #if 0 // UNUSED float eigMax = 10.0f; @@ -908,24 +936,27 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa obstacles, posSm[0], posSm[1], posSm[2], _xResSm, _yResSm, _zResSm); if (obsCheck > 0.95) bigUx[index] = bigUy[index] = bigUz[index] = 0.; - } // xyz + } // xyz*/ -#if PARALLEL==1 && !_WIN32 +#if PARALLEL==1 maxVelMagThreads[id] = maxVelMag1; #else maxVelMagThreads[0] = maxVelMag1; #endif + } } } // omp // compute maximum over threads float maxVelMag = maxVelMagThreads[0]; -#if PARALLEL==1 && !_WIN32 - for (int i = 1; i < 8; i++) +#if PARALLEL==1 + for (int i = 1; i < threadval; i++) if (maxVelMag < maxVelMagThreads[i]) maxVelMag = maxVelMagThreads[i]; + delete [] maxVelMagThreads; #endif + // prepare density for an advection SWAP_POINTERS(_densityBig, _densityBigOld); @@ -941,15 +972,56 @@ void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, floa const float dtSubdiv = dt / (float)totalSubsteps; // set boundaries of big velocity grid - FLUID_3D::setZeroX(bigUx, _resBig); - FLUID_3D::setZeroY(bigUy, _resBig); - FLUID_3D::setZeroZ(bigUz, _resBig); + FLUID_3D::setZeroX(bigUx, _resBig, 0 , _resBig[2]); + FLUID_3D::setZeroY(bigUy, _resBig, 0 , _resBig[2]); + FLUID_3D::setZeroZ(bigUz, _resBig, 0 , _resBig[2]); + +#if PARALLEL==1 + int stepParts = threadval*2; // Dividing parallelized sections into numOfThreads * 2 sections + float partSize = (float)_zResBig/stepParts; // Size of one part; + + if (partSize < 4) {stepParts = threadval; // If the slice gets too low (might actually slow things down, change it to larger + partSize = (float)_zResBig/stepParts;} + if (partSize < 4) {stepParts = (int)(ceil((float)_zResBig/4.0f)); // If it's still too low (only possible on future systems with +24 cores), change it to 4 + partSize = (float)_zResBig/stepParts;} +#else + int zBegin=0; + int zEnd=_resBig[2]; +#endif // do the MacCormack advection, with substepping if necessary for(int substep = 0; substep < totalSubsteps; substep++) { - FLUID_3D::advectFieldMacCormack(dtSubdiv, bigUx, bigUy, bigUz, - _densityBigOld, _densityBig, tempBig1, tempBig2, _resBig, NULL); + +#if PARALLEL==1 + #pragma omp parallel + { + + #pragma omp for schedule(static,1) + for (int i=0; i #include +#include // y in smoke is z in blender extern "C" FLUID_3D *smoke_init(int *res, float *p0, float dt) @@ -110,8 +111,8 @@ extern "C" void smoke_dissolve(FLUID_3D *fluid, int speed, int log) heat[i] *= (1.0 - dydx); - if(heat[i] < 0.0f) - heat[i] = 0.0f; + /*if(heat[i] < 0.0f) + heat[i] = 0.0f;*/ } } else // linear falloff @@ -127,10 +128,9 @@ extern "C" void smoke_dissolve(FLUID_3D *fluid, int speed, int log) if(density[i] < 0.0f) density[i] = 0.0f; - heat[i] -= dydx; - - if(heat[i] < 0.0f) - heat[i] = 0.0f; + if(abs(heat[i]) < dydx) heat[i] = 0.0f; + else if (heat[i]>0.0f) heat[i] -= dydx; + else if (heat[i]<0.0f) heat[i] += dydx; } } -- cgit v1.2.3