6 files changed, 441 insertions, 521 deletions
diff --git a/intern/smoke/extern/smoke_API.h b/intern/smoke/extern/smoke_API.h
index 1a3edce2344..5607df70cf3 100644
--- a/intern/smoke/extern/smoke_API.h
+++ b/intern/smoke/extern/smoke_API.h
@@ -63,11 +63,11 @@ void smoke_turbulence_free(struct WTURBULENCE *wt);
 void smoke_turbulence_step(struct WTURBULENCE *wt, struct FLUID_3D *fluid);
 
 float *smoke_turbulence_get_density(struct WTURBULENCE *wt);
-void smoke_turbulence_get_res(struct WTURBULENCE *wt, unsigned int *res);
+void smoke_turbulence_get_res(struct WTURBULENCE *wt, int *res);
 void smoke_turbulence_set_noise(struct WTURBULENCE *wt, int type);
-void smoke_turbulence_initBlenderRNA(struct WTURBULENCE *wt, float *strength);
+void smoke_initWaveletBlenderRNA(struct WTURBULENCE *wt, float *strength);
 
-void smoke_turbulence_dissolve(struct WTURBULENCE *wt, int speed, int log);
+void smoke_dissolve_wavelet(struct WTURBULENCE *wt, int speed, int log);
 
 // export
 void smoke_turbulence_export(struct WTURBULENCE *wt, float **dens, float **densold, float **tcu, float **tcv, float **tcw);
diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp
index 89dd893198b..7574a0ec16e 100644
--- a/intern/smoke/intern/FLUID_3D.cpp
+++ b/intern/smoke/intern/FLUID_3D.cpp
@@ -27,9 +27,9 @@
 #include <zlib.h>
 
 // boundary conditions of the fluid domain
-#define DOMAIN_BC_FRONT  1
-#define DOMAIN_BC_TOP    0
-#define DOMAIN_BC_LEFT   1
+#define DOMAIN_BC_FRONT  0 // z
+#define DOMAIN_BC_TOP    1 // y
+#define DOMAIN_BC_LEFT   1 // x
 #define DOMAIN_BC_BACK   DOMAIN_BC_FRONT
 #define DOMAIN_BC_BOTTOM DOMAIN_BC_TOP
 #define DOMAIN_BC_RIGHT  DOMAIN_BC_LEFT
@@ -111,47 +111,42 @@ FLUID_3D::FLUID_3D(int *res, float *p0, float dt) :
 	}
 
 	// set side obstacles
-  size_t index;
-  for (int y = 0; y < _yRes; y++) // z
-    for (int x = 0; x < _xRes; x++)
-    {
-      // front slab
-      index = x + y * _xRes;
-      if(DOMAIN_BC_BOTTOM==1) _obstacles[index] = 1;
+	int index;
+	for (int y = 0; y < _yRes; y++)
+	for (int x = 0; x < _xRes; x++)
+	{
+		// front slab
+		index = x + y * _xRes;
+		if(DOMAIN_BC_FRONT==1) _obstacles[index] = 1;
 
-      // back slab
-      index += _totalCells - _slabSize;
-      if(DOMAIN_BC_TOP==1) _obstacles[index] = 1;
-    }
-  for (int z = 0; z < _zRes; z++) // y
-    for (int x = 0; x < _xRes; x++)
-    {
-      // bottom slab
-      index = x + z * _slabSize;
-      if(DOMAIN_BC_FRONT==1) _obstacles[index] = 1;
+		// back slab
+		index += _totalCells - _slabSize;
+		if(DOMAIN_BC_BACK==1) _obstacles[index] = 1;
+	}
 
-      // top slab
-      index += _slabSize - _xRes;
-      if(DOMAIN_BC_BACK==1) _obstacles[index] = 1;
-    }
-  for (int z = 0; z < _zRes; z++) // x
-    for (int y = 0; y < _yRes; y++)
-    {
-      // left slab
-      index = y * _xRes + z * _slabSize;
-      if(DOMAIN_BC_LEFT==1) _obstacles[index] = 1;
+	for (int z = 0; z < _zRes; z++)
+	for (int x = 0; x < _xRes; x++)
+	{
+		// bottom slab
+		index = x + z * _slabSize;
+		if(DOMAIN_BC_BOTTOM==1) _obstacles[index] = 1;
 
-      // right slab
-      index += _xRes - 1;
-      if(DOMAIN_BC_RIGHT==1) _obstacles[index] = 1;
-    }
+		// top slab
+		index += _slabSize - _xRes;
+		if(DOMAIN_BC_TOP==1) _obstacles[index] = 1;
+	}
 
-	/*
-	SPHERE *obsSphere = NULL;
-	obsSphere = new SPHERE(0.375,0.5,0.375, 0.1); // for 4 to 3 domain
-	addObstacle(obsSphere);
-	delete obsSphere;
-	*/
+	for (int z = 0; z < _zRes; z++)
+	for (int y = 0; y < _yRes; y++)
+	{
+		// left slab
+		index = y * _xRes + z * _slabSize;
+		if(DOMAIN_BC_LEFT==1) _obstacles[index] = 1;
+
+		// right slab
+		index += _xRes - 1;
+		if(DOMAIN_BC_RIGHT==1) _obstacles[index] = 1;
+	}
 }
 
 FLUID_3D::~FLUID_3D()
@@ -191,7 +186,7 @@ void FLUID_3D::step()
 	for (int i = 0; i < _totalCells; i++)
 	{
 		_xForce[i] = _yForce[i] = _zForce[i] = 0.0f;
-		_obstacles[i] &= ~2;
+		// _obstacles[i] &= ~2;
 	}
 
 	wipeBoundaries();
@@ -232,7 +227,8 @@ void FLUID_3D::step()
 	_totalTime += _dt;
 	_totalSteps++;	
 
-	memset(_obstacles, 0, sizeof(unsigned char)*_xRes*_yRes*_zRes);
+	// todo xxx dg: only clear obstacles, not boundaries
+	// memset(_obstacles, 0, sizeof(unsigned char)*_xRes*_yRes*_zRes);
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -270,7 +266,7 @@ void FLUID_3D::artificialDamping(float* field) {
 //////////////////////////////////////////////////////////////////////
 void FLUID_3D::copyBorderAll(float* field)
 {
-	size_t index;
+	int index;
 	for (int y = 0; y < _yRes; y++)
 		for (int x = 0; x < _xRes; x++)
 		{
@@ -350,13 +346,13 @@ void FLUID_3D::project()
 
 	// copy out the boundaries
 	if(DOMAIN_BC_LEFT == 0)  setNeumannX(_xVelocity, _res);
-	else setZeroX(_xVelocity, _res);
+	else setZeroX(_xVelocity, _res); 
 
-	if(DOMAIN_BC_TOP == 0)   setNeumannZ(_zVelocity, _res);
-	else setZeroZ(_zVelocity, _res);
+	if(DOMAIN_BC_TOP == 0)   setNeumannY(_yVelocity, _res);
+	else setZeroY(_yVelocity, _res); 
 
-	if(DOMAIN_BC_FRONT == 0) setNeumannY(_yVelocity, _res);
-	else setZeroY(_yVelocity, _res);
+	if(DOMAIN_BC_FRONT == 0) setNeumannZ(_zVelocity, _res);
+	else setZeroZ(_zVelocity, _res);
 
 	// calculate divergence
 	index = _slabSize + _xRes + 1;
@@ -400,12 +396,16 @@ void FLUID_3D::project()
 		for (y = 1; y < _yRes - 1; y++, index += 2)
 			for (x = 1; x < _xRes - 1; x++, index++)
 			{
-				if(!_obstacles[index])
+				// if(!_obstacles[index])
 				{
 					_xVelocity[index] -= 0.5f * (_pressure[index + 1]     - _pressure[index - 1]) * invDx;
 					_yVelocity[index] -= 0.5f * (_pressure[index + _xRes]  - _pressure[index - _xRes]) * invDx;
 					_zVelocity[index] -= 0.5f * (_pressure[index + _slabSize] - _pressure[index - _slabSize]) * invDx;
-				}
+				}/*
+				else
+				{
+					_xVelocity[index] = _yVelocity[index] = _zVelocity[index] = 0.0f;
+				}*/
 			}
 
 	if (_pressure) delete[] _pressure;
@@ -669,13 +669,13 @@ void FLUID_3D::advectMacCormack()
 	Vec3Int res = Vec3Int(_xRes,_yRes,_zRes);
 
 	if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocity, res);
-	else setZeroX(_xVelocity, res);
+	else setZeroX(_xVelocity, res); 
 
-	if(DOMAIN_BC_TOP == 0) copyBorderZ(_zVelocity, res);
-	else setZeroZ(_zVelocity, res);
+	if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocity, res);
+	else setZeroY(_yVelocity, res); 
 
-	if(DOMAIN_BC_FRONT == 0) copyBorderY(_yVelocity, res);
-	else setZeroY(_yVelocity, res);
+	if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocity, res);
+	else setZeroZ(_zVelocity, res);
 
 	SWAP_POINTERS(_xVelocity, _xVelocityOld);
 	SWAP_POINTERS(_yVelocity, _yVelocityOld);
@@ -698,13 +698,13 @@ void FLUID_3D::advectMacCormack()
 	advectFieldMacCormack(dt0, _xVelocityOld, _yVelocityOld, _zVelocityOld, _zVelocityOld, _zVelocity, t1,t2, res, _obstacles);
 
 	if(DOMAIN_BC_LEFT == 0) copyBorderX(_xVelocity, res);
-	else setZeroX(_xVelocity, res);
+	else setZeroX(_xVelocity, res); 
 
-	if(DOMAIN_BC_TOP == 0) copyBorderZ(_zVelocity, res);
-	else setZeroZ(_zVelocity, res);
+	if(DOMAIN_BC_TOP == 0) copyBorderY(_yVelocity, res);
+	else setZeroY(_yVelocity, res); 
 
-	if(DOMAIN_BC_FRONT == 0) copyBorderY(_yVelocity, res);
-	else setZeroY(_yVelocity, res);
+	if(DOMAIN_BC_FRONT == 0) copyBorderZ(_zVelocity, res);
+	else setZeroZ(_zVelocity, res);
 
 	setZeroBorder(_density, res);
 	setZeroBorder(_heat, res);
diff --git a/intern/smoke/intern/FLUID_3D_STATIC.cpp b/intern/smoke/intern/FLUID_3D_STATIC.cpp
index 4474129beea..4909c071c3d 100644
--- a/intern/smoke/intern/FLUID_3D_STATIC.cpp
+++ b/intern/smoke/intern/FLUID_3D_STATIC.cpp
@@ -80,7 +80,7 @@ void FLUID_3D::addSmokeTestCase(float* field, Vec3Int res, float value)
 void FLUID_3D::setNeumannX(float* field, Vec3Int res)
 {
 	const int slabSize = res[0] * res[1];
-	size_t index;
+	int index;
 	for (int z = 0; z < res[2]; z++)
 		for (int y = 0; y < res[1]; y++)
 		{
@@ -92,6 +92,18 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res)
 			index += res[0] - 1;
 			field[index] = field[index - 2];
 		}
+
+	// fix, force top slab to only allow outwards flux
+	for (int y = 0; y < res[1]; y++)
+		for (int z = 0; z < res[2]; z++)
+		{
+			// top slab
+			int index = y * res[0] + z * slabSize;
+			index += res[0] - 1;
+			if(field[index]<0.) field[index] = 0.;
+			index -= 1;
+			if(field[index]<0.) field[index] = 0.;
+		}
  }
 
 //////////////////////////////////////////////////////////////////////
@@ -100,18 +112,31 @@ void FLUID_3D::setNeumannX(float* field, Vec3Int res)
 void FLUID_3D::setNeumannY(float* field, Vec3Int res)
 {
 	const int slabSize = res[0] * res[1];
-	size_t index;
+	int index;
 	for (int z = 0; z < res[2]; z++)
 		for (int x = 0; x < res[0]; x++)
 		{
-			// front slab
+			// bottom slab
 			index = x + z * slabSize;
 			field[index] = field[index + 2 * res[0]];
 
-			// back slab
+			// top slab
 			index += slabSize - res[0];
 			field[index] = field[index - 2 * res[0]];
 		}
+
+	// fix, force top slab to only allow outwards flux
+	for (int z = 0; z < res[2]; z++)
+		for (int x = 0; x < res[0]; x++)
+		{
+			// top slab
+			int index = x + z * slabSize;
+			index += slabSize - res[0];
+			if(field[index]<0.) field[index] = 0.;
+			index -= res[0];
+			if(field[index]<0.) field[index] = 0.;
+		}
+		
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -121,15 +146,15 @@ void FLUID_3D::setNeumannZ(float* field, Vec3Int res)
 {
 	const int slabSize = res[0] * res[1];
 	const int totalCells = res[0] * res[1] * res[2];
-	size_t index;
+	int index;
 	for (int y = 0; y < res[1]; y++)
 		for (int x = 0; x < res[0]; x++)
 		{
-			// bottom slab
+			// front slab
 			index = x + y * res[0];
 			field[index] = field[index + 2 * slabSize];
 
-			// top slab
+			// back slab
 			index += totalCells - slabSize;
 			field[index] = field[index - 2 * slabSize];
 		}
@@ -139,11 +164,11 @@ void FLUID_3D::setNeumannZ(float* field, Vec3Int res)
 		for (int x = 0; x < res[0]; x++)
 		{
 			// top slab
-			index = x + y * res[0];
+			int index = x + y * res[0];
 			index += totalCells - slabSize;
-			if(field[index]<0.) field[index] = 0.0f;
+			if(field[index]<0.) field[index] = 0.;
 			index -= slabSize;
-			if(field[index]<0.) field[index] = 0.0f;
+			if(field[index]<0.) field[index] = 0.;
 		}
 		
 }
@@ -231,13 +256,14 @@ void FLUID_3D::copyBorderX(float* field, Vec3Int res)
 void FLUID_3D::copyBorderY(float* field, Vec3Int res)
 {
 	const int slabSize = res[0] * res[1];
+	const int totalCells = res[0] * res[1] * res[2];
 	int index;
 	for (int z = 0; z < res[2]; z++)
 		for (int x = 0; x < res[0]; x++)
 		{
 			// bottom slab
 			index = x + z * slabSize;
-			field[index] = field[index + res[0]];
+			field[index] = field[index + res[0]]; 
 			// top slab
 			index += slabSize - res[0];
 			field[index] = field[index - res[0]];
@@ -253,7 +279,7 @@ void FLUID_3D::copyBorderZ(float* field, Vec3Int res)
 		{
 			// front slab
 			index = x + y * res[0];
-			field[index] = field[index + slabSize];
+			field[index] = field[index + slabSize]; 
 			// back slab
 			index += totalCells - slabSize;
 			field[index] = field[index - slabSize];
@@ -269,18 +295,21 @@ void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const
 	const int xres = res[0];
 	const int yres = res[1];
 	const int zres = res[2];
+	static int hits = 0;
+	static int total = 0;
 	const int slabSize = res[0] * res[1];
 
 	// scale dt up to grid resolution
-#if PARALLEL==1
-#pragma omp parallel for schedule(static)
+#if PARALLEL==1 && !_WIN32
+#pragma omp parallel
+#pragma omp for  schedule(static)
 #endif
 	for (int z = 0; z < zres; z++)
 		for (int y = 0; y < yres; y++)
 			for (int x = 0; x < xres; x++)
 			{
 				const int index = x + y * xres + z * xres*yres;
-
+				
         // backtrace
 				float xTrace = x - dt * velx[index];
 				float yTrace = y - dt * vely[index];
@@ -337,8 +366,8 @@ void FLUID_3D::advectFieldSemiLagrange(const float dt, const float* velx, const
 //
 // comments are the pseudocode from selle's paper
 //////////////////////////////////////////////////////////////////////
-void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity,
-		float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles)
+void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, const float* yVelocity, const float* zVelocity, 
+				float* oldField, float* newField, float* temp1, float* temp2, Vec3Int res, const unsigned char* obstacles)
 {
 	float* phiHatN  = temp1;
 	float* phiHatN1 = temp2;
@@ -359,7 +388,7 @@ void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, con
 	advectFieldSemiLagrange( -1.0*dt, xVelocity, yVelocity, zVelocity, phiHatN1, phiHatN, res);
 
 	// phiN1 = phiHatN1 + (phiN - phiHatN) / 2
-	const int border = 0;
+	const int border = 0; 
 	for (int z = border; z < sz-border; z++)
 		for (int y = border; y < sy-border; y++)
 			for (int x = border; x < sx-border; x++) {
@@ -376,7 +405,7 @@ void FLUID_3D::advectFieldMacCormack(const float dt, const float* xVelocity, con
 
 	// if the error estimate was bad, revert to first order
 	clampOutsideRays(dt, xVelocity, yVelocity, zVelocity, oldField, newField, res, obstacles, phiHatN1);
-}
+} 
 
 
 //////////////////////////////////////////////////////////////////////
@@ -454,17 +483,18 @@ void FLUID_3D::clampExtrema(const float dt, const float* velx, const float* vely
 }
 
 //////////////////////////////////////////////////////////////////////
-// Reverts any backtraces that go into boundaries back to first
+// Reverts any backtraces that go into boundaries back to first 
 // order -- in this case the error correction term was totally
 // incorrect
 //////////////////////////////////////////////////////////////////////
-void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float* vely, const float* velz,
-		float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection)
+void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float* vely,  const float* velz,
+				float* oldField, float* newField, Vec3Int res, const unsigned char* obstacles, const float *oldAdvection)
 {
 	const int sx= res[0];
 	const int sy= res[1];
 	const int sz= res[2];
 	const int slabSize = res[0] * res[1];
+
 	for (int z = 1; z < sz-1; z++)
 		for (int y = 1; y < sy-1; y++)
 			for (int x = 1; x < sx-1; x++)
@@ -479,7 +509,7 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float*
 				float zTrace    = z - dt * velz[index];
 
 				// see if it goes outside the boundaries
-				bool hasObstacle =
+				bool hasObstacle = 
 					(zTrace < 1.0f)    || (zTrace > sz - 2.0f) ||
 					(yTrace < 1.0f)    || (yTrace > sy - 2.0f) ||
 					(xTrace < 1.0f)    || (xTrace > sx - 2.0f) ||
@@ -515,7 +545,7 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float*
 				int z0 = (int)zBackward;
 				int z1 = z0 + 1;
 				if(obstacles && !hasObstacle) {
-					hasObstacle = hasObstacle ||
+					hasObstacle = hasObstacle || 
 						obstacles[x0 + y0 * sx + z0*slabSize] ||
 						obstacles[x0 + y1 * sx + z0*slabSize] ||
 						obstacles[x1 + y0 * sx + z0*slabSize] ||
@@ -535,7 +565,7 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float*
 				z0 = (int)zTrace;
 				z1 = z0 + 1;
 				if(obstacles && !hasObstacle) {
-					hasObstacle = hasObstacle ||
+					hasObstacle = hasObstacle || 
 						obstacles[x0 + y0 * sx + z0*slabSize] ||
 						obstacles[x0 + y1 * sx + z0*slabSize] ||
 						obstacles[x1 + y0 * sx + z0*slabSize] ||
@@ -577,84 +607,8 @@ void FLUID_3D::clampOutsideRays(const float dt, const float* velx, const float*
 						u1 * (s0 * (t0 * oldField[i001] +
 									t1 * oldField[i011]) +
 								s1 * (t0 * oldField[i101] +
-									t1 * oldField[i111]));
+									t1 * oldField[i111])); 
 				}
 			} // xyz
 }
 
-//////////////////////////////////////////////////////////////////////
-// image output
-//////////////////////////////////////////////////////////////////////
-/*
-void FLUID_3D::writeImageSliceXY(const float *field, Vec3Int res, int slice, string prefix, int picCnt, float scale) {
-  writeProjectedIntern(field, res, 0,1, prefix, picCnt, scale);
-}
-void FLUID_3D::writeImageSliceYZ(const float *field, Vec3Int res, int slice, string prefix, int picCnt, float scale) {
-  writeProjectedIntern(field, res, 1,2, prefix, picCnt, scale);
-}
-void FLUID_3D::writeImageSliceXZ(const float *field, Vec3Int res, int slice, string prefix, int picCnt, float scale) {
-  writeProjectedIntern(field, res, 0,2, prefix, picCnt, scale);
-}
-*/
-
-//////////////////////////////////////////////////////////////////////
-// Helper function for projecting densities along a dimension
-//////////////////////////////////////////////////////////////////////
-/*
-static int getOtherDir(int dir1, int dir2) {
-	switch(dir1) {
-		case 0:
-			switch(dir2) {
-				case 1: return 2;
-				case 2: return 1; }
-			break;
-		case 1:
-			switch(dir2) {
-				case 0: return 2;
-				case 2: return 0; }
-			break;
-		case 2:
-			switch(dir2) {
-				case 0: return 1;
-				case 1: return 0; }
-			break;
-		default:
-			return 0;
-	}
-	return 0;
-}
-*/
-
-//////////////////////////////////////////////////////////////////////
-// average densities along third spatial direction
-//////////////////////////////////////////////////////////////////////
-/*
-void FLUID_3D::writeProjectedIntern(const float *field, Vec3Int res,
-		int dir1, int dir2, string prefix, int picCnt, float scale) {
-	const int nitems = res[dir1]*res[dir2];
-	const int otherDir = getOtherDir(dir1,dir2);
-	float *buf = new float[nitems];
-	Vec3Int min = Vec3Int(0);
-	Vec3Int max = res;
-
-	min[otherDir] = 0;
-	max[otherDir] = res[otherDir];
-	float div = 1./(float)MIN3V(res); // normalize for shorter sides, old: res[otherDir];
-	div *= 4.; //slightly increase contrast
-	for(int i=0; i<nitems; i++) buf[i] = 0.;
-
-	Vec3Int cnt = 0;
-	for (cnt[2] = min[2]; cnt[2] < max[2]; cnt[2]++) {
-		for (cnt[1] = min[1]; cnt[1] < max[1]; cnt[1]++)
-			for (cnt[0] = min[0]; cnt[0] < max[0]; cnt[0]++)
-			{
-				const int index = cnt[0] + cnt[1] * res[0] + cnt[2] * res[0]*res[1];
-				const int bufindex = cnt[dir1] + cnt[dir2] * res[dir1];
-				buf[bufindex] += field[index] * scale *div;
-			}
-	}
-	// IMAGE::dumpNumberedPNG(picCnt, prefix, buf, res[dir1], res[dir2]);
-	delete[] buf;
-}
-*/
-
diff --git a/intern/smoke/intern/WTURBULENCE.cpp b/intern/smoke/intern/WTURBULENCE.cpp
index dd092d4f0cc..a1b2aaf30f2 100644
--- a/intern/smoke/intern/WTURBULENCE.cpp
+++ b/intern/smoke/intern/WTURBULENCE.cpp
@@ -92,10 +92,6 @@ WTURBULENCE::WTURBULENCE(int xResSm, int yResSm, int zResSm, int amplify, int no
 	_tcW = new float[_totalCellsSm];
 	_tcTemp = new float[_totalCellsSm];
 	
-	// allocate & init energy terms
-	_energy = new float[_totalCellsSm];
-	_highFreqEnergy = new float[_totalCellsSm];
-	
 	// map all 
 	const float dx = 1./(float)(_resSm[0]);
 	const float dy = 1./(float)(_resSm[1]);
@@ -109,15 +105,8 @@ WTURBULENCE::WTURBULENCE(int xResSm, int yResSm, int zResSm, int amplify, int no
 		_tcV[index] = y*dy;
 		_tcW[index] = z*dz;
 		_tcTemp[index] = 0.;
-		_energy[index] = 0.;
 		}
 	
-	// allocate eigenvalue arrays
-	_eigMin = new float[_totalCellsSm];
-	_eigMax = new float[_totalCellsSm];
-	for(int i=0; i < _totalCellsSm; i++) 
-		_eigMin[i] = _eigMax[i] =  0.;
-	
 	// noise tiles
 	_noiseTile = new float[noiseTileSize * noiseTileSize * noiseTileSize];
 	/*
@@ -143,12 +132,7 @@ WTURBULENCE::~WTURBULENCE() {
   delete[] _tcW;
   delete[] _tcTemp;
 
-  delete[] _eigMin;
-  delete[] _eigMax;
   delete[] _noiseTile;
-
-  delete[] _energy;
-  delete[] _highFreqEnergy;
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -293,34 +277,34 @@ static float minDz(int x, int y, int z, float* input, Vec3Int res)
 // handle texture coordinates (advection, reset, eigenvalues), 
 // Beware -- uses big density maccormack as temporary arrays
 ////////////////////////////////////////////////////////////////////// 
-void WTURBULENCE::advectTextureCoordinates (float dtOrg, float* xvel, float* yvel, float* zvel, float *_tempBig1, float *_tempBig2) {
+void WTURBULENCE::advectTextureCoordinates (float dtOrg, float* xvel, float* yvel, float* zvel, float *tempBig1, float *tempBig2) {
   // advection
   SWAP_POINTERS(_tcTemp, _tcU);
   FLUID_3D::copyBorderX(_tcTemp, _resSm);
   FLUID_3D::copyBorderY(_tcTemp, _resSm);
   FLUID_3D::copyBorderZ(_tcTemp, _resSm);
   FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, 
-      _tcTemp, _tcU, _tempBig1, _tempBig2, _resSm, NULL);
+      _tcTemp, _tcU, tempBig1, tempBig2, _resSm, NULL);
 
   SWAP_POINTERS(_tcTemp, _tcV);
   FLUID_3D::copyBorderX(_tcTemp, _resSm);
   FLUID_3D::copyBorderY(_tcTemp, _resSm);
   FLUID_3D::copyBorderZ(_tcTemp, _resSm);
   FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, 
-      _tcTemp, _tcV, _tempBig1, _tempBig2, _resSm, NULL);
+      _tcTemp, _tcV, tempBig1, tempBig2, _resSm, NULL);
 
   SWAP_POINTERS(_tcTemp, _tcW);
   FLUID_3D::copyBorderX(_tcTemp, _resSm);
   FLUID_3D::copyBorderY(_tcTemp, _resSm);
   FLUID_3D::copyBorderZ(_tcTemp, _resSm);
   FLUID_3D::advectFieldMacCormack(dtOrg, xvel, yvel, zvel, 
-      _tcTemp, _tcW, _tempBig1, _tempBig2, _resSm, NULL);
+      _tcTemp, _tcW, tempBig1, tempBig2, _resSm, NULL);
 }
 
 //////////////////////////////////////////////////////////////////////
 // Compute the eigenvalues of the advected texture
 ////////////////////////////////////////////////////////////////////// 
-void WTURBULENCE::computeEigenvalues() {
+void WTURBULENCE::computeEigenvalues(float *_eigMin, float *_eigMax) {
   // stats
   float maxeig = -1.;
   float mineig = 10.;
@@ -365,7 +349,7 @@ void WTURBULENCE::computeEigenvalues() {
 //////////////////////////////////////////////////////////////////////
 // advect & reset texture coordinates based on eigenvalues
 ////////////////////////////////////////////////////////////////////// 
-void WTURBULENCE::resetTextureCoordinates() 
+void WTURBULENCE::resetTextureCoordinates(float *_eigMin, float *_eigMax) 
 {
   // allowed deformation of the textures
   const float limit = 2.f;
@@ -396,7 +380,7 @@ void WTURBULENCE::resetTextureCoordinates()
 // Compute the highest frequency component of the wavelet
 // decomposition
 //////////////////////////////////////////////////////////////////////
-void WTURBULENCE::decomposeEnergy()
+void WTURBULENCE::decomposeEnergy(float *_energy, float *_highFreqEnergy)
 {
   // do the decomposition -- the goal here is to have
   // the energy with the high frequency component stomped out
@@ -432,7 +416,7 @@ void WTURBULENCE::decomposeEnergy()
 // compute velocity from energies and march into obstacles
 // for wavelet decomposition
 ////////////////////////////////////////////////////////////////////// 
-void WTURBULENCE::computeEnergy(float* xvel, float* yvel, float* zvel, unsigned char *obstacles) 
+void WTURBULENCE::computeEnergy(float *_energy, float* xvel, float* yvel, float* zvel, unsigned char *obstacles) 
 {
   // compute everywhere
   for (int x = 0; x < _totalCellsSm; x++) 
@@ -491,7 +475,7 @@ void WTURBULENCE::computeEnergy(float* xvel, float* yvel, float* zvel, unsigned
             }
             if (valid > 0)
             {
-              _energy[index] = sum / valid;
+              _energy[index] = sum / (float)valid;
               obstacles[index] = MARCHED;
             }
           }
@@ -516,9 +500,9 @@ void WTURBULENCE::computeEnergy(float* xvel, float* yvel, float* zvel, unsigned
 Vec3 WTURBULENCE::WVelocity(Vec3 orgPos)
 {
   // arbitrarily offset evaluation points
-  const Vec3 p1 = orgPos + Vec3(NOISE_TILE_SIZE/2,0,0);
-  const Vec3 p2 = orgPos + Vec3(0,NOISE_TILE_SIZE/2,0);
-  const Vec3 p3 = orgPos + Vec3(0,0,NOISE_TILE_SIZE/2);
+  const Vec3 p1 = orgPos + Vec3(NOISE_TILE_SIZE/2.0,0,0);
+  const Vec3 p2 = orgPos + Vec3(0,NOISE_TILE_SIZE/2.0,0);
+  const Vec3 p3 = orgPos + Vec3(0,0,NOISE_TILE_SIZE/2.0);
 
   const float f1y = WNoiseDy(p1, _noiseTile);
   const float f1z = WNoiseDz(p1, _noiseTile);
@@ -542,9 +526,9 @@ Vec3 WTURBULENCE::WVelocity(Vec3 orgPos)
 Vec3 WTURBULENCE::WVelocityWithJacobian(Vec3 orgPos, float* xUnwarped, float* yUnwarped, float* zUnwarped)
 {
   // arbitrarily offset evaluation points
-  const Vec3 p1 = orgPos + Vec3(NOISE_TILE_SIZE/2,0,0);
-  const Vec3 p2 = orgPos + Vec3(0,NOISE_TILE_SIZE/2,0);
-  const Vec3 p3 = orgPos + Vec3(0,0,NOISE_TILE_SIZE/2);
+  const Vec3 p1 = orgPos + Vec3(NOISE_TILE_SIZE/2.0,0,0);
+  const Vec3 p2 = orgPos + Vec3(0,NOISE_TILE_SIZE/2.0,0);
+  const Vec3 p3 = orgPos + Vec3(0,0,NOISE_TILE_SIZE/2.0);
 
   Vec3 final;
   final[0] = WNoiseDx(p1, _noiseTile);
@@ -575,44 +559,40 @@ Vec3 WTURBULENCE::WVelocityWithJacobian(Vec3 orgPos, float* xUnwarped, float* yU
   return ret;
 }
 
+
 //////////////////////////////////////////////////////////////////////
 // perform an actual noise advection step
 //////////////////////////////////////////////////////////////////////
 void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles) 
 {
-  // big velocity macCormack fields
-	float* _bigUx;
-	float* _bigUy;
-	float* _bigUz;
-
-	// temp arrays for BFECC and MacCormack - they have more convenient
-	// names in the actual implementations
-	float* _tempBig1;
-	float* _tempBig2;
-
-	// allocate high resolution velocity field. Note that this is only
-	// necessary because we use MacCormack advection. For semi-Lagrangian
-	// advection, these arrays are not necessary.
-	_tempBig1 = new float[_totalCellsBig];
-	_tempBig2 = new float[_totalCellsBig];
-
-  // enlarge timestep to match grid
-  const float dt = dtOrg * _amplify;
-  const float invAmp = 1.0f / _amplify;
-
-  _bigUx = new float[_totalCellsBig];
-  _bigUy = new float[_totalCellsBig];
-  _bigUz = new float[_totalCellsBig]; 
-
-  // prepare textures
-  advectTextureCoordinates(dtOrg, xvel,yvel,zvel, _tempBig1, _tempBig2);
+	// enlarge timestep to match grid
+	const float dt = dtOrg * _amplify;
+	const float invAmp = 1.0f / _amplify;
+  	float *tempBig1 = new float[_totalCellsBig];
+	float *tempBig2 = new float[_totalCellsBig];
+	float *bigUx = new float[_totalCellsBig];
+	float *bigUy = new float[_totalCellsBig];
+	float *bigUz = new float[_totalCellsBig]; 
+	float *_energy = new float[_totalCellsSm];
+	float *highFreqEnergy = new float[_totalCellsSm];
+	float *eigMin  = new float[_totalCellsSm];
+	float *eigMax  = new float[_totalCellsSm];
+
+	memset(tempBig1, 0, sizeof(float)*_totalCellsBig);
+	memset(tempBig2, 0, sizeof(float)*_totalCellsBig);
+	memset(highFreqEnergy, 0, sizeof(float)*_totalCellsSm);
+	memset(eigMin, 0, sizeof(float)*_totalCellsSm);
+	memset(eigMax, 0, sizeof(float)*_totalCellsSm);
+
+  	// prepare textures
+	advectTextureCoordinates(dtOrg, xvel,yvel,zvel, tempBig1, tempBig2);
 
   // compute eigenvalues of the texture coordinates
-  computeEigenvalues();
+  computeEigenvalues(eigMin, eigMax);
 
   // do wavelet decomposition of energy
-  computeEnergy(xvel, yvel, zvel, obstacles);
-  decomposeEnergy();
+  computeEnergy(_energy, xvel, yvel, zvel, obstacles);
+  decomposeEnergy(_energy, highFreqEnergy);
 
   // zero out coefficients inside of the obstacle
   for (int x = 0; x < _totalCellsSm; x++)
@@ -647,7 +627,7 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
 
         // retrieve wavelet energy at highest frequency
         float energy = INTERPOLATE::lerp3d(
-            _highFreqEnergy, posSm[0],posSm[1],posSm[2], _xResSm, _yResSm, _zResSm);
+            highFreqEnergy, posSm[0],posSm[1],posSm[2], _xResSm, _yResSm, _zResSm);
 
         // base amplitude for octave 0
         float coefficient = sqrtf(2.0f * fabs(energy));
@@ -656,8 +636,8 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
         // add noise to velocity, but only if the turbulence is
         // sufficiently undeformed, and the energy is large enough
         // to make a difference
-        const bool addNoise = _eigMax[indexSmall] < 2. && 
-                              _eigMin[indexSmall] > 0.5;
+        const bool addNoise = eigMax[indexSmall] < 2. && 
+                              eigMin[indexSmall] > 0.5;
         if (addNoise && amplitude > _cullingThreshold) {
           // base amplitude for octave 0
           float amplitudeScaled = amplitude;
@@ -679,21 +659,21 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
         // If you wanted to save memory, you would instead perform a 
         // semi-Lagrangian backtrace for the current grid cell here. Then
         // you could just throw the velocity away.
-        _bigUx[index] = vel[0];
-        _bigUy[index] = vel[1];
-        _bigUz[index] = vel[2];
+        bigUx[index] = vel[0];
+        bigUy[index] = vel[1];
+        bigUz[index] = vel[2];
 
         // compute the velocity magnitude for substepping later
-        const float velMag = _bigUx[index] * _bigUx[index] + 
-                             _bigUy[index] * _bigUy[index] + 
-                             _bigUz[index] * _bigUz[index];
+        const float velMag = bigUx[index] * bigUx[index] + 
+                             bigUy[index] * bigUy[index] + 
+                             bigUz[index] * bigUz[index];
         if (velMag > maxVelocity) maxVelocity = velMag;
 
         // zero out velocity inside obstacles
         float obsCheck = INTERPOLATE::lerp3dToFloat(
             obstacles, posSm[0], posSm[1], posSm[2], _xResSm, _yResSm, _zResSm); 
         if (obsCheck > 0.95) 
-          _bigUx[index] = _bigUy[index] = _bigUz[index] = 0.;
+          bigUx[index] = bigUy[index] = bigUz[index] = 0.;
       }
 
   // prepare density for an advection
@@ -701,24 +681,23 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
 
   // based on the maximum velocity present, see if we need to substep,
   // but cap the maximum number of substeps to 5
-  const int maxSubSteps = 25;
-  const int maxVel = 5;
+  const int maxSubSteps = 5; 
   maxVelocity = sqrt(maxVelocity) * dt;
-  int totalSubsteps = (int)(maxVelocity / (float)maxVel);
+  int totalSubsteps = (int)(maxVelocity / (float)maxSubSteps);
   totalSubsteps = (totalSubsteps < 1) ? 1 : totalSubsteps;
   totalSubsteps = (totalSubsteps > maxSubSteps) ? maxSubSteps : totalSubsteps;
   const float dtSubdiv = dt / (float)totalSubsteps;
 
   // set boundaries of big velocity grid
-  FLUID_3D::setZeroX(_bigUx, _resBig); 
-  FLUID_3D::setZeroY(_bigUy, _resBig); 
-  FLUID_3D::setZeroZ(_bigUz, _resBig);
+  FLUID_3D::setZeroX(bigUx, _resBig); 
+  FLUID_3D::setZeroY(bigUy, _resBig); 
+  FLUID_3D::setZeroZ(bigUz, _resBig);
 
   // do the MacCormack advection, with substepping if necessary
   for(int substep = 0; substep < totalSubsteps; substep++)
   {
-    FLUID_3D::advectFieldMacCormack(dtSubdiv, _bigUx, _bigUy, _bigUz, 
-        _densityBigOld, _densityBig, _tempBig1, _tempBig2, _resBig, NULL);
+    FLUID_3D::advectFieldMacCormack(dtSubdiv, bigUx, bigUy, bigUz, 
+        _densityBigOld, _densityBig, tempBig1, tempBig2, _resBig, NULL);
 
     if (substep < totalSubsteps - 1) 
       SWAP_POINTERS(_densityBig, _densityBigOld);
@@ -730,25 +709,21 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
   // reset texture coordinates now in preparation for next timestep
   // Shouldn't do this before generating the noise because then the 
   // eigenvalues stored do not reflect the underlying texture coordinates
-  resetTextureCoordinates();
+  resetTextureCoordinates(eigMin, eigMax);
   
-  // output files
-  /*
-  string prefix = string("./amplified.preview/density_bigxy_");
-  FLUID_3D::writeImageSliceXY(_densityBig, _resBig, _resBig[2]/2, prefix, _totalStepsBig, 1.0f);
-  //string df3Prefix = string("./df3/density_big_");
-  //IMAGE::dumpDF3(_totalStepsBig, df3Prefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
-  string pbrtPrefix = string("./pbrt/density_big_");
-  IMAGE::dumpPBRT(_totalStepsBig, pbrtPrefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
-	*/
-  _totalStepsBig++;
+  delete[] tempBig1;
+  delete[] tempBig2;
+  delete[] bigUx;
+  delete[] bigUy;
+  delete[] bigUz;
+  delete[] _energy;
+  delete[] highFreqEnergy;
 
-  	delete[] _bigUx;
-	delete[] _bigUy;
-	delete[] _bigUz;
+  delete[] eigMin;
+  delete[] eigMax;
+  
 
-	delete[] _tempBig1;
-    delete[] _tempBig2;
+  _totalStepsBig++;
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -757,257 +732,258 @@ void WTURBULENCE::stepTurbulenceReadable(float dtOrg, float* xvel, float* yvel,
 //////////////////////////////////////////////////////////////////////
 void WTURBULENCE::stepTurbulenceFull(float dtOrg, float* xvel, float* yvel, float* zvel, unsigned char *obstacles)
 {
-	// big velocity macCormack fields
-	float* _bigUx;
-	float* _bigUy;
-	float* _bigUz;
-
-	// temp arrays for BFECC and MacCormack - they have more convenient
-	// names in the actual implementations
-	float* _tempBig1;
-	float* _tempBig2;
-
-	// allocate high resolution velocity field. Note that this is only
-	// necessary because we use MacCormack advection. For semi-Lagrangian
-	// advection, these arrays are not necessary.
-	_tempBig1 = new float[_totalCellsBig];
-	_tempBig2 = new float[_totalCellsBig];
-
 	// enlarge timestep to match grid
 	const float dt = dtOrg * _amplify;
 	const float invAmp = 1.0f / _amplify;
-
-	_bigUx = new float[_totalCellsBig];
-	_bigUy = new float[_totalCellsBig];
-	_bigUz = new float[_totalCellsBig]; 
+	float *tempBig1 = new float[_totalCellsBig];
+	float *tempBig2 = new float[_totalCellsBig];
+	float *bigUx = new float[_totalCellsBig];
+	float *bigUy = new float[_totalCellsBig];
+	float *bigUz = new float[_totalCellsBig]; 
+	float *_energy = new float[_totalCellsSm];
+	float *highFreqEnergy = new float[_totalCellsSm];
+	float *eigMin  = new float[_totalCellsSm];
+	float *eigMax  = new float[_totalCellsSm];
+
+	memset(highFreqEnergy, 0, sizeof(float)*_totalCellsSm);
+	memset(eigMin, 0, sizeof(float)*_totalCellsSm);
+	memset(eigMax, 0, sizeof(float)*_totalCellsSm);
 
 	// prepare textures
-	advectTextureCoordinates(dtOrg, xvel,yvel,zvel, _tempBig1, _tempBig2);
+	advectTextureCoordinates(dtOrg, xvel,yvel,zvel, tempBig1, tempBig2);
 
 	// do wavelet decomposition of energy
-	computeEnergy(xvel, yvel, zvel, obstacles);
-	decomposeEnergy();
+	computeEnergy(_energy, xvel, yvel, zvel, obstacles);
 
-	// zero out coefficients inside of the obstacle
 	for (int x = 0; x < _totalCellsSm; x++)
 		if (obstacles[x]) _energy[x] = 0.f;
 
-	// parallel region setup
-	float maxVelMagThreads[8] = { -1., -1., -1., -1., -1., -1., -1., -1. };
-
-	#if PARALLEL==1
-		#pragma omp parallel
-	#endif
-	  { float maxVelMag1 = 0.;
-	#if PARALLEL==1
-		const int id  = omp_get_thread_num(); /*, num = omp_get_num_threads(); */
-	#endif
-
-	// vector noise main loop
-	#if PARALLEL==1
-	#pragma omp for  schedule(static)
-	#endif
-	for (int zSmall = 0; zSmall < _zResSm; zSmall++) 
-	for (int ySmall = 0; ySmall < _yResSm; ySmall++) 
-	for (int xSmall = 0; xSmall < _xResSm; xSmall++)
-	{
-		const int indexSmall = xSmall + ySmall * _xResSm + zSmall * _slabSizeSm;
-
-		// compute jacobian
-		float jacobian[3][3] = {
-		  { minDx(xSmall, ySmall, zSmall, _tcU, _resSm), minDx(xSmall, ySmall, zSmall, _tcV, _resSm), minDx(xSmall, ySmall, zSmall, _tcW, _resSm) } ,
-		  { minDy(xSmall, ySmall, zSmall, _tcU, _resSm), minDy(xSmall, ySmall, zSmall, _tcV, _resSm), minDy(xSmall, ySmall, zSmall, _tcW, _resSm) } ,
-		  { minDz(xSmall, ySmall, zSmall, _tcU, _resSm), minDz(xSmall, ySmall, zSmall, _tcV, _resSm), minDz(xSmall, ySmall, zSmall, _tcW, _resSm) }
-		};
-
-		// get LU factorization of texture jacobian and apply 
-		// it to unit vectors
-		JAMA::LU<float> LU = computeLU3x3(jacobian);
-		float xUnwarped[] = {1.0f, 0.0f, 0.0f};
-		float yUnwarped[] = {0.0f, 1.0f, 0.0f};
-		float zUnwarped[] = {0.0f, 0.0f, 1.0f};
-		float xWarped[] = {1.0f, 0.0f, 0.0f};
-		float yWarped[] = {0.0f, 1.0f, 0.0f};
-		float zWarped[] = {0.0f, 0.0f, 1.0f};
-		bool nonSingular = LU.isNonsingular();
-
-		#if 0
-			// UNUSED
-			float eigMax = 10.0f;
-			float eigMin = 0.1f;
-		#endif
+	decomposeEnergy(_energy, highFreqEnergy);
 
-		if (nonSingular)
-		{
-			solveLU3x3(LU, xUnwarped, xWarped);
-			solveLU3x3(LU, yUnwarped, yWarped);
-			solveLU3x3(LU, zUnwarped, zWarped);
-
-			// compute the eigenvalues while we have the Jacobian available
-			Vec3 eigenvalues = Vec3(1.);
-			computeEigenvalues3x3( &eigenvalues[0], jacobian);
-			_eigMax[indexSmall] = MAX3V(eigenvalues);
-			_eigMin[indexSmall] = MIN3V(eigenvalues);
-		}
+	// zero out coefficients inside of the obstacle
+	for (int x = 0; x < _totalCellsSm; x++)
+		if (obstacles[x]) highFreqEnergy[x] = 0.f;
 
-		// make sure to skip one on the beginning and end
-		int xStart = (xSmall == 0) ? 1 : 0;
-		int xEnd   = (xSmall == _xResSm - 1) ? _amplify - 1 : _amplify;
-		int yStart = (ySmall == 0) ? 1 : 0;
-		int yEnd   = (ySmall == _yResSm - 1) ? _amplify - 1 : _amplify;
-		int zStart = (zSmall == 0) ? 1 : 0;
-		int zEnd   = (zSmall == _zResSm - 1) ? _amplify - 1 : _amplify;
-		  
-		for (int zBig = zStart; zBig < zEnd; zBig++) 
-		for (int yBig = yStart; yBig < yEnd; yBig++) 
-		for (int xBig = xStart; xBig < xEnd; xBig++)
-		{
-			const int x = xSmall * _amplify + xBig;
-			const int y = ySmall * _amplify + yBig;
-			const int z = zSmall * _amplify + zBig;
-
-			// get unit position for both fine and coarse grid
-			const Vec3 pos = Vec3(x,y,z);
-			const Vec3 posSm = pos * invAmp;
-
-			// get grid index for both fine and coarse grid
-			const int index = x + y *_xResBig + z *_slabSizeBig;
-
-			// get a linearly interpolated velocity and texcoords
-			// from the coarse grid
-			Vec3 vel = INTERPOLATE::lerp3dVec( xvel,yvel,zvel, 
-			  posSm[0], posSm[1], posSm[2], _xResSm,_yResSm,_zResSm);
-			Vec3 uvw = INTERPOLATE::lerp3dVec( _tcU,_tcV,_tcW, 
-			  posSm[0], posSm[1], posSm[2], _xResSm,_yResSm,_zResSm);
-
-			// multiply the texture coordinate by _resSm so that turbulence
-			// synthesis begins at the first octave that the coarse grid 
-			// cannot capture
-			Vec3 texCoord = Vec3(uvw[0] * _resSm[0], 
-							   uvw[1] * _resSm[1],
-							   uvw[2] * _resSm[2]); 
-
-			// retrieve wavelet energy at highest frequency
-			float energy = INTERPOLATE::lerp3d(
-			  _highFreqEnergy, posSm[0],posSm[1],posSm[2], _xResSm, _yResSm, _zResSm);
-
-			// base amplitude for octave 0
-			float coefficient = sqrtf(2.0f * fabs(energy));
-			const float amplitude = *_strength * fabs(0.5 * coefficient) * persistence;
-
-			// add noise to velocity, but only if the turbulence is
-			// sufficiently undeformed, and the energy is large enough
-			// to make a difference
-			const bool addNoise = _eigMax[indexSmall] < 2. && 
-								_eigMin[indexSmall] > 0.5;
-
-			if (addNoise && amplitude > _cullingThreshold) {
-				// base amplitude for octave 0
-				float amplitudeScaled = amplitude;
-
-				for (int octave = 0; octave < _octaves; octave++)
-				{
-					// multiply the vector noise times the maximum allowed
-					// noise amplitude at this octave, and add it to the total
-					vel += WVelocityWithJacobian(texCoord, &xUnwarped[0], &yUnwarped[0], &zUnwarped[0]) * amplitudeScaled;
-
-					// scale coefficient for next octave
-					amplitudeScaled *= persistence;
-					texCoord *= 2.0f;
-				}
-			}
-
-			// Store velocity + turbulence in big grid for maccormack step
-			//
-			// If you wanted to save memory, you would instead perform a 
-			// semi-Lagrangian backtrace for the current grid cell here. Then
-			// you could just throw the velocity away.
-			_bigUx[index] = vel[0];
-			_bigUy[index] = vel[1];
-			_bigUz[index] = vel[2];
-
-			// compute the velocity magnitude for substepping later
-			const float velMag = _bigUx[index] * _bigUx[index] + 
-							   _bigUy[index] * _bigUy[index] + 
-							   _bigUz[index] * _bigUz[index];
-			if (velMag > maxVelMag1) maxVelMag1 = velMag;
-
-			// zero out velocity inside obstacles
-			float obsCheck = INTERPOLATE::lerp3dToFloat(
-			  obstacles, posSm[0], posSm[1], posSm[2], _xResSm, _yResSm, _zResSm); 
-
-			if (obsCheck > 0.95) 
-				_bigUx[index] = _bigUy[index] = _bigUz[index] = 0.;
-		} // xyz
-
-		#if PARALLEL==1
-		maxVelMagThreads[id] = maxVelMag1;
-		#else
-		maxVelMagThreads[0] = maxVelMag1;
-		#endif
-		}
-	} // omp
-
-	// compute maximum over threads
-	float maxVelMag = maxVelMagThreads[0];
-	#if PARALLEL==1
-	for (int i = 1; i < 8; i++) 
-	if (maxVelMag < maxVelMagThreads[i]) 
-	  maxVelMag = maxVelMagThreads[i];
-	#endif
-
-	// prepare density for an advection
-	SWAP_POINTERS(_densityBig, _densityBigOld);
-
-	// based on the maximum velocity present, see if we need to substep,
-	// but cap the maximum number of substeps to 5
-	const int maxSubSteps = 25;
-	const int maxVel = 5;
-	maxVelMag = sqrt(maxVelMag) * dt;
-	int totalSubsteps = (int)(maxVelMag / (float)maxVel);
-	totalSubsteps = (totalSubsteps < 1) ? 1 : totalSubsteps;
-	// printf("totalSubsteps: %d\n", totalSubsteps);
-	totalSubsteps = (totalSubsteps > maxSubSteps) ? maxSubSteps : totalSubsteps;
-	const float dtSubdiv = dt / (float)totalSubsteps;
-
-	// set boundaries of big velocity grid
-	FLUID_3D::setZeroX(_bigUx, _resBig); 
-	FLUID_3D::setZeroY(_bigUy, _resBig); 
-	FLUID_3D::setZeroZ(_bigUz, _resBig);
-
-	// do the MacCormack advection, with substepping if necessary
-	for(int substep = 0; substep < totalSubsteps; substep++)
-	{
-	FLUID_3D::advectFieldMacCormack(dtSubdiv, _bigUx, _bigUy, _bigUz, 
-		_densityBigOld, _densityBig, _tempBig1, _tempBig2, _resBig, NULL);
+	Vec3Int ressm(_xResSm, _yResSm, _zResSm);
+	FLUID_3D::setNeumannX(highFreqEnergy, ressm);
+	FLUID_3D::setNeumannY(highFreqEnergy, ressm);
+	FLUID_3D::setNeumannZ(highFreqEnergy, ressm);
+
+  // parallel region setup
+  float maxVelMagThreads[8] = { -1., -1., -1., -1., -1., -1., -1., -1. };
+#if PARALLEL==1
+#pragma omp parallel
+#endif
+  { float maxVelMag1 = 0.;
+#if PARALLEL==1
+    const int id  = omp_get_thread_num(); /*, num = omp_get_num_threads(); */
+#endif
+
+  // vector noise main loop
+#if PARALLEL==1
+#pragma omp for  schedule(static)
+#endif
+  for (int zSmall = 0; zSmall < _zResSm; zSmall++) 
+  for (int ySmall = 0; ySmall < _yResSm; ySmall++) 
+  for (int xSmall = 0; xSmall < _xResSm; xSmall++)
+  {
+    const int indexSmall = xSmall + ySmall * _xResSm + zSmall * _slabSizeSm;
+
+    // compute jacobian
+    float jacobian[3][3] = {
+      { minDx(xSmall, ySmall, zSmall, _tcU, _resSm), minDx(xSmall, ySmall, zSmall, _tcV, _resSm), minDx(xSmall, ySmall, zSmall, _tcW, _resSm) } ,
+      { minDy(xSmall, ySmall, zSmall, _tcU, _resSm), minDy(xSmall, ySmall, zSmall, _tcV, _resSm), minDy(xSmall, ySmall, zSmall, _tcW, _resSm) } ,
+      { minDz(xSmall, ySmall, zSmall, _tcU, _resSm), minDz(xSmall, ySmall, zSmall, _tcV, _resSm), minDz(xSmall, ySmall, zSmall, _tcW, _resSm) }
+    };
+
+    // get LU factorization of texture jacobian and apply 
+    // it to unit vectors
+    JAMA::LU<float> LU = computeLU3x3(jacobian);
+    float xUnwarped[] = {1.0f, 0.0f, 0.0f};
+    float yUnwarped[] = {0.0f, 1.0f, 0.0f};
+    float zUnwarped[] = {0.0f, 0.0f, 1.0f};
+    float xWarped[] = {1.0f, 0.0f, 0.0f};
+    float yWarped[] = {0.0f, 1.0f, 0.0f};
+    float zWarped[] = {0.0f, 0.0f, 1.0f};
+    bool nonSingular = LU.isNonsingular();
+#if 0
+	// UNUSED
+    float eigMax = 10.0f;
+    float eigMin = 0.1f;
+#endif
+    if (nonSingular)
+    {
+      solveLU3x3(LU, xUnwarped, xWarped);
+      solveLU3x3(LU, yUnwarped, yWarped);
+      solveLU3x3(LU, zUnwarped, zWarped);
+
+      // compute the eigenvalues while we have the Jacobian available
+      Vec3 eigenvalues = Vec3(1.);
+      computeEigenvalues3x3( &eigenvalues[0], jacobian);
+      eigMax[indexSmall] = MAX3V(eigenvalues);
+      eigMin[indexSmall] = MIN3V(eigenvalues);
+    }
+    
+    // make sure to skip one on the beginning and end
+    int xStart = (xSmall == 0) ? 1 : 0;
+    int xEnd   = (xSmall == _xResSm - 1) ? _amplify - 1 : _amplify;
+    int yStart = (ySmall == 0) ? 1 : 0;
+    int yEnd   = (ySmall == _yResSm - 1) ? _amplify - 1 : _amplify;
+    int zStart = (zSmall == 0) ? 1 : 0;
+    int zEnd   = (zSmall == _zResSm - 1) ? _amplify - 1 : _amplify;
+      
+    for (int zBig = zStart; zBig < zEnd; zBig++) 
+    for (int yBig = yStart; yBig < yEnd; yBig++) 
+    for (int xBig = xStart; xBig < xEnd; xBig++)
+    {
+      const int x = xSmall * _amplify + xBig;
+      const int y = ySmall * _amplify + yBig;
+      const int z = zSmall * _amplify + zBig;
+      
+      // get unit position for both fine and coarse grid
+      const Vec3 pos = Vec3(x,y,z);
+      const Vec3 posSm = pos * invAmp;
+      
+      // get grid index for both fine and coarse grid
+      const int index = x + y *_xResBig + z *_slabSizeBig;
+      
+      // get a linearly interpolated velocity and texcoords
+      // from the coarse grid
+      Vec3 vel = INTERPOLATE::lerp3dVec( xvel,yvel,zvel, 
+          posSm[0], posSm[1], posSm[2], _xResSm,_yResSm,_zResSm);
+      Vec3 uvw = INTERPOLATE::lerp3dVec( _tcU,_tcV,_tcW, 
+          posSm[0], posSm[1], posSm[2], _xResSm,_yResSm,_zResSm);
+
+      // multiply the texture coordinate by _resSm so that turbulence
+      // synthesis begins at the first octave that the coarse grid 
+      // cannot capture
+      Vec3 texCoord = Vec3(uvw[0] * _resSm[0], 
+                           uvw[1] * _resSm[1],
+                           uvw[2] * _resSm[2]); 
+
+      // retrieve wavelet energy at highest frequency
+      float energy = INTERPOLATE::lerp3d(
+          highFreqEnergy, posSm[0],posSm[1],posSm[2], _xResSm, _yResSm, _zResSm);
+
+      // base amplitude for octave 0
+      float coefficient = sqrtf(2.0f * fabs(energy));
+      const float amplitude = *_strength * fabs(0.5 * coefficient) * persistence;
+
+      // add noise to velocity, but only if the turbulence is
+      // sufficiently undeformed, and the energy is large enough
+      // to make a difference
+      const bool addNoise = eigMax[indexSmall] < 2. && 
+                            eigMin[indexSmall] > 0.5;
+      if (addNoise && amplitude > _cullingThreshold) {
+        // base amplitude for octave 0
+        float amplitudeScaled = amplitude;
 
-	if (substep < totalSubsteps - 1) 
-	  SWAP_POINTERS(_densityBig, _densityBigOld);
-	} // substep
+        for (int octave = 0; octave < _octaves; octave++)
+        {
+          // multiply the vector noise times the maximum allowed
+          // noise amplitude at this octave, and add it to the total
+          vel += WVelocityWithJacobian(texCoord, &xUnwarped[0], &yUnwarped[0], &zUnwarped[0]) * amplitudeScaled;
 
-	// wipe the density borders
-	FLUID_3D::setZeroBorder(_densityBig, _resBig);
+          // scale coefficient for next octave
+          amplitudeScaled *= persistence;
+          texCoord *= 2.0f;
+        }
+      }
 
-	// reset texture coordinates now in preparation for next timestep
-	// Shouldn't do this before generating the noise because then the 
-	// eigenvalues stored do not reflect the underlying texture coordinates
-	resetTextureCoordinates();
+      // Store velocity + turbulence in big grid for maccormack step
+      //
+      // If you wanted to save memory, you would instead perform a 
+      // semi-Lagrangian backtrace for the current grid cell here. Then
+      // you could just throw the velocity away.
+      bigUx[index] = vel[0];
+      bigUy[index] = vel[1];
+      bigUz[index] = vel[2];
+
+      // compute the velocity magnitude for substepping later
+      const float velMag = bigUx[index] * bigUx[index] + 
+                           bigUy[index] * bigUy[index] + 
+                           bigUz[index] * bigUz[index];
+      if (velMag > maxVelMag1) maxVelMag1 = velMag;
+
+      // zero out velocity inside obstacles
+      float obsCheck = INTERPOLATE::lerp3dToFloat(
+          obstacles, posSm[0], posSm[1], posSm[2], _xResSm, _yResSm, _zResSm); 
+      if (obsCheck > 0.95) 
+        bigUx[index] = bigUy[index] = bigUz[index] = 0.;
+    } // xyz
 
-	// output files
-	// string prefix = string("./amplified.preview/density_bigxy_");
-	// FLUID_3D::writeImageSliceXY(_densityBig, _resBig, _resBig[2]/2, prefix, _totalStepsBig, 1.0f);
-	//string df3prefix = string("./df3/density_big_");
-	//IMAGE::dumpDF3(_totalStepsBig, df3prefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
-	// string pbrtPrefix = string("./pbrt/density_big_");
-	// IMAGE::dumpPBRT(_totalStepsBig, pbrtPrefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
+#if PARALLEL==1
+    maxVelMagThreads[id] = maxVelMag1;
+#else
+    maxVelMagThreads[0] = maxVelMag1;
+#endif
+  }
+  } // omp
+  
+  // compute maximum over threads
+  float maxVelMag = maxVelMagThreads[0];
+#if PARALLEL==1
+  for (int i = 1; i < 8; i++) 
+    if (maxVelMag < maxVelMagThreads[i]) 
+      maxVelMag = maxVelMagThreads[i];
+#endif
 
-	_totalStepsBig++;
+  // prepare density for an advection
+  SWAP_POINTERS(_densityBig, _densityBigOld);
 
-	delete[] _bigUx;
-	delete[] _bigUy;
-	delete[] _bigUz;
+  // based on the maximum velocity present, see if we need to substep,
+  // but cap the maximum number of substeps to 5
+  const int maxSubSteps = 25;
+  const int maxVel = 5;
+  maxVelMag = sqrt(maxVelMag) * dt;
+  int totalSubsteps = (int)(maxVelMag / (float)maxVel);
+  totalSubsteps = (totalSubsteps < 1) ? 1 : totalSubsteps;
+  // printf("totalSubsteps: %d\n", totalSubsteps);
+  totalSubsteps = (totalSubsteps > maxSubSteps) ? maxSubSteps : totalSubsteps;
+  const float dtSubdiv = dt / (float)totalSubsteps;
 
-	delete[] _tempBig1;
-	delete[] _tempBig2;
+  // set boundaries of big velocity grid
+  FLUID_3D::setZeroX(bigUx, _resBig); 
+  FLUID_3D::setZeroY(bigUy, _resBig); 
+  FLUID_3D::setZeroZ(bigUz, _resBig);
+
+  // do the MacCormack advection, with substepping if necessary
+  for(int substep = 0; substep < totalSubsteps; substep++)
+  {
+    FLUID_3D::advectFieldMacCormack(dtSubdiv, bigUx, bigUy, bigUz, 
+        _densityBigOld, _densityBig, tempBig1, tempBig2, _resBig, NULL);
+
+    if (substep < totalSubsteps - 1) 
+      SWAP_POINTERS(_densityBig, _densityBigOld);
+  } // substep
+
+  delete[] tempBig1;
+  delete[] tempBig2;
+  delete[] bigUx;
+  delete[] bigUy;
+  delete[] bigUz;
+  delete[] _energy;
+  delete[] highFreqEnergy;
+  
+  // wipe the density borders
+  FLUID_3D::setZeroBorder(_densityBig, _resBig);
+    
+  // reset texture coordinates now in preparation for next timestep
+  // Shouldn't do this before generating the noise because then the 
+  // eigenvalues stored do not reflect the underlying texture coordinates
+  resetTextureCoordinates(eigMin, eigMax);
+
+  delete[] eigMin;
+  delete[] eigMax;
+  
+  // output files
+  // string prefix = string("./amplified.preview/density_bigxy_");
+  // FLUID_3D::writeImageSliceXY(_densityBig, _resBig, _resBig[2]/2, prefix, _totalStepsBig, 1.0f);
+  //string df3prefix = string("./df3/density_big_");
+  //IMAGE::dumpDF3(_totalStepsBig, df3prefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
+  // string pbrtPrefix = string("./pbrt/density_big_");
+  // IMAGE::dumpPBRT(_totalStepsBig, pbrtPrefix, _densityBig, _resBig[0],_resBig[1],_resBig[2]);
+  
+  _totalStepsBig++;
 }
 
diff --git a/intern/smoke/intern/WTURBULENCE.h b/intern/smoke/intern/WTURBULENCE.h
index c21e002ad48..0aa978e9e52 100644
--- a/intern/smoke/intern/WTURBULENCE.h
+++ b/intern/smoke/intern/WTURBULENCE.h
@@ -49,10 +49,10 @@ class WTURBULENCE
 		void stepTurbulenceFull(float dt, float* xvel, float* yvel, float* zvel, unsigned char *obstacles);
 	
 		// texcoord functions
-		void advectTextureCoordinates (float dtOrg, float* xvel, float* yvel, float* zvel, float *_tempBig1, float *_tempBig2);
-		void resetTextureCoordinates();
+		void advectTextureCoordinates(float dtOrg, float* xvel, float* yvel, float* zvel, float *tempBig1, float *tempBig2);
+		void resetTextureCoordinates(float *_eigMin, float *_eigMax);
 
-		void computeEnergy(float* xvel, float* yvel, float* zvel, unsigned char *obstacles);
+		void computeEnergy(float *energy, float* xvel, float* yvel, float* zvel, unsigned char *obstacles);
 
 		// evaluate wavelet noise function
 		Vec3 WVelocity(Vec3 p);
@@ -63,8 +63,6 @@ class WTURBULENCE
 		inline float* getArrayTcU() { return _tcU; }
 		inline float* getArrayTcV() { return _tcV; }
 		inline float* getArrayTcW() { return _tcW; }
-		inline float* getArrayEigMin() { return _eigMin; }
-		inline float* getArrayEigMax() { return _eigMax; }
 
 		inline Vec3Int getResSm() { return _resSm; } // small resolution
 		inline Vec3Int getResBig() { return _resBig; }
@@ -81,15 +79,15 @@ class WTURBULENCE
 		
 		// noise settings
 		float _cullingThreshold;
-		float _noiseStrength;
-		float _noiseSizeScale;
-		bool _uvwAdvection;
-		bool _uvwReset;
-		float _noiseTimeanimSpeed;
-		int _dumpInterval;
-		int _noiseControlType;
+		// float _noiseStrength;
+		// float _noiseSizeScale;
+		// bool _uvwAdvection;
+		// bool _uvwReset;
+		// float _noiseTimeanimSpeed;
+		// int _dumpInterval;
+		// nt _noiseControlType;
 		// debug, scale density for projections output images
-		float _outputScale;
+		// float _outputScale;
 
 		// noise resolution
 		int _xResBig;
@@ -117,24 +115,15 @@ class WTURBULENCE
 		float* _tcW;
 		float* _tcTemp;
 
-		float* _eigMin; // no save -dg
-		float* _eigMax; // no save -dg
-
-		// wavelet decomposition of velocity energies
-		float* _energy; // no save -dg
-
 		// noise data
 		float* _noiseTile;
 		//float* _noiseTileExt;
 
 		// step counter
 		int _totalStepsBig;
-
-		// highest frequency component of wavelet decomposition
-		float* _highFreqEnergy; // no save -dg
 		
-		void computeEigenvalues();
-		void decomposeEnergy();
+		void computeEigenvalues(float *_eigMin, float *_eigMax);
+		void decomposeEnergy(float *energy, float *_highFreqEnergy);
 };
 
 #endif // WTURBULENCE_H
diff --git a/intern/smoke/intern/smoke_API.cpp b/intern/smoke/intern/smoke_API.cpp
index 5a016b51bbe..058831dddbb 100644
--- a/intern/smoke/intern/smoke_API.cpp
+++ b/intern/smoke/intern/smoke_API.cpp
@@ -137,7 +137,7 @@ extern "C" void smoke_dissolve(FLUID_3D *fluid, int speed, int log)
 	}
 }
 
-extern "C" void smoke_turbulence_dissolve(WTURBULENCE *wt, int speed, int log)
+extern "C" void smoke_dissolve_wavelet(WTURBULENCE *wt, int speed, int log)
 {
 	float *density = wt->getDensityBig();
 	Vec3Int r = wt->getResBig();
@@ -172,7 +172,7 @@ extern "C" void smoke_turbulence_dissolve(WTURBULENCE *wt, int speed, int log)
 	}
 }
 
-extern "C" void smoke_turbulence_initBlenderRNA(WTURBULENCE *wt, float *strength)
+extern "C" void smoke_initWaveletBlenderRNA(WTURBULENCE *wt, float *strength)
 {
 	wt->initBlenderRNA(strength);
 }
@@ -241,13 +241,14 @@ extern "C" float *smoke_turbulence_get_density(WTURBULENCE *wt)
 	return wt ? wt->getDensityBig() : NULL;
 }
 
-extern "C" void smoke_turbulence_get_res(WTURBULENCE *wt, unsigned int *res)
+extern "C" void smoke_turbulence_get_res(WTURBULENCE *wt, int *res)
 {
 	if(wt)
 	{
-		res[0] = wt->_resBig[0];
-		res[1] = wt->_resBig[1];
-		res[2] = wt->_resBig[2];
+		Vec3Int r = wt->getResBig();
+		res[0] = r[0];
+		res[1] = r[1];
+		res[2] = r[2];
 	}
 }