Cycles: merge of changes from tomato branch.

Regular rendering now works tiled, and supports save buffers to save memory during render and cache render results. Brick texture node by Thomas. http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Brick_Texture Image texture Blended Box Mapping. http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Image_Texture http://mango.blender.org/production/blended_box/ Various bug fixes by Sergey and Campbell. * Fix for reading freed memory in some node setups. * Fix incorrect memory read when synchronizing mesh motion. * Fix crash appearing when direct light usage is different on different layers. * Fix for vector pass gives wrong result in some circumstances. * Fix for wrong resolution used for rendering Render Layer node. * Option to cancel rendering when doing initial synchronization. * No more texture limit when using CPU render. * Many fixes for new tiled rendering.
author: Brecht Van Lommel <brechtvanlommel@pandora.be> 2012-09-04 17:29:07 +0400
committer: Brecht Van Lommel <brechtvanlommel@pandora.be> 2012-09-04 17:29:07 +0400
commit: adea12cb01e4c4f18f345dfbbf49e9e622192e4e (patch)
tree: b43018344c696e4d59437fabc7f17f5b9d6a8e80 /intern/cycles/device/device_cuda.cpp
parent: 68563134d4800be4eb46aa6b598fd719cdaf2980 (diff)
1 files changed, 84 insertions, 34 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 357f99145b2..c8dcfdc2f3d 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -23,6 +23,8 @@
 #include "device.h"
 #include "device_intern.h"
 
+#include "buffers.h"
+
 #include "util_cuda.h"
 #include "util_debug.h"
 #include "util_map.h"
@@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN
 class CUDADevice : public Device
 {
 public:
+	TaskPool task_pool;
 	CUdevice cuDevice;
 	CUcontext cuContext;
 	CUmodule cuModule;
@@ -192,6 +195,8 @@ public:
 
 	~CUDADevice()
 	{
+		task_pool.stop();
+
 		cuda_push_context();
 		cuda_assert(cuCtxDetach(cuContext))
 	}
@@ -466,13 +471,13 @@ public:
 		}
 	}
 
-	void path_trace(DeviceTask& task)
+	void path_trace(RenderTile& rtile, int sample)
 	{
 		cuda_push_context();
 
 		CUfunction cuPathTrace;
-		CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
-		CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state);
+		CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
+		CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
 
 		/* get kernel function */
 		cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
@@ -486,29 +491,28 @@ public:
 		cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
 		offset += sizeof(d_rng_state);
 
-		int sample = task.sample;
 		offset = align_up(offset, __alignof(sample));
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample))
-		offset += sizeof(task.sample);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
+		offset += sizeof(sample);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.x))
-		offset += sizeof(task.x);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
+		offset += sizeof(rtile.x);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.y))
-		offset += sizeof(task.y);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
+		offset += sizeof(rtile.y);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.w))
-		offset += sizeof(task.w);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
+		offset += sizeof(rtile.w);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.h))
-		offset += sizeof(task.h);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
+		offset += sizeof(rtile.h);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset))
-		offset += sizeof(task.offset);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
+		offset += sizeof(rtile.offset);
 
-		cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride))
-		offset += sizeof(task.stride);
+		cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
+		offset += sizeof(rtile.stride);
 
 		cuda_assert(cuParamSetSize(cuPathTrace, offset))
 
@@ -520,23 +524,25 @@ public:
 		int xthreads = 8;
 		int ythreads = 8;
 #endif
-		int xblocks = (task.w + xthreads - 1)/xthreads;
-		int yblocks = (task.h + ythreads - 1)/ythreads;
+		int xblocks = (rtile.w + xthreads - 1)/xthreads;
+		int yblocks = (rtile.h + ythreads - 1)/ythreads;
 
 		cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
 		cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
 		cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
 
+		cuda_assert(cuCtxSynchronize())
+
 		cuda_pop_context();
 	}
 
-	void tonemap(DeviceTask& task)
+	void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
 	{
 		cuda_push_context();
 
 		CUfunction cuFilmConvert;
-		CUdeviceptr d_rgba = map_pixels(task.rgba);
-		CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
+		CUdeviceptr d_rgba = map_pixels(rgba);
+		CUdeviceptr d_buffer = cuda_device_ptr(buffer);
 
 		/* get kernel function */
 		cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap"))
@@ -820,27 +826,71 @@ public:
 		Device::draw_pixels(mem, y, w, h, dy, width, height, transparent);
 	}
 
-	void task_add(DeviceTask& task)
+	void thread_run(DeviceTask *task)
 	{
-		if(task.type == DeviceTask::TONEMAP)
-			tonemap(task);
-		else if(task.type == DeviceTask::PATH_TRACE)
-			path_trace(task);
-		else if(task.type == DeviceTask::SHADER)
-			shader(task);
+		if(task->type == DeviceTask::PATH_TRACE) {
+			RenderTile tile;
+			
+			/* keep rendering tiles until done */
+			while(task->acquire_tile(this, tile)) {
+				int start_sample = tile.start_sample;
+				int end_sample = tile.start_sample + tile.num_samples;
+
+				for(int sample = start_sample; sample < end_sample; sample++) {
+					if (task->get_cancel())
+						break;
+
+					path_trace(tile, sample);
+
+					tile.sample = sample + 1;
+
+					task->update_progress(tile);
+				}
+
+				task->release_tile(tile);
+			}
+		}
+		else if(task->type == DeviceTask::SHADER) {
+			shader(*task);
+
+			cuda_push_context();
+			cuda_assert(cuCtxSynchronize())
+			cuda_pop_context();
+		}
 	}
 
-	void task_wait()
+	class CUDADeviceTask : public DeviceTask {
+	public:
+		CUDADeviceTask(CUDADevice *device, DeviceTask& task)
+		: DeviceTask(task)
+		{
+			run = function_bind(&CUDADevice::thread_run, device, this);
+		}
+	};
+
+	void task_add(DeviceTask& task)
 	{
-		cuda_push_context();
+		if(task.type == DeviceTask::TONEMAP) {
+			/* must be done in main thread due to opengl access */
+			tonemap(task, task.buffer, task.rgba);
 
-		cuda_assert(cuCtxSynchronize())
+			cuda_push_context();
+			cuda_assert(cuCtxSynchronize())
+			cuda_pop_context();
+		}
+		else {
+			task_pool.push(new CUDADeviceTask(this, task));
+		}
+	}
 
-		cuda_pop_context();
+	void task_wait()
+	{
+		task_pool.wait_work();
 	}
 
 	void task_cancel()
 	{
+		task_pool.cancel();
 	}
 };
author	Brecht Van Lommel <brechtvanlommel@pandora.be>	2012-09-04 17:29:07 +0400
committer	Brecht Van Lommel <brechtvanlommel@pandora.be>	2012-09-04 17:29:07 +0400
commit	adea12cb01e4c4f18f345dfbbf49e9e622192e4e (patch)
tree	b43018344c696e4d59437fabc7f17f5b9d6a8e80 /intern/cycles/device/device_cuda.cpp
parent	68563134d4800be4eb46aa6b598fd719cdaf2980 (diff)