diff options
author | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-09-04 17:29:07 +0400 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@pandora.be> | 2012-09-04 17:29:07 +0400 |
commit | adea12cb01e4c4f18f345dfbbf49e9e622192e4e (patch) | |
tree | b43018344c696e4d59437fabc7f17f5b9d6a8e80 /intern/cycles/device/device_cuda.cpp | |
parent | 68563134d4800be4eb46aa6b598fd719cdaf2980 (diff) |
Cycles: merge of changes from tomato branch.
Regular rendering now works tiled, and supports save buffers to save memory
during render and cache render results.
Brick texture node by Thomas.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Brick_Texture
Image texture Blended Box Mapping.
http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/Nodes/Textures#Image_Texture
http://mango.blender.org/production/blended_box/
Various bug fixes by Sergey and Campbell.
* Fix for reading freed memory in some node setups.
* Fix incorrect memory read when synchronizing mesh motion.
* Fix crash appearing when direct light usage is different on different layers.
* Fix for vector pass gives wrong result in some circumstances.
* Fix for wrong resolution used for rendering Render Layer node.
* Option to cancel rendering when doing initial synchronization.
* No more texture limit when using CPU render.
* Many fixes for new tiled rendering.
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 118 |
1 files changed, 84 insertions, 34 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 357f99145b2..c8dcfdc2f3d 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -23,6 +23,8 @@ #include "device.h" #include "device_intern.h" +#include "buffers.h" + #include "util_cuda.h" #include "util_debug.h" #include "util_map.h" @@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN class CUDADevice : public Device { public: + TaskPool task_pool; CUdevice cuDevice; CUcontext cuContext; CUmodule cuModule; @@ -192,6 +195,8 @@ public: ~CUDADevice() { + task_pool.stop(); + cuda_push_context(); cuda_assert(cuCtxDetach(cuContext)) } @@ -466,13 +471,13 @@ public: } } - void path_trace(DeviceTask& task) + void path_trace(RenderTile& rtile, int sample) { cuda_push_context(); CUfunction cuPathTrace; - CUdeviceptr d_buffer = cuda_device_ptr(task.buffer); - CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state); + CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer); + CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state); /* get kernel function */ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")) @@ -486,29 +491,28 @@ public: cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))) offset += sizeof(d_rng_state); - int sample = task.sample; offset = align_up(offset, __alignof(sample)); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample)) - offset += sizeof(task.sample); + cuda_assert(cuParamSeti(cuPathTrace, offset, sample)) + offset += sizeof(sample); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.x)) - offset += sizeof(task.x); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)) + offset += sizeof(rtile.x); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.y)) - offset += sizeof(task.y); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)) + offset += sizeof(rtile.y); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.w)) - offset += sizeof(task.w); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)) + offset += sizeof(rtile.w); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.h)) - offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)) + offset += sizeof(rtile.h); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset)) - offset += sizeof(task.offset); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)) + offset += sizeof(rtile.offset); - cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride)) - offset += sizeof(task.stride); + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)) + offset += sizeof(rtile.stride); cuda_assert(cuParamSetSize(cuPathTrace, offset)) @@ -520,23 +524,25 @@ public: int xthreads = 8; int ythreads = 8; #endif - int xblocks = (task.w + xthreads - 1)/xthreads; - int yblocks = (task.h + ythreads - 1)/ythreads; + int xblocks = (rtile.w + xthreads - 1)/xthreads; + int yblocks = (rtile.h + ythreads - 1)/ythreads; cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)) cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)) cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)) + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); } - void tonemap(DeviceTask& task) + void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba) { cuda_push_context(); CUfunction cuFilmConvert; - CUdeviceptr d_rgba = map_pixels(task.rgba); - CUdeviceptr d_buffer = cuda_device_ptr(task.buffer); + CUdeviceptr d_rgba = map_pixels(rgba); + CUdeviceptr d_buffer = cuda_device_ptr(buffer); /* get kernel function */ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap")) @@ -820,27 +826,71 @@ public: Device::draw_pixels(mem, y, w, h, dy, width, height, transparent); } - void task_add(DeviceTask& task) + void thread_run(DeviceTask *task) { - if(task.type == DeviceTask::TONEMAP) - tonemap(task); - else if(task.type == DeviceTask::PATH_TRACE) - path_trace(task); - else if(task.type == DeviceTask::SHADER) - shader(task); + if(task->type == DeviceTask::PATH_TRACE) { + RenderTile tile; + + /* keep rendering tiles until done */ + while(task->acquire_tile(this, tile)) { + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; + + for(int sample = start_sample; sample < end_sample; sample++) { + if (task->get_cancel()) + break; + + path_trace(tile, sample); + + tile.sample = sample + 1; + + task->update_progress(tile); + } + + task->release_tile(tile); + } + } + else if(task->type == DeviceTask::SHADER) { + shader(*task); + + cuda_push_context(); + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); + } } - void task_wait() + class CUDADeviceTask : public DeviceTask { + public: + CUDADeviceTask(CUDADevice *device, DeviceTask& task) + : DeviceTask(task) + { + run = function_bind(&CUDADevice::thread_run, device, this); + } + }; + + void task_add(DeviceTask& task) { - cuda_push_context(); + if(task.type == DeviceTask::TONEMAP) { + /* must be done in main thread due to opengl access */ + tonemap(task, task.buffer, task.rgba); - cuda_assert(cuCtxSynchronize()) + cuda_push_context(); + cuda_assert(cuCtxSynchronize()) + cuda_pop_context(); + } + else { + task_pool.push(new CUDADeviceTask(this, task)); + } + } - cuda_pop_context(); + void task_wait() + { + task_pool.wait_work(); } void task_cancel() { + task_pool.cancel(); } }; |