Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2017-09-27 00:42:36 +0300
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2017-10-04 22:11:14 +0300
commit5b7d6ea54b2fc35b8b12c667f5bf9a1c9c46d5c2 (patch)
tree99a9ca07d5366b164dfdf267ad1ed3691d2d7d57 /intern/cycles/device/device_cuda.cpp
parent660e8e59e7b4265324a8fba7ae716f84a73c6c64 (diff)
Code refactor: add WorkTile struct for passing work to kernel.
This makes sharing some code between mega/split in following commits a bit easier, and also paves the way for rendering multiple tiles later.
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp58
1 files changed, 32 insertions, 26 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 29b5bd70789..7ee74e9a512 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1293,8 +1293,6 @@ public:
CUDAContextScope scope(this);
CUfunction cuPathTrace;
- CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
- CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
if(branched) {
@@ -1308,40 +1306,48 @@ public:
return;
}
- /* pass in parameters */
- void *args[] = {&d_buffer,
- &d_rng_state,
- &sample,
- &rtile.x,
- &rtile.y,
- &rtile.w,
- &rtile.h,
- &rtile.offset,
- &rtile.stride};
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
- /* launch kernel */
- int threads_per_block;
- cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
+ /* allocate work tile */
+ device_vector<WorkTile> work_tiles;
+ work_tiles.resize(1);
- /*int num_registers;
- cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace));
+ WorkTile *wtile = work_tiles.get_data();
+ wtile->x = rtile.x;
+ wtile->y = rtile.y;
+ wtile->w = rtile.w;
+ wtile->h = rtile.h;
+ wtile->offset = rtile.offset;
+ wtile->stride = rtile.stride;
+ wtile->start_sample = sample;
+ wtile->num_samples = 1;
+ wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
+ wtile->rng_state = (uint*)cuda_device_ptr(rtile.rng_state);
- printf("threads_per_block %d\n", threads_per_block);
- printf("num_registers %d\n", num_registers);*/
+ mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
+ mem_copy_to(work_tiles);
- int xthreads = (int)sqrt(threads_per_block);
- int ythreads = (int)sqrt(threads_per_block);
- int xblocks = (rtile.w + xthreads - 1)/xthreads;
- int yblocks = (rtile.h + ythreads - 1)/ythreads;
+ CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
+
+ /* pass in parameters */
+ void *args[] = {&d_work_tiles,
+ &total_work_size};
+
+ /* launch kernel */
+ int num_threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&num_threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
+ int num_blocks = divide_up(total_work_size, num_threads_per_block);
cuda_assert(cuLaunchKernel(cuPathTrace,
- xblocks , yblocks, 1, /* blocks */
- xthreads, ythreads, 1, /* threads */
+ num_blocks, 1, 1,
+ num_threads_per_block, 1, 1,
0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
+
+ mem_free(work_tiles);
}
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)