Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/cuda/device_cuda_impl.cpp')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp52
1 files changed, 28 insertions, 24 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..acf53c3eb1b 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
cuMemGetInfo(&free_before, &total);
/* Get kernel function. */
- CUfunction cuPathTrace;
+ CUfunction cuRender;
- if (requested_features.use_integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (requested_features.use_baking) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
/* Launch kernel, using just 1 block appears sufficient to reserve
* memory for all multiprocessors. It would be good to do this in
@@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
void *args[] = {&d_work_tiles, &total_work_size};
- cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
0));
}
-void CUDADevice::path_trace(DeviceTask &task,
- RenderTile &rtile,
- device_vector<WorkTile> &work_tiles)
+void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
{
scoped_timer timer(&rtile.buffers->render_time);
@@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task,
return;
CUDAContextScope scope(this);
- CUfunction cuPathTrace;
+ CUfunction cuRender;
/* Get kernel function. */
- if (task.integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (rtile.task == RenderTile::BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
if (have_error()) {
return;
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
work_tiles.alloc(1);
@@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task,
* remain conservative for GPUs connected to a display to avoid driver
* timeouts and display freezing. */
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
if (!info.display_device) {
min_blocks *= 8;
}
@@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task,
void *args[] = {&d_work_tiles, &total_work_size};
cuda_assert(
- cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
uint filter_sample = sample + wtile->num_samples - 1;
@@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task)
CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
/* get kernel function */
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
}
else {
@@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task)
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- path_trace(*task, tile, work_tiles);
+ render(*task, tile, work_tiles);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(*task, tile, work_tiles);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;