Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--intern/cycles/device/device_optix.cpp64
1 files changed, 28 insertions, 36 deletions
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index fc32679e794..39110cc0959 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -119,17 +119,8 @@ struct KernelParams {
threads = (int)sqrt((float)threads); \
int xblocks = ((w) + threads - 1) / threads; \
int yblocks = ((h) + threads - 1) / threads; \
- check_result_cuda_ret(cuLaunchKernel(func, \
- xblocks, \
- yblocks, \
- 1, \
- threads, \
- threads, \
- 1, \
- 0, \
- cuda_stream[thread_index], \
- args, \
- 0)); \
+ check_result_cuda_ret( \
+ cuLaunchKernel(func, xblocks, yblocks, 1, threads, threads, 1, 0, 0, args, 0)); \
} \
(void)0
@@ -195,7 +186,7 @@ class OptiXDevice : public CUDADevice {
OptixTraversableHandle tlas_handle = 0;
OptixDenoiser denoiser = NULL;
- vector<pair<int2, CUdeviceptr>> denoiser_state;
+ pair<int2, CUdeviceptr> denoiser_state = {};
int denoiser_input_passes = 0;
public:
@@ -250,9 +241,6 @@ class OptiXDevice : public CUDADevice {
launch_params.data_elements = sizeof(KernelParams);
// Allocate launch parameter buffer memory on device
launch_params.alloc_to_device(info.cpu_threads);
-
- // Create denoiser state entries for all threads (but do not allocate yet)
- denoiser_state.resize(info.cpu_threads);
}
~OptiXDevice()
{
@@ -267,9 +255,8 @@ class OptiXDevice : public CUDADevice {
cuMemFree(mem);
}
- // Free denoiser state for all threads
- for (const pair<int2, CUdeviceptr> &state : denoiser_state) {
- cuMemFree(state.second);
+ if (denoiser_state.second) {
+ cuMemFree(denoiser_state.second);
}
sbt_data.free();
@@ -571,7 +558,7 @@ class OptiXDevice : public CUDADevice {
if (tile.task == RenderTile::PATH_TRACE)
launch_render(task, tile, thread_index);
else if (tile.task == RenderTile::DENOISE)
- launch_denoise(task, tile, thread_index);
+ launch_denoise(task, tile);
task.release_tile(tile);
if (task.get_cancel() && !task.need_finish_queue)
break; // User requested cancellation
@@ -596,7 +583,7 @@ class OptiXDevice : public CUDADevice {
tile.stride = task.stride;
tile.buffers = task.buffers;
- launch_denoise(task, tile, thread_index);
+ launch_denoise(task, tile);
}
}
@@ -670,7 +657,7 @@ class OptiXDevice : public CUDADevice {
}
}
- bool launch_denoise(DeviceTask &task, RenderTile &rtile, int thread_index)
+ bool launch_denoise(DeviceTask &task, RenderTile &rtile)
{
// Update current sample (for display and NLM denoising task)
rtile.sample = rtile.start_sample + rtile.num_samples;
@@ -807,8 +794,8 @@ class OptiXDevice : public CUDADevice {
check_result_optix_ret(
optixDenoiserComputeMemoryResources(denoiser, rect_size.x, rect_size.y, &sizes));
- auto &state = denoiser_state[thread_index].second;
- auto &state_size = denoiser_state[thread_index].first;
+ auto &state = denoiser_state.second;
+ auto &state_size = denoiser_state.first;
const size_t scratch_size = sizes.recommendedScratchSizeInBytes;
const size_t scratch_offset = sizes.stateSizeInBytes;
@@ -824,7 +811,7 @@ class OptiXDevice : public CUDADevice {
// Initialize denoiser state for the current tile size
check_result_optix_ret(optixDenoiserSetup(denoiser,
- cuda_stream[thread_index],
+ 0,
rect_size.x,
rect_size.y,
state,
@@ -872,7 +859,7 @@ class OptiXDevice : public CUDADevice {
// Finally run denonising
OptixDenoiserParams params = {}; // All parameters are disabled/zero
check_result_optix_ret(optixDenoiserInvoke(denoiser,
- cuda_stream[thread_index],
+ 0,
&params,
state,
scratch_offset,
@@ -902,12 +889,11 @@ class OptiXDevice : public CUDADevice {
"kernel_cuda_filter_convert_from_rgb", rtiles[9].w, rtiles[9].h, output_args);
# endif
- check_result_cuda_ret(cuStreamSynchronize(cuda_stream[thread_index]));
+ check_result_cuda_ret(cuStreamSynchronize(0));
task.unmap_neighbor_tiles(rtiles, this);
}
else {
- assert(thread_index == 0);
// Run CUDA denoising kernels
DenoisingTask denoising(this, task);
CUDADevice::denoise(rtile, denoising);
@@ -1436,6 +1422,15 @@ class OptiXDevice : public CUDADevice {
void task_add(DeviceTask &task) override
{
+ struct OptiXDeviceTask : public DeviceTask {
+ OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task)
+ {
+ // Using task index parameter instead of thread index, since number of CUDA streams may
+ // differ from number of threads
+ run = function_bind(&OptiXDevice::thread_run, device, *this, task_index);
+ }
+ };
+
// Upload texture information to device if it has changed since last launch
load_texture_info();
@@ -1445,20 +1440,17 @@ class OptiXDevice : public CUDADevice {
return;
}
+ if (task.type == DeviceTask::DENOISE || task.type == DeviceTask::DENOISE_BUFFER) {
+ // Execute denoising in a single thread (e.g. to avoid race conditions during creation)
+ task_pool.push(new OptiXDeviceTask(this, task, 0));
+ return;
+ }
+
// Split task into smaller ones
list<DeviceTask> tasks;
task.split(tasks, info.cpu_threads);
// Queue tasks in internal task pool
- struct OptiXDeviceTask : public DeviceTask {
- OptiXDeviceTask(OptiXDevice *device, DeviceTask &task, int task_index) : DeviceTask(task)
- {
- // Using task index parameter instead of thread index, since number of CUDA streams may
- // differ from number of threads
- run = function_bind(&OptiXDevice::thread_run, device, *this, task_index);
- }
- };
-
int task_index = 0;
for (DeviceTask &task : tasks)
task_pool.push(new OptiXDeviceTask(this, task, task_index++));