Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/cuda/device_cuda.h2
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp52
-rw-r--r--intern/cycles/device/device_cpu.cpp28
-rw-r--r--intern/cycles/device/opencl/device_opencl.h1
-rw-r--r--intern/cycles/device/opencl/device_opencl_impl.cpp53
5 files changed, 100 insertions, 36 deletions
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 3e397da895b..3f23f0fe4c5 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -223,7 +223,7 @@ class CUDADevice : public Device {
CUdeviceptr d_wtile,
CUstream stream = 0);
- void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+ void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
void film_convert(DeviceTask &task,
device_ptr buffer,
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index ba5d479e0e7..acf53c3eb1b 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -586,20 +586,23 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
cuMemGetInfo(&free_before, &total);
/* Get kernel function. */
- CUfunction cuPathTrace;
+ CUfunction cuRender;
- if (requested_features.use_integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (requested_features.use_baking) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (requested_features.use_integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
/* Launch kernel, using just 1 block appears sufficient to reserve
* memory for all multiprocessors. It would be good to do this in
@@ -609,7 +612,7 @@ void CUDADevice::reserve_local_memory(const DeviceRequestedFeatures &requested_f
void *args[] = {&d_work_tiles, &total_work_size};
- cuda_assert(cuLaunchKernel(cuPathTrace, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuda_assert(cuLaunchKernel(cuRender, 1, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
@@ -1780,9 +1783,7 @@ void CUDADevice::adaptive_sampling_post(RenderTile &rtile,
0));
}
-void CUDADevice::path_trace(DeviceTask &task,
- RenderTile &rtile,
- device_vector<WorkTile> &work_tiles)
+void CUDADevice::render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles)
{
scoped_timer timer(&rtile.buffers->render_time);
@@ -1790,21 +1791,24 @@ void CUDADevice::path_trace(DeviceTask &task,
return;
CUDAContextScope scope(this);
- CUfunction cuPathTrace;
+ CUfunction cuRender;
/* Get kernel function. */
- if (task.integrator_branched) {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ if (rtile.task == RenderTile::BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake"));
+ }
+ else if (task.integrator_branched) {
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_branched_path_trace"));
}
else {
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace"));
}
if (have_error()) {
return;
}
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1));
/* Allocate work tile. */
work_tiles.alloc(1);
@@ -1822,8 +1826,8 @@ void CUDADevice::path_trace(DeviceTask &task,
* remain conservative for GPUs connected to a display to avoid driver
* timeouts and display freezing. */
int min_blocks, num_threads_per_block;
- cuda_assert(cuOccupancyMaxPotentialBlockSize(
- &min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ cuda_assert(
+ cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0));
if (!info.display_device) {
min_blocks *= 8;
}
@@ -1851,7 +1855,7 @@ void CUDADevice::path_trace(DeviceTask &task,
void *args[] = {&d_work_tiles, &total_work_size};
cuda_assert(
- cuLaunchKernel(cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
+ cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
/* Run the adaptive sampling kernels at selected samples aligned to step samples. */
uint filter_sample = sample + wtile->num_samples - 1;
@@ -1957,10 +1961,7 @@ void CUDADevice::shader(DeviceTask &task)
CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
/* get kernel function */
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
}
else {
@@ -2297,9 +2298,12 @@ void CUDADevice::thread_run(DeviceTask *task)
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- path_trace(*task, tile, work_tiles);
+ render(*task, tile, work_tiles);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(*task, tile, work_tiles);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c701c14318f..fc6febd8cee 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -188,6 +188,7 @@ class CPUDevice : public Device {
convert_to_byte_kernel;
KernelFunctions<void (*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)>
shader_kernel;
+ KernelFunctions<void (*)(KernelGlobals *, float *, int, int, int, int, int)> bake_kernel;
KernelFunctions<void (*)(
int, TileInfo *, int, int, float *, float *, float *, float *, float *, int *, int, int)>
@@ -270,6 +271,7 @@ class CPUDevice : public Device {
REGISTER_KERNEL(convert_to_half_float),
REGISTER_KERNEL(convert_to_byte),
REGISTER_KERNEL(shader),
+ REGISTER_KERNEL(bake),
REGISTER_KERNEL(filter_divide_shadow),
REGISTER_KERNEL(filter_get_feature),
REGISTER_KERNEL(filter_write_feature),
@@ -895,7 +897,7 @@ class CPUDevice : public Device {
}
}
- void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
+ void render(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
{
const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;
@@ -919,12 +921,21 @@ class CPUDevice : public Device {
break;
}
- for (int y = tile.y; y < tile.y + tile.h; y++) {
- for (int x = tile.x; x < tile.x + tile.w; x++) {
- if (use_coverage) {
- coverage.init_pixel(x, y);
+ if (tile.task == RenderTile::PATH_TRACE) {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ if (use_coverage) {
+ coverage.init_pixel(x, y);
+ }
+ path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
+ }
+ }
+ }
+ else {
+ for (int y = tile.y; y < tile.y + tile.h; y++) {
+ for (int x = tile.x; x < tile.x + tile.w; x++) {
+ bake_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
- path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
@@ -1019,9 +1030,12 @@ class CPUDevice : public Device {
split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
}
else {
- path_trace(task, tile, kg);
+ render(task, tile, kg);
}
}
+ else if (tile.task == RenderTile::BAKE) {
+ render(task, tile, kg);
+ }
else if (tile.task == RenderTile::DENOISE) {
denoise(denoising, tile);
task.update_progress(&tile, tile.w * tile.h);
diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h
index d6f4fb43061..389268e1c2a 100644
--- a/intern/cycles/device/opencl/device_opencl.h
+++ b/intern/cycles/device/opencl/device_opencl.h
@@ -451,6 +451,7 @@ class OpenCLDevice : public Device {
device_ptr rgba_half);
void shader(DeviceTask &task);
void update_adaptive(DeviceTask &task, RenderTile &tile, int sample);
+ void bake(DeviceTask &task, RenderTile &tile);
void denoise(RenderTile &tile, DenoisingTask &denoising);
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index 2766f85d17c..beb3174b111 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -1367,6 +1367,9 @@ void OpenCLDevice::thread_run(DeviceTask *task)
*/
clFinish(cqCommandQueue);
}
+ else if (tile.task == RenderTile::BAKE) {
+ bake(*task, tile);
+ }
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
@@ -1858,10 +1861,7 @@ void OpenCLDevice::shader(DeviceTask &task)
cl_int d_offset = task.offset;
OpenCLDevice::OpenCLProgram *program = &background_program;
- if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
- program = &bake_program;
- }
- else if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ if (task.shader_eval_type == SHADER_EVAL_DISPLACE) {
program = &displace_program;
}
program->wait_for_availability();
@@ -1892,6 +1892,51 @@ void OpenCLDevice::shader(DeviceTask &task)
}
}
+void OpenCLDevice::bake(DeviceTask &task, RenderTile &rtile)
+{
+ scoped_timer timer(&rtile.buffers->render_time);
+
+ /* Cast arguments to cl types. */
+ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
+ cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
+ cl_int d_x = rtile.x;
+ cl_int d_y = rtile.y;
+ cl_int d_w = rtile.w;
+ cl_int d_h = rtile.h;
+ cl_int d_offset = rtile.offset;
+ cl_int d_stride = rtile.stride;
+
+ bake_program.wait_for_availability();
+ cl_kernel kernel = bake_program();
+
+ cl_uint start_arg_index = kernel_set_args(kernel, 0, d_data, d_buffer);
+
+ set_kernel_arg_buffers(kernel, &start_arg_index);
+
+ start_arg_index += kernel_set_args(
+ kernel, start_arg_index, d_x, d_y, d_w, d_h, d_offset, d_stride);
+
+ int start_sample = rtile.start_sample;
+ int end_sample = rtile.start_sample + rtile.num_samples;
+
+ for (int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
+ break;
+ }
+
+ kernel_set_args(kernel, start_arg_index, sample);
+
+ enqueue_kernel(kernel, d_w, d_h);
+
+ rtile.sample = sample + 1;
+
+ task.update_progress(&rtile, rtile.w * rtile.h);
+ }
+
+ clFinish(cqCommandQueue);
+}
+
string OpenCLDevice::kernel_build_options(const string *debug_src)
{
string build_options = "-cl-no-signed-zeros -cl-mad-enable ";