Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@gmail.com>2014-05-27 15:20:07 +0400
committerBrecht Van Lommel <brechtvanlommel@gmail.com>2014-05-27 17:11:32 +0400
commit69c7522b2463245ef16ebcf2806645c78e83b4df (patch)
tree96721c9d2bc755d68cf9eeded77255f4c58018f3
parentbc9e66f0830627e807f720dca4b9d5d8d39e732a (diff)
Fix T40379: world MIS causing too much CUDA memory usage.
The kernel for baking the world texture was the same as the one used for baking. Now that's separate which allows the kernel to reserve much less memory.
-rw-r--r--intern/cycles/device/device_cuda.cpp7
-rw-r--r--intern/cycles/device/device_opencl.cpp29
-rw-r--r--intern/cycles/kernel/kernel.cl25
-rw-r--r--intern/cycles/kernel/kernel.cpp5
-rw-r--r--intern/cycles/kernel/kernel.cu9
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp5
-rw-r--r--intern/cycles/kernel/kernel_bake.h5
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp5
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp5
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp5
10 files changed, 81 insertions, 19 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index b19f5e22769..48d1c18555a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -746,7 +746,12 @@ public:
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
/* get kernel function */
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+ }
/* do tasks in smaller chunks, so we can cancel it */
const int shader_chunk_size = 65536;
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 694ec9db036..f841daba124 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -321,6 +321,7 @@ public:
cl_kernel ckFilmConvertByteKernel;
cl_kernel ckFilmConvertHalfFloatKernel;
cl_kernel ckShaderKernel;
+ cl_kernel ckBakeKernel;
cl_int ciErr;
typedef map<string, device_vector<uchar>*> ConstMemMap;
@@ -443,6 +444,7 @@ public:
ckFilmConvertByteKernel = NULL;
ckFilmConvertHalfFloatKernel = NULL;
ckShaderKernel = NULL;
+ ckBakeKernel = NULL;
null_mem = 0;
device_initialized = false;
@@ -791,6 +793,10 @@ public:
if(opencl_error(ciErr))
return false;
+ ckBakeKernel = clCreateKernel(cpProgram, "kernel_ocl_bake", &ciErr);
+ if(opencl_error(ciErr))
+ return false;
+
return true;
}
@@ -1054,19 +1060,26 @@ public:
/* sample arguments */
cl_uint narg = 0;
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data));
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input));
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output));
+ cl_kernel kernel;
+
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE)
+ kernel = ckBakeKernel;
+ else
+ kernel = ckShaderKernel;
+
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(ckShaderKernel, &narg, #name);
+ set_kernel_arg_mem(kernel, &narg, #name);
#include "kernel_textures.h"
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
- opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+ opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
- enqueue_kernel(ckShaderKernel, task.shader_w, 1);
+ enqueue_kernel(kernel, task.shader_w, 1);
}
void thread_run(DeviceTask *task)
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index 1dc0793a7bc..d7d3438036e 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -131,3 +131,28 @@ __kernel void kernel_ocl_shader(
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
}
+__kernel void kernel_ocl_bake(
+ ccl_constant KernelData *data,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+
+#define KERNEL_TEX(type, ttype, name) \
+ ccl_global type *name,
+#include "kernel_textures.h"
+
+ int type, int sx, int sw)
+{
+ KernelGlobals kglobals, *kg = &kglobals;
+
+ kg->data = data;
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name;
+#include "kernel_textures.h"
+
+ int x = sx + get_global_id(0);
+
+ if(x < sx + sw)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x);
+}
+
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index a0b6b8e13d0..a535659b3b1 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -122,7 +122,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index b9b41f755ba..bb20819f6fc 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -153,5 +153,14 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
}
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx)
+{
+ int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+
+ kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+}
+
#endif
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index c572fcd3df6..7d354de16d2 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -69,7 +69,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 6e735517763..c3ae2b6a54e 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -356,11 +356,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
{
- if(type >= SHADER_EVAL_BAKE) {
- kernel_bake_evaluate(kg, input, output, type, i);
- return;
- }
-
ShaderData sd;
uint4 in = input[i];
float3 out;
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 455cac046a3..3b5faea2994 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -66,7 +66,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 29aca52890e..3b18b164ffd 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -68,7 +68,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index 0ece67e6d2b..a3731d790f4 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -69,7 +69,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
- kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ if(type >= SHADER_EVAL_BAKE)
+ kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+ else
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
CCL_NAMESPACE_END