Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2015-12-30 17:04:01 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2015-12-30 17:04:04 +0300
commit3918c8b9a52ae9dcdb0488df92d7d3ca615be8c7 (patch)
tree3740e477610ab3ed020d505cd98308d29f663f5b /intern/cycles/device/device_cuda.cpp
parentc8a551bf13edf711b93ea89cd3fcd244e4206cee (diff)
Cycles: Optionally output luminance from the shader evaluation kernel
This makes it possible to move some parts of evaluation from host to the device and hopefully reduce memory usage by avoid having full RGBA buffer on the host. Reviewers: juicyfruit, lukasstockner97, brecht Reviewed By: lukasstockner97, brecht Differential Revision: https://developer.blender.org/D1702
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r--intern/cycles/device/device_cuda.cpp20
1 files changed, 13 insertions, 7 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index d9d6fd77ecb..5c9ca3454c6 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -726,6 +726,7 @@ public:
CUfunction cuShader;
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
+ CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma);
/* get kernel function */
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
@@ -747,13 +748,18 @@ public:
int shader_w = min(shader_chunk_size, end - shader_x);
/* pass in parameters */
- void *args[] = {&d_input,
- &d_output,
- &task.shader_eval_type,
- &shader_x,
- &shader_w,
- &offset,
- &sample};
+ void *args[8];
+ int arg = 0;
+ args[arg++] = &d_input;
+ args[arg++] = &d_output;
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ args[arg++] = &d_output_luma;
+ }
+ args[arg++] = &task.shader_eval_type;
+ args[arg++] = &shader_x;
+ args[arg++] = &shader_w;
+ args[arg++] = &offset;
+ args[arg++] = &sample;
/* launch kernel */
int threads_per_block;