Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brechtvanlommel@pandora.be>2013-08-31 03:49:38 +0400
committerBrecht Van Lommel <brechtvanlommel@pandora.be>2013-08-31 03:49:38 +0400
commit29f6616d609fbd92cf313b0fdec555c2fcb4ede0 (patch)
treee0c9500368c5210071cb841ea86f5674b0cf6f25 /intern/cycles/device
parent60ff60dcdc9f43891fb8a19e10f9bb7964a539bf (diff)
Cycles: viewport render now takes scene color management settings into account,
except for curves, that's still missing from the OpenColorIO GLSL shader. The pixels are stored in a half float texture, converterd from full float with native GPU instructions and SIMD on the CPU, so it should be pretty quick. Using a GLSL shader is useful for GPU render because it avoids a copy through CPU memory.
Diffstat (limited to 'intern/cycles/device')
-rw-r--r--intern/cycles/device/device.cpp45
-rw-r--r--intern/cycles/device/device_cpu.cpp67
-rw-r--r--intern/cycles/device/device_cuda.cpp45
-rw-r--r--intern/cycles/device/device_memory.h9
-rw-r--r--intern/cycles/device/device_multi.cpp4
-rw-r--r--intern/cycles/device/device_network.cpp5
-rw-r--r--intern/cycles/device/device_network.h8
-rw-r--r--intern/cycles/device/device_opencl.cpp32
-rw-r--r--intern/cycles/device/device_task.cpp2
-rw-r--r--intern/cycles/device/device_task.h5
10 files changed, 154 insertions, 68 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index e42f83be6ce..10d4112b57d 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -41,7 +41,10 @@ void Device::pixels_alloc(device_memory& mem)
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
{
- mem_copy_from(mem, y, w, h, sizeof(uint8_t)*4);
+ if(mem.data_type == TYPE_HALF)
+ mem_copy_from(mem, y, w, h, sizeof(half4));
+ else
+ mem_copy_from(mem, y, w, h, sizeof(uchar4));
}
void Device::pixels_free(device_memory& mem)
@@ -53,27 +56,49 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
{
pixels_copy_from(rgba, y, w, h);
+ GLuint texid;
+ glGenTextures(1, &texid);
+ glBindTexture(GL_TEXTURE_2D, texid);
+ if(rgba.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, (void*)rgba.data_pointer);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+ glEnable(GL_TEXTURE_2D);
+
if(transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
- glPixelZoom((float)width/(float)w, (float)height/(float)h);
- glRasterPos2f(0, dy);
+ glColor3f(1.0f, 1.0f, 1.0f);
- uint8_t *pixels = (uint8_t*)rgba.data_pointer;
+ glPushMatrix();
+ glTranslatef(0.0f, (float)dy, 0.0f);
- /* for multi devices, this assumes the ineffecient method that we allocate
- * all pixels on the device even though we only render to a subset */
- pixels += 4*y*w;
+ glBegin(GL_QUADS);
+
+ glTexCoord2f(0.0f, 0.0f);
+ glVertex2f(0.0f, 0.0f);
+ glTexCoord2f(1.0f, 0.0f);
+ glVertex2f((float)width, 0.0f);
+ glTexCoord2f(1.0f, 1.0f);
+ glVertex2f((float)width, (float)height);
+ glTexCoord2f(0.0f, 1.0f);
+ glVertex2f(0.0f, (float)height);
- glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
+ glEnd();
- glRasterPos2f(0.0f, 0.0f);
- glPixelZoom(1.0f, 1.0f);
+ glPopMatrix();
if(transparent)
glDisable(GL_BLEND);
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glDisable(GL_TEXTURE_2D);
+ glDeleteTextures(1, &texid);
}
Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index d9c08dadbb0..b1dbdec9d36 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -127,8 +127,8 @@ public:
{
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
- else if(task->type == DeviceTask::TONEMAP)
- thread_tonemap(*task);
+ else if(task->type == DeviceTask::FILM_CONVERT)
+ thread_film_convert(*task);
else if(task->type == DeviceTask::SHADER)
thread_shader(*task);
}
@@ -237,28 +237,55 @@ public:
#endif
}
- void thread_tonemap(DeviceTask& task)
+ void thread_film_convert(DeviceTask& task)
{
+ float sample_scale = 1.0f/(task.sample + 1);
+
+ if(task.rgba_half) {
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_sse3()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
- task.sample, x, y, task.offset, task.stride);
- }
- else if(system_cpu_support_sse2()) {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
- task.sample, x, y, task.offset, task.stride);
+ if(system_cpu_support_sse3()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else if(system_cpu_support_sse2()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else
+#endif
+ {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
}
- else
+ else {
+#ifdef WITH_OPTIMIZED_KERNEL
+ if(system_cpu_support_sse3()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else if(system_cpu_support_sse2()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
+ else
#endif
- {
- for(int y = task.y; y < task.y + task.h; y++)
- for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
- task.sample, x, y, task.offset, task.stride);
+ {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
+ sample_scale, x, y, task.offset, task.stride);
+ }
}
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index c1b5a8bfcea..b5eaa69bf0e 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -625,7 +625,7 @@ public:
cuda_pop_context();
}
- void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
+ void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{
if(have_error())
return;
@@ -633,11 +633,14 @@ public:
cuda_push_context();
CUfunction cuFilmConvert;
- CUdeviceptr d_rgba = map_pixels(rgba);
+ CUdeviceptr d_rgba = map_pixels((rgba_byte)? rgba_byte: rgba_half);
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
- cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap"))
+ if(rgba_half)
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"))
+ else
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"))
/* pass in parameters */
int offset = 0;
@@ -648,11 +651,11 @@ public:
cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)))
offset += sizeof(d_buffer);
- int sample = task.sample;
- offset = align_up(offset, __alignof(sample));
+ float sample_scale = 1.0f/(task.sample + 1);
+ offset = align_up(offset, __alignof(sample_scale));
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.sample))
- offset += sizeof(task.sample);
+ cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale))
+ offset += sizeof(sample_scale);
cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x))
offset += sizeof(task.x);
@@ -684,7 +687,7 @@ public:
cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1))
cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks))
- unmap_pixels(task.rgba);
+ unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
cuda_pop_context();
}
@@ -771,13 +774,19 @@ public:
glGenBuffers(1, &pmem.cuPBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO);
- glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLfloat)*3, NULL, GL_DYNAMIC_DRAW);
+ if(mem.data_type == TYPE_HALF)
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
+ else
+ glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glGenTextures(1, &pmem.cuTexId);
glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ if(mem.data_type == TYPE_HALF)
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, pmem.w, pmem.h, 0, GL_RGBA, GL_HALF_FLOAT, NULL);
+ else
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, pmem.w, pmem.h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, 0);
@@ -865,11 +874,19 @@ public:
/* for multi devices, this assumes the ineffecient method that we allocate
* all pixels on the device even though we only render to a subset */
- size_t offset = sizeof(uint8_t)*4*y*w;
+ size_t offset = 4*y*w;
+
+ if(mem.data_type == TYPE_HALF)
+ offset *= sizeof(GLhalf);
+ else
+ offset *= sizeof(uint8_t);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pmem.cuPBO);
glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
- glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset);
+ if(mem.data_type == TYPE_HALF)
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, (void*)offset);
+ else
+ glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset);
glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
glEnable(GL_TEXTURE_2D);
@@ -961,9 +978,9 @@ public:
void task_add(DeviceTask& task)
{
- if(task.type == DeviceTask::TONEMAP) {
+ if(task.type == DeviceTask::FILM_CONVERT) {
/* must be done in main thread due to opengl access */
- tonemap(task, task.buffer, task.rgba);
+ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index d27dd19cc96..18e6242d23d 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -46,7 +46,8 @@ enum DataType {
TYPE_UCHAR,
TYPE_UINT,
TYPE_INT,
- TYPE_FLOAT
+ TYPE_FLOAT,
+ TYPE_HALF
};
static inline size_t datatype_size(DataType datatype)
@@ -56,6 +57,7 @@ static inline size_t datatype_size(DataType datatype)
case TYPE_FLOAT: return sizeof(float);
case TYPE_UINT: return sizeof(uint);
case TYPE_INT: return sizeof(int);
+ case TYPE_HALF: return sizeof(half);
default: return 0;
}
}
@@ -147,6 +149,11 @@ template<> struct device_type_traits<float4> {
static const int num_elements = 4;
};
+template<> struct device_type_traits<half4> {
+ static const DataType data_type = TYPE_HALF;
+ static const int num_elements = 4;
+};
+
/* Device Memory */
class device_memory
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index af6ca9e1fbd..4df0fdbd4c7 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -261,7 +261,6 @@ public:
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
- if(tile.rgba) tile.rgba = sub.ptr_map[tile.rgba];
}
}
}
@@ -290,7 +289,8 @@ public:
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
- if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
+ if(task.rgba_byte) subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
+ if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 521739b8ef1..23c1a10fa0a 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -408,7 +408,8 @@ protected:
rcv.read(task);
if(task.buffer) task.buffer = ptr_map[task.buffer];
- if(task.rgba) task.rgba = ptr_map[task.rgba];
+ if(task.rgba_byte) task.rgba_byte = ptr_map[task.rgba_byte];
+ if(task.rgba_half) task.rgba_half = ptr_map[task.rgba_half];
if(task.shader_input) task.shader_input = ptr_map[task.shader_input];
if(task.shader_output) task.shader_output = ptr_map[task.shader_output];
@@ -448,7 +449,6 @@ protected:
if(tile.buffer) tile.buffer = ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = ptr_map[tile.rng_state];
- if(tile.rgba) tile.rgba = ptr_map[tile.rgba];
result = true;
break;
@@ -478,7 +478,6 @@ protected:
if(tile.buffer) tile.buffer = ptr_imap[tile.buffer];
if(tile.rng_state) tile.rng_state = ptr_imap[tile.rng_state];
- if(tile.rgba) tile.rgba = ptr_imap[tile.rgba];
RPCSend snd(socket, "release_tile");
snd.add(tile);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 5fe574fd4d4..db399cf4240 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -94,7 +94,7 @@ public:
int type = (int)task.type;
archive & type & task.x & task.y & task.w & task.h;
- archive & task.rgba & task.buffer & task.sample & task.num_samples;
+ archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride;
archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w;
@@ -105,7 +105,7 @@ public:
archive & tile.x & tile.y & tile.w & tile.h;
archive & tile.start_sample & tile.num_samples & tile.sample;
archive & tile.offset & tile.stride;
- archive & tile.buffer & tile.rng_state & tile.rgba;
+ archive & tile.buffer & tile.rng_state;
}
void write()
@@ -234,7 +234,7 @@ public:
int type;
*archive & type & task.x & task.y & task.w & task.h;
- *archive & task.rgba & task.buffer & task.sample & task.num_samples;
+ *archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.resolution & task.offset & task.stride;
*archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
@@ -247,7 +247,7 @@ public:
*archive & tile.x & tile.y & tile.w & tile.h;
*archive & tile.start_sample & tile.num_samples & tile.sample;
*archive & tile.resolution & tile.offset & tile.stride;
- *archive & tile.buffer & tile.rng_state & tile.rgba;
+ *archive & tile.buffer & tile.rng_state & tile.rgba_byte & tile.rgba_half;
tile.buffers = NULL;
}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index e800b3f6442..d723df70c89 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -321,7 +321,8 @@ public:
cl_device_id cdDevice;
cl_program cpProgram;
cl_kernel ckPathTraceKernel;
- cl_kernel ckFilmConvertKernel;
+ cl_kernel ckFilmConvertByteKernel;
+ cl_kernel ckFilmConvertHalfFloatKernel;
cl_kernel ckShaderKernel;
cl_int ciErr;
@@ -431,7 +432,8 @@ public:
cqCommandQueue = NULL;
cpProgram = NULL;
ckPathTraceKernel = NULL;
- ckFilmConvertKernel = NULL;
+ ckFilmConvertByteKernel = NULL;
+ ckFilmConvertHalfFloatKernel = NULL;
ckShaderKernel = NULL;
null_mem = 0;
device_initialized = false;
@@ -762,7 +764,11 @@ public:
if(opencl_error(ciErr))
return false;
- ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
+ ckFilmConvertByteKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_byte", &ciErr);
+ if(opencl_error(ciErr))
+ return false;
+
+ ckFilmConvertHalfFloatKernel = clCreateKernel(cpProgram, "kernel_ocl_convert_to_half_float", &ciErr);
if(opencl_error(ciErr))
return false;
@@ -788,8 +794,10 @@ public:
if(ckPathTraceKernel)
clReleaseKernel(ckPathTraceKernel);
- if(ckFilmConvertKernel)
- clReleaseKernel(ckFilmConvertKernel);
+ if(ckFilmConvertByteKernel)
+ clReleaseKernel(ckFilmConvertByteKernel);
+ if(ckFilmConvertHalfFloatKernel)
+ clReleaseKernel(ckFilmConvertHalfFloatKernel);
if(cpProgram)
clReleaseProgram(cpProgram);
if(cqCommandQueue)
@@ -980,17 +988,17 @@ public:
return err;
}
- void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
+ void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
- cl_mem d_rgba = CL_MEM_PTR(rgba);
+ cl_mem d_rgba = (rgba_byte)? CL_MEM_PTR(rgba_byte): CL_MEM_PTR(rgba_half);
cl_mem d_buffer = CL_MEM_PTR(buffer);
cl_int d_x = task.x;
cl_int d_y = task.y;
cl_int d_w = task.w;
cl_int d_h = task.h;
- cl_int d_sample = task.sample;
+ cl_float d_sample_scale = 1.0f/(task.sample + 1);
cl_int d_offset = task.offset;
cl_int d_stride = task.stride;
@@ -998,6 +1006,8 @@ public:
cl_uint narg = 0;
ciErr = 0;
+ cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel;
+
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
@@ -1006,7 +1016,7 @@ public:
ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h"
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample);
+ ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
@@ -1052,8 +1062,8 @@ public:
void thread_run(DeviceTask *task)
{
- if(task->type == DeviceTask::TONEMAP) {
- tonemap(*task, task->buffer, task->rgba);
+ if(task->type == DeviceTask::FILM_CONVERT) {
+ film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 8c1e2920635..7d0eeab780d 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
-: type(type_), x(0), y(0), w(0), h(0), rgba(0), buffer(0),
+: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0)
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index e232e128827..c1bd39b70ca 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -34,11 +34,12 @@ class Tile;
class DeviceTask : public Task {
public:
- typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
+ typedef enum { PATH_TRACE, FILM_CONVERT, SHADER } Type;
Type type;
int x, y, w, h;
- device_ptr rgba;
+ device_ptr rgba_byte;
+ device_ptr rgba_half;
device_ptr buffer;
int sample;
int num_samples;