Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/CMakeLists.txt2
-rw-r--r--intern/cycles/blender/addon/ui.py9
-rw-r--r--intern/cycles/blender/blender_curves.cpp4
-rw-r--r--intern/cycles/blender/blender_mesh.cpp2
-rw-r--r--intern/cycles/blender/blender_shader.cpp5
-rw-r--r--intern/cycles/blender/blender_sync.cpp3
-rw-r--r--intern/cycles/device/device.cpp75
-rw-r--r--intern/cycles/device/device_cuda.cpp219
-rw-r--r--intern/cycles/device/device_opencl.cpp12
-rw-r--r--intern/cycles/kernel/CMakeLists.txt38
-rw-r--r--intern/cycles/kernel/SConscript34
-rw-r--r--intern/cycles/kernel/kernel_bvh.h19
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h11
-rw-r--r--intern/cycles/kernel/kernel_film.h4
-rw-r--r--intern/cycles/kernel/kernel_globals.h4
-rw-r--r--intern/cycles/kernel/kernel_path.h85
-rw-r--r--intern/cycles/kernel/kernel_primitive.h8
-rw-r--r--intern/cycles/kernel/kernel_shader.h23
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp3
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp3
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h159
-rw-r--r--intern/cycles/kernel/kernel_types.h12
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp2
-rw-r--r--intern/cycles/kernel/shaders/SConscript2
-rw-r--r--intern/cycles/kernel/shaders/node_environment_texture.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_hair_bsdf.osl43
-rw-r--r--intern/cycles/kernel/shaders/node_sky_texture.osl12
-rw-r--r--intern/cycles/kernel/shaders/node_subsurface_scattering.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_ward_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/stdosl.h19
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h3
-rw-r--r--intern/cycles/kernel/svm/svm_types.h3
-rw-r--r--intern/cycles/render/CMakeLists.txt2
-rw-r--r--intern/cycles/render/bssrdf.cpp134
-rw-r--r--intern/cycles/render/bssrdf.h29
-rw-r--r--intern/cycles/render/nodes.cpp75
-rw-r--r--intern/cycles/render/nodes.h3
-rw-r--r--intern/cycles/render/shader.cpp24
-rw-r--r--intern/cycles/render/shader.h1
-rw-r--r--intern/cycles/util/util_opengl.h5
-rw-r--r--intern/cycles/util/util_types.h5
41 files changed, 415 insertions, 687 deletions
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 9b72efd90cf..25f91a0caea 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -45,6 +45,8 @@ set(ADDON_FILES
addon/ui.py
)
+add_definitions(-DGLEW_STATIC)
+
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}")
add_dependencies(bf_intern_cycles bf_rna)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 5fa497776fe..7a03df4f35a 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -1085,6 +1085,8 @@ class CyclesTexture_PT_mapping(CyclesButtonsPanel, Panel):
mapping = node.texture_mapping
+ layout.prop(mapping, "vector_type", expand=True)
+
row = layout.row()
row.column().prop(mapping, "translation")
@@ -1178,7 +1180,7 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
psys = context.particle_system
- return CyclesButtonsPanel.poll(context) and psys
+ return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
def draw_header(self, context):
ccscene = context.scene.cycles_curves
@@ -1218,8 +1220,9 @@ class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
ccscene = scene.cycles_curves
- use_curves = ccscene.use_curves and context.particle_system
- return CyclesButtonsPanel.poll(context) and use_curves
+ psys = context.particle_system
+ use_curves = ccscene.use_curves and psys
+ return CyclesButtonsPanel.poll(context) and use_curves and psys.settings.type == 'HAIR'
def draw(self, context):
layout = self.layout
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 57ef9a1005c..1cddc25a22b 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -217,8 +217,8 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par
float3 pcKey;
for(int step_no = 0; step_no <= ren_step; step_no++) {
float nco[3];
- b_psys.co_hair(*b_ob, psmd, pa_no, step_no, nco);
- float3 cKey = make_float3(nco[0],nco[1],nco[2]);
+ b_psys.co_hair(*b_ob, pa_no, step_no, nco);
+ float3 cKey = make_float3(nco[0], nco[1], nco[2]);
cKey = transform_point(&itfm, cKey);
if(step_no > 0)
curve_length += len(cKey - pcKey);
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index caa61e0c5d7..940a923e5af 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -480,7 +480,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed);
if(b_mesh) {
- if(render_layer.use_surfaces && !(hide_tris && experimental)) {
+ if(render_layer.use_surfaces && !hide_tris) {
if(cmesh.data && experimental && RNA_boolean_get(&cmesh, "use_subdivision"))
create_subd_mesh(mesh, b_mesh, &cmesh, used_shaders);
else
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 2007171642f..b576181d890 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -147,6 +147,7 @@ static void get_tex_mapping(TextureMapping *mapping, BL::TexMapping b_mapping)
mapping->translation = get_float3(b_mapping.translation());
mapping->rotation = get_float3(b_mapping.rotation());
mapping->scale = get_float3(b_mapping.scale());
+ mapping->type = (TextureMapping::Type)b_mapping.vector_type();
mapping->x_mapping = (TextureMapping::Mapping)b_mapping.mapping_x();
mapping->y_mapping = (TextureMapping::Mapping)b_mapping.mapping_y();
@@ -161,6 +162,7 @@ static void get_tex_mapping(TextureMapping *mapping, BL::ShaderNodeMapping b_map
mapping->translation = get_float3(b_mapping.translation());
mapping->rotation = get_float3(b_mapping.rotation());
mapping->scale = get_float3(b_mapping.scale());
+ mapping->type = (TextureMapping::Type)b_mapping.vector_type();
mapping->use_minmax = b_mapping.use_min() || b_mapping.use_max();
@@ -320,9 +322,6 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen
SubsurfaceScatteringNode *subsurface = new SubsurfaceScatteringNode();
switch(b_subsurface_node.falloff()) {
- case BL::ShaderNodeSubsurfaceScattering::falloff_COMPATIBLE:
- subsurface->closure = CLOSURE_BSSRDF_COMPATIBLE_ID;
- break;
case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC:
subsurface->closure = CLOSURE_BSSRDF_CUBIC_ID;
break;
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 337d075cce4..a349ddc5c3f 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -34,6 +34,7 @@
#include "util_debug.h"
#include "util_foreach.h"
+#include "util_opengl.h"
CCL_NAMESPACE_BEGIN
@@ -494,7 +495,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use
params.shadingsystem = SessionParams::OSL;
/* color managagement */
- params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
+ params.display_buffer_linear = GLEW_ARB_half_float_pixel && b_engine.support_display_space_shader(b_scene);
return params;
}
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 10d4112b57d..eb262a907a4 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -56,18 +56,6 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
{
pixels_copy_from(rgba, y, w, h);
- GLuint texid;
- glGenTextures(1, &texid);
- glBindTexture(GL_TEXTURE_2D, texid);
- if(rgba.data_type == TYPE_HALF)
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer);
- else
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, (void*)rgba.data_pointer);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-
- glEnable(GL_TEXTURE_2D);
-
if(transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
@@ -75,30 +63,57 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
glColor3f(1.0f, 1.0f, 1.0f);
- glPushMatrix();
- glTranslatef(0.0f, (float)dy, 0.0f);
+ if(rgba.data_type == TYPE_HALF) {
+ /* draw half float texture, GLSL shader for display transform assumed to be bound */
+ GLuint texid;
+ glGenTextures(1, &texid);
+ glBindTexture(GL_TEXTURE_2D, texid);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- glBegin(GL_QUADS);
-
- glTexCoord2f(0.0f, 0.0f);
- glVertex2f(0.0f, 0.0f);
- glTexCoord2f(1.0f, 0.0f);
- glVertex2f((float)width, 0.0f);
- glTexCoord2f(1.0f, 1.0f);
- glVertex2f((float)width, (float)height);
- glTexCoord2f(0.0f, 1.0f);
- glVertex2f(0.0f, (float)height);
+ glEnable(GL_TEXTURE_2D);
+
+ glPushMatrix();
+ glTranslatef(0.0f, (float)dy, 0.0f);
+
+ glBegin(GL_QUADS);
+
+ glTexCoord2f(0.0f, 0.0f);
+ glVertex2f(0.0f, 0.0f);
+ glTexCoord2f(1.0f, 0.0f);
+ glVertex2f((float)width, 0.0f);
+ glTexCoord2f(1.0f, 1.0f);
+ glVertex2f((float)width, (float)height);
+ glTexCoord2f(0.0f, 1.0f);
+ glVertex2f(0.0f, (float)height);
- glEnd();
+ glEnd();
- glPopMatrix();
+ glPopMatrix();
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+ glDisable(GL_TEXTURE_2D);
+ glDeleteTextures(1, &texid);
+ }
+ else {
+ /* fallback for old graphics cards that don't support GLSL, half float,
+ * and non-power-of-two textures */
+ glPixelZoom((float)width/(float)w, (float)height/(float)h);
+ glRasterPos2f(0, dy);
+
+ uint8_t *pixels = (uint8_t*)rgba.data_pointer;
+
+ pixels += 4*y*w;
+
+ glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels);
+
+ glRasterPos2f(0.0f, 0.0f);
+ glPixelZoom(1.0f, 1.0f);
+ }
if(transparent)
glDisable(GL_BLEND);
-
- glBindTexture(GL_TEXTURE_2D, 0);
- glDisable(GL_TEXTURE_2D);
- glDeleteTextures(1, &texid);
}
Device *Device::create(DeviceInfo& info, Stats &stats, bool background)
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index b5eaa69bf0e..4ce7f6fd729 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -43,7 +43,9 @@ public:
CUmodule cuModule;
map<device_ptr, bool> tex_interp_map;
int cuDevId;
+ int cuDevArchitecture;
bool first_error;
+ bool use_texture_storage;
struct PixelMem {
GLuint cuPBO;
@@ -173,6 +175,7 @@ public:
{
first_error = true;
background = background_;
+ use_texture_storage = true;
cuDevId = info.num;
cuDevice = 0;
@@ -203,6 +206,15 @@ public:
if(cuda_error_(result, "cuCtxCreate"))
return;
+ int major, minor;
+ cuDeviceComputeCapability(&major, &minor, cuDevId);
+ cuDevArchitecture = major*100 + minor*10;
+
+ /* In order to use full 6GB of memory on Titan cards, use arrays instead
+ * of textures. On earlier cards this seems slower, but on Titan it is
+ * actually slightly faster in tests. */
+ use_texture_storage = (cuDevArchitecture < 350);
+
cuda_pop_context();
}
@@ -210,20 +222,17 @@ public:
{
task_pool.stop();
- cuda_push_context();
- cuda_assert(cuCtxDetach(cuContext))
+ cuda_assert(cuCtxDestroy(cuContext))
}
bool support_device(bool experimental)
{
- if(!experimental) {
- int major, minor;
- cuDeviceComputeCapability(&major, &minor, cuDevId);
+ int major, minor;
+ cuDeviceComputeCapability(&major, &minor, cuDevId);
- if(major < 2) {
- cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
- return false;
- }
+ if(major < 2) {
+ cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
+ return false;
}
return true;
@@ -275,8 +284,12 @@ public:
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
return "";
}
+ if(cuda_version < 50) {
+ printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
+ return "";
+ }
- if(cuda_version != 50)
+ else if(cuda_version > 50)
printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
/* compile */
@@ -285,36 +298,14 @@ public:
const int machine = system_cpu_bits();
string arch_flags;
- /* build flags depending on CUDA version and arch */
- if(cuda_version < 50) {
- /* CUDA 4.x */
- if(major == 1) {
- /* sm_1x */
- arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0";
- }
- else if(major == 2) {
- /* sm_2x */
- arch_flags = "--maxrregcount=24";
- }
- else {
- /* sm_3x */
- arch_flags = "--maxrregcount=32";
- }
+ /* CUDA 5.x build flags for different archs */
+ if(major == 2) {
+ /* sm_2x */
+ arch_flags = "--maxrregcount=32 --use_fast_math";
}
- else {
- /* CUDA 5.x */
- if(major == 1) {
- /* sm_1x */
- arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math";
- }
- else if(major == 2) {
- /* sm_2x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
- else {
- /* sm_3x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
+ else if(major == 3) {
+ /* sm_3x */
+ arch_flags = "--maxrregcount=32 --use_fast_math";
}
double starttime = time_dt();
@@ -448,90 +439,118 @@ public:
CUarray_format_enum format;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
+ bool use_texture = interpolation || use_texture_storage;
- switch(mem.data_type) {
- case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
- case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
- case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
- case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
- default: assert(0); return;
- }
-
- CUtexref texref = NULL;
-
- cuda_push_context();
- cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))
-
- if(!texref) {
- cuda_pop_context();
- return;
- }
+ if(use_texture) {
- if(interpolation) {
- CUarray handle = NULL;
- CUDA_ARRAY_DESCRIPTOR desc;
+ switch(mem.data_type) {
+ case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+ case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+ case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+ case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ default: assert(0); return;
+ }
- desc.Width = mem.data_width;
- desc.Height = mem.data_height;
- desc.Format = format;
- desc.NumChannels = mem.data_elements;
+ CUtexref texref = NULL;
- cuda_assert(cuArrayCreate(&handle, &desc))
+ cuda_push_context();
+ cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))
- if(!handle) {
+ if(!texref) {
cuda_pop_context();
return;
}
- if(mem.data_height > 1) {
- CUDA_MEMCPY2D param;
- memset(&param, 0, sizeof(param));
- param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
- param.dstArray = handle;
- param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = (void*)mem.data_pointer;
- param.srcPitch = mem.data_width*dsize*mem.data_elements;
- param.WidthInBytes = param.srcPitch;
- param.Height = mem.data_height;
-
- cuda_assert(cuMemcpy2D(&param))
+ if(interpolation) {
+ CUarray handle = NULL;
+ CUDA_ARRAY_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+
+ cuda_assert(cuArrayCreate(&handle, &desc))
+
+ if(!handle) {
+ cuda_pop_context();
+ return;
+ }
+
+ if(mem.data_height > 1) {
+ CUDA_MEMCPY2D param;
+ memset(&param, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = handle;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = (void*)mem.data_pointer;
+ param.srcPitch = mem.data_width*dsize*mem.data_elements;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+
+ cuda_assert(cuMemcpy2D(&param))
+ }
+ else
+ cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size))
+
+ cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT))
+
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR))
+ cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES))
+
+ mem.device_pointer = (device_ptr)handle;
+
+ stats.mem_alloc(size);
}
- else
- cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size))
+ else {
+ cuda_pop_context();
- cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT))
+ mem_alloc(mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
- cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR))
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES))
+ cuda_push_context();
+
+ cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size))
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT))
+ cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER))
+ }
- mem.device_pointer = (device_ptr)handle;
+ if(periodic) {
+ cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP))
+ cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP))
+ }
+ else {
+ cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP))
+ cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP))
+ }
+ cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements))
- stats.mem_alloc(size);
+ cuda_pop_context();
}
else {
- cuda_pop_context();
-
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
cuda_push_context();
- cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size))
- cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT))
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER))
- }
+ CUdeviceptr cumem;
+ size_t cubytes;
- if(periodic) {
- cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP))
- cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP))
- }
- else {
- cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP))
- cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP))
- }
- cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements))
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name))
- cuda_pop_context();
+ if(cubytes == 8) {
+ /* 64 bit device pointer */
+ uint64_t ptr = mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
+ }
+ else {
+ /* 32 bit device pointer */
+ uint32_t ptr = (uint32_t)mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
+ }
+
+ cuda_pop_context();
+ }
tex_interp_map[mem.device_pointer] = interpolation;
}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index d723df70c89..11c7bc6f099 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -892,8 +892,16 @@ public:
void tex_free(device_memory& mem)
{
- if(mem.data_pointer)
+ if(mem.device_pointer) {
+ foreach(const MemMap::value_type& value, mem_map) {
+ if(value.second == mem.device_pointer) {
+ mem_map.erase(value.first);
+ break;
+ }
+ }
+
mem_free(mem);
+ }
}
size_t global_size_round_up(int group_size, int global_size)
@@ -1086,7 +1094,7 @@ public:
tile.sample = sample + 1;
- //task->update_progress(tile);
+ task->update_progress(tile);
}
task->release_tile(tile);
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index eaa4e304ebb..56ba0e08743 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
- # build flags depending on CUDA version and arch
- if(CUDA_VERSION LESS 50)
- # CUDA 4.x
- if(${arch} MATCHES "sm_1[0-9]")
- # sm_1x
- set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
- elseif(${arch} MATCHES "sm_2[0-9]")
- # sm_2x
- set(cuda_arch_flags "--maxrregcount=24")
- else()
- # sm_3x
- set(cuda_arch_flags "--maxrregcount=32")
- endif()
-
- set(cuda_math_flags "")
- else()
- # CUDA 5.x
- if(${arch} MATCHES "sm_1[0-9]")
- # sm_1x
- set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
- elseif(${arch} MATCHES "sm_2[0-9]")
- # sm_2x
- set(cuda_arch_flags "--maxrregcount=32")
- else()
- # sm_3x
- set(cuda_arch_flags "--maxrregcount=32")
- endif()
-
- set(cuda_math_flags "--use_fast_math")
+ # CUDA 5.x build flags for different archs
+ if(${arch} MATCHES "sm_2[0-9]")
+ # sm_2x
+ set(cuda_arch_flags "--maxrregcount=32")
+ elseif(${arch} MATCHES "sm_3[0-9]")
+ # sm_3x
+ set(cuda_arch_flags "--maxrregcount=32")
endif()
+
+ set(cuda_math_flags "--use_fast_math")
if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 6459c3ed183..a0522d9ba8e 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
for arch in cuda_archs:
cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
- # build flags depending on CUDA version and arch
- if cuda_version < 50:
- if arch == "sm_35":
- print("Can't build kernel for CUDA sm_35 architecture, skipping")
- continue
-
- # CUDA 4.x
- if arch.startswith("sm_1"):
- # sm_1x
- cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"
- elif arch.startswith("sm_2"):
- # sm_2x
- cuda_arch_flags = "--maxrregcount=24"
- else:
- # sm_3x
- cuda_arch_flags = "--maxrregcount=32"
- else:
- # CUDA 5.x
- if arch.startswith("sm_1"):
- # sm_1x
- cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"
- elif arch.startswith("sm_2"):
- # sm_2x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
- else:
- # sm_3x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+ # CUDA 5.x build flags for different archs
+ if arch.startswith("sm_2"):
+ # sm_2x
+ cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+ elif arch.startswith("sm_3"):
+ # sm_3x
+ cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h
index 4b01f2eebcd..44a9822c103 100644
--- a/intern/cycles/kernel/kernel_bvh.h
+++ b/intern/cycles/kernel/kernel_bvh.h
@@ -809,11 +809,16 @@ __device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters
#include "kernel_bvh_subsurface.h"
#endif
-
+/* to work around titan bug when using arrays instead of textures */
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+__device_inline
+#else
+__device_noinline
+#endif
#ifdef __HAIR__
-__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax)
+bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax)
#else
-__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
+bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
#endif
{
#ifdef __OBJECT_MOTION__
@@ -851,8 +856,14 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui
#endif /* __KERNEL_CPU__ */
}
+/* to work around titan bug when using arrays instead of textures */
#ifdef __SUBSURFACE__
-__device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+__device_inline
+#else
+__device_noinline
+#endif
+uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index cb86ce8c4ae..44c2b9effe9 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -57,7 +57,18 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* Macros to handle different memory storage on different devices */
+/* In order to use full 6GB of memory on Titan cards, use arrays instead
+ * of textures. On earlier cards this seems slower, but on Titan it is
+ * actually slightly faster in tests. */
+#if __CUDA_ARCH__ < 350
+#define __KERNEL_CUDA_TEX_STORAGE__
+#endif
+
+#ifdef __KERNEL_CUDA_TEX_STORAGE__
#define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
+#else
+#define kernel_tex_fetch(t, index) t[(index)]
+#endif
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
#define kernel_data __data
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index 8868a335037..721eceabc37 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -70,8 +70,8 @@ __device void kernel_film_convert_to_half_float(KernelGlobals *kg,
/* buffer offset */
int index = offset + x + y*stride;
- float4 *in = (float4*)(buffer + index*kernel_data.film.pass_stride);
- half *out = (half*)rgba + index*4;
+ __global float4 *in = (__global float4*)(buffer + index*kernel_data.film.pass_stride);
+ __global half *out = (__global half*)rgba + index*4;
float scale = kernel_data.film.exposure*sample_scale;
float4_store_half(out, in, scale);
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index ab0a717b592..b5e691eb615 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -66,7 +66,11 @@ typedef struct KernelGlobals {
__constant__ KernelData __data;
typedef struct KernelGlobals {} KernelGlobals;
+#ifdef __KERNEL_CUDA_TEX_STORAGE__
#define KERNEL_TEX(type, ttype, name) ttype name;
+#else
+#define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
+#endif
#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
#include "kernel_textures.h"
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1afe8480616..5354738d378 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -266,14 +266,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
- if(old_subsurface_scatter_use(&sd)) {
- old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
- }
- else {
- float bssrdf_u, bssrdf_v;
- path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
- }
+ float bssrdf_u, bssrdf_v;
+ path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
}
@@ -468,7 +463,9 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
float min_ray_pdf = FLT_MAX;
float ray_pdf = 0.0f;
+#ifdef __LAMP_MIS__
float ray_t = 0.0f;
+#endif
PathState state;
int rng_offset = PRNG_BASE_NUM;
#ifdef __CMJ__
@@ -662,41 +659,35 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
if(sc) {
uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
- if(old_subsurface_scatter_use(&sd)) {
- old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
- }
- else {
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
- float bssrdf_u, bssrdf_v;
- path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- float3 tp = throughput;
- PathState hit_state = state;
- Ray hit_ray = ray;
- float hit_ray_t = ray_t;
- float hit_ray_pdf = ray_pdf;
- float hit_min_ray_pdf = min_ray_pdf;
-
- hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
-
- if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit],
- &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) {
- kernel_path_indirect(kg, rng, sample, hit_ray, buffer,
- tp, num_samples, num_samples,
- hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&L);
- path_radiance_reset_indirect(&L);
- }
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++) {
+ float3 tp = throughput;
+ PathState hit_state = state;
+ Ray hit_ray = ray;
+ float hit_ray_t = ray_t;
+ float hit_ray_pdf = ray_pdf;
+ float hit_min_ray_pdf = min_ray_pdf;
+
+ hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
+
+ if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit],
+ &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) {
+ kernel_path_indirect(kg, rng, sample, hit_ray, buffer,
+ tp, num_samples, num_samples,
+ hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&L);
+ path_radiance_reset_indirect(&L);
}
-
- break;
}
+ break;
}
}
#endif
@@ -1129,17 +1120,6 @@ __device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
- if(old_subsurface_scatter_use(&sd)) {
- ShaderData bssrdf_sd = sd;
- old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true);
-
- /* compute lighting with the BSDF closure */
- kernel_branched_path_integrate_lighting(kg, rng, sample*num_samples + j,
- aa_samples*num_samples,
- &bssrdf_sd, throughput, num_samples_inv,
- ray_pdf, ray_pdf, state, rng_offset, &L, buffer);
- }
- else {
ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
float bssrdf_u, bssrdf_v;
path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
@@ -1151,7 +1131,6 @@ __device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int
aa_samples*num_samples,
&bssrdf_sd[hit], throughput, num_samples_inv,
ray_pdf, ray_pdf, state, rng_offset+PRNG_BOUNCE_NUM, &L, buffer);
- }
}
state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
diff --git a/intern/cycles/kernel/kernel_primitive.h b/intern/cycles/kernel/kernel_primitive.h
index 4a06dff84bf..636cfd06532 100644
--- a/intern/cycles/kernel/kernel_primitive.h
+++ b/intern/cycles/kernel/kernel_primitive.h
@@ -93,7 +93,11 @@ __device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
if(sd->segment != ~0)
+#ifdef __DPDU__
return normalize(sd->dPdu);
+#else
+ return make_float3(0.0f, 0.0f, 0.0f);
+#endif
#endif
/* try to create spherical tangent from generated coordinates */
@@ -108,7 +112,11 @@ __device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
}
else {
/* otherwise use surface derivatives */
+#ifdef __DPDU__
return normalize(sd->dPdu);
+#else
+ return make_float3(0.0f, 0.0f, 0.0f);
+#endif
}
}
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index ee71a0cfcf4..81630caed9a 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN
/* ShaderData setup from incoming ray */
#ifdef __OBJECT_MOTION__
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
+__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time)
{
- /* note that this is a separate non-inlined function to work around crash
- * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
@@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim
}
#endif
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
const Intersection *isect, const Ray *ray, int bounce)
{
#ifdef __INSTANCING__
@@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData
/* ShaderData setup from position sampled on mesh */
-#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42
-__device_noinline
-#else
-__device
-#endif
-void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
+__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I,
int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
{
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 862626d6899..9c69e519dca 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -20,7 +20,10 @@
#ifdef WITH_OPTIMIZED_KERNEL
+/* SSE optimization disabled for now on 32 bit, see bug #36316 */
+#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
+#endif
#include "kernel.h"
#include "kernel_compat_cpu.h"
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index c44098606a5..05877a41b4a 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -20,9 +20,12 @@
#ifdef WITH_OPTIMIZED_KERNEL
+/* SSE optimization disabled for now on 32 bit, see bug #36316 */
+#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
#define __KERNEL_SSE2__
#define __KERNEL_SSE3__
#define __KERNEL_SSSE3__
+#endif
#include "kernel.h"
#include "kernel_compat_cpu.h"
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 4567f2ff0ce..d16b9328bf2 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -403,164 +403,5 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N);
}
-
-/* OLD BSSRDF */
-
-__device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u)
-{
- int table_offset = kernel_data.bssrdf.table_offset;
- float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
- return r*radius;
-}
-
-#ifdef BSSRDF_MULTI_EVAL
-__device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r)
-{
- if(r >= radius)
- return 0.0f;
-
- /* todo: when we use the real BSSRDF this will need to be divided by the maximum
- * radius instead of the average radius */
- float t = r/radius;
-
- int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET;
- float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE);
-
- pdf /= radius;
-
- return pdf;
-}
-#endif
-
-#ifdef BSSRDF_MULTI_EVAL
-__device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all)
-{
- /* compute pdf */
- float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
- float pdf_sum = 0.0f;
- float sample_weight_sum = 0.0f;
- int num_bssrdf = 0;
-
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(CLOSURE_IS_BSSRDF(sc->type)) {
- float sample_weight = (all)? 1.0f: sc->sample_weight;
-
- /* compute pdf */
- float pdf = 1.0f;
- for(int i = 0; i < num_r; i++)
- pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]);
-
- eval_sum += sc->weight*pdf;
- pdf_sum += sample_weight*pdf;
-
- sample_weight_sum += sample_weight;
- num_bssrdf++;
- }
- }
-
- float inv_pdf_sum;
-
- if(pdf_sum > 0.0f) {
- /* in case of branched path integrate we sample all bssrdf's once,
- * for path trace we pick one, so adjust pdf for that */
- if(all)
- inv_pdf_sum = 1.0f/pdf_sum;
- else
- inv_pdf_sum = sample_weight_sum/pdf_sum;
- }
- else
- inv_pdf_sum = 0.0f;
-
- float3 weight = eval_sum * inv_pdf_sum;
-
- return weight;
-}
-#endif
-
-/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-__device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all)
-{
- float radius = sc->data0;
- float refl = max(average(sc->weight)*3.0f, 0.0f);
- float r = 0.0f;
- bool hit = false;
- float3 weight = make_float3(1.0f, 1.0f, 1.0f);
-#ifdef BSSRDF_MULTI_EVAL
- float r_attempts[BSSRDF_MAX_ATTEMPTS];
-#endif
- int num_attempts;
-
- /* attempt to find a hit a given number of times before giving up */
- for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) {
- /* random numbers for sampling */
- float u1 = lcg_step_float(lcg_state);
- float u2 = lcg_step_float(lcg_state);
- float u3 = lcg_step_float(lcg_state);
- float u4 = lcg_step_float(lcg_state);
- float u5 = lcg_step_float(lcg_state);
-
- r = old_bssrdf_sample_distance(kg, radius, refl, u5);
-#ifdef BSSRDF_MULTI_EVAL
- r_attempts[num_attempts] = r;
-#endif
-
- float3 p1 = sd->P + sample_uniform_sphere(u1, u2)*r;
- float3 p2 = sd->P + sample_uniform_sphere(u3, u4)*r;
-
- /* create ray */
- Ray ray;
- ray.P = p1;
- ray.D = normalize_len(p2 - p1, &ray.t);
- ray.dP = sd->dP;
- ray.dD = differential3_zero();
- ray.time = sd->time;
-
- /* intersect with the same object. if multiple intersections are
- * found it will randomly pick one of them */
- Intersection isect;
- if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0)
- continue;
-
- /* setup new shading point */
- shader_setup_from_subsurface(kg, sd, &isect, &ray);
-
- hit = true;
- num_attempts++;
- break;
- }
-
- /* evaluate subsurface scattering closures */
-#ifdef BSSRDF_MULTI_EVAL
- weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all);
-#else
- weight *= sc->weight;
-#endif
-
- if(!hit)
- weight = make_float3(0.0f, 0.0f, 0.0f);
-
- /* optionally blur colors and bump mapping */
- float3 N = sd->N;
- subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N);
-
- /* replace closures with a single diffuse BSDF */
- subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N);
-}
-
-__device bool old_subsurface_scatter_use(ShaderData *sd)
-{
- for(int i = 0; i < sd->num_closure; i++) {
- ShaderClosure *sc = &sd->closure[i];
-
- if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID)
- return true;
- }
-
- return false;
-}
-
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index d039b708bd4..3371c580c71 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -35,12 +35,7 @@ CCL_NAMESPACE_BEGIN
#define PARTICLE_SIZE 5
#define TIME_INVALID FLT_MAX
-#define BSSRDF_RADIUS_TABLE_SIZE 1024
-#define BSSRDF_REFL_TABLE_SIZE 256
-#define BSSRDF_PDF_TABLE_OFFSET (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE)
-#define BSSRDF_LOOKUP_TABLE_SIZE (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2)
#define BSSRDF_MIN_RADIUS 1e-8f
-#define BSSRDF_MAX_ATTEMPTS 8
#define BSSRDF_MAX_HITS 4
#define BB_DRAPPER 800.0f
@@ -815,12 +810,6 @@ typedef struct KernelCurves {
int pad2;
} KernelCurves;
-typedef struct KernelBSSRDF {
- int table_offset;
- int num_attempts;
- int pad1, pad2;
-} KernelBSSRDF;
-
typedef struct KernelBlackbody {
int table_offset;
int pad1, pad2, pad3;
@@ -834,7 +823,6 @@ typedef struct KernelData {
KernelIntegrator integrator;
KernelBVH bvh;
KernelCurves curve;
- KernelBSSRDF bssrdf;
KernelBlackbody blackbody;
} KernelData;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 625ad263f7f..18e8fee4348 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -253,7 +253,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag,
/* disable in case of diffuse ancestor, can't see it well then and
* adds considerably noise due to probabilities of continuing path
* getting lower and lower */
- if(sc.type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+ if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f);
/* create one closure for each color channel */
diff --git a/intern/cycles/kernel/shaders/SConscript b/intern/cycles/kernel/shaders/SConscript
index aad6e23e4d4..8bc1c2206e0 100644
--- a/intern/cycles/kernel/shaders/SConscript
+++ b/intern/cycles/kernel/shaders/SConscript
@@ -57,7 +57,7 @@ if env['WITH_BF_CYCLES_OSL']:
osl_file = os.path.join(source_dir, f)
oso_file = os.path.join(build_dir, f.replace('.osl', '.oso'))
- command = "%s -q -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file)
+ command = "\"%s\" -q -O2 -I\"%s\" -o \"%s\" \"%s\"" % (osl_compiler, source_dir, oso_file, osl_file)
shaders.Command(oso_file, f, command)
shaders.Depends(oso_file, [f] + dependencies)
diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl
index 6b61c689066..136ccdf8b18 100644
--- a/intern/cycles/kernel/shaders/node_environment_texture.osl
+++ b/intern/cycles/kernel/shaders/node_environment_texture.osl
@@ -19,7 +19,7 @@
vector environment_texture_direction_to_equirectangular(vector dir)
{
- float u = -atan2(dir[1], dir[0]) / (2.0 * M_PI) + 0.5;
+ float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5;
float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5;
return vector(u, v, 0.0);
diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
index d1d7d0fb6a6..54d4cb67c3b 100644
--- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl
@@ -19,8 +19,8 @@
#include "stdosl.h"
shader node_hair_bsdf(
- color Color = 0.8,
- string component = "Reflection",
+ color Color = 0.8,
+ string component = "Reflection",
float Offset = 0.0,
float RoughnessU = 0.1,
float RoughnessV = 1.0,
@@ -28,29 +28,30 @@ shader node_hair_bsdf(
output closure color BSDF = 0)
{
float IsStrand;
- float roughnessh = clamp(RoughnessU, 0.001,1.0);
- float roughnessv = clamp(RoughnessV, 0.001,1.0);
+ float roughnessh = clamp(RoughnessU, 0.001, 1.0);
+ float roughnessv = clamp(RoughnessV, 0.001, 1.0);
getattribute("geom:is_curve", IsStrand);
- if (!IsStrand) {
- if (backfacing())
- BSDF = transparent();
- else {
- if (component == "Reflection")
- BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0);
- else
- BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0);
+ if (!IsStrand) {
+ if (backfacing()) {
+ BSDF = transparent();
+ }
+ else {
+ if (component == "Reflection")
+ BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0);
+ else
+ BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0);
}
}
- else {
- if (backfacing())
- BSDF = transparent();
- else {
- if (component == "Reflection")
- BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, dPdu, -Offset);
- else
- BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, dPdu, -Offset);
+ else {
+ if (backfacing()) {
+ BSDF = transparent();
+ }
+ else {
+ if (component == "Reflection")
+ BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, dPdu, -Offset);
+ else
+ BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, dPdu, -Offset);
}
}
}
-
diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl
index 3ed791fdc92..85c2dbdb2c2 100644
--- a/intern/cycles/kernel/shaders/node_sky_texture.osl
+++ b/intern/cycles/kernel/shaders/node_sky_texture.osl
@@ -44,8 +44,8 @@ float sky_perez_function(float lam[9], float theta, float gamma)
}
color sky_radiance_old(normal dir,
- float sunphi, float suntheta, color radiance,
- float config_x[9], float config_y[9], float config_z[9])
+ float sunphi, float suntheta, color radiance,
+ float config_x[9], float config_y[9], float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -76,7 +76,7 @@ float sky_radiance_internal(float config[9], float theta, float gamma)
float expM = exp(config[4] * gamma);
float rayM = cgamma * cgamma;
- float mieM = (1.0 + rayM) / pow((1.0 + config[8]*config[8] - 2.0*config[8]*cgamma), 1.5);
+ float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5);
float zenith = sqrt(ctheta);
return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) *
@@ -84,8 +84,8 @@ float sky_radiance_internal(float config[9], float theta, float gamma)
}
color sky_radiance_new(normal dir,
- float sunphi, float suntheta, color radiance,
- float config_x[9], float config_y[9], float config_z[9])
+ float sunphi, float suntheta, color radiance,
+ float config_x[9], float config_y[9], float config_z[9])
{
/* convert vector to spherical coordinates */
vector spherical = sky_spherical_coordinates(dir);
@@ -104,7 +104,7 @@ color sky_radiance_new(normal dir,
float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2];
/* convert to RGB and adjust strength */
- return xyz_to_rgb(x, y, z) * (M_2PI/683);
+ return xyz_to_rgb(x, y, z) * (M_2PI / 683);
}
shader node_sky_texture(
diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
index 5c1d1be0ee7..1c0cd74c0be 100644
--- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
+++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl
@@ -26,7 +26,7 @@ shader node_subsurface_scattering(
normal Normal = N,
output closure color BSSRDF = 0)
{
- if(Falloff == "Gaussian")
+ if (Falloff == "Gaussian")
BSSRDF = Color * bssrdf_gaussian(N, Scale * Radius, TextureBlur);
else
BSSRDF = Color * bssrdf_cubic(N, Scale * Radius, TextureBlur, Sharpness);
diff --git a/intern/cycles/kernel/shaders/node_ward_bsdf.osl b/intern/cycles/kernel/shaders/node_ward_bsdf.osl
index a21dd24059d..2d360d594f2 100644
--- a/intern/cycles/kernel/shaders/node_ward_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_ward_bsdf.osl
@@ -29,7 +29,7 @@ shader node_ward_bsdf(
vector T = Tangent;
if (Rotation != 0.0)
- T = rotate(T, Rotation * 2.0 * M_PI, point(0.0, 0.0, 0.0), Normal);
+ T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal);
/* compute roughness */
float RoughnessU, RoughnessV;
diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h
index 424ca335903..47c5dc27f2b 100644
--- a/intern/cycles/kernel/shaders/stdosl.h
+++ b/intern/cycles/kernel/shaders/stdosl.h
@@ -134,12 +134,16 @@ normal mod (normal a, float b) { return a - b*floor(a/b); }
color mod (color a, float b) { return a - b*floor(a/b); }
float mod (float a, float b) { return a - b*floor(a/b); }
PERCOMP2 (min)
+int min (int a, int b) BUILTIN;
PERCOMP2 (max)
+int max (int a, int b) BUILTIN;
normal clamp (normal x, normal minval, normal maxval) { return max(min(x,maxval),minval); }
vector clamp (vector x, vector minval, vector maxval) { return max(min(x,maxval),minval); }
point clamp (point x, point minval, point maxval) { return max(min(x,maxval),minval); }
color clamp (color x, color minval, color maxval) { return max(min(x,maxval),minval); }
float clamp (float x, float minval, float maxval) { return max(min(x,maxval),minval); }
+int clamp (int x, int minval, int maxval) { return max(min(x,maxval),minval); }
+#if 0
normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; }
normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; }
vector mix (vector x, vector y, vector a) { return x*(1-a) + y*a; }
@@ -149,6 +153,17 @@ point mix (point x, point y, float a) { return x*(1-a) + y*a; }
color mix (color x, color y, color a) { return x*(1-a) + y*a; }
color mix (color x, color y, float a) { return x*(1-a) + y*a; }
float mix (float x, float y, float a) { return x*(1-a) + y*a; }
+#else
+normal mix (normal x, normal y, normal a) BUILTIN;
+normal mix (normal x, normal y, float a) BUILTIN;
+vector mix (vector x, vector y, vector a) BUILTIN;
+vector mix (vector x, vector y, float a) BUILTIN;
+point mix (point x, point y, point a) BUILTIN;
+point mix (point x, point y, float a) BUILTIN;
+color mix (color x, color y, color a) BUILTIN;
+color mix (color x, color y, float a) BUILTIN;
+float mix (float x, float y, float a) BUILTIN;
+#endif
int isnan (float x) BUILTIN;
int isinf (float x) BUILTIN;
int isfinite (float x) BUILTIN;
@@ -418,8 +433,8 @@ int startswith (string s, string prefix) BUILTIN;
int endswith (string s, string suffix) BUILTIN;
string substr (string s, int start, int len) BUILTIN;
string substr (string s, int start) { return substr (s, start, strlen(s)); }
-float strtof (string str) BUILTIN;
-int strtoi (string str) BUILTIN;
+float stof (string str) BUILTIN;
+int stoi (string str) BUILTIN;
// Define concat in terms of shorter concat
string concat (string a, string b, string c) {
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 6d9c4e215e6..0d4716ab078 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -378,7 +378,6 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
#endif
#ifdef __SUBSURFACE__
- case CLOSURE_BSSRDF_COMPATIBLE_ID:
case CLOSURE_BSSRDF_CUBIC_ID:
case CLOSURE_BSSRDF_GAUSSIAN_ID: {
ShaderClosure *sc = &sd->closure[sd->num_closure];
@@ -388,7 +387,7 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st
/* disable in case of diffuse ancestor, can't see it well then and
* adds considerably noise due to probabilities of continuing path
* getting lower and lower */
- if(type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR))
+ if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR)
param1 = 0.0f;
if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) {
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index 50daf159f26..abd63530d63 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -377,7 +377,6 @@ typedef enum ClosureType {
CLOSURE_BSDF_TRANSPARENT_ID,
/* BSSRDF */
- CLOSURE_BSSRDF_COMPATIBLE_ID,
CLOSURE_BSSRDF_CUBIC_ID,
CLOSURE_BSSRDF_GAUSSIAN_ID,
@@ -402,7 +401,7 @@ typedef enum ClosureType {
#define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID)
#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID)
-#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_COMPATIBLE_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
+#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID)
#define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID)
#define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID)
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index dbbfe2ee7c9..7d00ed92164 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -18,7 +18,6 @@ set(SRC
background.cpp
blackbody.cpp
buffers.cpp
- bssrdf.cpp
camera.cpp
film.cpp
graph.cpp
@@ -47,7 +46,6 @@ set(SRC_HEADERS
background.h
blackbody.h
buffers.h
- bssrdf.h
camera.h
film.h
graph.h
diff --git a/intern/cycles/render/bssrdf.cpp b/intern/cycles/render/bssrdf.cpp
deleted file mode 100644
index 69cab5e1824..00000000000
--- a/intern/cycles/render/bssrdf.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-#include "bssrdf.h"
-
-#include "util_algorithm.h"
-#include "util_math.h"
-#include "util_types.h"
-
-#include "kernel_types.h"
-#include "kernel_montecarlo.h"
-
-CCL_NAMESPACE_BEGIN
-
-static float bssrdf_cubic(float ld, float r)
-{
- if(ld == 0.0f)
- return (r == 0.0f)? 1.0f: 0.0f;
-
- return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f);
-}
-
-/* Cumulative density function utilities */
-
-static float cdf_lookup_inverse(const vector<float>& table, float2 range, float x)
-{
- int index = upper_bound(table.begin(), table.end(), x) - table.begin();
-
- if(index == 0)
- return range[0];
- else if(index == table.size())
- return range[1];
- else
- index--;
-
- float t = (x - table[index])/(table[index+1] - table[index]);
- float y = ((index + t)/(table.size() - 1));
-
- return y*(range[1] - range[0]) + range[0];
-}
-
-static void cdf_invert(vector<float>& to, float2 to_range, const vector<float>& from, float2 from_range)
-{
- float step = 1.0f/(float)(to.size() - 1);
-
- for(int i = 0; i < to.size(); i++) {
- float x = (i*step)*(from_range[1] - from_range[0]) + from_range[0];
- to[i] = cdf_lookup_inverse(from, to_range, x);
- }
-}
-
-/* BSSRDF */
-
-static void bssrdf_lookup_table_create(float ld, vector<float>& sample_table, vector<float>& pdf_table)
-{
- const int size = BSSRDF_RADIUS_TABLE_SIZE;
- vector<float> cdf(size);
- vector<float> pdf(size);
- float step = 1.0f/(float)(size - 1);
- float max_radius = ld;
- float pdf_sum = 0.0f;
-
- /* compute the probability density function */
- for(int i = 0; i < pdf.size(); i++) {
- float x = (i*step)*max_radius;
- pdf[i] = bssrdf_cubic(ld, x);
- pdf_sum += pdf[i];
- }
-
- /* adjust for area covered by each distance */
- for(int i = 0; i < pdf.size(); i++) {
- float x = (i*step)*max_radius;
- pdf[i] *= M_2PI_F*x;
- }
-
- /* normalize pdf, we multiply in reflectance later */
- if(pdf_sum > 0.0f)
- for(int i = 0; i < pdf.size(); i++)
- pdf[i] /= pdf_sum;
-
- /* sum to account for sampling which uses overlapping sphere */
- for(int i = pdf.size() - 2; i >= 0; i--)
- pdf[i] = pdf[i] + pdf[i+1];
-
- /* compute the cumulative density function */
- cdf[0] = 0.0f;
-
- for(int i = 1; i < size; i++)
- cdf[i] = cdf[i-1] + 0.5f*(pdf[i-1] + pdf[i])*step*max_radius;
-
- /* invert cumulative density function for importance sampling */
- float2 cdf_range = make_float2(0.0f, cdf[size - 1]);
- float2 table_range = make_float2(0.0f, max_radius);
-
- cdf_invert(sample_table, table_range, cdf, cdf_range);
-
- /* copy pdf table */
- for(int i = 0; i < pdf.size(); i++)
- pdf_table[i] = pdf[i];
-}
-
-void bssrdf_table_build(vector<float>& table)
-{
- vector<float> sample_table(BSSRDF_RADIUS_TABLE_SIZE);
- vector<float> pdf_table(BSSRDF_RADIUS_TABLE_SIZE);
-
- table.resize(BSSRDF_LOOKUP_TABLE_SIZE);
-
- /* create a 2D lookup table, for reflection x sample radius */
- for(int i = 0; i < BSSRDF_REFL_TABLE_SIZE; i++) {
- float radius = 1.0f;
-
- bssrdf_lookup_table_create(radius, sample_table, pdf_table);
-
- memcpy(&table[i*BSSRDF_RADIUS_TABLE_SIZE], &sample_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float));
- memcpy(&table[BSSRDF_PDF_TABLE_OFFSET + i*BSSRDF_RADIUS_TABLE_SIZE], &pdf_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float));
- }
-}
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/render/bssrdf.h b/intern/cycles/render/bssrdf.h
deleted file mode 100644
index b8f0d44bc34..00000000000
--- a/intern/cycles/render/bssrdf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-#ifndef __BSSRDF_H__
-#define __BSSRDF_H__
-
-#include "util_vector.h"
-
-CCL_NAMESPACE_BEGIN
-
-void bssrdf_table_build(vector<float>& table);
-
-CCL_NAMESPACE_END
-
-#endif /* __BSSRDF_H__ */
-
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 70cb5613e61..621d52bbbbf 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -20,6 +20,7 @@
#include "osl.h"
#include "sky_model.h"
+#include "util_foreach.h"
#include "util_transform.h"
CCL_NAMESPACE_BEGIN
@@ -41,6 +42,8 @@ TextureMapping::TextureMapping()
y_mapping = Y;
z_mapping = Z;
+ type = TEXTURE;
+
projection = FLAT;
}
@@ -54,12 +57,52 @@ Transform TextureMapping::compute_transform()
mmat[1][y_mapping-1] = 1.0f;
if(z_mapping != NONE)
mmat[2][z_mapping-1] = 1.0f;
+
+ float3 scale_clamped = scale;
- Transform smat = transform_scale(scale);
+ if(type == TEXTURE || type == NORMAL) {
+ /* keep matrix invertible */
+ if(fabsf(scale.x) < 1e-5f)
+ scale_clamped.x = signf(scale.x)*1e-5f;
+ if(fabsf(scale.y) < 1e-5f)
+ scale_clamped.y = signf(scale.y)*1e-5f;
+ if(fabsf(scale.z) < 1e-5f)
+ scale_clamped.z = signf(scale.z)*1e-5f;
+ }
+
+ Transform smat = transform_scale(scale_clamped);
Transform rmat = transform_euler(rotation);
Transform tmat = transform_translate(translation);
- return tmat*rmat*smat*mmat;
+ Transform mat;
+
+ switch(type) {
+ case TEXTURE:
+ /* inverse transform on texture coordinate gives
+ * forward transform on texture */
+ mat = tmat*rmat*smat;
+ mat = transform_inverse(mat);
+ break;
+ case POINT:
+ /* full transform */
+ mat = tmat*rmat*smat;
+ break;
+ case VECTOR:
+ /* no translation for vectors */
+ mat = rmat*smat;
+ break;
+ case NORMAL:
+ /* no translation for normals, and inverse transpose */
+ mat = rmat*smat;
+ mat = transform_inverse(mat);
+ mat = transform_transpose(mat);
+ break;
+ }
+
+ /* projection last */
+ mat = mat*mmat;
+
+ return mat;
}
bool TextureMapping::skip()
@@ -97,6 +140,11 @@ void TextureMapping::compile(SVMCompiler& compiler, int offset_in, int offset_ou
compiler.add_node(float3_to_float4(min));
compiler.add_node(float3_to_float4(max));
}
+
+ if(type == NORMAL) {
+ compiler.add_node(NODE_VECTOR_MATH, NODE_VECTOR_MATH_NORMALIZE, offset_out, offset_out);
+ compiler.add_node(NODE_VECTOR_MATH, SVM_STACK_INVALID, offset_out);
+ }
}
void TextureMapping::compile(OSLCompiler &compiler)
@@ -472,6 +520,9 @@ static void sky_texture_precompute_new(SunSky *sunsky, float3 dir, float turbidi
float theta = spherical.x;
float phi = spherical.y;
+ /* Clamp Turbidity */
+ turbidity = clamp(turbidity, 0.0f, 10.0f);
+
/* Clamp to Horizon */
theta = clamp(theta, 0.0f, M_PI_2_F);
@@ -3677,6 +3728,26 @@ void OSLScriptNode::compile(SVMCompiler& compiler)
void OSLScriptNode::compile(OSLCompiler& compiler)
{
+ /* XXX fix for #36790:
+ * point and normal parameters are reflected as generic SOCK_VECTOR sockets
+ * on the node. Socket fixed input values need to be copied explicitly here for
+ * vector sockets, otherwise OSL will reject the value due to mismatching type.
+ */
+ foreach(ShaderInput *input, this->inputs) {
+ if(!input->link) {
+ /* no need for compatible_name here, OSL parameter names are always unique */
+ string param_name(input->name);
+ switch(input->type) {
+ case SHADER_SOCKET_VECTOR:
+ compiler.parameter_point(param_name.c_str(), input->value);
+ compiler.parameter_normal(param_name.c_str(), input->value);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
if(!filepath.empty())
compiler.add(this, filepath.c_str(), true);
else
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index d58c6633a41..430c37158f4 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -43,6 +43,9 @@ public:
float3 min, max;
bool use_minmax;
+ enum Type { POINT = 0, TEXTURE = 1, VECTOR = 2, NORMAL = 3 };
+ Type type;
+
enum Mapping { NONE = 0, X = 1, Y = 2, Z = 3 };
Mapping x_mapping, y_mapping, z_mapping;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 385395e3752..26af60572f6 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -15,7 +15,6 @@
*/
#include "background.h"
-#include "bssrdf.h"
#include "blackbody.h"
#include "device.h"
#include "graph.h"
@@ -127,7 +126,6 @@ void Shader::tag_used(Scene *scene)
ShaderManager::ShaderManager()
{
need_update = true;
- bssrdf_table_offset = TABLE_OFFSET_INVALID;
blackbody_table_offset = TABLE_OFFSET_INVALID;
}
@@ -254,23 +252,6 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
device->tex_alloc("__shader_flag", dscene->shader_flag);
- /* bssrdf lookup table */
- KernelBSSRDF *kbssrdf = &dscene->data.bssrdf;
-
- if(has_surface_bssrdf && bssrdf_table_offset == TABLE_OFFSET_INVALID) {
- vector<float> table;
-
- bssrdf_table_build(table);
- bssrdf_table_offset = scene->lookup_tables->add_table(dscene, table);
-
- kbssrdf->table_offset = (int)bssrdf_table_offset;
- kbssrdf->num_attempts = BSSRDF_MAX_ATTEMPTS;
- }
- else if(!has_surface_bssrdf && bssrdf_table_offset != TABLE_OFFSET_INVALID) {
- scene->lookup_tables->remove_table(bssrdf_table_offset);
- bssrdf_table_offset = TABLE_OFFSET_INVALID;
- }
-
/* blackbody lookup table */
KernelBlackbody *kblackbody = &dscene->data.blackbody;
@@ -289,11 +270,6 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene)
{
- if(bssrdf_table_offset != TABLE_OFFSET_INVALID) {
- scene->lookup_tables->remove_table(bssrdf_table_offset);
- bssrdf_table_offset = TABLE_OFFSET_INVALID;
- }
-
if(blackbody_table_offset != TABLE_OFFSET_INVALID) {
scene->lookup_tables->remove_table(blackbody_table_offset);
blackbody_table_offset = TABLE_OFFSET_INVALID;
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index a58d6955da0..6869a651b46 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -141,7 +141,6 @@ protected:
typedef unordered_map<ustring, uint, ustringHash> AttributeIDMap;
AttributeIDMap unique_attribute_id;
- size_t bssrdf_table_offset;
size_t blackbody_table_offset;
};
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
index 07a296ed1e6..04a3e039c9d 100644
--- a/intern/cycles/util/util_opengl.h
+++ b/intern/cycles/util/util_opengl.h
@@ -20,12 +20,7 @@
/* OpenGL header includes, used everywhere we use OpenGL, to deal with
* platform differences in one central place. */
-#ifdef __APPLE__
-#include <OpenGL/gl.h>
-#include <OpenGL/glu.h>
-#else
#include <GL/glew.h>
-#endif
#endif /* __UTIL_OPENGL_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index f48fd1e124b..a8f514864db 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -545,10 +545,7 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12
#ifdef __KERNEL_OPENCL__
-__device_inline void float4_store_half(half *h, const float4 *f, float scale)
-{
- vstore_half4(*f * scale, 0, h);
-}
+#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h);
#else