diff options
Diffstat (limited to 'intern/cycles')
41 files changed, 415 insertions, 687 deletions
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index 9b72efd90cf..25f91a0caea 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -45,6 +45,8 @@ set(ADDON_FILES addon/ui.py ) +add_definitions(-DGLEW_STATIC) + blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}") add_dependencies(bf_intern_cycles bf_rna) diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 5fa497776fe..7a03df4f35a 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -1085,6 +1085,8 @@ class CyclesTexture_PT_mapping(CyclesButtonsPanel, Panel): mapping = node.texture_mapping + layout.prop(mapping, "vector_type", expand=True) + row = layout.row() row.column().prop(mapping, "translation") @@ -1178,7 +1180,7 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel): scene = context.scene cscene = scene.cycles psys = context.particle_system - return CyclesButtonsPanel.poll(context) and psys + return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR' def draw_header(self, context): ccscene = context.scene.cycles_curves @@ -1218,8 +1220,9 @@ class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel): scene = context.scene cscene = scene.cycles ccscene = scene.cycles_curves - use_curves = ccscene.use_curves and context.particle_system - return CyclesButtonsPanel.poll(context) and use_curves + psys = context.particle_system + use_curves = ccscene.use_curves and psys + return CyclesButtonsPanel.poll(context) and use_curves and psys.settings.type == 'HAIR' def draw(self, context): layout = self.layout diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 57ef9a1005c..1cddc25a22b 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -217,8 +217,8 @@ bool ObtainCacheParticleData(Mesh *mesh, BL::Mesh *b_mesh, BL::Object *b_ob, Par float3 pcKey; for(int step_no = 0; step_no <= ren_step; step_no++) { float nco[3]; - b_psys.co_hair(*b_ob, psmd, pa_no, step_no, nco); - float3 cKey = make_float3(nco[0],nco[1],nco[2]); + b_psys.co_hair(*b_ob, pa_no, step_no, nco); + float3 cKey = make_float3(nco[0], nco[1], nco[2]); cKey = transform_point(&itfm, cKey); if(step_no > 0) curve_length += len(cKey - pcKey); diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index caa61e0c5d7..940a923e5af 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -480,7 +480,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, need_undeformed); if(b_mesh) { - if(render_layer.use_surfaces && !(hide_tris && experimental)) { + if(render_layer.use_surfaces && !hide_tris) { if(cmesh.data && experimental && RNA_boolean_get(&cmesh, "use_subdivision")) create_subd_mesh(mesh, b_mesh, &cmesh, used_shaders); else diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 2007171642f..b576181d890 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -147,6 +147,7 @@ static void get_tex_mapping(TextureMapping *mapping, BL::TexMapping b_mapping) mapping->translation = get_float3(b_mapping.translation()); mapping->rotation = get_float3(b_mapping.rotation()); mapping->scale = get_float3(b_mapping.scale()); + mapping->type = (TextureMapping::Type)b_mapping.vector_type(); mapping->x_mapping = (TextureMapping::Mapping)b_mapping.mapping_x(); mapping->y_mapping = (TextureMapping::Mapping)b_mapping.mapping_y(); @@ -161,6 +162,7 @@ static void get_tex_mapping(TextureMapping *mapping, BL::ShaderNodeMapping b_map mapping->translation = get_float3(b_mapping.translation()); mapping->rotation = get_float3(b_mapping.rotation()); mapping->scale = get_float3(b_mapping.scale()); + mapping->type = (TextureMapping::Type)b_mapping.vector_type(); mapping->use_minmax = b_mapping.use_min() || b_mapping.use_max(); @@ -320,9 +322,6 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen SubsurfaceScatteringNode *subsurface = new SubsurfaceScatteringNode(); switch(b_subsurface_node.falloff()) { - case BL::ShaderNodeSubsurfaceScattering::falloff_COMPATIBLE: - subsurface->closure = CLOSURE_BSSRDF_COMPATIBLE_ID; - break; case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC: subsurface->closure = CLOSURE_BSSRDF_CUBIC_ID; break; diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 337d075cce4..a349ddc5c3f 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -34,6 +34,7 @@ #include "util_debug.h" #include "util_foreach.h" +#include "util_opengl.h" CCL_NAMESPACE_BEGIN @@ -494,7 +495,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use params.shadingsystem = SessionParams::OSL; /* color managagement */ - params.display_buffer_linear = b_engine.support_display_space_shader(b_scene); + params.display_buffer_linear = GLEW_ARB_half_float_pixel && b_engine.support_display_space_shader(b_scene); return params; } diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 10d4112b57d..eb262a907a4 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -56,18 +56,6 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w { pixels_copy_from(rgba, y, w, h); - GLuint texid; - glGenTextures(1, &texid); - glBindTexture(GL_TEXTURE_2D, texid); - if(rgba.data_type == TYPE_HALF) - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer); - else - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, (void*)rgba.data_pointer); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - glEnable(GL_TEXTURE_2D); - if(transparent) { glEnable(GL_BLEND); glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA); @@ -75,30 +63,57 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w glColor3f(1.0f, 1.0f, 1.0f); - glPushMatrix(); - glTranslatef(0.0f, (float)dy, 0.0f); + if(rgba.data_type == TYPE_HALF) { + /* draw half float texture, GLSL shader for display transform assumed to be bound */ + GLuint texid; + glGenTextures(1, &texid); + glBindTexture(GL_TEXTURE_2D, texid); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, (void*)rgba.data_pointer); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glBegin(GL_QUADS); - - glTexCoord2f(0.0f, 0.0f); - glVertex2f(0.0f, 0.0f); - glTexCoord2f(1.0f, 0.0f); - glVertex2f((float)width, 0.0f); - glTexCoord2f(1.0f, 1.0f); - glVertex2f((float)width, (float)height); - glTexCoord2f(0.0f, 1.0f); - glVertex2f(0.0f, (float)height); + glEnable(GL_TEXTURE_2D); + + glPushMatrix(); + glTranslatef(0.0f, (float)dy, 0.0f); + + glBegin(GL_QUADS); + + glTexCoord2f(0.0f, 0.0f); + glVertex2f(0.0f, 0.0f); + glTexCoord2f(1.0f, 0.0f); + glVertex2f((float)width, 0.0f); + glTexCoord2f(1.0f, 1.0f); + glVertex2f((float)width, (float)height); + glTexCoord2f(0.0f, 1.0f); + glVertex2f(0.0f, (float)height); - glEnd(); + glEnd(); - glPopMatrix(); + glPopMatrix(); + + glBindTexture(GL_TEXTURE_2D, 0); + glDisable(GL_TEXTURE_2D); + glDeleteTextures(1, &texid); + } + else { + /* fallback for old graphics cards that don't support GLSL, half float, + * and non-power-of-two textures */ + glPixelZoom((float)width/(float)w, (float)height/(float)h); + glRasterPos2f(0, dy); + + uint8_t *pixels = (uint8_t*)rgba.data_pointer; + + pixels += 4*y*w; + + glDrawPixels(w, h, GL_RGBA, GL_UNSIGNED_BYTE, pixels); + + glRasterPos2f(0.0f, 0.0f); + glPixelZoom(1.0f, 1.0f); + } if(transparent) glDisable(GL_BLEND); - - glBindTexture(GL_TEXTURE_2D, 0); - glDisable(GL_TEXTURE_2D); - glDeleteTextures(1, &texid); } Device *Device::create(DeviceInfo& info, Stats &stats, bool background) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index b5eaa69bf0e..4ce7f6fd729 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -43,7 +43,9 @@ public: CUmodule cuModule; map<device_ptr, bool> tex_interp_map; int cuDevId; + int cuDevArchitecture; bool first_error; + bool use_texture_storage; struct PixelMem { GLuint cuPBO; @@ -173,6 +175,7 @@ public: { first_error = true; background = background_; + use_texture_storage = true; cuDevId = info.num; cuDevice = 0; @@ -203,6 +206,15 @@ public: if(cuda_error_(result, "cuCtxCreate")) return; + int major, minor; + cuDeviceComputeCapability(&major, &minor, cuDevId); + cuDevArchitecture = major*100 + minor*10; + + /* In order to use full 6GB of memory on Titan cards, use arrays instead + * of textures. On earlier cards this seems slower, but on Titan it is + * actually slightly faster in tests. */ + use_texture_storage = (cuDevArchitecture < 350); + cuda_pop_context(); } @@ -210,20 +222,17 @@ public: { task_pool.stop(); - cuda_push_context(); - cuda_assert(cuCtxDetach(cuContext)) + cuda_assert(cuCtxDestroy(cuContext)) } bool support_device(bool experimental) { - if(!experimental) { - int major, minor; - cuDeviceComputeCapability(&major, &minor, cuDevId); + int major, minor; + cuDeviceComputeCapability(&major, &minor, cuDevId); - if(major < 2) { - cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); - return false; - } + if(major < 2) { + cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); + return false; } return true; @@ -275,8 +284,12 @@ public: cuda_error_message("CUDA nvcc compiler version could not be parsed."); return ""; } + if(cuda_version < 50) { + printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10); + return ""; + } - if(cuda_version != 50) + else if(cuda_version > 50) printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ @@ -285,36 +298,14 @@ public: const int machine = system_cpu_bits(); string arch_flags; - /* build flags depending on CUDA version and arch */ - if(cuda_version < 50) { - /* CUDA 4.x */ - if(major == 1) { - /* sm_1x */ - arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0"; - } - else if(major == 2) { - /* sm_2x */ - arch_flags = "--maxrregcount=24"; - } - else { - /* sm_3x */ - arch_flags = "--maxrregcount=32"; - } + /* CUDA 5.x build flags for different archs */ + if(major == 2) { + /* sm_2x */ + arch_flags = "--maxrregcount=32 --use_fast_math"; } - else { - /* CUDA 5.x */ - if(major == 1) { - /* sm_1x */ - arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math"; - } - else if(major == 2) { - /* sm_2x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } - else { - /* sm_3x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } + else if(major == 3) { + /* sm_3x */ + arch_flags = "--maxrregcount=32 --use_fast_math"; } double starttime = time_dt(); @@ -448,90 +439,118 @@ public: CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); + bool use_texture = interpolation || use_texture_storage; - switch(mem.data_type) { - case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; - case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; - case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; - case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; - default: assert(0); return; - } - - CUtexref texref = NULL; - - cuda_push_context(); - cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) - - if(!texref) { - cuda_pop_context(); - return; - } + if(use_texture) { - if(interpolation) { - CUarray handle = NULL; - CUDA_ARRAY_DESCRIPTOR desc; + switch(mem.data_type) { + case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; + case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; + case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; + case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; + default: assert(0); return; + } - desc.Width = mem.data_width; - desc.Height = mem.data_height; - desc.Format = format; - desc.NumChannels = mem.data_elements; + CUtexref texref = NULL; - cuda_assert(cuArrayCreate(&handle, &desc)) + cuda_push_context(); + cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) - if(!handle) { + if(!texref) { cuda_pop_context(); return; } - if(mem.data_height > 1) { - CUDA_MEMCPY2D param; - memset(¶m, 0, sizeof(param)); - param.dstMemoryType = CU_MEMORYTYPE_ARRAY; - param.dstArray = handle; - param.srcMemoryType = CU_MEMORYTYPE_HOST; - param.srcHost = (void*)mem.data_pointer; - param.srcPitch = mem.data_width*dsize*mem.data_elements; - param.WidthInBytes = param.srcPitch; - param.Height = mem.data_height; - - cuda_assert(cuMemcpy2D(¶m)) + if(interpolation) { + CUarray handle = NULL; + CUDA_ARRAY_DESCRIPTOR desc; + + desc.Width = mem.data_width; + desc.Height = mem.data_height; + desc.Format = format; + desc.NumChannels = mem.data_elements; + + cuda_assert(cuArrayCreate(&handle, &desc)) + + if(!handle) { + cuda_pop_context(); + return; + } + + if(mem.data_height > 1) { + CUDA_MEMCPY2D param; + memset(¶m, 0, sizeof(param)); + param.dstMemoryType = CU_MEMORYTYPE_ARRAY; + param.dstArray = handle; + param.srcMemoryType = CU_MEMORYTYPE_HOST; + param.srcHost = (void*)mem.data_pointer; + param.srcPitch = mem.data_width*dsize*mem.data_elements; + param.WidthInBytes = param.srcPitch; + param.Height = mem.data_height; + + cuda_assert(cuMemcpy2D(¶m)) + } + else + cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)) + + cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)) + + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)) + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)) + + mem.device_pointer = (device_ptr)handle; + + stats.mem_alloc(size); } - else - cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)) + else { + cuda_pop_context(); - cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)) + mem_alloc(mem, MEM_READ_ONLY); + mem_copy_to(mem); - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)) - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)) + cuda_push_context(); + + cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)) + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)) + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)) + } - mem.device_pointer = (device_ptr)handle; + if(periodic) { + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)) + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)) + } + else { + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)) + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)) + } + cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)) - stats.mem_alloc(size); + cuda_pop_context(); } else { - cuda_pop_context(); - mem_alloc(mem, MEM_READ_ONLY); mem_copy_to(mem); cuda_push_context(); - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)) - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)) - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)) - } + CUdeviceptr cumem; + size_t cubytes; - if(periodic) { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)) - } - else { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)) - } - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)) + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)) - cuda_pop_context(); + if(cubytes == 8) { + /* 64 bit device pointer */ + uint64_t ptr = mem.device_pointer; + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + } + else { + /* 32 bit device pointer */ + uint32_t ptr = (uint32_t)mem.device_pointer; + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + } + + cuda_pop_context(); + } tex_interp_map[mem.device_pointer] = interpolation; } diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index d723df70c89..11c7bc6f099 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -892,8 +892,16 @@ public: void tex_free(device_memory& mem) { - if(mem.data_pointer) + if(mem.device_pointer) { + foreach(const MemMap::value_type& value, mem_map) { + if(value.second == mem.device_pointer) { + mem_map.erase(value.first); + break; + } + } + mem_free(mem); + } } size_t global_size_round_up(int group_size, int global_size) @@ -1086,7 +1094,7 @@ public: tile.sample = sample + 1; - //task->update_progress(tile); + task->update_progress(tile); } task->release_tile(tile); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index eaa4e304ebb..56ba0e08743 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -151,36 +151,16 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") - # build flags depending on CUDA version and arch - if(CUDA_VERSION LESS 50) - # CUDA 4.x - if(${arch} MATCHES "sm_1[0-9]") - # sm_1x - set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") - elseif(${arch} MATCHES "sm_2[0-9]") - # sm_2x - set(cuda_arch_flags "--maxrregcount=24") - else() - # sm_3x - set(cuda_arch_flags "--maxrregcount=32") - endif() - - set(cuda_math_flags "") - else() - # CUDA 5.x - if(${arch} MATCHES "sm_1[0-9]") - # sm_1x - set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0") - elseif(${arch} MATCHES "sm_2[0-9]") - # sm_2x - set(cuda_arch_flags "--maxrregcount=32") - else() - # sm_3x - set(cuda_arch_flags "--maxrregcount=32") - endif() - - set(cuda_math_flags "--use_fast_math") + # CUDA 5.x build flags for different archs + if(${arch} MATCHES "sm_2[0-9]") + # sm_2x + set(cuda_arch_flags "--maxrregcount=32") + elseif(${arch} MATCHES "sm_3[0-9]") + # sm_3x + set(cuda_arch_flags "--maxrregcount=32") endif() + + set(cuda_math_flags "--use_fast_math") if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 6459c3ed183..a0522d9ba8e 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -86,33 +86,13 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: for arch in cuda_archs: cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) - # build flags depending on CUDA version and arch - if cuda_version < 50: - if arch == "sm_35": - print("Can't build kernel for CUDA sm_35 architecture, skipping") - continue - - # CUDA 4.x - if arch.startswith("sm_1"): - # sm_1x - cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0" - elif arch.startswith("sm_2"): - # sm_2x - cuda_arch_flags = "--maxrregcount=24" - else: - # sm_3x - cuda_arch_flags = "--maxrregcount=32" - else: - # CUDA 5.x - if arch.startswith("sm_1"): - # sm_1x - cuda_arch_flags = "--maxrregcount=24 --opencc-options -OPT:Olimit=0 --use_fast_math" - elif arch.startswith("sm_2"): - # sm_2x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" - else: - # sm_3x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" + # CUDA 5.x build flags for different archs + if arch.startswith("sm_2"): + # sm_2x + cuda_arch_flags = "--maxrregcount=32 --use_fast_math" + elif arch.startswith("sm_3"): + # sm_3x + cuda_arch_flags = "--maxrregcount=32 --use_fast_math" command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file) diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h index 4b01f2eebcd..44a9822c103 100644 --- a/intern/cycles/kernel/kernel_bvh.h +++ b/intern/cycles/kernel/kernel_bvh.h @@ -809,11 +809,16 @@ __device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Inters #include "kernel_bvh_subsurface.h" #endif - +/* to work around titan bug when using arrays instead of textures */ +#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) +__device_inline +#else +__device_noinline +#endif #ifdef __HAIR__ -__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) +bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) #else -__device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) +bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) #endif { #ifdef __OBJECT_MOTION__ @@ -851,8 +856,14 @@ __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const ui #endif /* __KERNEL_CPU__ */ } +/* to work around titan bug when using arrays instead of textures */ #ifdef __SUBSURFACE__ -__device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) +#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) +__device_inline +#else +__device_noinline +#endif +uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) { #ifdef __OBJECT_MOTION__ if(kernel_data.bvh.have_motion) { diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index cb86ce8c4ae..44c2b9effe9 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -57,7 +57,18 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; /* Macros to handle different memory storage on different devices */ +/* In order to use full 6GB of memory on Titan cards, use arrays instead + * of textures. On earlier cards this seems slower, but on Titan it is + * actually slightly faster in tests. */ +#if __CUDA_ARCH__ < 350 +#define __KERNEL_CUDA_TEX_STORAGE__ +#endif + +#ifdef __KERNEL_CUDA_TEX_STORAGE__ #define kernel_tex_fetch(t, index) tex1Dfetch(t, index) +#else +#define kernel_tex_fetch(t, index) t[(index)] +#endif #define kernel_tex_image_interp(t, x, y) tex2D(t, x, y) #define kernel_data __data diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 8868a335037..721eceabc37 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -70,8 +70,8 @@ __device void kernel_film_convert_to_half_float(KernelGlobals *kg, /* buffer offset */ int index = offset + x + y*stride; - float4 *in = (float4*)(buffer + index*kernel_data.film.pass_stride); - half *out = (half*)rgba + index*4; + __global float4 *in = (__global float4*)(buffer + index*kernel_data.film.pass_stride); + __global half *out = (__global half*)rgba + index*4; float scale = kernel_data.film.exposure*sample_scale; float4_store_half(out, in, scale); diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index ab0a717b592..b5e691eb615 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -66,7 +66,11 @@ typedef struct KernelGlobals { __constant__ KernelData __data; typedef struct KernelGlobals {} KernelGlobals; +#ifdef __KERNEL_CUDA_TEX_STORAGE__ #define KERNEL_TEX(type, ttype, name) ttype name; +#else +#define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name; +#endif #define KERNEL_IMAGE_TEX(type, ttype, name) ttype name; #include "kernel_textures.h" diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 1afe8480616..5354738d378 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -266,14 +266,9 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray if(sc) { uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); - if(old_subsurface_scatter_use(&sd)) { - old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); - } - else { - float bssrdf_u, bssrdf_v; - path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); - } + float bssrdf_u, bssrdf_v; + path_rng_2D(kg, rng, sample, num_total_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); state.flag |= PATH_RAY_BSSRDF_ANCESTOR; } @@ -468,7 +463,9 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R float min_ray_pdf = FLT_MAX; float ray_pdf = 0.0f; +#ifdef __LAMP_MIS__ float ray_t = 0.0f; +#endif PathState state; int rng_offset = PRNG_BASE_NUM; #ifdef __CMJ__ @@ -662,41 +659,35 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R if(sc) { uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb); - if(old_subsurface_scatter_use(&sd)) { - old_subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false); - } - else { - ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; - float bssrdf_u, bssrdf_v; - path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - float3 tp = throughput; - PathState hit_state = state; - Ray hit_ray = ray; - float hit_ray_t = ray_t; - float hit_ray_pdf = ray_pdf; - float hit_min_ray_pdf = min_ray_pdf; - - hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR; - - if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit], - &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) { - kernel_path_indirect(kg, rng, sample, hit_ray, buffer, - tp, num_samples, num_samples, - hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } + ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + float bssrdf_u, bssrdf_v; + path_rng_2D(kg, rng, sample, num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); + + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) { + float3 tp = throughput; + PathState hit_state = state; + Ray hit_ray = ray; + float hit_ray_t = ray_t; + float hit_ray_pdf = ray_pdf; + float hit_min_ray_pdf = min_ray_pdf; + + hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR; + + if(kernel_path_integrate_lighting(kg, rng, sample, num_samples, &bssrdf_sd[hit], + &tp, &hit_min_ray_pdf, &hit_ray_pdf, &hit_state, rng_offset+PRNG_BOUNCE_NUM, &L, &hit_ray, &hit_ray_t)) { + kernel_path_indirect(kg, rng, sample, hit_ray, buffer, + tp, num_samples, num_samples, + hit_min_ray_pdf, hit_ray_pdf, hit_state, rng_offset+PRNG_BOUNCE_NUM*2, &L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); } - - break; } + break; } } #endif @@ -1129,17 +1120,6 @@ __device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int /* do subsurface scatter step with copy of shader data, this will * replace the BSSRDF with a diffuse BSDF closure */ for(int j = 0; j < num_samples; j++) { - if(old_subsurface_scatter_use(&sd)) { - ShaderData bssrdf_sd = sd; - old_subsurface_scatter_step(kg, &bssrdf_sd, state.flag, sc, &lcg_state, true); - - /* compute lighting with the BSDF closure */ - kernel_branched_path_integrate_lighting(kg, rng, sample*num_samples + j, - aa_samples*num_samples, - &bssrdf_sd, throughput, num_samples_inv, - ray_pdf, ray_pdf, state, rng_offset, &L, buffer); - } - else { ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; float bssrdf_u, bssrdf_v; path_rng_2D(kg, &bssrdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); @@ -1151,7 +1131,6 @@ __device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int aa_samples*num_samples, &bssrdf_sd[hit], throughput, num_samples_inv, ray_pdf, ray_pdf, state, rng_offset+PRNG_BOUNCE_NUM, &L, buffer); - } } state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR; diff --git a/intern/cycles/kernel/kernel_primitive.h b/intern/cycles/kernel/kernel_primitive.h index 4a06dff84bf..636cfd06532 100644 --- a/intern/cycles/kernel/kernel_primitive.h +++ b/intern/cycles/kernel/kernel_primitive.h @@ -93,7 +93,11 @@ __device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ if(sd->segment != ~0) +#ifdef __DPDU__ return normalize(sd->dPdu); +#else + return make_float3(0.0f, 0.0f, 0.0f); +#endif #endif /* try to create spherical tangent from generated coordinates */ @@ -108,7 +112,11 @@ __device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) } else { /* otherwise use surface derivatives */ +#ifdef __DPDU__ return normalize(sd->dPdu); +#else + return make_float3(0.0f, 0.0f, 0.0f); +#endif } } diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index ee71a0cfcf4..81630caed9a 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -36,15 +36,8 @@ CCL_NAMESPACE_BEGIN /* ShaderData setup from incoming ray */ #ifdef __OBJECT_MOTION__ -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) +__device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float time) { - /* note that this is a separate non-inlined function to work around crash - * on CUDA sm 2.0, otherwise kernel execution crashes (compiler bug?) */ if(sd->flag & SD_OBJECT_MOTION) { sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); sd->ob_itfm= transform_quick_inverse(sd->ob_tfm); @@ -56,12 +49,7 @@ void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd, float tim } #endif -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, +__device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, int bounce) { #ifdef __INSTANCING__ @@ -249,12 +237,7 @@ __device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData /* ShaderData setup from position sampled on mesh */ -#if defined(__KERNEL_CUDA_VERSION__) && __KERNEL_CUDA_VERSION__ <= 42 -__device_noinline -#else -__device -#endif -void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, +__device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, const float3 P, const float3 Ng, const float3 I, int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment) { diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 862626d6899..9c69e519dca 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -20,7 +20,10 @@ #ifdef WITH_OPTIMIZED_KERNEL +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) #define __KERNEL_SSE2__ +#endif #include "kernel.h" #include "kernel_compat_cpu.h" diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index c44098606a5..05877a41b4a 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -20,9 +20,12 @@ #ifdef WITH_OPTIMIZED_KERNEL +/* SSE optimization disabled for now on 32 bit, see bug #36316 */ +#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) #define __KERNEL_SSE2__ #define __KERNEL_SSE3__ #define __KERNEL_SSSE3__ +#endif #include "kernel.h" #include "kernel_compat_cpu.h" diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 4567f2ff0ce..d16b9328bf2 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -403,164 +403,5 @@ __device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, subsurface_scatter_setup_diffuse_bsdf(sd, eval, (num_hits > 0), N); } - -/* OLD BSSRDF */ - -__device float old_bssrdf_sample_distance(KernelGlobals *kg, float radius, float refl, float u) -{ - int table_offset = kernel_data.bssrdf.table_offset; - float r = lookup_table_read_2D(kg, u, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); - - return r*radius; -} - -#ifdef BSSRDF_MULTI_EVAL -__device float old_bssrdf_pdf(KernelGlobals *kg, float radius, float refl, float r) -{ - if(r >= radius) - return 0.0f; - - /* todo: when we use the real BSSRDF this will need to be divided by the maximum - * radius instead of the average radius */ - float t = r/radius; - - int table_offset = kernel_data.bssrdf.table_offset + BSSRDF_PDF_TABLE_OFFSET; - float pdf = lookup_table_read_2D(kg, t, refl, table_offset, BSSRDF_RADIUS_TABLE_SIZE, BSSRDF_REFL_TABLE_SIZE); - - pdf /= radius; - - return pdf; -} -#endif - -#ifdef BSSRDF_MULTI_EVAL -__device float3 old_subsurface_scatter_multi_eval(KernelGlobals *kg, ShaderData *sd, bool hit, float refl, float *r, int num_r, bool all) -{ - /* compute pdf */ - float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f); - float pdf_sum = 0.0f; - float sample_weight_sum = 0.0f; - int num_bssrdf = 0; - - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(CLOSURE_IS_BSSRDF(sc->type)) { - float sample_weight = (all)? 1.0f: sc->sample_weight; - - /* compute pdf */ - float pdf = 1.0f; - for(int i = 0; i < num_r; i++) - pdf *= old_bssrdf_pdf(kg, sc->data0, refl, r[i]); - - eval_sum += sc->weight*pdf; - pdf_sum += sample_weight*pdf; - - sample_weight_sum += sample_weight; - num_bssrdf++; - } - } - - float inv_pdf_sum; - - if(pdf_sum > 0.0f) { - /* in case of branched path integrate we sample all bssrdf's once, - * for path trace we pick one, so adjust pdf for that */ - if(all) - inv_pdf_sum = 1.0f/pdf_sum; - else - inv_pdf_sum = sample_weight_sum/pdf_sum; - } - else - inv_pdf_sum = 0.0f; - - float3 weight = eval_sum * inv_pdf_sum; - - return weight; -} -#endif - -/* subsurface scattering step, from a point on the surface to another nearby point on the same object */ -__device void old_subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, int state_flag, ShaderClosure *sc, uint *lcg_state, bool all) -{ - float radius = sc->data0; - float refl = max(average(sc->weight)*3.0f, 0.0f); - float r = 0.0f; - bool hit = false; - float3 weight = make_float3(1.0f, 1.0f, 1.0f); -#ifdef BSSRDF_MULTI_EVAL - float r_attempts[BSSRDF_MAX_ATTEMPTS]; -#endif - int num_attempts; - - /* attempt to find a hit a given number of times before giving up */ - for(num_attempts = 0; num_attempts < kernel_data.bssrdf.num_attempts; num_attempts++) { - /* random numbers for sampling */ - float u1 = lcg_step_float(lcg_state); - float u2 = lcg_step_float(lcg_state); - float u3 = lcg_step_float(lcg_state); - float u4 = lcg_step_float(lcg_state); - float u5 = lcg_step_float(lcg_state); - - r = old_bssrdf_sample_distance(kg, radius, refl, u5); -#ifdef BSSRDF_MULTI_EVAL - r_attempts[num_attempts] = r; -#endif - - float3 p1 = sd->P + sample_uniform_sphere(u1, u2)*r; - float3 p2 = sd->P + sample_uniform_sphere(u3, u4)*r; - - /* create ray */ - Ray ray; - ray.P = p1; - ray.D = normalize_len(p2 - p1, &ray.t); - ray.dP = sd->dP; - ray.dD = differential3_zero(); - ray.time = sd->time; - - /* intersect with the same object. if multiple intersections are - * found it will randomly pick one of them */ - Intersection isect; - if(scene_intersect_subsurface(kg, &ray, &isect, sd->object, lcg_state, 1) == 0) - continue; - - /* setup new shading point */ - shader_setup_from_subsurface(kg, sd, &isect, &ray); - - hit = true; - num_attempts++; - break; - } - - /* evaluate subsurface scattering closures */ -#ifdef BSSRDF_MULTI_EVAL - weight *= old_subsurface_scatter_multi_eval(kg, sd, hit, refl, r_attempts, num_attempts, all); -#else - weight *= sc->weight; -#endif - - if(!hit) - weight = make_float3(0.0f, 0.0f, 0.0f); - - /* optionally blur colors and bump mapping */ - float3 N = sd->N; - subsurface_color_bump_blur(kg, sd, sd, state_flag, &weight, &N); - - /* replace closures with a single diffuse BSDF */ - subsurface_scatter_setup_diffuse_bsdf(sd, weight, hit, N); -} - -__device bool old_subsurface_scatter_use(ShaderData *sd) -{ - for(int i = 0; i < sd->num_closure; i++) { - ShaderClosure *sc = &sd->closure[i]; - - if(sc->type == CLOSURE_BSSRDF_COMPATIBLE_ID) - return true; - } - - return false; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d039b708bd4..3371c580c71 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -35,12 +35,7 @@ CCL_NAMESPACE_BEGIN #define PARTICLE_SIZE 5 #define TIME_INVALID FLT_MAX -#define BSSRDF_RADIUS_TABLE_SIZE 1024 -#define BSSRDF_REFL_TABLE_SIZE 256 -#define BSSRDF_PDF_TABLE_OFFSET (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE) -#define BSSRDF_LOOKUP_TABLE_SIZE (BSSRDF_RADIUS_TABLE_SIZE*BSSRDF_REFL_TABLE_SIZE*2) #define BSSRDF_MIN_RADIUS 1e-8f -#define BSSRDF_MAX_ATTEMPTS 8 #define BSSRDF_MAX_HITS 4 #define BB_DRAPPER 800.0f @@ -815,12 +810,6 @@ typedef struct KernelCurves { int pad2; } KernelCurves; -typedef struct KernelBSSRDF { - int table_offset; - int num_attempts; - int pad1, pad2; -} KernelBSSRDF; - typedef struct KernelBlackbody { int table_offset; int pad1, pad2, pad3; @@ -834,7 +823,6 @@ typedef struct KernelData { KernelIntegrator integrator; KernelBVH bvh; KernelCurves curve; - KernelBSSRDF bssrdf; KernelBlackbody blackbody; } KernelData; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 625ad263f7f..18e8fee4348 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -253,7 +253,7 @@ static void flatten_surface_closure_tree(ShaderData *sd, int path_flag, /* disable in case of diffuse ancestor, can't see it well then and * adds considerably noise due to probabilities of continuing path * getting lower and lower */ - if(sc.type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)) + if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) bssrdf->radius = make_float3(0.0f, 0.0f, 0.0f); /* create one closure for each color channel */ diff --git a/intern/cycles/kernel/shaders/SConscript b/intern/cycles/kernel/shaders/SConscript index aad6e23e4d4..8bc1c2206e0 100644 --- a/intern/cycles/kernel/shaders/SConscript +++ b/intern/cycles/kernel/shaders/SConscript @@ -57,7 +57,7 @@ if env['WITH_BF_CYCLES_OSL']: osl_file = os.path.join(source_dir, f) oso_file = os.path.join(build_dir, f.replace('.osl', '.oso')) - command = "%s -q -O2 -I%s -o %s %s" % (osl_compiler, source_dir, oso_file, osl_file) + command = "\"%s\" -q -O2 -I\"%s\" -o \"%s\" \"%s\"" % (osl_compiler, source_dir, oso_file, osl_file) shaders.Command(oso_file, f, command) shaders.Depends(oso_file, [f] + dependencies) diff --git a/intern/cycles/kernel/shaders/node_environment_texture.osl b/intern/cycles/kernel/shaders/node_environment_texture.osl index 6b61c689066..136ccdf8b18 100644 --- a/intern/cycles/kernel/shaders/node_environment_texture.osl +++ b/intern/cycles/kernel/shaders/node_environment_texture.osl @@ -19,7 +19,7 @@ vector environment_texture_direction_to_equirectangular(vector dir) { - float u = -atan2(dir[1], dir[0]) / (2.0 * M_PI) + 0.5; + float u = -atan2(dir[1], dir[0]) / (M_2PI) + 0.5; float v = atan2(dir[2], hypot(dir[0], dir[1])) / M_PI + 0.5; return vector(u, v, 0.0); diff --git a/intern/cycles/kernel/shaders/node_hair_bsdf.osl b/intern/cycles/kernel/shaders/node_hair_bsdf.osl index d1d7d0fb6a6..54d4cb67c3b 100644 --- a/intern/cycles/kernel/shaders/node_hair_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_hair_bsdf.osl @@ -19,8 +19,8 @@ #include "stdosl.h" shader node_hair_bsdf( - color Color = 0.8, - string component = "Reflection", + color Color = 0.8, + string component = "Reflection", float Offset = 0.0, float RoughnessU = 0.1, float RoughnessV = 1.0, @@ -28,29 +28,30 @@ shader node_hair_bsdf( output closure color BSDF = 0) { float IsStrand; - float roughnessh = clamp(RoughnessU, 0.001,1.0); - float roughnessv = clamp(RoughnessV, 0.001,1.0); + float roughnessh = clamp(RoughnessU, 0.001, 1.0); + float roughnessv = clamp(RoughnessV, 0.001, 1.0); getattribute("geom:is_curve", IsStrand); - if (!IsStrand) { - if (backfacing()) - BSDF = transparent(); - else { - if (component == "Reflection") - BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); - else - BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); + if (!IsStrand) { + if (backfacing()) { + BSDF = transparent(); + } + else { + if (component == "Reflection") + BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); + else + BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, normalize(dPdv), 0.0); } } - else { - if (backfacing()) - BSDF = transparent(); - else { - if (component == "Reflection") - BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, dPdu, -Offset); - else - BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, dPdu, -Offset); + else { + if (backfacing()) { + BSDF = transparent(); + } + else { + if (component == "Reflection") + BSDF = Color * hair_reflection(Normal, roughnessh, roughnessv, dPdu, -Offset); + else + BSDF = Color * hair_transmission(Normal, roughnessh, roughnessv, dPdu, -Offset); } } } - diff --git a/intern/cycles/kernel/shaders/node_sky_texture.osl b/intern/cycles/kernel/shaders/node_sky_texture.osl index 3ed791fdc92..85c2dbdb2c2 100644 --- a/intern/cycles/kernel/shaders/node_sky_texture.osl +++ b/intern/cycles/kernel/shaders/node_sky_texture.osl @@ -44,8 +44,8 @@ float sky_perez_function(float lam[9], float theta, float gamma) } color sky_radiance_old(normal dir, - float sunphi, float suntheta, color radiance, - float config_x[9], float config_y[9], float config_z[9]) + float sunphi, float suntheta, color radiance, + float config_x[9], float config_y[9], float config_z[9]) { /* convert vector to spherical coordinates */ vector spherical = sky_spherical_coordinates(dir); @@ -76,7 +76,7 @@ float sky_radiance_internal(float config[9], float theta, float gamma) float expM = exp(config[4] * gamma); float rayM = cgamma * cgamma; - float mieM = (1.0 + rayM) / pow((1.0 + config[8]*config[8] - 2.0*config[8]*cgamma), 1.5); + float mieM = (1.0 + rayM) / pow((1.0 + config[8] * config[8] - 2.0 * config[8] * cgamma), 1.5); float zenith = sqrt(ctheta); return (1.0 + config[0] * exp(config[1] / (ctheta + 0.01))) * @@ -84,8 +84,8 @@ float sky_radiance_internal(float config[9], float theta, float gamma) } color sky_radiance_new(normal dir, - float sunphi, float suntheta, color radiance, - float config_x[9], float config_y[9], float config_z[9]) + float sunphi, float suntheta, color radiance, + float config_x[9], float config_y[9], float config_z[9]) { /* convert vector to spherical coordinates */ vector spherical = sky_spherical_coordinates(dir); @@ -104,7 +104,7 @@ color sky_radiance_new(normal dir, float z = sky_radiance_internal(config_z, theta, gamma) * radiance[2]; /* convert to RGB and adjust strength */ - return xyz_to_rgb(x, y, z) * (M_2PI/683); + return xyz_to_rgb(x, y, z) * (M_2PI / 683); } shader node_sky_texture( diff --git a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl index 5c1d1be0ee7..1c0cd74c0be 100644 --- a/intern/cycles/kernel/shaders/node_subsurface_scattering.osl +++ b/intern/cycles/kernel/shaders/node_subsurface_scattering.osl @@ -26,7 +26,7 @@ shader node_subsurface_scattering( normal Normal = N, output closure color BSSRDF = 0) { - if(Falloff == "Gaussian") + if (Falloff == "Gaussian") BSSRDF = Color * bssrdf_gaussian(N, Scale * Radius, TextureBlur); else BSSRDF = Color * bssrdf_cubic(N, Scale * Radius, TextureBlur, Sharpness); diff --git a/intern/cycles/kernel/shaders/node_ward_bsdf.osl b/intern/cycles/kernel/shaders/node_ward_bsdf.osl index a21dd24059d..2d360d594f2 100644 --- a/intern/cycles/kernel/shaders/node_ward_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_ward_bsdf.osl @@ -29,7 +29,7 @@ shader node_ward_bsdf( vector T = Tangent; if (Rotation != 0.0) - T = rotate(T, Rotation * 2.0 * M_PI, point(0.0, 0.0, 0.0), Normal); + T = rotate(T, Rotation * M_2PI, point(0.0, 0.0, 0.0), Normal); /* compute roughness */ float RoughnessU, RoughnessV; diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 424ca335903..47c5dc27f2b 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -134,12 +134,16 @@ normal mod (normal a, float b) { return a - b*floor(a/b); } color mod (color a, float b) { return a - b*floor(a/b); } float mod (float a, float b) { return a - b*floor(a/b); } PERCOMP2 (min) +int min (int a, int b) BUILTIN; PERCOMP2 (max) +int max (int a, int b) BUILTIN; normal clamp (normal x, normal minval, normal maxval) { return max(min(x,maxval),minval); } vector clamp (vector x, vector minval, vector maxval) { return max(min(x,maxval),minval); } point clamp (point x, point minval, point maxval) { return max(min(x,maxval),minval); } color clamp (color x, color minval, color maxval) { return max(min(x,maxval),minval); } float clamp (float x, float minval, float maxval) { return max(min(x,maxval),minval); } +int clamp (int x, int minval, int maxval) { return max(min(x,maxval),minval); } +#if 0 normal mix (normal x, normal y, normal a) { return x*(1-a) + y*a; } normal mix (normal x, normal y, float a) { return x*(1-a) + y*a; } vector mix (vector x, vector y, vector a) { return x*(1-a) + y*a; } @@ -149,6 +153,17 @@ point mix (point x, point y, float a) { return x*(1-a) + y*a; } color mix (color x, color y, color a) { return x*(1-a) + y*a; } color mix (color x, color y, float a) { return x*(1-a) + y*a; } float mix (float x, float y, float a) { return x*(1-a) + y*a; } +#else +normal mix (normal x, normal y, normal a) BUILTIN; +normal mix (normal x, normal y, float a) BUILTIN; +vector mix (vector x, vector y, vector a) BUILTIN; +vector mix (vector x, vector y, float a) BUILTIN; +point mix (point x, point y, point a) BUILTIN; +point mix (point x, point y, float a) BUILTIN; +color mix (color x, color y, color a) BUILTIN; +color mix (color x, color y, float a) BUILTIN; +float mix (float x, float y, float a) BUILTIN; +#endif int isnan (float x) BUILTIN; int isinf (float x) BUILTIN; int isfinite (float x) BUILTIN; @@ -418,8 +433,8 @@ int startswith (string s, string prefix) BUILTIN; int endswith (string s, string suffix) BUILTIN; string substr (string s, int start, int len) BUILTIN; string substr (string s, int start) { return substr (s, start, strlen(s)); } -float strtof (string str) BUILTIN; -int strtoi (string str) BUILTIN; +float stof (string str) BUILTIN; +int stoi (string str) BUILTIN; // Define concat in terms of shorter concat string concat (string a, string b, string c) { diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 6d9c4e215e6..0d4716ab078 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -378,7 +378,6 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st #endif #ifdef __SUBSURFACE__ - case CLOSURE_BSSRDF_COMPATIBLE_ID: case CLOSURE_BSSRDF_CUBIC_ID: case CLOSURE_BSSRDF_GAUSSIAN_ID: { ShaderClosure *sc = &sd->closure[sd->num_closure]; @@ -388,7 +387,7 @@ __device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *st /* disable in case of diffuse ancestor, can't see it well then and * adds considerably noise due to probabilities of continuing path * getting lower and lower */ - if(type != CLOSURE_BSSRDF_COMPATIBLE_ID && (path_flag & PATH_RAY_DIFFUSE_ANCESTOR)) + if(path_flag & PATH_RAY_DIFFUSE_ANCESTOR) param1 = 0.0f; if(sample_weight > 1e-5f && sd->num_closure+2 < MAX_CLOSURE) { diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index 50daf159f26..abd63530d63 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -377,7 +377,6 @@ typedef enum ClosureType { CLOSURE_BSDF_TRANSPARENT_ID, /* BSSRDF */ - CLOSURE_BSSRDF_COMPATIBLE_ID, CLOSURE_BSSRDF_CUBIC_ID, CLOSURE_BSSRDF_GAUSSIAN_ID, @@ -402,7 +401,7 @@ typedef enum ClosureType { #define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) #define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID) -#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_COMPATIBLE_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID) +#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID) #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_ISOTROPIC_ID) #define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID) #define CLOSURE_IS_HOLDOUT(type) (type == CLOSURE_HOLDOUT_ID) diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index dbbfe2ee7c9..7d00ed92164 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -18,7 +18,6 @@ set(SRC background.cpp blackbody.cpp buffers.cpp - bssrdf.cpp camera.cpp film.cpp graph.cpp @@ -47,7 +46,6 @@ set(SRC_HEADERS background.h blackbody.h buffers.h - bssrdf.h camera.h film.h graph.h diff --git a/intern/cycles/render/bssrdf.cpp b/intern/cycles/render/bssrdf.cpp deleted file mode 100644 index 69cab5e1824..00000000000 --- a/intern/cycles/render/bssrdf.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#include "bssrdf.h" - -#include "util_algorithm.h" -#include "util_math.h" -#include "util_types.h" - -#include "kernel_types.h" -#include "kernel_montecarlo.h" - -CCL_NAMESPACE_BEGIN - -static float bssrdf_cubic(float ld, float r) -{ - if(ld == 0.0f) - return (r == 0.0f)? 1.0f: 0.0f; - - return powf(ld - min(r, ld), 3.0f) * 4.0f/powf(ld, 4.0f); -} - -/* Cumulative density function utilities */ - -static float cdf_lookup_inverse(const vector<float>& table, float2 range, float x) -{ - int index = upper_bound(table.begin(), table.end(), x) - table.begin(); - - if(index == 0) - return range[0]; - else if(index == table.size()) - return range[1]; - else - index--; - - float t = (x - table[index])/(table[index+1] - table[index]); - float y = ((index + t)/(table.size() - 1)); - - return y*(range[1] - range[0]) + range[0]; -} - -static void cdf_invert(vector<float>& to, float2 to_range, const vector<float>& from, float2 from_range) -{ - float step = 1.0f/(float)(to.size() - 1); - - for(int i = 0; i < to.size(); i++) { - float x = (i*step)*(from_range[1] - from_range[0]) + from_range[0]; - to[i] = cdf_lookup_inverse(from, to_range, x); - } -} - -/* BSSRDF */ - -static void bssrdf_lookup_table_create(float ld, vector<float>& sample_table, vector<float>& pdf_table) -{ - const int size = BSSRDF_RADIUS_TABLE_SIZE; - vector<float> cdf(size); - vector<float> pdf(size); - float step = 1.0f/(float)(size - 1); - float max_radius = ld; - float pdf_sum = 0.0f; - - /* compute the probability density function */ - for(int i = 0; i < pdf.size(); i++) { - float x = (i*step)*max_radius; - pdf[i] = bssrdf_cubic(ld, x); - pdf_sum += pdf[i]; - } - - /* adjust for area covered by each distance */ - for(int i = 0; i < pdf.size(); i++) { - float x = (i*step)*max_radius; - pdf[i] *= M_2PI_F*x; - } - - /* normalize pdf, we multiply in reflectance later */ - if(pdf_sum > 0.0f) - for(int i = 0; i < pdf.size(); i++) - pdf[i] /= pdf_sum; - - /* sum to account for sampling which uses overlapping sphere */ - for(int i = pdf.size() - 2; i >= 0; i--) - pdf[i] = pdf[i] + pdf[i+1]; - - /* compute the cumulative density function */ - cdf[0] = 0.0f; - - for(int i = 1; i < size; i++) - cdf[i] = cdf[i-1] + 0.5f*(pdf[i-1] + pdf[i])*step*max_radius; - - /* invert cumulative density function for importance sampling */ - float2 cdf_range = make_float2(0.0f, cdf[size - 1]); - float2 table_range = make_float2(0.0f, max_radius); - - cdf_invert(sample_table, table_range, cdf, cdf_range); - - /* copy pdf table */ - for(int i = 0; i < pdf.size(); i++) - pdf_table[i] = pdf[i]; -} - -void bssrdf_table_build(vector<float>& table) -{ - vector<float> sample_table(BSSRDF_RADIUS_TABLE_SIZE); - vector<float> pdf_table(BSSRDF_RADIUS_TABLE_SIZE); - - table.resize(BSSRDF_LOOKUP_TABLE_SIZE); - - /* create a 2D lookup table, for reflection x sample radius */ - for(int i = 0; i < BSSRDF_REFL_TABLE_SIZE; i++) { - float radius = 1.0f; - - bssrdf_lookup_table_create(radius, sample_table, pdf_table); - - memcpy(&table[i*BSSRDF_RADIUS_TABLE_SIZE], &sample_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); - memcpy(&table[BSSRDF_PDF_TABLE_OFFSET + i*BSSRDF_RADIUS_TABLE_SIZE], &pdf_table[0], BSSRDF_RADIUS_TABLE_SIZE*sizeof(float)); - } -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/render/bssrdf.h b/intern/cycles/render/bssrdf.h deleted file mode 100644 index b8f0d44bc34..00000000000 --- a/intern/cycles/render/bssrdf.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#ifndef __BSSRDF_H__ -#define __BSSRDF_H__ - -#include "util_vector.h" - -CCL_NAMESPACE_BEGIN - -void bssrdf_table_build(vector<float>& table); - -CCL_NAMESPACE_END - -#endif /* __BSSRDF_H__ */ - diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 70cb5613e61..621d52bbbbf 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -20,6 +20,7 @@ #include "osl.h" #include "sky_model.h" +#include "util_foreach.h" #include "util_transform.h" CCL_NAMESPACE_BEGIN @@ -41,6 +42,8 @@ TextureMapping::TextureMapping() y_mapping = Y; z_mapping = Z; + type = TEXTURE; + projection = FLAT; } @@ -54,12 +57,52 @@ Transform TextureMapping::compute_transform() mmat[1][y_mapping-1] = 1.0f; if(z_mapping != NONE) mmat[2][z_mapping-1] = 1.0f; + + float3 scale_clamped = scale; - Transform smat = transform_scale(scale); + if(type == TEXTURE || type == NORMAL) { + /* keep matrix invertible */ + if(fabsf(scale.x) < 1e-5f) + scale_clamped.x = signf(scale.x)*1e-5f; + if(fabsf(scale.y) < 1e-5f) + scale_clamped.y = signf(scale.y)*1e-5f; + if(fabsf(scale.z) < 1e-5f) + scale_clamped.z = signf(scale.z)*1e-5f; + } + + Transform smat = transform_scale(scale_clamped); Transform rmat = transform_euler(rotation); Transform tmat = transform_translate(translation); - return tmat*rmat*smat*mmat; + Transform mat; + + switch(type) { + case TEXTURE: + /* inverse transform on texture coordinate gives + * forward transform on texture */ + mat = tmat*rmat*smat; + mat = transform_inverse(mat); + break; + case POINT: + /* full transform */ + mat = tmat*rmat*smat; + break; + case VECTOR: + /* no translation for vectors */ + mat = rmat*smat; + break; + case NORMAL: + /* no translation for normals, and inverse transpose */ + mat = rmat*smat; + mat = transform_inverse(mat); + mat = transform_transpose(mat); + break; + } + + /* projection last */ + mat = mat*mmat; + + return mat; } bool TextureMapping::skip() @@ -97,6 +140,11 @@ void TextureMapping::compile(SVMCompiler& compiler, int offset_in, int offset_ou compiler.add_node(float3_to_float4(min)); compiler.add_node(float3_to_float4(max)); } + + if(type == NORMAL) { + compiler.add_node(NODE_VECTOR_MATH, NODE_VECTOR_MATH_NORMALIZE, offset_out, offset_out); + compiler.add_node(NODE_VECTOR_MATH, SVM_STACK_INVALID, offset_out); + } } void TextureMapping::compile(OSLCompiler &compiler) @@ -472,6 +520,9 @@ static void sky_texture_precompute_new(SunSky *sunsky, float3 dir, float turbidi float theta = spherical.x; float phi = spherical.y; + /* Clamp Turbidity */ + turbidity = clamp(turbidity, 0.0f, 10.0f); + /* Clamp to Horizon */ theta = clamp(theta, 0.0f, M_PI_2_F); @@ -3677,6 +3728,26 @@ void OSLScriptNode::compile(SVMCompiler& compiler) void OSLScriptNode::compile(OSLCompiler& compiler) { + /* XXX fix for #36790: + * point and normal parameters are reflected as generic SOCK_VECTOR sockets + * on the node. Socket fixed input values need to be copied explicitly here for + * vector sockets, otherwise OSL will reject the value due to mismatching type. + */ + foreach(ShaderInput *input, this->inputs) { + if(!input->link) { + /* no need for compatible_name here, OSL parameter names are always unique */ + string param_name(input->name); + switch(input->type) { + case SHADER_SOCKET_VECTOR: + compiler.parameter_point(param_name.c_str(), input->value); + compiler.parameter_normal(param_name.c_str(), input->value); + break; + default: + break; + } + } + } + if(!filepath.empty()) compiler.add(this, filepath.c_str(), true); else diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index d58c6633a41..430c37158f4 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -43,6 +43,9 @@ public: float3 min, max; bool use_minmax; + enum Type { POINT = 0, TEXTURE = 1, VECTOR = 2, NORMAL = 3 }; + Type type; + enum Mapping { NONE = 0, X = 1, Y = 2, Z = 3 }; Mapping x_mapping, y_mapping, z_mapping; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 385395e3752..26af60572f6 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -15,7 +15,6 @@ */ #include "background.h" -#include "bssrdf.h" #include "blackbody.h" #include "device.h" #include "graph.h" @@ -127,7 +126,6 @@ void Shader::tag_used(Scene *scene) ShaderManager::ShaderManager() { need_update = true; - bssrdf_table_offset = TABLE_OFFSET_INVALID; blackbody_table_offset = TABLE_OFFSET_INVALID; } @@ -254,23 +252,6 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc device->tex_alloc("__shader_flag", dscene->shader_flag); - /* bssrdf lookup table */ - KernelBSSRDF *kbssrdf = &dscene->data.bssrdf; - - if(has_surface_bssrdf && bssrdf_table_offset == TABLE_OFFSET_INVALID) { - vector<float> table; - - bssrdf_table_build(table); - bssrdf_table_offset = scene->lookup_tables->add_table(dscene, table); - - kbssrdf->table_offset = (int)bssrdf_table_offset; - kbssrdf->num_attempts = BSSRDF_MAX_ATTEMPTS; - } - else if(!has_surface_bssrdf && bssrdf_table_offset != TABLE_OFFSET_INVALID) { - scene->lookup_tables->remove_table(bssrdf_table_offset); - bssrdf_table_offset = TABLE_OFFSET_INVALID; - } - /* blackbody lookup table */ KernelBlackbody *kblackbody = &dscene->data.blackbody; @@ -289,11 +270,6 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc void ShaderManager::device_free_common(Device *device, DeviceScene *dscene, Scene *scene) { - if(bssrdf_table_offset != TABLE_OFFSET_INVALID) { - scene->lookup_tables->remove_table(bssrdf_table_offset); - bssrdf_table_offset = TABLE_OFFSET_INVALID; - } - if(blackbody_table_offset != TABLE_OFFSET_INVALID) { scene->lookup_tables->remove_table(blackbody_table_offset); blackbody_table_offset = TABLE_OFFSET_INVALID; diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index a58d6955da0..6869a651b46 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -141,7 +141,6 @@ protected: typedef unordered_map<ustring, uint, ustringHash> AttributeIDMap; AttributeIDMap unique_attribute_id; - size_t bssrdf_table_offset; size_t blackbody_table_offset; }; diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h index 07a296ed1e6..04a3e039c9d 100644 --- a/intern/cycles/util/util_opengl.h +++ b/intern/cycles/util/util_opengl.h @@ -20,12 +20,7 @@ /* OpenGL header includes, used everywhere we use OpenGL, to deal with * platform differences in one central place. */ -#ifdef __APPLE__ -#include <OpenGL/gl.h> -#include <OpenGL/glu.h> -#else #include <GL/glew.h> -#endif #endif /* __UTIL_OPENGL_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index f48fd1e124b..a8f514864db 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -545,10 +545,7 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __device_inline const __m12 #ifdef __KERNEL_OPENCL__ -__device_inline void float4_store_half(half *h, const float4 *f, float scale) -{ - vstore_half4(*f * scale, 0, h); -} +#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h); #else |