diff options
author | Antonio Vazquez <blendergit@gmail.com> | 2019-08-26 12:52:52 +0300 |
---|---|---|
committer | Antonio Vazquez <blendergit@gmail.com> | 2019-08-26 12:52:52 +0300 |
commit | 6129e20cec4639aebf335ff13b2ba0c59670662d (patch) | |
tree | a36691d3c487e376f0fa21676ca6e416f051e9a4 /intern/cycles | |
parent | 03bbd5f9dedf7b3dfea7119c172c61f0b50ae28c (diff) | |
parent | 27787549256410b6b2de1eca47a2719830af7f96 (diff) |
Merge branch 'master' into temp-gpencil-drw-engine
Conflicts:
source/blender/draw/engines/gpencil/gpencil_engine.c
Diffstat (limited to 'intern/cycles')
108 files changed, 1904 insertions, 1006 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 87f88f7ed34..6a3ebd85378 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -178,11 +178,13 @@ if(CXX_HAS_AVX2) endif() if(WITH_CYCLES_OSL) + # LLVM and OSL need to build without RTTI if(WIN32 AND MSVC) set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID") endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") endif() # Definitions and Includes diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 29a1408d85d..4dba8ffbe0e 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -656,7 +656,7 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa } if (attr_random != NULL) { - attr_random->add(hash_int_01(num_curves)); + attr_random->add(hash_uint2_to_float(num_curves, 0)); } mesh->add_curve(num_keys, CData->psys_shader[sys]); diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index c672bc9f3e2..551866f7fce 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -1002,6 +1002,9 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph, oldcurve_keys.steal_data(mesh->curve_keys); oldcurve_radius.steal_data(mesh->curve_radius); + /* ensure bvh rebuild (instead of refit) if has_voxel_attributes() changed */ + bool oldhas_voxel_attributes = mesh->has_voxel_attributes(); + mesh->clear(); mesh->used_shaders = used_shaders; mesh->name = ustring(b_ob_data.name().c_str()); @@ -1050,7 +1053,8 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph, /* tag update */ bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) || (oldsubd_face_corners != mesh->subd_face_corners) || - (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius); + (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius) || + (oldhas_voxel_attributes != mesh->has_voxel_attributes()); mesh->tag_update(scene, rebuild); diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 7ccf8226e5b..b670922ac88 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -217,7 +217,7 @@ void BlenderSync::sync_light(BL::Object &b_parent, light->random_id = random_id; } else { - light->random_id = hash_int_2d(hash_string(b_ob.name().c_str()), 0); + light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0); } if (light->type == LIGHT_AREA) @@ -444,6 +444,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) { object->name = b_ob.name().c_str(); object->pass_id = b_ob.pass_index(); + object->color = get_float3(b_ob.color()); object->tfm = tfm; object->motion.clear(); @@ -490,7 +491,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph, else { object->dupli_generated = make_float3(0.0f, 0.0f, 0.0f); object->dupli_uv = make_float2(0.0f, 0.0f); - object->random_id = hash_int_2d(hash_string(object->name.c_str()), 0); + object->random_id = hash_uint2(hash_string(object->name.c_str()), 0); } object->tag_update(scene); diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index dcbb101b01d..9a798a4f979 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -527,7 +527,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) builtin_images_load(); /* Attempt to free all data which is held by Blender side, since at this - * point we knwo that we've got everything to render current view layer. + * point we know that we've got everything to render current view layer. */ /* At the moment we only free if we are not doing multi-view * (or if we are rendering the last view). See T58142/D4239 for discussion. @@ -539,8 +539,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_) /* Make sure all views have different noise patterns. - hardcoded value just to make it random */ if (view_index != 0) { - scene->integrator->seed += hash_int_2d(scene->integrator->seed, - hash_int(view_index * 0xdeadbeef)); + scene->integrator->seed += hash_uint2(scene->integrator->seed, + hash_uint2(view_index * 0xdeadbeef, 0)); scene->integrator->tag_update(scene); } @@ -1481,8 +1481,8 @@ void BlenderSession::update_resumable_tile_manager(int num_samples) /* Round after doing the multiplications with num_chunks and num_samples_per_chunk * to allow for many small chunks. */ - int rounded_range_start_sample = (int)floor(range_start_sample + 0.5f); - int rounded_range_num_samples = max((int)floor(range_num_samples + 0.5f), 1); + int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f); + int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1); /* Make sure we don't overshoot. */ if (rounded_range_start_sample + rounded_range_num_samples > num_samples) { diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index f952b3025f0..720f521c589 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -315,18 +315,27 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeRGBToBW)) { node = new RGBToBWNode(); } + else if (b_node.is_a(&RNA_ShaderNodeMapRange)) { + BL::ShaderNodeMapRange b_map_range_node(b_node); + MapRangeNode *map_range_node = new MapRangeNode(); + map_range_node->clamp = b_map_range_node.clamp(); + node = map_range_node; + } + else if (b_node.is_a(&RNA_ShaderNodeClamp)) { + node = new ClampNode(); + } else if (b_node.is_a(&RNA_ShaderNodeMath)) { BL::ShaderNodeMath b_math_node(b_node); - MathNode *math = new MathNode(); - math->type = (NodeMath)b_math_node.operation(); - math->use_clamp = b_math_node.use_clamp(); - node = math; + MathNode *math_node = new MathNode(); + math_node->type = (NodeMathType)b_math_node.operation(); + math_node->use_clamp = b_math_node.use_clamp(); + node = math_node; } else if (b_node.is_a(&RNA_ShaderNodeVectorMath)) { BL::ShaderNodeVectorMath b_vector_math_node(b_node); - VectorMathNode *vmath = new VectorMathNode(); - vmath->type = (NodeVectorMath)b_vector_math_node.operation(); - node = vmath; + VectorMathNode *vector_math_node = new VectorMathNode(); + vector_math_node->type = (NodeVectorMathType)b_vector_math_node.operation(); + node = vector_math_node; } else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) { BL::ShaderNodeVectorTransform b_vector_transform_node(b_node); @@ -598,6 +607,9 @@ static ShaderNode *add_node(Scene *scene, else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) { node = new HairInfoNode(); } + else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) { + node = new VolumeInfoNode(); + } else if (b_node.is_a(&RNA_ShaderNodeBump)) { BL::ShaderNodeBump b_bump_node(b_node); BumpNode *bump = new BumpNode(); @@ -835,6 +847,12 @@ static ShaderNode *add_node(Scene *scene, } node = ies; } + else if (b_node.is_a(&RNA_ShaderNodeTexWhiteNoise)) { + BL::ShaderNodeTexWhiteNoise b_tex_white_noise_node(b_node); + WhiteNoiseTextureNode *white_noise_node = new WhiteNoiseTextureNode(); + white_noise_node->dimensions = b_tex_white_noise_node.dimensions(); + node = white_noise_node; + } else if (b_node.is_a(&RNA_ShaderNodeNormalMap)) { BL::ShaderNodeNormalMap b_normal_map_node(b_node); NormalMapNode *nmap = new NormalMapNode(); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index bac571b02ce..aec21887088 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -255,13 +255,13 @@ void BlenderSync::sync_integrator() integrator->seed = get_int(cscene, "seed"); if (get_boolean(cscene, "use_animated_seed")) { - integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed")); + integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed")); if (b_scene.frame_subframe() != 0.0f) { /* TODO(sergey): Ideally should be some sort of hash_merge, * but this is good enough for now. */ - integrator->seed += hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX), - get_int(cscene, "seed")); + integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX), + get_int(cscene, "seed")); } } diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h index c9d1dc67e54..3625dd45ae2 100644 --- a/intern/cycles/blender/blender_util.h +++ b/intern/cycles/blender/blender_util.h @@ -594,7 +594,7 @@ template<typename K, typename T> class id_map { T *find(const BL::ID &id) { - return find(id.ptr.id.data); + return find(id.ptr.owner_id); } T *find(const K &key) @@ -629,7 +629,7 @@ template<typename K, typename T> class id_map { bool sync(T **r_data, const BL::ID &id) { - return sync(r_data, id, id, id.ptr.id.data); + return sync(r_data, id, id, id.ptr.owner_id); } bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key) diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp index 850bdf5b8b4..b6df9024ffa 100644 --- a/intern/cycles/bvh/bvh4.cpp +++ b/intern/cycles/bvh/bvh4.cpp @@ -43,8 +43,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node) if (node->is_leaf()) { return new LeafNode(*reinterpret_cast<const LeafNode *>(node)); } - /* Collect nodes of one layer deeper, allowing us to have more childrem in - * an inner layer. */ + /* Collect nodes of one layer deeper, allowing us to have more children in an inner layer. */ assert(node->num_children() <= 2); const BVHNode *children[4]; const BVHNode *child0 = node->get_child(0); diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 442b92100bb..79474fb0814 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -194,7 +194,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_ DeviceRequestedFeatures features(requested_features); enable_default_features(features); - /* Always turn off baking at this point. Baking is only usefull when building the bake kernel. + /* Always turn off baking at this point. Baking is only useful when building the bake kernel. * this also makes sure that the kernels that are build during baking can be reused * when not doing any baking. */ features.use_baking = false; diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index cc40ad42b06..dc9b4072841 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -746,7 +746,7 @@ bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_i } VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor; - /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework + /* It is possible to have Iris GPU on AMD/Apple OpenCL framework * (aka, it will not be on Intel framework). This isn't supported * and needs an explicit blacklist. */ diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 8a8fee108ae..48439a8b68f 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -179,6 +179,7 @@ set(SRC_SVM_HEADERS svm/svm_blackbody.h svm/svm_bump.h svm/svm_camera.h + svm/svm_clamp.h svm/svm_closure.h svm/svm_convert.h svm/svm_checker.h @@ -198,6 +199,7 @@ set(SRC_SVM_HEADERS svm/svm_invert.h svm/svm_light_path.h svm/svm_magic.h + svm/svm_map_range.h svm/svm_mapping.h svm/svm_math.h svm/svm_math_util.h @@ -219,6 +221,7 @@ set(SRC_SVM_HEADERS svm/svm_voronoi.h svm/svm_voxel.h svm/svm_wave.h + svm/svm_white_noise.h ) set(SRC_GEOM_HEADERS @@ -486,6 +489,19 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) +if(WITH_COMPILER_ASAN) + if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN)) + # GCC hangs compiling the big kernel files with asan and release, so disable by default. + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr") + elseif(CMAKE_C_COMPILER_ID MATCHES "Clang") + # With OSL, Cycles disables rtti in some modules, wich then breaks at linking + # when trying to use vptr sanitizer (included into 'undefined' general option). + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr") + endif() +endif() + set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h index be0f05285e8..162b2fb5cdb 100644 --- a/intern/cycles/kernel/bvh/bvh.h +++ b/intern/cycles/kernel/bvh/bvh.h @@ -177,24 +177,23 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray) return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x); } -/* Note: ray is passed by value to work around a possible CUDA compiler bug. */ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, - const Ray ray, + const Ray *ray, const uint visibility, Intersection *isect) { PROFILING_INIT(kg, PROFILING_INTERSECT); - if (!scene_intersect_valid(&ray)) { + if (!scene_intersect_valid(ray)) { return false; } #ifdef __EMBREE__ if (kernel_data.bvh.scene) { - isect->t = ray.t; + isect->t = ray->t; CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); IntersectContext rtc_ctx(&ctx); RTCRayHit ray_hit; - kernel_embree_setup_rayhit(ray, ray_hit, visibility); + kernel_embree_setup_rayhit(*ray, ray_hit, visibility); rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit); if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID && ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) { @@ -207,42 +206,43 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, #ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { # ifdef __HAIR__ - if (kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_curves) { + return bvh_intersect_hair_motion(kg, ray, isect, visibility); + } # endif /* __HAIR__ */ - return bvh_intersect_motion(kg, &ray, isect, visibility); + return bvh_intersect_motion(kg, ray, isect, visibility); } #endif /* __OBJECT_MOTION__ */ #ifdef __HAIR__ - if (kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_curves) { + return bvh_intersect_hair(kg, ray, isect, visibility); + } #endif /* __HAIR__ */ #ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, &ray, isect, visibility); + if (kernel_data.bvh.have_instancing) { + return bvh_intersect_instancing(kg, ray, isect, visibility); + } # endif /* __INSTANCING__ */ - - return bvh_intersect(kg, &ray, isect, visibility); + return bvh_intersect(kg, ray, isect, visibility); #else /* __KERNEL_CPU__ */ # ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, &ray, isect, visibility); + return bvh_intersect_instancing(kg, ray, isect, visibility); # else - return bvh_intersect(kg, &ray, isect, visibility); + return bvh_intersect(kg, ray, isect, visibility); # endif /* __INSTANCING__ */ #endif /* __KERNEL_CPU__ */ } #ifdef __BVH_LOCAL__ -/* Note: ray is passed by value to work around a possible CUDA compiler bug. */ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, - const Ray ray, + const Ray *ray, LocalIntersection *local_isect, int local_object, uint *lcg_state, @@ -250,7 +250,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, { PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); - if (!scene_intersect_valid(&ray)) { + if (!scene_intersect_valid(ray)) { local_isect->num_hits = 0; return false; } @@ -264,19 +264,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, ctx.sss_object_id = local_object; IntersectContext rtc_ctx(&ctx); RTCRay rtc_ray; - kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); + kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY); /* Get the Embree scene for this intersection. */ RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2); if (geom) { - float3 P = ray.P; - float3 dir = ray.D; - float3 idir = ray.D; + float3 P = ray->P; + float3 dir = ray->D; + float3 idir = ray->D; const int object_flag = kernel_tex_fetch(__object_flag, local_object); if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) { Transform ob_itfm; rtc_ray.tfar = bvh_instance_motion_push( - kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm); + kg, local_object, ray, &P, &dir, &idir, ray->t, &ob_itfm); /* bvh_instance_motion_push() returns the inverse transform but * it's not needed here. */ (void)ob_itfm; @@ -299,10 +299,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, # endif /* __EMBREE__ */ # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { - return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits); + return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); } # endif /* __OBJECT_MOTION__ */ - return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits); + return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); } #endif @@ -377,15 +377,18 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, if (!scene_intersect_valid(ray)) { return false; } + # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_volume_motion(kg, ray, isect, visibility); } # endif /* __OBJECT_MOTION__ */ + # ifdef __KERNEL_CPU__ # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) + if (kernel_data.bvh.have_instancing) { return bvh_intersect_volume_instancing(kg, ray, isect, visibility); + } # endif /* __INSTANCING__ */ return bvh_intersect_volume(kg, ray, isect, visibility); # else /* __KERNEL_CPU__ */ @@ -422,15 +425,18 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray); return rtc_ray.tfar == -INFINITY; } -# endif +# endif /* __EMBREE__ */ + # ifdef __OBJECT_MOTION__ if (kernel_data.bvh.have_motion) { return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility); } # endif /* __OBJECT_MOTION__ */ + # ifdef __INSTANCING__ - if (kernel_data.bvh.have_instancing) + if (kernel_data.bvh.have_instancing) { return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility); + } # endif /* __INSTANCING__ */ return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility); } diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h index a33bc73e25b..db598d1c7fa 100644 --- a/intern/cycles/kernel/bvh/bvh_nodes.h +++ b/intern/cycles/kernel/bvh/bvh_nodes.h @@ -39,7 +39,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg, { /* fetch node data */ +# ifdef __VISIBILITY_FLAG__ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); +# endif float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1); float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2); float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3); @@ -111,7 +113,9 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg, float dist[2]) { int mask = 0; +# ifdef __VISIBILITY_FLAG__ float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0); +# endif if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) { # ifdef __VISIBILITY_FLAG__ if ((__float_as_uint(cnodes.x) & visibility)) diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h index 0da4d6bd060..4d88a822821 100644 --- a/intern/cycles/kernel/closure/bssrdf.h +++ b/intern/cycles/kernel/closure/bssrdf.h @@ -231,7 +231,7 @@ ccl_device float bssrdf_burley_eval(const float d, float r) * NOTES: * - Surface albedo is already included into sc->weight, no need to * multiply by this term here. - * - This is normalized diffuse model, so the equation is mutliplied + * - This is normalized diffuse model, so the equation is multiplied * by 2*pi, which also matches cdf(). */ float exp_r_3_d = expf(-r / (3.0f * d)); diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h index 585c4b33787..880a661214e 100644 --- a/intern/cycles/kernel/filter/filter_transform.h +++ b/intern/cycles/kernel/filter/filter_transform.h @@ -70,9 +70,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff filter_calculate_scale(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature - * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent - * overfitting. */ + * This transformation maps the num_features-dimensional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. + * This mainly helps to prevent over-fitting. */ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero(feature_matrix, num_features); FOR_PIXEL_WINDOW @@ -85,7 +85,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1); *rank = 0; - /* Prevent overfitting when a small window is used. */ + /* Prevent over-fitting when a small window is used. */ int max_rank = min(num_features, num_pixels / 3); if (pca_threshold < 0.0f) { float threshold_energy = 0.0f; diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index f410e6e27e2..af4e6fbd89b 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -227,6 +227,17 @@ ccl_device_inline float object_surface_area(KernelGlobals *kg, int object) return kernel_tex_fetch(__objects, object).surface_area; } +/* Color of the object */ + +ccl_device_inline float3 object_color(KernelGlobals *kg, int object) +{ + if (object == OBJECT_NONE) + return make_float3(0.0f, 0.0f, 0.0f); + + const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object); + return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]); +} + /* Pass ID number of object */ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object) diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index b9d723222a1..46a51f5a560 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -319,10 +319,12 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L, float3 bsdf, float3 ao) { +#ifdef __PASSES__ /* Store AO pass. */ if (L->use_light_pass && state->bounce == 0) { L->ao += alpha * throughput * ao; } +#endif #ifdef __SHADOW_TRICKS__ /* For shadow catcher, accumulate ratio. */ diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 97cd3a3320c..62ce04ba48f 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -237,7 +237,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, /* Panorama Camera */ ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam, +#ifdef __CAMERA_MOTION__ const ccl_global DecomposedTransform *cam_motion, +#endif float raster_x, float raster_y, float lens_u, @@ -413,8 +415,12 @@ ccl_device_inline void camera_sample(KernelGlobals *kg, camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray); } else { +#ifdef __CAMERA_MOTION__ const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion); camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray); +#else + camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray); +#endif } } diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 469b81d120b..5075c434b10 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -58,6 +58,7 @@ __device__ half __float2half(const float f) # define ccl_device_forceinline __device__ __forceinline__ #endif #define ccl_device_noinline __device__ __noinline__ +#define ccl_device_noinline_cpu ccl_device #define ccl_global #define ccl_static_constant __constant__ #define ccl_constant const diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index e040ea88d7c..1fe52c51ab0 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -35,6 +35,7 @@ #define ccl_device_inline ccl_device #define ccl_device_forceinline ccl_device #define ccl_device_noinline ccl_device ccl_noinline +#define ccl_device_noinline_cpu ccl_device #define ccl_may_alias #define ccl_static_constant static __constant #define ccl_constant __constant diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 34300543f91..16d52b0c733 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -17,17 +17,17 @@ CCL_NAMESPACE_BEGIN /* Direction Emission */ -ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, - ShaderData *emission_sd, - LightSample *ls, - ccl_addr_space PathState *state, - float3 I, - differential3 dI, - float t, - float time) +ccl_device_noinline_cpu float3 direct_emissive_eval(KernelGlobals *kg, + ShaderData *emission_sd, + LightSample *ls, + ccl_addr_space PathState *state, + float3 I, + differential3 dI, + float t, + float time) { /* setup shading at emitter */ - float3 eval; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); if (shader_constant_emission_eval(kg, ls->shader, &eval)) { if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) { @@ -98,15 +98,15 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, return eval; } -ccl_device_noinline bool direct_emission(KernelGlobals *kg, - ShaderData *sd, - ShaderData *emission_sd, - LightSample *ls, - ccl_addr_space PathState *state, - Ray *ray, - BsdfEval *eval, - bool *is_lamp, - float rand_terminate) +ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg, + ShaderData *sd, + ShaderData *emission_sd, + LightSample *ls, + ccl_addr_space PathState *state, + Ray *ray, + BsdfEval *eval, + bool *is_lamp, + float rand_terminate) { if (ls->pdf == 0.0f) return false; @@ -208,7 +208,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, /* Indirect Primitive Emission */ -ccl_device_noinline float3 indirect_primitive_emission( +ccl_device_noinline_cpu float3 indirect_primitive_emission( KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf) { /* evaluate emissive closure */ @@ -234,11 +234,11 @@ ccl_device_noinline float3 indirect_primitive_emission( /* Indirect Lamp Emission */ -ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, - ShaderData *emission_sd, - ccl_addr_space PathState *state, - Ray *ray, - float3 *emission) +ccl_device_noinline_cpu bool indirect_lamp_emission(KernelGlobals *kg, + ShaderData *emission_sd, + ccl_addr_space PathState *state, + Ray *ray, + float3 *emission) { bool hit_lamp = false; @@ -293,10 +293,10 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, /* Indirect Background */ -ccl_device_noinline float3 indirect_background(KernelGlobals *kg, - ShaderData *emission_sd, - ccl_addr_space PathState *state, - ccl_addr_space Ray *ray) +ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg, + ShaderData *emission_sd, + ccl_addr_space PathState *state, + ccl_addr_space Ray *ray) { #ifdef __BACKGROUND__ int shader = kernel_data.background.surface_shader; @@ -314,7 +314,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg, } /* Evaluate background shader. */ - float3 L; + float3 L = make_float3(0.0f, 0.0f, 0.0f); if (!shader_constant_emission_eval(kg, shader, &L)) { # ifdef __SPLIT_KERNEL__ Ray priv_ray = *ray; diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index 9128bfa9d95..ce908ce0fe2 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -182,17 +182,7 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 #ifdef __BACKGROUND_MIS__ -/* TODO(sergey): In theory it should be all fine to use noinline for all - * devices, but we're so close to the release so better not screw things - * up for CPU at least. - */ -# ifdef __KERNEL_GPU__ -ccl_device_noinline -# else -ccl_device -# endif - float3 - background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf) +ccl_device float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf) { /* for the following, the CDF values are actually a pair of floats, with the * function value as X and the actual CDF as Y. The last entry's function @@ -274,13 +264,7 @@ ccl_device /* TODO(sergey): Same as above, after the release we should consider using * 'noinline' for all devices. */ -# ifdef __KERNEL_GPU__ -ccl_device_noinline -# else -ccl_device -# endif - float - background_map_pdf(KernelGlobals *kg, float3 direction) +ccl_device float background_map_pdf(KernelGlobals *kg, float3 direction) { float2 uv = direction_to_equirectangular(direction); int res_x = kernel_data.integrator.pdf_background_res_x; @@ -1092,7 +1076,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) int len = kernel_data.integrator.num_distribution + 1; float r = *randu; - while (len > 0) { + do { int half_len = len >> 1; int middle = first + half_len; @@ -1103,7 +1087,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) first = middle + 1; len = len - half_len - 1; } - } + } while (len > 0); /* Clamping should not be needed but float rounding errors seem to * make this fail on rare occasions. */ @@ -1120,42 +1104,49 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu) /* Generic Light */ -ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) +ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) { return (bounce > kernel_tex_fetch(__lights, index).max_bounces); } -ccl_device_noinline bool light_sample( - KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls) +ccl_device_noinline bool light_sample(KernelGlobals *kg, + int lamp, + float randu, + float randv, + float time, + float3 P, + int bounce, + LightSample *ls) { - /* sample index */ - int index = light_distribution_sample(kg, &randu); - - /* fetch light data */ - const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, - index); - int prim = kdistribution->prim; - - if (prim >= 0) { - int object = kdistribution->mesh_light.object_id; - int shader_flag = kdistribution->mesh_light.shader_flag; + if (lamp < 0) { + /* sample index */ + int index = light_distribution_sample(kg, &randu); + + /* fetch light data */ + const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch( + __light_distribution, index); + int prim = kdistribution->prim; + + if (prim >= 0) { + int object = kdistribution->mesh_light.object_id; + int shader_flag = kdistribution->mesh_light.shader_flag; + + triangle_light_sample(kg, prim, object, randu, randv, time, ls, P); + ls->shader |= shader_flag; + return (ls->pdf > 0.0f); + } - triangle_light_sample(kg, prim, object, randu, randv, time, ls, P); - ls->shader |= shader_flag; - return (ls->pdf > 0.0f); + lamp = -prim - 1; } - else { - int lamp = -prim - 1; - if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { - return false; - } - - return lamp_light_sample(kg, lamp, randu, randv, P, ls); + if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { + return false; } + + return lamp_light_sample(kg, lamp, randu, randv, P, ls); } -ccl_device int light_select_num_samples(KernelGlobals *kg, int index) +ccl_device_inline int light_select_num_samples(KernelGlobals *kg, int index) { return kernel_tex_fetch(__lights, index).samples; } diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index e84937e2fd9..4a424866efe 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -114,14 +114,12 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, float value = path_total_shaded / max(path_total, 1e-7f); kernel_write_pass_float(buffer + 2, value * value); } -#endif /* __DENOISING_FEATURES__ */ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, PathRadiance *L) { -#ifdef __DENOISING_FEATURES__ if (state->denoising_feature_weight == 0.0f) { return; } @@ -162,13 +160,8 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg, state->denoising_feature_weight = 0.0f; } -#else - (void)kg; - (void)sd; - (void)state; - (void)L; -#endif /* __DENOISING_FEATURES__ */ } +#endif /* __DENOISING_FEATURES__ */ #ifdef __KERNEL_DEBUG__ ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg, diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index f3e2a8a234a..63be0a7f505 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -65,7 +65,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg, ray->t = kernel_data.background.ao_distance; } - bool hit = scene_intersect(kg, *ray, visibility, isect); + bool hit = scene_intersect(kg, ray, visibility, isect); #ifdef __KERNEL_DEBUG__ if (state->flag & PATH_RAY_CAMERA) { @@ -103,7 +103,7 @@ ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg, light_ray.dP = ray->dP; /* intersect with lamp */ - float3 emission; + float3 emission = make_float3(0.0f, 0.0f, 0.0f); if (indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) path_radiance_accum_emission(L, state, throughput, emission); @@ -474,12 +474,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, # endif /* __SUBSURFACE__ */ # if defined(__EMISSION__) - if (kernel_data.integrator.use_direct_light) { - int all = (kernel_data.integrator.sample_all_lights_indirect) || - (state->flag & PATH_RAY_SHADOW_CATCHER); - kernel_branched_path_surface_connect_light( - kg, sd, emission_sd, state, throughput, 1.0f, L, all); - } + int all = (kernel_data.integrator.sample_all_lights_indirect) || + (state->flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light( + kg, sd, emission_sd, state, throughput, 1.0f, L, all); # endif /* defined(__EMISSION__) */ # ifdef __VOLUME__ @@ -590,7 +588,9 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, throughput /= probability; } +# ifdef __DENOISING_FEATURES__ kernel_update_denoising_features(kg, &sd, state, L); +# endif # ifdef __AO__ /* ambient occlusion */ @@ -610,8 +610,10 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg, } # endif /* __SUBSURFACE__ */ +# ifdef __EMISSION__ /* direct lighting */ kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L); +# endif /* __EMISSION__ */ # ifdef __VOLUME__ } diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index f3a1ea3f4fd..ea6b23e7eb4 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -198,14 +198,14 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg, # endif /* __VOLUME__ */ /* bounce off surface and integrate indirect light */ -ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, - ShaderData *sd, - ShaderData *indirect_sd, - ShaderData *emission_sd, - float3 throughput, - float num_samples_adjust, - PathState *state, - PathRadiance *L) +ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, + ShaderData *sd, + ShaderData *indirect_sd, + ShaderData *emission_sd, + float3 throughput, + float num_samples_adjust, + PathState *state, + PathRadiance *L) { float sum_sample_weight = 0.0f; # ifdef __DENOISING_FEATURES__ @@ -445,7 +445,9 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, } } +# ifdef __DENOISING_FEATURES__ kernel_update_denoising_features(kg, &sd, &state, L); +# endif # ifdef __AO__ /* ambient occlusion */ diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index a1ab4951565..d299106ea96 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN defined(__BAKING__) /* branched path tracing: connect path directly to position on one or more lights and add it to L */ -ccl_device_noinline void kernel_branched_path_surface_connect_light( +ccl_device_noinline_cpu void kernel_branched_path_surface_connect_light( KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, @@ -32,140 +32,100 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light( { # ifdef __EMISSION__ /* sample illumination from lights to find path contribution */ - if (!(sd->flag & SD_BSDF_HAS_EVAL)) - return; - - Ray light_ray; BsdfEval L_light; - bool is_lamp; - -# ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; -# endif - - if (sample_all_lights) { - /* lamp sampling */ - for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) - continue; - - int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i)); - float num_samples_inv = num_samples_adjust / - (num_samples * kernel_data.integrator.num_all_lights); - uint lamp_rng_hash = cmj_hash(state->rng_hash, i); - - for (int j = 0; j < num_samples; j++) { - float light_u, light_v; - path_branched_rng_2D( - kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_branched_rng_light_termination( - kg, lamp_rng_hash, state, j, num_samples); - LightSample ls; - if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { - /* The sampling probability returned by lamp_light_sample assumes that all lights were - * sampled. - * However, this code only samples lamps, so if the scene also had mesh lights, the real - * probability is twice as high. */ - if (kernel_data.integrator.pdf_triangles != 0.0f) - ls.pdf *= 2.0f; + int num_lights = 0; + if (kernel_data.integrator.use_direct_light) { + if (sample_all_lights) { + num_lights = kernel_data.integrator.num_all_lights; + if (kernel_data.integrator.pdf_triangles != 0.0f) { + num_lights += 1; + } + } + else { + num_lights = 1; + } + } - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, - state, - throughput * num_samples_inv, - &L_light, - shadow, - num_samples_inv, - is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light); - } - } + for (int i = 0; i < num_lights; i++) { + /* sample one light at random */ + int num_samples = 1; + int num_all_lights = 1; + uint lamp_rng_hash = state->rng_hash; + bool double_pdf = false; + bool is_mesh_light = false; + bool is_lamp = false; + + if (sample_all_lights) { + /* lamp sampling */ + is_lamp = i < kernel_data.integrator.num_all_lights; + if (is_lamp) { + if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) { + continue; } + num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i)); + num_all_lights = kernel_data.integrator.num_all_lights; + lamp_rng_hash = cmj_hash(state->rng_hash, i); + double_pdf = kernel_data.integrator.pdf_triangles != 0.0f; + } + /* mesh light sampling */ + else { + num_samples = ceil_to_int(num_samples_adjust * kernel_data.integrator.mesh_light_samples); + double_pdf = kernel_data.integrator.num_all_lights != 0; + is_mesh_light = true; } } - /* mesh light sampling */ - if (kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = ceil_to_int(num_samples_adjust * - kernel_data.integrator.mesh_light_samples); - float num_samples_inv = num_samples_adjust / num_samples; + float num_samples_inv = num_samples_adjust / (num_samples * num_all_lights); - for (int j = 0; j < num_samples; j++) { + for (int j = 0; j < num_samples; j++) { + Ray light_ray; + light_ray.t = 0.0f; /* reset ray */ +# ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +# endif + bool has_emission = false; + + if (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)) { float light_u, light_v; path_branched_rng_2D( - kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); float terminate = path_branched_rng_light_termination( - kg, state->rng_hash, state, j, num_samples); + kg, lamp_rng_hash, state, j, num_samples); /* only sample triangle lights */ - if (kernel_data.integrator.num_all_lights) + if (is_mesh_light && double_pdf) { light_u = 0.5f * light_u; + } LightSample ls; - if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* Same as above, probability needs to be corrected since the sampling was forced to - * select a mesh light. */ - if (kernel_data.integrator.num_all_lights) + const int lamp = is_lamp ? i : -1; + if (light_sample(kg, lamp, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + /* The sampling probability returned by lamp_light_sample assumes that all lights were + * sampled. However, this code only samples lamps, so if the scene also had mesh lights, + * the real probability is twice as high. */ + if (double_pdf) { ls.pdf *= 2.0f; - - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, - state, - throughput * num_samples_inv, - &L_light, - shadow, - num_samples_inv, - is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light); - } } + + has_emission = direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate); } } - } - } - else { - /* sample one light at random */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - float terminate = path_state_rng_light_termination(kg, state); - LightSample ls; - if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* sample random light */ - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { + /* trace shadow ray */ + float3 shadow; + + const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow); + + if (has_emission) { + if (!blocked) { /* accumulate */ - path_radiance_accum_light(L, - state, - throughput * num_samples_adjust, - &L_light, - shadow, - num_samples_adjust, - is_lamp); + path_radiance_accum_light( + L, state, throughput * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } else { - path_radiance_accum_total_light(L, state, throughput * num_samples_adjust, &L_light); + path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light); } } } @@ -255,45 +215,48 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT); #ifdef __EMISSION__ - if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL))) - return; - # ifdef __SHADOW_TRICKS__ - if (state->flag & PATH_RAY_SHADOW_CATCHER) { - kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, 1); - return; - } -# endif - + int all = (state->flag & PATH_RAY_SHADOW_CATCHER); + kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, all); +# else /* sample illumination from lights to find path contribution */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - Ray light_ray; BsdfEval L_light; - bool is_lamp; + bool is_lamp = false; + bool has_emission = false; -# ifdef __OBJECT_MOTION__ + light_ray.t = 0.0f; +# ifdef __OBJECT_MOTION__ light_ray.time = sd->time; -# endif +# endif - LightSample ls; - if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - float terminate = path_state_rng_light_termination(kg, state); - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; + if (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)) { + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } - else { - path_radiance_accum_total_light(L, state, throughput, &L_light); - } + LightSample ls; + if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + float terminate = path_state_rng_light_termination(kg, state); + has_emission = direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate); + } + } + + /* trace shadow ray */ + float3 shadow; + + const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow); + + if (has_emission) { + if (!blocked) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } + else { + path_radiance_accum_total_light(L, state, throughput, &L_light); } } +# endif #endif } diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index fea4dfc159d..6b62005d19a 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -26,49 +26,48 @@ ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg, PathRadiance *L) { # ifdef __EMISSION__ - if (!kernel_data.integrator.use_direct_light) - return; - /* sample illumination from lights to find path contribution */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - Ray light_ray; BsdfEval L_light; - LightSample ls; - bool is_lamp; + bool is_lamp = false; + bool has_emission = false; + light_ray.t = 0.0f; +# ifdef __OBJECT_MOTION__ /* connect to light from given point where shader has been evaluated */ light_ray.time = sd->time; +# endif - if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - float terminate = path_state_rng_light_termination(kg, state); - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; + if (kernel_data.integrator.use_direct_light) { + float light_u, light_v; + path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); - } + LightSample ls; + if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + float terminate = path_state_rng_light_termination(kg, state); + has_emission = direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate); } } + + /* trace shadow ray */ + float3 shadow; + + const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow); + + if (has_emission && !blocked) { + /* accumulate */ + path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp); + } # endif /* __EMISSION__ */ } -# ifdef __KERNEL_GPU__ -ccl_device_noinline -# else -ccl_device -# endif - bool - kernel_path_volume_bounce(KernelGlobals *kg, - ShaderData *sd, - ccl_addr_space float3 *throughput, - ccl_addr_space PathState *state, - PathRadianceState *L_state, - ccl_addr_space Ray *ray) +ccl_device_noinline_cpu bool kernel_path_volume_bounce(KernelGlobals *kg, + ShaderData *sd, + ccl_addr_space float3 *throughput, + ccl_addr_space PathState *state, + PathRadianceState *L_state, + ccl_addr_space Ray *ray) { /* sample phase function */ float phase_pdf; @@ -128,7 +127,7 @@ ccl_device return true; } -# ifndef __SPLIT_KERNEL__ +# if !defined(__SPLIT_KERNEL__) && (defined(__BRANCHED_PATH__) || defined(__VOLUME_DECOUPLED__)) ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, @@ -140,96 +139,71 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, const VolumeSegment *segment) { # ifdef __EMISSION__ - if (!kernel_data.integrator.use_direct_light) - return; - - Ray light_ray; BsdfEval L_light; - bool is_lamp; - - light_ray.time = sd->time; + int num_lights = 1; if (sample_all_lights) { - /* lamp sampling */ - for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) - continue; - - int num_samples = light_select_num_samples(kg, i); - float num_samples_inv = 1.0f / (num_samples * kernel_data.integrator.num_all_lights); - uint lamp_rng_hash = cmj_hash(state->rng_hash, i); - - for (int j = 0; j < num_samples; j++) { - /* sample random position on given light */ - float light_u, light_v; - path_branched_rng_2D( - kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - - LightSample ls; - lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls); - - float3 tp = throughput; - - /* sample position on volume segment */ - float rphase = path_branched_rng_1D( - kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL); - float rscatter = path_branched_rng_1D( - kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, - ray, - sd, - &tp, - rphase, - rscatter, - segment, - (ls.t != FLT_MAX) ? &ls.P : - NULL, - false); + num_lights = kernel_data.integrator.num_all_lights; + if (kernel_data.integrator.pdf_triangles != 0.0f) { + num_lights += 1; + } + } - /* todo: split up light_sample so we don't have to call it again with new position */ - if (result == VOLUME_PATH_SCATTERED && - lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) { - if (kernel_data.integrator.pdf_triangles != 0.0f) - ls.pdf *= 2.0f; - - float terminate = path_branched_rng_light_termination( - kg, state->rng_hash, state, j, num_samples); - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light( - L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); - } - } + for (int i = 0; i < num_lights; ++i) { + /* sample one light at random */ + int num_samples = 1; + int num_all_lights = 1; + uint lamp_rng_hash = state->rng_hash; + bool double_pdf = false; + bool is_mesh_light = false; + bool is_lamp = false; + + if (sample_all_lights) { + /* lamp sampling */ + is_lamp = i < kernel_data.integrator.num_all_lights; + if (is_lamp) { + if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) { + continue; } + num_samples = light_select_num_samples(kg, i); + num_all_lights = kernel_data.integrator.num_all_lights; + lamp_rng_hash = cmj_hash(state->rng_hash, i); + double_pdf = kernel_data.integrator.pdf_triangles != 0.0f; + } + /* mesh light sampling */ + else { + num_samples = kernel_data.integrator.mesh_light_samples; + double_pdf = kernel_data.integrator.num_all_lights != 0; + is_mesh_light = true; } } - /* mesh light sampling */ - if (kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = kernel_data.integrator.mesh_light_samples; - float num_samples_inv = 1.0f / num_samples; + float num_samples_inv = 1.0f / (num_samples * num_all_lights); + + for (int j = 0; j < num_samples; j++) { + Ray light_ray; + light_ray.t = 0.0f; /* reset ray */ +# ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +# endif + bool has_emission = false; - for (int j = 0; j < num_samples; j++) { - /* sample random position on random triangle */ + float3 tp = throughput; + + if (kernel_data.integrator.use_direct_light) { + /* sample random position on random light/triangle */ float light_u, light_v; path_branched_rng_2D( - kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); /* only sample triangle lights */ - if (kernel_data.integrator.num_all_lights) + if (is_mesh_light && double_pdf) { light_u = 0.5f * light_u; + } LightSample ls; - light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); - - float3 tp = throughput; + const int lamp = is_lamp ? i : -1; + light_sample(kg, lamp, light_u, light_v, sd->time, ray->P, state->bounce, &ls); /* sample position on volume segment */ float rphase = path_branched_rng_1D( @@ -249,69 +223,31 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, NULL, false); - /* todo: split up light_sample so we don't have to call it again with new position */ - if (result == VOLUME_PATH_SCATTERED && - light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - if (kernel_data.integrator.num_all_lights) - ls.pdf *= 2.0f; - - float terminate = path_branched_rng_light_termination( - kg, state->rng_hash, state, j, num_samples); - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light( - L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); + if (result == VOLUME_PATH_SCATTERED) { + /* todo: split up light_sample so we don't have to call it again with new position */ + if (light_sample(kg, lamp, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { + if (double_pdf) { + ls.pdf *= 2.0f; } + + /* sample random light */ + float terminate = path_branched_rng_light_termination( + kg, state->rng_hash, state, j, num_samples); + has_emission = direct_emission( + kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate); } } } - } - } - else { - /* sample random position on random light */ - float light_u, light_v; - path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v); - LightSample ls; - light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls); - - float3 tp = throughput; - - /* sample position on volume segment */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - state, - ray, - sd, - &tp, - rphase, - rscatter, - segment, - (ls.t != FLT_MAX) ? &ls.P : - NULL, - false); - - /* todo: split up light_sample so we don't have to call it again with new position */ - if (result == VOLUME_PATH_SCATTERED && - light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - /* sample random light */ - float terminate = path_state_rng_light_termination(kg, state); - if (direct_emission( - kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) { - /* trace shadow ray */ - float3 shadow; - - if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp); - } + /* trace shadow ray */ + float3 shadow; + + const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow); + + if (has_emission && !blocked) { + /* accumulate */ + path_radiance_accum_light( + L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp); } } } diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 78eafbfe3cb..a5ae427c2d3 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -130,7 +130,7 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg, float *fy) { /* load state */ - *rng_hash = hash_int_2d(x, y); + *rng_hash = hash_uint2(x, y); *rng_hash ^= kernel_data.integrator.seed; #ifdef __DEBUG_CORRELATION__ diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 4963e012e15..4688857b718 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -686,8 +686,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *r if (r < next_sum) { sampled = i; - /* Rescale to reuse for direction sample, to better - * preserve stratification. */ + /* Rescale to reuse for direction sample, to better preserve stratification. */ *randu = (r - partial_sum) / sc->sample_weight; break; } @@ -743,8 +742,7 @@ ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd, *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf; sampled = i; - /* Rescale to reuse for direction sample, to better - * preserve stratifaction. */ + /* Rescale to reuse for direction sample, to better preserve stratification. */ *randu = (r - partial_sum) / sc->sample_weight; break; } @@ -780,7 +778,7 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg, kernel_assert(CLOSURE_IS_BSDF(sc->type)); int label; - float3 eval; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); *pdf = 0.0f; label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); @@ -810,7 +808,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE); int label; - float3 eval; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); *pdf = 0.0f; label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); @@ -1223,7 +1221,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg, * depending on color channels, even if this is perhaps not a common case */ const ShaderClosure *sc = &sd->closure[sampled]; int label; - float3 eval; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); *pdf = 0.0f; label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); @@ -1248,7 +1246,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE); int label; - float3 eval; + float3 eval = make_float3(0.0f, 0.0f, 0.0f); *pdf = 0.0f; label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf); diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 6640f64518a..c02d7d77faf 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -103,7 +103,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); + const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); #ifdef __VOLUME__ if (!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* Apply attenuation from current volume shader. */ @@ -318,7 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, if (bounce >= kernel_data.integrator.transparent_max_bounce) { return true; } - if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) { + if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) { break; } if (!shader_transparent_shadow(kg, isect)) { @@ -374,7 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg, Intersection *isect, float3 *shadow) { - bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); + bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect); bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false; return shadow_blocked_transparent_stepped_loop( kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow); @@ -433,7 +433,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, * TODO(sergey): Check why using record-all behavior causes slowdown in such * cases. Could that be caused by a higher spill pressure? */ - const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect); + const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect); const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false; if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) { return shadow_blocked_transparent_stepped_loop( diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 7510e50a962..8dc1904058d 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -222,7 +222,7 @@ ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg, /* intersect with the same object. if multiple intersections are found it * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */ - scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS); + scene_intersect_local(kg, ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS); int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS); for (int hit = 0; hit < num_eval_hits; hit++) { @@ -418,7 +418,7 @@ ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg, float t = -logf(1.0f - rdist) / sample_sigma_t; ray->t = t; - scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1); + scene_intersect_local(kg, ray, ss_isect, sd->object, NULL, 1); hit = (ss_isect->num_hits > 0); if (hit) { diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a1d950bbc70..b3cb6ca7c19 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -115,7 +115,6 @@ CCL_NAMESPACE_BEGIN # define __LAMP_MIS__ # define __CAMERA_MOTION__ # define __OBJECT_MOTION__ -# define __HAIR__ # define __BAKING__ # define __PRINCIPLED__ # define __SUBSURFACE__ @@ -650,9 +649,8 @@ typedef struct Ray { * is fixed. */ #ifndef __KERNEL_OPENCL_AMD__ - float3 P; /* origin */ - float3 D; /* direction */ - + float3 P; /* origin */ + float3 D; /* direction */ float t; /* length of the ray */ float time; /* time (for motion blur) */ #else @@ -1408,6 +1406,7 @@ typedef struct KernelObject { float surface_area; float pass_id; float random_number; + float color[3]; int particle_index; float dupli_generated[3]; @@ -1420,11 +1419,9 @@ typedef struct KernelObject { uint patch_map_offset; uint attribute_map_offset; uint motion_offset; - uint pad1; float cryptomatte_object; float cryptomatte_asset; - float pad2, pad3; } KernelObject; static_assert_align(KernelObject, 16); @@ -1523,7 +1520,7 @@ static_assert_align(KernelShader, 16); * Queue 1 - Active rays * Queue 2 - Background queue * Queue 3 - Shadow ray cast kernel - AO - * Queeu 4 - Shadow ray cast kernel - direct lighting + * Queue 4 - Shadow ray cast kernel - direct lighting */ /* Queue names */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index cc85110bdd8..2705526abe4 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -187,7 +187,7 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, ShaderData *sd, float3 *throughput) { - float3 sigma_t; + float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f); if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t)) *throughput *= volume_color_transmittance(sigma_t, ray->t); @@ -225,7 +225,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, } float3 new_P = ray->P + ray->D * (t + step_offset); - float3 sigma_t; + float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f); /* compute attenuation over segment */ if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { @@ -621,6 +621,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, new_tp = tp * transmittance; } else { + transmittance = make_float3(0.0f, 0.0f, 0.0f); new_tp = tp; } @@ -671,7 +672,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg, * ray, with the assumption that there are no surfaces blocking light * between the endpoints. distance sampling is used to decide if we will * scatter or not. */ -ccl_device_noinline VolumeIntegrateResult +ccl_device_noinline_cpu VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, ccl_addr_space PathState *state, ShaderData *sd, @@ -1275,7 +1276,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, */ if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) { stack[0].shader = kernel_data.background.volume_shader; - stack[0].object = PRIM_NONE; + stack[0].object = OBJECT_NONE; stack[1].shader = SHADER_NONE; } else { diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 4289e2bbb85..8f311baf010 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -19,6 +19,10 @@ CCL_NAMESPACE_BEGIN +/* Make template functions private so symbols don't conflict between kernels with different + * instruction sets. */ +namespace { + template<typename T> struct TextureInterpolator { #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ { \ @@ -523,6 +527,8 @@ ccl_device float4 kernel_tex_image_interp_3d( } } +} /* Namespace. */ + CCL_NAMESPACE_END #endif // __KERNEL_CPU_IMAGE_H__ diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt index 28d9ca854db..35cca2da8ad 100644 --- a/intern/cycles/kernel/osl/CMakeLists.txt +++ b/intern/cycles/kernel/osl/CMakeLists.txt @@ -29,8 +29,6 @@ set(LIB cycles_render ) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") - include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 316d24b0954..415de9cd66b 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -81,6 +81,7 @@ ustring OSLRenderServices::u_screen("screen"); ustring OSLRenderServices::u_raster("raster"); ustring OSLRenderServices::u_ndc("NDC"); ustring OSLRenderServices::u_object_location("object:location"); +ustring OSLRenderServices::u_object_color("object:color"); ustring OSLRenderServices::u_object_index("object:index"); ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated"); ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv"); @@ -668,6 +669,10 @@ bool OSLRenderServices::get_object_standard_attribute( float3 f = object_location(kg, sd); return set_attribute_float3(f, type, derivatives, val); } + else if (name == u_object_color) { + float3 f = object_color(kg, sd->object); + return set_attribute_float3(f, type, derivatives, val); + } else if (name == u_object_index) { float f = object_pass_id(kg, sd->object); return set_attribute_float(f, type, derivatives, val); @@ -697,7 +702,7 @@ bool OSLRenderServices::get_object_standard_attribute( } else if (name == u_particle_random) { int particle_id = object_particle_id(kg, sd->object); - float f = hash_int_01(particle_index(kg, particle_id)); + float f = hash_uint2_to_float(particle_index(kg, particle_id), 0); return set_attribute_float(f, type, derivatives, val); } @@ -1401,7 +1406,7 @@ bool OSLRenderServices::trace(TraceOpt &options, /* Raytrace, leaving out shadow opaque to avoid early exit. */ uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE; - return scene_intersect(kg, ray, visibility, &tracedata->isect); + return scene_intersect(kg, &ray, visibility, &tracedata->isect); } bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 024ef656be1..469c5188730 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -245,6 +245,7 @@ class OSLRenderServices : public OSL::RendererServices { static ustring u_raster; static ustring u_ndc; static ustring u_object_location; + static ustring u_object_color; static ustring u_object_index; static ustring u_geom_dupli_generated; static ustring u_geom_dupli_uv; diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index b42b9b2fe64..c50bffe27b2 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -13,6 +13,7 @@ set(SRC_OSL node_bump.osl node_camera.osl node_checker_texture.osl + node_clamp.osl node_combine_rgb.osl node_combine_hsv.osl node_combine_xyz.osl @@ -46,6 +47,7 @@ set(SRC_OSL node_light_falloff.osl node_light_path.osl node_magic_texture.osl + node_map_range.osl node_mapping.osl node_math.osl node_mix.osl @@ -83,6 +85,7 @@ set(SRC_OSL node_wavelength.osl node_blackbody.osl node_wave_texture.osl + node_white_noise_texture.osl node_wireframe.osl node_hair_bsdf.osl node_principled_hair_bsdf.osl diff --git a/intern/cycles/kernel/shaders/node_clamp.osl b/intern/cycles/kernel/shaders/node_clamp.osl new file mode 100644 index 00000000000..87dc1ccdb12 --- /dev/null +++ b/intern/cycles/kernel/shaders/node_clamp.osl @@ -0,0 +1,22 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stdosl.h" + +shader node_clamp(float Value = 1.0, float Min = 0.0, float Max = 1.0, output float Result = 0.0) +{ + Result = clamp(Value, Min, Max); +} diff --git a/intern/cycles/kernel/shaders/node_map_range.osl b/intern/cycles/kernel/shaders/node_map_range.osl new file mode 100644 index 00000000000..8a28edf5f35 --- /dev/null +++ b/intern/cycles/kernel/shaders/node_map_range.osl @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stdosl.h" + +shader node_map_range(float Value = 1.0, + float FromMin = 0.0, + float FromMax = 1.0, + float ToMin = 0.0, + float ToMax = 1.0, + output float Result = 0.0) +{ + if (FromMax != FromMin) { + Result = ToMin + ((Value - FromMin) / (FromMax - FromMin)) * (ToMax - ToMin); + } +} diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl index 8830339e05f..13e4c91ba10 100644 --- a/intern/cycles/kernel/shaders/node_math.osl +++ b/intern/cycles/kernel/shaders/node_math.osl @@ -18,56 +18,30 @@ float safe_divide(float a, float b) { - float result; - - if (b == 0.0) - result = 0.0; - else - result = a / b; - - return result; + return (b != 0.0) ? a / b : 0.0; } float safe_modulo(float a, float b) { - float result; - - if (b == 0.0) - result = 0.0; - else - result = fmod(a, b); - - return result; + return (b != 0.0) ? fmod(a, b) : 0.0; } float safe_sqrt(float a) { - float result; - - if (a > 0.0) - result = sqrt(a); - else - result = 0.0; - - return result; + return (a > 0.0) ? sqrt(a) : 0.0; } float safe_log(float a, float b) { - if (a < 0.0 || b < 0.0) - return 0.0; - - return log(a) / log(b); + return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0; } +/* OSL asin, acos, and pow functions are safe by default. */ shader node_math(string type = "add", - int use_clamp = 0, - float Value1 = 0.0, - float Value2 = 0.0, + float Value1 = 0.5, + float Value2 = 0.5, output float Value = 0.0) { - /* OSL asin, acos, pow check for values that could give rise to nan */ - if (type == "add") Value = Value1 + Value2; else if (type == "subtract") @@ -76,47 +50,46 @@ shader node_math(string type = "add", Value = Value1 * Value2; else if (type == "divide") Value = safe_divide(Value1, Value2); - else if (type == "sine") - Value = sin(Value1); - else if (type == "cosine") - Value = cos(Value1); - else if (type == "tangent") - Value = tan(Value1); - else if (type == "arcsine") - Value = asin(Value1); - else if (type == "arccosine") - Value = acos(Value1); - else if (type == "arctangent") - Value = atan(Value1); else if (type == "power") Value = pow(Value1, Value2); else if (type == "logarithm") Value = safe_log(Value1, Value2); + else if (type == "sqrt") + Value = safe_sqrt(Value1); + else if (type == "absolute") + Value = fabs(Value1); else if (type == "minimum") Value = min(Value1, Value2); else if (type == "maximum") Value = max(Value1, Value2); - else if (type == "round") - Value = floor(Value1 + 0.5); else if (type == "less_than") Value = Value1 < Value2; else if (type == "greater_than") Value = Value1 > Value2; - else if (type == "modulo") - Value = safe_modulo(Value1, Value2); - else if (type == "absolute") - Value = fabs(Value1); - else if (type == "arctan2") - Value = atan2(Value1, Value2); + else if (type == "round") + Value = floor(Value1 + 0.5); else if (type == "floor") Value = floor(Value1); else if (type == "ceil") Value = ceil(Value1); - else if (type == "fract") + else if (type == "fraction") Value = Value1 - floor(Value1); - else if (type == "sqrt") - Value = safe_sqrt(Value1); - - if (use_clamp) - Value = clamp(Value, 0.0, 1.0); + else if (type == "modulo") + Value = safe_modulo(Value1, Value2); + else if (type == "sine") + Value = sin(Value1); + else if (type == "cosine") + Value = cos(Value1); + else if (type == "tangent") + Value = tan(Value1); + else if (type == "arcsine") + Value = asin(Value1); + else if (type == "arccosine") + Value = acos(Value1); + else if (type == "arctangent") + Value = atan(Value1); + else if (type == "arctan2") + Value = atan2(Value1, Value2); + else + warning("%s", "Unknown math operator!"); } diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl index 0904a30a53f..350404bb747 100644 --- a/intern/cycles/kernel/shaders/node_object_info.osl +++ b/intern/cycles/kernel/shaders/node_object_info.osl @@ -17,11 +17,13 @@ #include "stdosl.h" shader node_object_info(output point Location = point(0.0, 0.0, 0.0), + output color Color = color(1.0, 1.0, 1.0), output float ObjectIndex = 0.0, output float MaterialIndex = 0.0, output float Random = 0.0) { getattribute("object:location", Location); + getattribute("object:color", Color); getattribute("object:index", ObjectIndex); getattribute("material:index", MaterialIndex); getattribute("object:random", Random); diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl index 10bb0c7283c..fd5e27aa144 100644 --- a/intern/cycles/kernel/shaders/node_vector_math.osl +++ b/intern/cycles/kernel/shaders/node_vector_math.osl @@ -16,34 +16,97 @@ #include "stdosl.h" +float safe_divide(float a, float b) +{ + return (b != 0.0) ? a / b : 0.0; +} + +vector safe_divide(vector a, vector b) +{ + return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0, + (b[1] != 0.0) ? a[1] / b[1] : 0.0, + (b[2] != 0.0) ? a[2] / b[2] : 0.0); +} + +vector project(vector v, vector v_proj) +{ + float lenSquared = dot(v_proj, v_proj); + return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0); +} + +vector snap(vector a, vector b) +{ + return floor(safe_divide(a, b)) * b; +} + shader node_vector_math(string type = "add", vector Vector1 = vector(0.0, 0.0, 0.0), vector Vector2 = vector(0.0, 0.0, 0.0), + float Scale = 1.0, output float Value = 0.0, output vector Vector = vector(0.0, 0.0, 0.0)) { if (type == "add") { Vector = Vector1 + Vector2; - Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; } else if (type == "subtract") { Vector = Vector1 - Vector2; - Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0; } - else if (type == "average") { - Value = length(Vector1 + Vector2); - Vector = normalize(Vector1 + Vector2); + else if (type == "multiply") { + Vector = Vector1 * Vector2; + } + else if (type == "divide") { + Vector = safe_divide(Vector1, Vector2); + } + else if (type == "cross_product") { + Vector = cross(Vector1, Vector2); + } + else if (type == "project") { + Vector = project(Vector1, Vector2); + } + else if (type == "reflect") { + Vector = reflect(Vector1, normalize(Vector2)); } else if (type == "dot_product") { Value = dot(Vector1, Vector2); } - else if (type == "cross_product") { - vector c = cross(Vector1, Vector2); - Value = length(c); - Vector = normalize(c); + else if (type == "distance") { + Value = distance(Vector1, Vector2); } - else if (type == "normalize") { + else if (type == "length") { Value = length(Vector1); + } + else if (type == "scale") { + Vector = Vector1 * Scale; + } + else if (type == "normalize") { Vector = normalize(Vector1); } + else if (type == "snap") { + Vector = snap(Vector1, Vector2); + } + else if (type == "floor") { + Vector = floor(Vector1); + } + else if (type == "ceil") { + Vector = ceil(Vector1); + } + else if (type == "modulo") { + Vector = mod(Vector1, Vector2); + } + else if (type == "fraction") { + Vector = Vector1 - floor(Vector1); + } + else if (type == "absolute") { + Vector = abs(Vector1); + } + else if (type == "minimum") { + Vector = min(Vector1, Vector2); + } + else if (type == "maximum") { + Vector = max(Vector1, Vector2); + } + else { + warning("%s", "Unknown vector math operator!"); + } } diff --git a/intern/cycles/kernel/shaders/node_white_noise_texture.osl b/intern/cycles/kernel/shaders/node_white_noise_texture.osl new file mode 100644 index 00000000000..f026fb4ab39 --- /dev/null +++ b/intern/cycles/kernel/shaders/node_white_noise_texture.osl @@ -0,0 +1,39 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stdosl.h" + +shader node_white_noise_texture(string dimensions = "3D", + point Vector = point(0.0, 0.0, 0.0), + float W = 0.0, + output float Value = 0.0) +{ + if (dimensions == "1D") { + Value = noise("hash", W); + } + else if (dimensions == "2D") { + Value = noise("hash", Vector[0], Vector[1]); + } + else if (dimensions == "3D") { + Value = noise("hash", Vector); + } + else if (dimensions == "4D") { + Value = noise("hash", Vector, W); + } + else { + warning("%s", "Unknown dimension!"); + } +} diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index b2ca59d60cc..3be2b35812f 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -86,8 +86,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, float terminate = path_state_rng_light_termination(kg, state); LightSample ls; - if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { - + if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) { Ray light_ray; light_ray.time = sd->time; diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index 5cd4131e2ae..56cdb22bba3 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -132,10 +132,12 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( } } +#ifdef __DENOISING_FEATURES__ if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; kernel_update_denoising_features(kg, sd, state, L); } +#endif } #ifdef __AO__ diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 4a386afa5de..ab8570618ab 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -132,16 +132,25 @@ ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset) __uint_as_float(node.w)); } -ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w) +ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y) { - if (x) - *x = (i & 0xFF); - if (y) - *y = ((i >> 8) & 0xFF); - if (z) - *z = ((i >> 16) & 0xFF); - if (w) - *w = ((i >> 24) & 0xFF); + *x = (i & 0xFF); + *y = ((i >> 8) & 0xFF); +} + +ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z) +{ + *x = (i & 0xFF); + *y = ((i >> 8) & 0xFF); + *z = ((i >> 16) & 0xFF); +} + +ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w) +{ + *x = (i & 0xFF); + *y = ((i >> 8) & 0xFF); + *z = ((i >> 16) & 0xFF); + *w = ((i >> 24) & 0xFF); } CCL_NAMESPACE_END @@ -192,6 +201,9 @@ CCL_NAMESPACE_END #include "kernel/svm/svm_vector_transform.h" #include "kernel/svm/svm_voxel.h" #include "kernel/svm/svm_bump.h" +#include "kernel/svm/svm_map_range.h" +#include "kernel/svm/svm_clamp.h" +#include "kernel/svm/svm_white_noise.h" #ifdef __SHADER_RAYTRACE__ # include "kernel/svm/svm_ao.h" @@ -430,6 +442,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_TEX_BRICK: svm_node_tex_brick(kg, sd, stack, node, &offset); break; + case NODE_TEX_WHITE_NOISE: + svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset); + break; # endif /* __TEXTURES__ */ # ifdef __EXTRA_NODES__ case NODE_NORMAL: @@ -486,6 +501,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, case NODE_BLACKBODY: svm_node_blackbody(kg, sd, stack, node.y, node.z); break; + case NODE_MAP_RANGE: + svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, &offset); + break; + case NODE_CLAMP: + svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset); + break; # endif /* __EXTRA_NODES__ */ # if NODES_FEATURE(NODE_FEATURE_VOLUME) case NODE_TEX_VOXEL: diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index 62413979201..4cb986b897a 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -16,6 +16,8 @@ CCL_NAMESPACE_BEGIN +#ifdef __SHADER_RAYTRACE__ + ccl_device_noinline float svm_ao(KernelGlobals *kg, ShaderData *sd, float3 N, @@ -64,13 +66,13 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg, ray.dD = differential3_zero(); if (flags & NODE_AO_ONLY_LOCAL) { - if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) { + if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) { unoccluded++; } } else { Intersection isect; - if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { + if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) { unoccluded++; } } @@ -83,10 +85,10 @@ ccl_device void svm_node_ao( KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) { uint flags, dist_offset, normal_offset, out_ao_offset; - decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset); + svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset); uint color_offset, out_color_offset, samples; - decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL); + svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples); float dist = stack_load_float_default(stack, dist_offset, node.w); float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; @@ -102,4 +104,6 @@ ccl_device void svm_node_ao( } } +#endif /* __SHADER_RAYTRACE__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index a67cfe91a30..eaee0f9e4ee 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -46,8 +46,8 @@ ccl_device AttributeDescriptor svm_node_attr_init( ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; + NodeAttributeType type = NODE_ATTR_FLOAT; + uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ @@ -80,16 +80,10 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u } } -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_noinline -#endif - void - svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; + NodeAttributeType type = NODE_ATTR_FLOAT; + uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ @@ -125,16 +119,10 @@ ccl_device_noinline } } -#ifndef __KERNEL_CUDA__ -ccl_device -#else -ccl_device_noinline -#endif - void - svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) +ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type; - uint out_offset; + NodeAttributeType type = NODE_ATTR_FLOAT; + uint out_offset = 0; AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index fcf28e96e98..434502f31f9 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -16,6 +16,8 @@ CCL_NAMESPACE_BEGIN +#ifdef __SHADER_RAYTRACE__ + /* Bevel shader averaging normals from nearby surfaces. * * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013 @@ -51,7 +53,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, float3 sum_N = make_float3(0.0f, 0.0f, 0.0f); for (int sample = 0; sample < num_samples; sample++) { - float disk_u, disk_v; + float disk_u = 0.0f, disk_v = 0.0f; path_branched_rng_2D( kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v); @@ -110,7 +112,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, /* Intersect with the same object. if multiple intersections are found it * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */ - scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS); + scene_intersect_local(kg, ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS); int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS); @@ -120,14 +122,14 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, if (sd->type & PRIMITIVE_TRIANGLE) { hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray); } -#ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { float3 verts[3]; motion_triangle_vertices( kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts); hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts); } -#endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ /* Get geometric normal. */ float3 hit_Ng = isect.Ng[hit]; @@ -151,11 +153,11 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg, if (sd->type & PRIMITIVE_TRIANGLE) { N = triangle_smooth_normal(kg, N, prim, u, v); } -#ifdef __OBJECT_MOTION__ +# ifdef __OBJECT_MOTION__ else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) { N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time); } -#endif /* __OBJECT_MOTION__ */ +# endif /* __OBJECT_MOTION__ */ } /* Transform normals to world space. */ @@ -200,7 +202,7 @@ ccl_device void svm_node_bevel( KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node) { uint num_samples, radius_offset, normal_offset, out_offset; - decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset); + svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset); float radius = stack_load_float(stack, radius_offset); float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples); @@ -214,4 +216,6 @@ ccl_device void svm_node_bevel( stack_store_float3(stack, out_offset, bevel_N); } +#endif /* __SHADER_RAYTRACE__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h index b5cbfcc72df..6984afa30a5 100644 --- a/intern/cycles/kernel/svm/svm_brick.h +++ b/intern/cycles/kernel/svm/svm_brick.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Brick */ -ccl_device_noinline float brick_noise(uint n) /* fast integer noise */ +ccl_device_inline float brick_noise(uint n) /* fast integer noise */ { uint nn; n = (n + 1013) & 0x7fffffff; @@ -27,16 +27,16 @@ ccl_device_noinline float brick_noise(uint n) /* fast integer noise */ return 0.5f * ((float)nn / 1073741824.0f); } -ccl_device_noinline float2 svm_brick(float3 p, - float mortar_size, - float mortar_smooth, - float bias, - float brick_width, - float row_height, - float offset_amount, - int offset_frequency, - float squash_amount, - int squash_frequency) +ccl_device_noinline_cpu float2 svm_brick(float3 p, + float mortar_size, + float mortar_smooth, + float bias, + float brick_width, + float row_height, + float offset_amount, + int offset_frequency, + float squash_amount, + int squash_frequency) { int bricknum, rownum; float offset = 0.0f; @@ -87,13 +87,13 @@ ccl_device void svm_node_tex_brick( /* RNA properties */ uint offset_frequency, squash_frequency; - decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); - decode_node_uchar4( + svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset); + svm_unpack_node_uchar4( node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset); - decode_node_uchar4( + svm_unpack_node_uchar4( node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset); - decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL); + svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency); float3 co = stack_load_float3(stack, co_offset); diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h index dcd75a2fe8f..9554b5946fb 100644 --- a/intern/cycles/kernel/svm/svm_brightness.h +++ b/intern/cycles/kernel/svm/svm_brightness.h @@ -22,7 +22,7 @@ ccl_device void svm_node_brightness( uint bright_offset, contrast_offset; float3 color = stack_load_float3(stack, in_color); - decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL); + svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset); float brightness = stack_load_float(stack, bright_offset); float contrast = stack_load_float(stack, contrast_offset); diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h index 63b4d1e149b..d54cb73df91 100644 --- a/intern/cycles/kernel/svm/svm_checker.h +++ b/intern/cycles/kernel/svm/svm_checker.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Checker */ -ccl_device_noinline float svm_checker(float3 p) +ccl_device float svm_checker(float3 p) { /* avoid precision issues on unit coordinates */ p.x = (p.x + 0.000001f) * 0.999999f; @@ -37,8 +37,8 @@ ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *s uint co_offset, color1_offset, color2_offset, scale_offset; uint color_offset, fac_offset; - decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset); - decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); + svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset); + svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset); float3 co = stack_load_float3(stack, co_offset); float3 color1 = stack_load_float3(stack, color1_offset); diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h new file mode 100644 index 00000000000..a45e70a3f15 --- /dev/null +++ b/intern/cycles/kernel/svm/svm_clamp.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Clamp Node */ + +ccl_device void svm_node_clamp(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint value_stack_offset, + uint parameters_stack_offsets, + uint result_stack_offset, + int *offset) +{ + uint min_stack_offset, max_stack_offset; + svm_unpack_node_uchar2(parameters_stack_offsets, &min_stack_offset, &max_stack_offset); + + uint4 defaults = read_node(kg, offset); + + float value = stack_load_float(stack, value_stack_offset); + float min = stack_load_float_default(stack, min_stack_offset, defaults.x); + float max = stack_load_float_default(stack, max_stack_offset, defaults.y); + + stack_store_float(stack, result_stack_offset, clamp(value, min, max)); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 270fe4c8615..1511fc65835 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -85,7 +85,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, uint type, param1_offset, param2_offset; uint mix_weight_offset; - decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); + svm_unpack_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : 1.0f); @@ -122,21 +122,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, uint4 data_node2 = read_node(kg, offset); float3 T = stack_load_float3(stack, data_node.y); - decode_node_uchar4(data_node.z, - &specular_offset, - &roughness_offset, - &specular_tint_offset, - &anisotropic_offset); - decode_node_uchar4(data_node.w, - &sheen_offset, - &sheen_tint_offset, - &clearcoat_offset, - &clearcoat_roughness_offset); - decode_node_uchar4(data_node2.x, - &eta_offset, - &transmission_offset, - &anisotropic_rotation_offset, - &transmission_roughness_offset); + svm_unpack_node_uchar4(data_node.z, + &specular_offset, + &roughness_offset, + &specular_tint_offset, + &anisotropic_offset); + svm_unpack_node_uchar4(data_node.w, + &sheen_offset, + &sheen_tint_offset, + &clearcoat_offset, + &clearcoat_roughness_offset); + svm_unpack_node_uchar4(data_node2.x, + &eta_offset, + &transmission_offset, + &anisotropic_rotation_offset, + &transmission_roughness_offset); // get Disney principled parameters float metallic = param1; @@ -793,19 +793,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, float3 weight = sd->svm_closure_weight * mix_weight; uint offset_ofs, ior_ofs, color_ofs, parametrization; - decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization); + svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization); float alpha = stack_load_float_default(stack, offset_ofs, data_node.z); float ior = stack_load_float_default(stack, ior_ofs, data_node.w); uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs; - decode_node_uchar4(data_node2.x, - &coat_ofs, - &melanin_ofs, - &melanin_redness_ofs, - &absorption_coefficient_ofs); + svm_unpack_node_uchar4(data_node2.x, + &coat_ofs, + &melanin_ofs, + &melanin_redness_ofs, + &absorption_coefficient_ofs); uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs; - decode_node_uchar4( + svm_unpack_node_uchar4( data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs); const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y); @@ -982,7 +982,7 @@ ccl_device void svm_node_closure_volume( uint type, density_offset, anisotropy_offset; uint mix_weight_offset; - decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset); + svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset); float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : 1.0f); @@ -1040,7 +1040,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, } uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset; - decode_node_uchar4( + svm_unpack_node_uchar4( node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset); float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) : 1.0f); @@ -1099,7 +1099,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, } uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset; - decode_node_uchar4( + svm_unpack_node_uchar4( node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset); float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) : __uint_as_float(value_node.z); @@ -1229,7 +1229,8 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) /* fetch weight from blend input, previous mix closures, * and write to stack to be used by closure nodes later */ uint weight_offset, in_weight_offset, weight1_offset, weight2_offset; - decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); + svm_unpack_node_uchar4( + node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset); float weight = stack_load_float(stack, weight_offset); weight = saturate(weight); diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h index 12b59d2616b..3a6a5ba782f 100644 --- a/intern/cycles/kernel/svm/svm_color_util.h +++ b/intern/cycles/kernel/svm/svm_color_util.h @@ -264,7 +264,7 @@ ccl_device float3 svm_mix_clamp(float3 col) return outcol; } -ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) +ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2) { float t = saturate(fac); diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index f16664a684c..250fac6bcb8 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -23,7 +23,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac #ifdef __RAY_DIFFERENTIALS__ /* get normal input */ uint normal_offset, scale_offset, invert, use_object_space; - decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space); + svm_unpack_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space); float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; @@ -42,7 +42,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac /* get bump values */ uint c_offset, x_offset, y_offset, strength_offset; - decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset); + svm_unpack_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset); float h_c = stack_load_float(stack, c_offset); float h_x = stack_load_float(stack, x_offset); @@ -95,7 +95,7 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg, ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint height_offset, midlevel_offset, scale_offset, normal_offset; - decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset); + svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset); float height = stack_load_float(stack, height_offset); float midlevel = stack_load_float(stack, midlevel_offset); @@ -126,7 +126,7 @@ ccl_device void svm_node_vector_displacement( uint space = data_node.x; uint vector_offset, midlevel_offset, scale_offset, displacement_offset; - decode_node_uchar4( + svm_unpack_node_uchar4( node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset); float3 vector = stack_load_float3(stack, vector_offset); diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h index 03119991597..96d602e35bf 100644 --- a/intern/cycles/kernel/svm/svm_fresnel.h +++ b/intern/cycles/kernel/svm/svm_fresnel.h @@ -22,7 +22,7 @@ ccl_device void svm_node_fresnel( ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node) { uint normal_offset, out_offset; - decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL); + svm_unpack_node_uchar2(node, &normal_offset, &out_offset); float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) : __uint_as_float(ior_value); float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N; @@ -43,7 +43,7 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node) uint blend_value = node.z; uint type, normal_offset, out_offset; - decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL); + svm_unpack_node_uchar3(node.w, &type, &normal_offset, &out_offset); float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) : __uint_as_float(blend_value); diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index a9104643299..019c6294082 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -113,6 +113,10 @@ ccl_device void svm_node_object_info( stack_store_float3(stack, out_offset, object_location(kg, sd)); return; } + case NODE_INFO_OB_COLOR: { + stack_store_float3(stack, out_offset, object_color(kg, sd->object)); + return; + } case NODE_INFO_OB_INDEX: data = object_pass_id(kg, sd->object); break; @@ -149,7 +153,7 @@ ccl_device void svm_node_particle_info( } case NODE_INFO_PAR_RANDOM: { int particle_id = object_particle_id(kg, sd->object); - float random = hash_int_01(particle_index(kg, particle_id)); + float random = hash_uint2_to_float(particle_index(kg, particle_id), 0); stack_store_float(stack, out_offset, random); break; } diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h index c315564fbc2..08304bc47e8 100644 --- a/intern/cycles/kernel/svm/svm_gradient.h +++ b/intern/cycles/kernel/svm/svm_gradient.h @@ -64,7 +64,7 @@ ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node) { uint type, co_offset, color_offset, fac_offset; - decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset); + svm_unpack_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset); float3 co = stack_load_float3(stack, co_offset); diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index 72379fba870..1f7bd421869 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -24,8 +24,8 @@ ccl_device void svm_node_hsv( { uint in_color_offset, fac_offset, out_color_offset; uint hue_offset, sat_offset, val_offset; - decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL); - decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL); + svm_unpack_node_uchar3(node.y, &in_color_offset, &fac_offset, &out_color_offset); + svm_unpack_node_uchar3(node.z, &hue_offset, &sat_offset, &val_offset); float fac = stack_load_float(stack, fac_offset); float3 in_color = stack_load_float3(stack, in_color_offset); diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h index e57e54ef123..56c804b44d0 100644 --- a/intern/cycles/kernel/svm/svm_ies.h +++ b/intern/cycles/kernel/svm/svm_ies.h @@ -101,8 +101,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg, ccl_device void svm_node_ies( KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { - uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z; - decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy); + uint vector_offset, strength_offset, fac_offset, slot = node.z; + svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset); float3 vector = stack_load_float3(stack, vector_offset); float strength = stack_load_float_default(stack, strength_offset, node.w); diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 2ef64662d0e..64abdd2d8b3 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,6 +16,8 @@ CCL_NAMESPACE_BEGIN +#ifdef __TEXTURES__ + ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags) { float4 r = kernel_tex_image_interp(kg, id, x, y); @@ -48,7 +50,7 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta uint id = node.y; uint co_offset, out_offset, alpha_offset, flags; - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); + svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); float3 co = stack_load_float3(stack, co_offset); float2 tex_co; @@ -143,7 +145,7 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float /* now fetch textures */ uint co_offset, out_offset, alpha_offset, flags; - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); + svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); float3 co = stack_load_float3(stack, co_offset); uint id = node.y; @@ -179,7 +181,7 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, uint co_offset, out_offset, alpha_offset, flags; uint projection = node.w; - decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); + svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags); float3 co = stack_load_float3(stack, co_offset); float2 uv; @@ -199,4 +201,6 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, stack_store_float(stack, alpha_offset, f.w); } +#endif /* __TEXTURES__ */ + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index 65a9a284a17..768c65918cd 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -84,7 +84,7 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node) { uint strength_offset, out_offset, smooth_offset; - decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL); + svm_unpack_node_uchar3(node.z, &strength_offset, &smooth_offset, &out_offset); float strength = stack_load_float(stack, strength_offset); uint type = node.y; diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h index 115d2e2fe4b..9c160e6d8cc 100644 --- a/intern/cycles/kernel/svm/svm_magic.h +++ b/intern/cycles/kernel/svm/svm_magic.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Magic */ -ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion) +ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion) { float x = sinf((p.x + p.y + p.z) * 5.0f); float y = cosf((-p.x + p.y - p.z) * 5.0f); @@ -93,8 +93,8 @@ ccl_device void svm_node_tex_magic( uint depth; uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset; - decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL); - decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL); + svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset); + svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset); uint4 node2 = read_node(kg, offset); float3 co = stack_load_float3(stack, co_offset); diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h new file mode 100644 index 00000000000..f2a68adbe61 --- /dev/null +++ b/intern/cycles/kernel/svm/svm_map_range.h @@ -0,0 +1,54 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Map Range Node */ + +ccl_device void svm_node_map_range(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint value_stack_offset, + uint parameters_stack_offsets, + uint result_stack_offset, + int *offset) +{ + uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset; + svm_unpack_node_uchar4(parameters_stack_offsets, + &from_min_stack_offset, + &from_max_stack_offset, + &to_min_stack_offset, + &to_max_stack_offset); + + uint4 defaults = read_node(kg, offset); + + float value = stack_load_float(stack, value_stack_offset); + float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x); + float from_max = stack_load_float_default(stack, from_max_stack_offset, defaults.y); + float to_min = stack_load_float_default(stack, to_min_stack_offset, defaults.z); + float to_max = stack_load_float_default(stack, to_max_stack_offset, defaults.w); + + float result; + if (from_max != from_min) { + result = to_min + ((value - from_min) / (from_max - from_min)) * (to_max - to_min); + } + else { + result = 0.0f; + } + stack_store_float(stack, result_stack_offset, result); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 5920913825b..d156dec497c 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -16,48 +16,50 @@ CCL_NAMESPACE_BEGIN -/* Nodes */ - ccl_device void svm_node_math(KernelGlobals *kg, ShaderData *sd, float *stack, - uint itype, - uint f1_offset, - uint f2_offset, + uint type, + uint inputs_stack_offsets, + uint result_stack_offset, int *offset) { - NodeMath type = (NodeMath)itype; - float f1 = stack_load_float(stack, f1_offset); - float f2 = stack_load_float(stack, f2_offset); - float f = svm_math(type, f1, f2); + uint a_stack_offset, b_stack_offset; + svm_unpack_node_uchar2(inputs_stack_offsets, &a_stack_offset, &b_stack_offset); - uint4 node1 = read_node(kg, offset); + float a = stack_load_float(stack, a_stack_offset); + float b = stack_load_float(stack, b_stack_offset); + float result = svm_math((NodeMathType)type, a, b); - stack_store_float(stack, node1.y, f); + stack_store_float(stack, result_stack_offset, result); } ccl_device void svm_node_vector_math(KernelGlobals *kg, ShaderData *sd, float *stack, - uint itype, - uint v1_offset, - uint v2_offset, + uint type, + uint inputs_stack_offsets, + uint outputs_stack_offsets, int *offset) { - NodeVectorMath type = (NodeVectorMath)itype; - float3 v1 = stack_load_float3(stack, v1_offset); - float3 v2 = stack_load_float3(stack, v2_offset); - float f; - float3 v; + uint value_stack_offset, vector_stack_offset; + uint a_stack_offset, b_stack_offset, scale_stack_offset; + svm_unpack_node_uchar3( + inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &scale_stack_offset); + svm_unpack_node_uchar2(outputs_stack_offsets, &value_stack_offset, &vector_stack_offset); - svm_vector_math(&f, &v, type, v1, v2); + float3 a = stack_load_float3(stack, a_stack_offset); + float3 b = stack_load_float3(stack, b_stack_offset); + float scale = stack_load_float(stack, scale_stack_offset); - uint4 node1 = read_node(kg, offset); + float value; + float3 vector; + svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, scale); - if (stack_valid(node1.y)) - stack_store_float(stack, node1.y, f); - if (stack_valid(node1.z)) - stack_store_float3(stack, node1.z, v); + if (stack_valid(value_stack_offset)) + stack_store_float(stack, value_stack_offset, value); + if (stack_valid(vector_stack_offset)) + stack_store_float3(stack, vector_stack_offset, vector); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index e3544515f1b..c07a1e4ed98 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -16,99 +16,130 @@ CCL_NAMESPACE_BEGIN -ccl_device float average_fac(float3 v) -{ - return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f; -} - ccl_device void svm_vector_math( - float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2) + float *value, float3 *vector, NodeVectorMathType type, float3 a, float3 b, float scale) { - if (type == NODE_VECTOR_MATH_ADD) { - *Vector = Vector1 + Vector2; - *Fac = average_fac(*Vector); - } - else if (type == NODE_VECTOR_MATH_SUBTRACT) { - *Vector = Vector1 - Vector2; - *Fac = average_fac(*Vector); - } - else if (type == NODE_VECTOR_MATH_AVERAGE) { - *Vector = safe_normalize_len(Vector1 + Vector2, Fac); - } - else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) { - *Fac = dot(Vector1, Vector2); - *Vector = make_float3(0.0f, 0.0f, 0.0f); - } - else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) { - *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac); - } - else if (type == NODE_VECTOR_MATH_NORMALIZE) { - *Vector = safe_normalize_len(Vector1, Fac); - } - else { - *Fac = 0.0f; - *Vector = make_float3(0.0f, 0.0f, 0.0f); + switch (type) { + case NODE_VECTOR_MATH_ADD: + *vector = a + b; + break; + case NODE_VECTOR_MATH_SUBTRACT: + *vector = a - b; + break; + case NODE_VECTOR_MATH_MULTIPLY: + *vector = a * b; + break; + case NODE_VECTOR_MATH_DIVIDE: + *vector = safe_divide_float3_float3(a, b); + break; + case NODE_VECTOR_MATH_CROSS_PRODUCT: + *vector = cross(a, b); + break; + case NODE_VECTOR_MATH_PROJECT: + *vector = project(a, b); + break; + case NODE_VECTOR_MATH_REFLECT: + *vector = reflect(a, b); + break; + case NODE_VECTOR_MATH_DOT_PRODUCT: + *value = dot(a, b); + break; + case NODE_VECTOR_MATH_DISTANCE: + *value = distance(a, b); + break; + case NODE_VECTOR_MATH_LENGTH: + *value = len(a); + break; + case NODE_VECTOR_MATH_SCALE: + *vector = a * scale; + break; + case NODE_VECTOR_MATH_NORMALIZE: + *vector = safe_normalize(a); + break; + case NODE_VECTOR_MATH_SNAP: + *vector = floor(safe_divide_float3_float3(a, b)) * b; + break; + case NODE_VECTOR_MATH_FLOOR: + *vector = floor(a); + break; + case NODE_VECTOR_MATH_CEIL: + *vector = ceil(a); + break; + case NODE_VECTOR_MATH_MODULO: + *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z)); + break; + case NODE_VECTOR_MATH_FRACTION: + *vector = a - floor(a); + break; + case NODE_VECTOR_MATH_ABSOLUTE: + *vector = fabs(a); + break; + case NODE_VECTOR_MATH_MINIMUM: + *vector = min(a, b); + break; + case NODE_VECTOR_MATH_MAXIMUM: + *vector = max(a, b); + break; + default: + *vector = make_float3(0.0f, 0.0f, 0.0f); + *value = 0.0f; } } -ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) +ccl_device float svm_math(NodeMathType type, float a, float b) { - float Fac; - - if (type == NODE_MATH_ADD) - Fac = Fac1 + Fac2; - else if (type == NODE_MATH_SUBTRACT) - Fac = Fac1 - Fac2; - else if (type == NODE_MATH_MULTIPLY) - Fac = Fac1 * Fac2; - else if (type == NODE_MATH_DIVIDE) - Fac = safe_divide(Fac1, Fac2); - else if (type == NODE_MATH_SINE) - Fac = sinf(Fac1); - else if (type == NODE_MATH_COSINE) - Fac = cosf(Fac1); - else if (type == NODE_MATH_TANGENT) - Fac = tanf(Fac1); - else if (type == NODE_MATH_ARCSINE) - Fac = safe_asinf(Fac1); - else if (type == NODE_MATH_ARCCOSINE) - Fac = safe_acosf(Fac1); - else if (type == NODE_MATH_ARCTANGENT) - Fac = atanf(Fac1); - else if (type == NODE_MATH_POWER) - Fac = safe_powf(Fac1, Fac2); - else if (type == NODE_MATH_LOGARITHM) - Fac = safe_logf(Fac1, Fac2); - else if (type == NODE_MATH_MINIMUM) - Fac = fminf(Fac1, Fac2); - else if (type == NODE_MATH_MAXIMUM) - Fac = fmaxf(Fac1, Fac2); - else if (type == NODE_MATH_ROUND) - Fac = floorf(Fac1 + 0.5f); - else if (type == NODE_MATH_LESS_THAN) - Fac = Fac1 < Fac2; - else if (type == NODE_MATH_GREATER_THAN) - Fac = Fac1 > Fac2; - else if (type == NODE_MATH_MODULO) - Fac = safe_modulo(Fac1, Fac2); - else if (type == NODE_MATH_ABSOLUTE) - Fac = fabsf(Fac1); - else if (type == NODE_MATH_ARCTAN2) - Fac = atan2f(Fac1, Fac2); - else if (type == NODE_MATH_FLOOR) - Fac = floorf(Fac1); - else if (type == NODE_MATH_CEIL) - Fac = ceilf(Fac1); - else if (type == NODE_MATH_FRACT) - Fac = Fac1 - floorf(Fac1); - else if (type == NODE_MATH_SQRT) - Fac = safe_sqrtf(Fac1); - else if (type == NODE_MATH_CLAMP) - Fac = saturate(Fac1); - else - Fac = 0.0f; - - return Fac; + switch (type) { + case NODE_MATH_ADD: + return a + b; + case NODE_MATH_SUBTRACT: + return a - b; + case NODE_MATH_MULTIPLY: + return a * b; + case NODE_MATH_DIVIDE: + return safe_divide(a, b); + case NODE_MATH_POWER: + return safe_powf(a, b); + case NODE_MATH_LOGARITHM: + return safe_logf(a, b); + case NODE_MATH_SQRT: + return safe_sqrtf(a); + case NODE_MATH_ABSOLUTE: + return fabsf(a); + case NODE_MATH_MINIMUM: + return fminf(a, b); + case NODE_MATH_MAXIMUM: + return fmaxf(a, b); + case NODE_MATH_LESS_THAN: + return a < b; + case NODE_MATH_GREATER_THAN: + return a > b; + case NODE_MATH_ROUND: + return floorf(a + 0.5f); + case NODE_MATH_FLOOR: + return floorf(a); + case NODE_MATH_CEIL: + return ceilf(a); + case NODE_MATH_FRACTION: + return a - floorf(a); + case NODE_MATH_MODULO: + return safe_modulo(a, b); + case NODE_MATH_SINE: + return sinf(a); + case NODE_MATH_COSINE: + return cosf(a); + case NODE_MATH_TANGENT: + return tanf(a); + case NODE_MATH_ARCSINE: + return safe_asinf(a); + case NODE_MATH_ARCCOSINE: + return safe_acosf(a); + case NODE_MATH_ARCTANGENT: + return atanf(a); + case NODE_MATH_ARCTAN2: + return atan2f(a, b); + default: + return 0.0f; + } } /* Calculate color in range 800..12000 using an approximation diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h index 67fb5ca6241..9291c7e7295 100644 --- a/intern/cycles/kernel/svm/svm_musgrave.h +++ b/intern/cycles/kernel/svm/svm_musgrave.h @@ -25,7 +25,10 @@ CCL_NAMESPACE_BEGIN * from "Texturing and Modelling: A procedural approach" */ -ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves) +ccl_device_noinline_cpu float noise_musgrave_fBm(float3 p, + float H, + float lacunarity, + float octaves) { float rmd; float value = 0.0f; @@ -53,10 +56,10 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity * octaves: number of frequencies in the fBm */ -ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, - float H, - float lacunarity, - float octaves) +ccl_device_noinline_cpu float noise_musgrave_multi_fractal(float3 p, + float H, + float lacunarity, + float octaves) { float rmd; float value = 1.0f; @@ -85,7 +88,7 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p, * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hetero_terrain( +ccl_device_noinline_cpu float noise_musgrave_hetero_terrain( float3 p, float H, float lacunarity, float octaves, float offset) { float value, increment, rmd; @@ -121,7 +124,7 @@ ccl_device_noinline float noise_musgrave_hetero_terrain( * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_hybrid_multi_fractal( +ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal( float3 p, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight, rmd; @@ -159,7 +162,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal( * offset: raises the terrain from `sea level' */ -ccl_device_noinline float noise_musgrave_ridged_multi_fractal( +ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal( float3 p, float H, float lacunarity, float octaves, float offset, float gain) { float result, signal, weight; @@ -222,10 +225,10 @@ ccl_device void svm_node_tex_musgrave( uint dimension_offset, lacunarity_offset, detail_offset, offset_offset; uint gain_offset, scale_offset; - decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset); - decode_node_uchar4( + svm_unpack_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset); + svm_unpack_node_uchar4( node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset); - decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL); + svm_unpack_node_uchar2(node.w, &gain_offset, &scale_offset); float3 co = stack_load_float3(stack, co_offset); float dimension = stack_load_float_default(stack, dimension_offset, node2.x); diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 322579ccfe3..dd375af27e5 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -41,42 +41,6 @@ ccl_device_inline ssei quick_floor_sse(const ssef &x) } #endif -ccl_device uint hash(uint kx, uint ky, uint kz) -{ - // define some handy macros -#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) -#define final(a, b, c) \ - { \ - c ^= b; \ - c -= rot(b, 14); \ - a ^= c; \ - a -= rot(c, 11); \ - b ^= a; \ - b -= rot(a, 25); \ - c ^= b; \ - c -= rot(b, 16); \ - a ^= c; \ - a -= rot(c, 4); \ - b ^= a; \ - b -= rot(a, 14); \ - c ^= b; \ - c -= rot(b, 24); \ - } - // now hash the data! - uint a, b, c, len = 3; - a = b = c = 0xdeadbeef + (len << 2) + 13; - - c += kz; - b += ky; - a += kx; - final(a, b, c); - - return c; - // macros not needed anymore -#undef rot -#undef final -} - #ifdef __KERNEL_SSE2__ ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz) { @@ -218,7 +182,7 @@ ccl_device_inline ssef scale3_sse(const ssef &result) #endif #ifndef __KERNEL_SSE2__ -ccl_device_noinline float perlin(float x, float y, float z) +ccl_device_noinline_cpu float perlin(float x, float y, float z) { int X; float fx = floorfrac(x, &X); @@ -236,17 +200,19 @@ ccl_device_noinline float perlin(float x, float y, float z) result = nerp( w, nerp(v, - nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)), nerp(u, - grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz), - grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))), + grad(hash_uint3(X, Y, Z), fx, fy, fz), + grad(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz)), + nerp(u, + grad(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz), + grad(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))), nerp(v, nerp(u, - grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f), - grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)), + grad(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f), + grad(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)), nerp(u, - grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f), - grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f)))); + grad(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f), + grad(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f)))); float r = scale3(result); /* can happen for big coordinates, things even out to 0.0 then anyway */ @@ -312,16 +278,16 @@ ccl_device float snoise(float3 p) ccl_device float cellnoise(float3 p) { int3 ip = quick_floor_to_int3(p); - return bits_to_01(hash(ip.x, ip.y, ip.z)); + return hash_uint3_to_float(ip.x, ip.y, ip.z); } ccl_device float3 cellnoise3(float3 p) { int3 ip = quick_floor_to_int3(p); #ifndef __KERNEL_SSE__ - float r = bits_to_01(hash(ip.x, ip.y, ip.z)); - float g = bits_to_01(hash(ip.y, ip.x, ip.z)); - float b = bits_to_01(hash(ip.y, ip.z, ip.x)); + float r = hash_uint3_to_float(ip.x, ip.y, ip.z); + float g = hash_uint3_to_float(ip.y, ip.x, ip.z); + float b = hash_uint3_to_float(ip.y, ip.z, ip.x); return make_float3(r, g, b); #else ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128)); diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h index 3324e86fcd8..91dc11691e6 100644 --- a/intern/cycles/kernel/svm/svm_noisetex.h +++ b/intern/cycles/kernel/svm/svm_noisetex.h @@ -23,8 +23,8 @@ ccl_device void svm_node_tex_noise( { uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset; - decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset); - decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL); + svm_unpack_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset); uint4 node2 = read_node(kg, offset); diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h index 6084ee35a1f..85ccf39144b 100644 --- a/intern/cycles/kernel/svm/svm_ramp.h +++ b/intern/cycles/kernel/svm/svm_ramp.h @@ -59,7 +59,7 @@ ccl_device void svm_node_rgb_ramp( uint fac_offset, color_offset, alpha_offset; uint interpolate = node.z; - decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL); + svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset); uint table_size = read_node(kg, offset).x; @@ -78,7 +78,7 @@ ccl_device void svm_node_curves( KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { uint fac_offset, color_offset, out_offset; - decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL); + svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset); uint table_size = read_node(kg, offset).x; diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 1fb3e20f9e0..a876d6bc916 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -257,7 +257,7 @@ ccl_device void svm_node_tex_coord_bump_dy( ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint color_offset, strength_offset, normal_offset, space; - decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space); + svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space); float3 color = stack_load_float3(stack, color_offset); color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f); @@ -349,7 +349,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { uint tangent_offset, direction_type, axis; - decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL); + svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis); float3 tangent; float3 attribute_value; diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index ea92fd7ce59..a3caa1ab68d 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -138,6 +138,9 @@ typedef enum ShaderNodeType { NODE_VECTOR_DISPLACEMENT, NODE_PRINCIPLED_VOLUME, NODE_IES, + NODE_MAP_RANGE, + NODE_CLAMP, + NODE_TEX_WHITE_NOISE, } ShaderNodeType; typedef enum NodeAttributeType { @@ -158,6 +161,7 @@ typedef enum NodeGeometry { typedef enum NodeObjectInfo { NODE_INFO_OB_LOCATION, + NODE_INFO_OB_COLOR, NODE_INFO_OB_INDEX, NODE_INFO_MAT_INDEX, NODE_INFO_OB_RANDOM @@ -242,7 +246,7 @@ typedef enum NodeMix { NODE_MIX_CLAMP /* used for the clamp UI option */ } NodeMix; -typedef enum NodeMath { +typedef enum NodeMathType { NODE_MATH_ADD, NODE_MATH_SUBTRACT, NODE_MATH_MULTIPLY, @@ -265,19 +269,35 @@ typedef enum NodeMath { NODE_MATH_ARCTAN2, NODE_MATH_FLOOR, NODE_MATH_CEIL, - NODE_MATH_FRACT, + NODE_MATH_FRACTION, NODE_MATH_SQRT, - NODE_MATH_CLAMP /* used for the clamp UI option */ -} NodeMath; +} NodeMathType; -typedef enum NodeVectorMath { +typedef enum NodeVectorMathType { NODE_VECTOR_MATH_ADD, NODE_VECTOR_MATH_SUBTRACT, - NODE_VECTOR_MATH_AVERAGE, - NODE_VECTOR_MATH_DOT_PRODUCT, + NODE_VECTOR_MATH_MULTIPLY, + NODE_VECTOR_MATH_DIVIDE, + NODE_VECTOR_MATH_CROSS_PRODUCT, - NODE_VECTOR_MATH_NORMALIZE -} NodeVectorMath; + NODE_VECTOR_MATH_PROJECT, + NODE_VECTOR_MATH_REFLECT, + NODE_VECTOR_MATH_DOT_PRODUCT, + + NODE_VECTOR_MATH_DISTANCE, + NODE_VECTOR_MATH_LENGTH, + NODE_VECTOR_MATH_SCALE, + NODE_VECTOR_MATH_NORMALIZE, + + NODE_VECTOR_MATH_SNAP, + NODE_VECTOR_MATH_FLOOR, + NODE_VECTOR_MATH_CEIL, + NODE_VECTOR_MATH_MODULO, + NODE_VECTOR_MATH_FRACTION, + NODE_VECTOR_MATH_ABSOLUTE, + NODE_VECTOR_MATH_MINIMUM, + NODE_VECTOR_MATH_MAXIMUM, +} NodeVectorMathType; typedef enum NodeVectorTransformType { NODE_VECTOR_TRANSFORM_TYPE_VECTOR, diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index 7ec0f07f2e4..1e95492cf1b 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -26,8 +26,8 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, uint itype, ifrom, ito; uint vector_in, vector_out; - decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL); - decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL); + svm_unpack_node_uchar3(node.y, &itype, &ifrom, &ito); + svm_unpack_node_uchar2(node.z, &vector_in, &vector_out); float3 in = stack_load_float3(stack, vector_in); diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 3e28a316169..3d7fa523968 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -116,8 +116,8 @@ ccl_device void svm_node_tex_voronoi( uint co_offset, coloring, distance, feature; uint scale_offset, e_offset, fac_offset, color_offset; - decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature); - decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset); + svm_unpack_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature); + svm_unpack_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset); float3 co = stack_load_float3(stack, co_offset); float scale = stack_load_float_default(stack, scale_offset, node2.x); diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 26d8cc71d3b..b79be8e5bde 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -23,7 +23,7 @@ ccl_device void svm_node_tex_voxel( KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) { uint co_offset, density_out_offset, color_out_offset, space; - decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); + svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space); #ifdef __VOLUME__ int id = node.y; float3 co = stack_load_float3(stack, co_offset); diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h index 003ad7dc63a..baaa89ab0cb 100644 --- a/intern/cycles/kernel/svm/svm_wave.h +++ b/intern/cycles/kernel/svm/svm_wave.h @@ -18,12 +18,12 @@ CCL_NAMESPACE_BEGIN /* Wave */ -ccl_device_noinline float svm_wave(NodeWaveType type, - NodeWaveProfile profile, - float3 p, - float detail, - float distortion, - float dscale) +ccl_device_noinline_cpu float svm_wave(NodeWaveType type, + NodeWaveProfile profile, + float3 p, + float detail, + float distortion, + float dscale) { float n; @@ -54,8 +54,8 @@ ccl_device void svm_node_tex_wave( uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset, fac_offset; - decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset); - decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset); + svm_unpack_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset); + svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset); float3 co = stack_load_float3(stack, co_offset); float scale = stack_load_float_default(stack, scale_offset, node2.x); diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h new file mode 100644 index 00000000000..71d4591d25d --- /dev/null +++ b/intern/cycles/kernel/svm/svm_white_noise.h @@ -0,0 +1,55 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +ccl_device void svm_node_tex_white_noise(KernelGlobals *kg, + ShaderData *sd, + float *stack, + uint dimensions, + uint inputs_stack_offsets, + uint value_stack_offset, + int *offset) +{ + uint vector_stack_offset, w_stack_offset; + svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset); + + float3 vector = stack_load_float3(stack, vector_stack_offset); + float w = stack_load_float(stack, w_stack_offset); + + float value; + switch (dimensions) { + case 1: + value = hash_float_to_float(w); + break; + case 2: + value = hash_float2_to_float(make_float2(vector.x, vector.y)); + break; + case 3: + value = hash_float3_to_float(vector); + break; + case 4: + value = hash_float4_to_float(make_float4(vector.x, vector.y, vector.z, w)); + break; + default: + value = 0.0f; + kernel_assert(0); + break; + } + stack_store_float(stack, value_stack_offset, value); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index 55e61d0e8c7..49158bd86d5 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -93,7 +93,7 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta uint in_size = node.y; uint out_fac = node.z; uint use_pixel_size, bump_offset; - decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL); + svm_unpack_node_uchar2(node.w, &use_pixel_size, &bump_offset); /* Input Data */ float size = stack_load_float(stack, in_size); diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index c79e5a23ea1..53196b013f6 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -85,8 +85,6 @@ if(WITH_CYCLES_OSL) ) endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}") - include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 73893921500..b906357b7b5 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -124,7 +124,7 @@ BakeData *BakeManager::init(const int object, const size_t tri_offset, const siz void BakeManager::set_shader_limit(const size_t x, const size_t y) { m_shader_limit = x * y; - m_shader_limit = (size_t)pow(2, ceil(log(m_shader_limit) / log(2))); + m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2))); } bool BakeManager::bake(Device *device, diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 7591d9dda0c..38306a63c74 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -565,8 +565,7 @@ float3 Camera::transform_raster_to_world(float raster_x, float raster_y) BoundBox Camera::viewplane_bounds_get() { /* TODO(sergey): This is all rather stupid, but is there a way to perform - * checks we need in a more clear and smart fasion? - */ + * checks we need in a more clear and smart fashion? */ BoundBox bounds = BoundBox::empty; if (type == CAMERA_PANORAMA) { diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp index e475ff60eef..851d4b71df8 100644 --- a/intern/cycles/render/constant_fold.cpp +++ b/intern/cycles/render/constant_fold.cpp @@ -301,7 +301,7 @@ void ConstantFolder::fold_mix(NodeMix type, bool clamp) const } } -void ConstantFolder::fold_math(NodeMath type, bool clamp) const +void ConstantFolder::fold_math(NodeMathType type) const { ShaderInput *value1_in = node->input("Value1"); ShaderInput *value2_in = node->input("Value2"); @@ -310,25 +310,25 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const case NODE_MATH_ADD: /* X + 0 == 0 + X == X */ if (is_zero(value1_in)) { - try_bypass_or_make_constant(value2_in, clamp); + try_bypass_or_make_constant(value2_in); } else if (is_zero(value2_in)) { - try_bypass_or_make_constant(value1_in, clamp); + try_bypass_or_make_constant(value1_in); } break; case NODE_MATH_SUBTRACT: /* X - 0 == X */ if (is_zero(value2_in)) { - try_bypass_or_make_constant(value1_in, clamp); + try_bypass_or_make_constant(value1_in); } break; case NODE_MATH_MULTIPLY: /* X * 1 == 1 * X == X */ if (is_one(value1_in)) { - try_bypass_or_make_constant(value2_in, clamp); + try_bypass_or_make_constant(value2_in); } else if (is_one(value2_in)) { - try_bypass_or_make_constant(value1_in, clamp); + try_bypass_or_make_constant(value1_in); } /* X * 0 == 0 * X == 0 */ else if (is_zero(value1_in) || is_zero(value2_in)) { @@ -338,7 +338,7 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const case NODE_MATH_DIVIDE: /* X / 1 == X */ if (is_one(value2_in)) { - try_bypass_or_make_constant(value1_in, clamp); + try_bypass_or_make_constant(value1_in); } /* 0 / X == 0 */ else if (is_zero(value1_in)) { @@ -352,17 +352,18 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const } /* X ^ 1 == X */ else if (is_one(value2_in)) { - try_bypass_or_make_constant(value1_in, clamp); + try_bypass_or_make_constant(value1_in); } default: break; } } -void ConstantFolder::fold_vector_math(NodeVectorMath type) const +void ConstantFolder::fold_vector_math(NodeVectorMathType type) const { ShaderInput *vector1_in = node->input("Vector1"); ShaderInput *vector2_in = node->input("Vector2"); + ShaderInput *scale_in = node->input("Scale"); switch (type) { case NODE_VECTOR_MATH_ADD: @@ -380,6 +381,27 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const try_bypass_or_make_constant(vector1_in); } break; + case NODE_VECTOR_MATH_MULTIPLY: + /* X * 0 == 0 * X == 0 */ + if (is_zero(vector1_in) || is_zero(vector2_in)) { + make_zero(); + } /* X * 1 == 1 * X == X */ + else if (is_one(vector1_in)) { + try_bypass_or_make_constant(vector2_in); + } + else if (is_one(vector2_in)) { + try_bypass_or_make_constant(vector1_in); + } + break; + case NODE_VECTOR_MATH_DIVIDE: + /* X / 0 == 0 / X == 0 */ + if (is_zero(vector1_in) || is_zero(vector2_in)) { + make_zero(); + } /* X / 1 == X */ + else if (is_one(vector2_in)) { + try_bypass_or_make_constant(vector1_in); + } + break; case NODE_VECTOR_MATH_DOT_PRODUCT: case NODE_VECTOR_MATH_CROSS_PRODUCT: /* X * 0 == 0 * X == 0 */ @@ -387,6 +409,21 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const make_zero(); } break; + case NODE_VECTOR_MATH_LENGTH: + case NODE_VECTOR_MATH_ABSOLUTE: + if (is_zero(vector1_in)) { + make_zero(); + } + break; + case NODE_VECTOR_MATH_SCALE: + /* X * 0 == 0 * X == 0 */ + if (is_zero(vector1_in) || is_zero(scale_in)) { + make_zero(); + } /* X * 1 == X */ + else if (is_one(scale_in)) { + try_bypass_or_make_constant(vector1_in); + } + break; default: break; } diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h index c14b94868dc..881636a9fe1 100644 --- a/intern/cycles/render/constant_fold.h +++ b/intern/cycles/render/constant_fold.h @@ -64,8 +64,8 @@ class ConstantFolder { /* Specific nodes. */ void fold_mix(NodeMix type, bool clamp) const; - void fold_math(NodeMath type, bool clamp) const; - void fold_vector_math(NodeVectorMath type) const; + void fold_math(NodeMathType type) const; + void fold_vector_math(NodeVectorMathType type) const; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 76258a292e8..b41b0b7b260 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -141,7 +141,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->caustics_refractive = caustics_refractive; kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy; - kintegrator->seed = hash_int(seed); + kintegrator->seed = hash_uint2(seed, 0); kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) || dscene->data.background.ao_factor != 0.0f); diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 5c3f1c35bdc..8c7a21da561 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -944,7 +944,7 @@ void LightManager::tag_update(Scene * /*scene*/) need_update = true; } -int LightManager::add_ies_from_file(ustring filename) +int LightManager::add_ies_from_file(const string &filename) { string content; @@ -953,10 +953,10 @@ int LightManager::add_ies_from_file(ustring filename) content = "\n"; } - return add_ies(ustring(content)); + return add_ies(content); } -int LightManager::add_ies(ustring content) +int LightManager::add_ies(const string &content) { uint hash = hash_string(content.c_str()); diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h index 79450ea5f8d..6dd23374818 100644 --- a/intern/cycles/render/light.h +++ b/intern/cycles/render/light.h @@ -92,8 +92,8 @@ class LightManager { ~LightManager(); /* IES texture management */ - int add_ies(ustring ies); - int add_ies_from_file(ustring filename); + int add_ies(const string &ies); + int add_ies_from_file(const string &filename); void remove_ies(int slot); void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 91c3a772537..6ac66661859 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -1091,6 +1091,17 @@ bool Mesh::has_true_displacement() const return false; } +bool Mesh::has_voxel_attributes() const +{ + foreach (const Attribute &attr, attributes.attributes) { + if (attr.element == ATTR_ELEMENT_VOXEL) { + return true; + } + } + + return false; +} + float Mesh::motion_time(int step) const { return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f; @@ -2020,15 +2031,7 @@ void MeshManager::device_update_preprocess(Device *device, Scene *scene, Progres if (need_update && mesh->has_volume) { /* Create volume meshes if there is voxel data. */ - bool has_voxel_attributes = false; - - foreach (Attribute &attr, mesh->attributes.attributes) { - if (attr.element == ATTR_ELEMENT_VOXEL) { - has_voxel_attributes = true; - } - } - - if (has_voxel_attributes) { + if (mesh->has_voxel_attributes()) { if (!volume_images_updated) { progress.set_status("Updating Meshes Volume Bounds"); device_update_volume_images(device, scene, progress); diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 05c67ccb3b7..5bb6ab328b7 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -318,6 +318,7 @@ class Mesh : public Node { bool has_motion_blur() const; bool has_true_displacement() const; + bool has_voxel_attributes() const; /* Convert between normalized -1..1 motion time and index * in the VERTEX_MOTION attribute. */ diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 8e7969cfbaf..69c1c06f846 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -163,8 +163,10 @@ void TextureMapping::compile(SVMCompiler &compiler, int offset_in, int offset_ou } if (type == NORMAL) { - compiler.add_node(NODE_VECTOR_MATH, NODE_VECTOR_MATH_NORMALIZE, offset_out, offset_out); - compiler.add_node(NODE_VECTOR_MATH, SVM_STACK_INVALID, offset_out); + compiler.add_node(NODE_VECTOR_MATH, + NODE_VECTOR_MATH_NORMALIZE, + compiler.encode_uchar4(offset_out, offset_out, offset_out), + compiler.encode_uchar4(SVM_STACK_INVALID, offset_out)); } } @@ -1067,10 +1069,10 @@ void IESLightNode::get_slot() if (slot == -1) { if (ies.empty()) { - slot = light_manager->add_ies_from_file(filename); + slot = light_manager->add_ies_from_file(filename.string()); } else { - slot = light_manager->add_ies(ies); + slot = light_manager->add_ies(ies.string()); } } } @@ -1108,6 +1110,53 @@ void IESLightNode::compile(OSLCompiler &compiler) compiler.add(this, "node_ies_light"); } +/* White Noise Texture */ + +NODE_DEFINE(WhiteNoiseTextureNode) +{ + NodeType *type = NodeType::add("white_noise_texture", create, NodeType::SHADER); + + static NodeEnum dimensions_enum; + dimensions_enum.insert("1D", 1); + dimensions_enum.insert("2D", 2); + dimensions_enum.insert("3D", 3); + dimensions_enum.insert("4D", 4); + SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3); + + SOCKET_IN_POINT(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_FLOAT(w, "W", 0.0f); + + SOCKET_OUT_FLOAT(value, "Value"); + + return type; +} + +WhiteNoiseTextureNode::WhiteNoiseTextureNode() : ShaderNode(node_type) +{ +} + +void WhiteNoiseTextureNode::compile(SVMCompiler &compiler) +{ + ShaderInput *vector_in = input("Vector"); + ShaderInput *w_in = input("W"); + ShaderOutput *value_out = output("Value"); + + int vector_stack_offset = compiler.stack_assign(vector_in); + int w_stack_offset = compiler.stack_assign(w_in); + int value_stack_offset = compiler.stack_assign(value_out); + + compiler.add_node(NODE_TEX_WHITE_NOISE, + dimensions, + compiler.encode_uchar4(vector_stack_offset, w_stack_offset), + value_stack_offset); +} + +void WhiteNoiseTextureNode::compile(OSLCompiler &compiler) +{ + compiler.parameter(this, "dimensions"); + compiler.add(this, "node_white_noise_texture"); +} + /* Musgrave Texture */ NODE_DEFINE(MusgraveTextureNode) @@ -3891,6 +3940,7 @@ NODE_DEFINE(ObjectInfoNode) NodeType *type = NodeType::add("object_info", create, NodeType::SHADER); SOCKET_OUT_VECTOR(location, "Location"); + SOCKET_OUT_COLOR(color, "Color"); SOCKET_OUT_FLOAT(object_index, "Object Index"); SOCKET_OUT_FLOAT(material_index, "Material Index"); SOCKET_OUT_FLOAT(random, "Random"); @@ -3909,6 +3959,11 @@ void ObjectInfoNode::compile(SVMCompiler &compiler) compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_LOCATION, compiler.stack_assign(out)); } + out = output("Color"); + if (!out->links.empty()) { + compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_COLOR, compiler.stack_assign(out)); + } + out = output("Object Index"); if (!out->links.empty()) { compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_INDEX, compiler.stack_assign(out)); @@ -4119,6 +4174,90 @@ void HairInfoNode::compile(OSLCompiler &compiler) compiler.add(this, "node_hair_info"); } +/* Volume Info */ + +NODE_DEFINE(VolumeInfoNode) +{ + NodeType *type = NodeType::add("volume_info", create, NodeType::SHADER); + + SOCKET_OUT_COLOR(color, "Color"); + SOCKET_OUT_FLOAT(density, "Density"); + SOCKET_OUT_FLOAT(flame, "Flame"); + SOCKET_OUT_FLOAT(temperature, "Temperature"); + + return type; +} + +VolumeInfoNode::VolumeInfoNode() : ShaderNode(node_type) +{ +} + +/* The requested attributes are not updated after node expansion. + * So we explicitly request the required attributes. + */ +void VolumeInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes) +{ + if (shader->has_volume) { + if (!output("Color")->links.empty()) { + attributes->add(ATTR_STD_VOLUME_COLOR); + } + if (!output("Density")->links.empty()) { + attributes->add(ATTR_STD_VOLUME_DENSITY); + } + if (!output("Flame")->links.empty()) { + attributes->add(ATTR_STD_VOLUME_FLAME); + } + if (!output("Temperature")->links.empty()) { + attributes->add(ATTR_STD_VOLUME_TEMPERATURE); + } + attributes->add(ATTR_STD_GENERATED_TRANSFORM); + } + ShaderNode::attributes(shader, attributes); +} + +void VolumeInfoNode::expand(ShaderGraph *graph) +{ + ShaderOutput *color_out = output("Color"); + if (!color_out->links.empty()) { + AttributeNode *attr = new AttributeNode(); + attr->attribute = "color"; + graph->add(attr); + graph->relink(color_out, attr->output("Color")); + } + + ShaderOutput *density_out = output("Density"); + if (!density_out->links.empty()) { + AttributeNode *attr = new AttributeNode(); + attr->attribute = "density"; + graph->add(attr); + graph->relink(density_out, attr->output("Fac")); + } + + ShaderOutput *flame_out = output("Flame"); + if (!flame_out->links.empty()) { + AttributeNode *attr = new AttributeNode(); + attr->attribute = "flame"; + graph->add(attr); + graph->relink(flame_out, attr->output("Fac")); + } + + ShaderOutput *temperature_out = output("Temperature"); + if (!temperature_out->links.empty()) { + AttributeNode *attr = new AttributeNode(); + attr->attribute = "temperature"; + graph->add(attr); + graph->relink(temperature_out, attr->output("Fac")); + } +} + +void VolumeInfoNode::compile(SVMCompiler &) +{ +} + +void VolumeInfoNode::compile(OSLCompiler &) +{ +} + /* Value */ NODE_DEFINE(ValueNode) @@ -5259,6 +5398,140 @@ void OutputNode::compile(OSLCompiler &compiler) compiler.add(this, "node_output_displacement"); } +/* Map Range Node */ + +NODE_DEFINE(MapRangeNode) +{ + NodeType *type = NodeType::add("map_range", create, NodeType::SHADER); + + SOCKET_IN_FLOAT(value, "Value", 1.0f); + SOCKET_IN_FLOAT(from_min, "From Min", 0.0f); + SOCKET_IN_FLOAT(from_max, "From Max", 1.0f); + SOCKET_IN_FLOAT(to_min, "To Min", 0.0f); + SOCKET_IN_FLOAT(to_max, "To Max", 1.0f); + + SOCKET_OUT_FLOAT(result, "Result"); + + return type; +} + +MapRangeNode::MapRangeNode() : ShaderNode(node_type) +{ +} + +void MapRangeNode::expand(ShaderGraph *graph) +{ + if (clamp) { + ShaderOutput *result_out = output("Result"); + if (!result_out->links.empty()) { + ClampNode *clamp_node = new ClampNode(); + clamp_node->min = to_min; + clamp_node->max = to_max; + graph->add(clamp_node); + graph->relink(result_out, clamp_node->output("Result")); + graph->connect(result_out, clamp_node->input("Value")); + } + } +} + +void MapRangeNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + float result; + if (from_max != from_min) { + result = to_min + ((value - from_min) / (from_max - from_min)) * (to_max - to_min); + } + else { + result = 0.0f; + } + folder.make_constant(result); + } +} + +void MapRangeNode::compile(SVMCompiler &compiler) +{ + ShaderInput *value_in = input("Value"); + ShaderInput *from_min_in = input("From Min"); + ShaderInput *from_max_in = input("From Max"); + ShaderInput *to_min_in = input("To Min"); + ShaderInput *to_max_in = input("To Max"); + ShaderOutput *result_out = output("Result"); + + int value_stack_offset = compiler.stack_assign(value_in); + int from_min_stack_offset = compiler.stack_assign_if_linked(from_min_in); + int from_max_stack_offset = compiler.stack_assign_if_linked(from_max_in); + int to_min_stack_offset = compiler.stack_assign_if_linked(to_min_in); + int to_max_stack_offset = compiler.stack_assign_if_linked(to_max_in); + int result_stack_offset = compiler.stack_assign(result_out); + + compiler.add_node( + NODE_MAP_RANGE, + value_stack_offset, + compiler.encode_uchar4( + from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset), + result_stack_offset); + + compiler.add_node(__float_as_int(from_min), + __float_as_int(from_max), + __float_as_int(to_min), + __float_as_int(to_max)); +} + +void MapRangeNode::compile(OSLCompiler &compiler) +{ + compiler.add(this, "node_map_range"); +} + +/* Clamp Node */ + +NODE_DEFINE(ClampNode) +{ + NodeType *type = NodeType::add("clamp", create, NodeType::SHADER); + + SOCKET_IN_FLOAT(value, "Value", 1.0f); + SOCKET_IN_FLOAT(min, "Min", 0.0f); + SOCKET_IN_FLOAT(max, "Max", 1.0f); + + SOCKET_OUT_FLOAT(result, "Result"); + + return type; +} + +ClampNode::ClampNode() : ShaderNode(node_type) +{ +} + +void ClampNode::constant_fold(const ConstantFolder &folder) +{ + if (folder.all_inputs_constant()) { + folder.make_constant(clamp(value, min, max)); + } +} + +void ClampNode::compile(SVMCompiler &compiler) +{ + ShaderInput *value_in = input("Value"); + ShaderInput *min_in = input("Min"); + ShaderInput *max_in = input("Max"); + ShaderOutput *result_out = output("Result"); + + int value_stack_offset = compiler.stack_assign(value_in); + int min_stack_offset = compiler.stack_assign(min_in); + int max_stack_offset = compiler.stack_assign(max_in); + int result_stack_offset = compiler.stack_assign(result_out); + + compiler.add_node(NODE_CLAMP, + value_stack_offset, + compiler.encode_uchar4(min_stack_offset, max_stack_offset), + result_stack_offset); + compiler.add_node(__float_as_int(min), __float_as_int(max)); +} + +void ClampNode::compile(OSLCompiler &compiler) +{ + compiler.add(this, "node_clamp"); +} + /* Math */ NODE_DEFINE(MathNode) @@ -5288,14 +5561,14 @@ NODE_DEFINE(MathNode) type_enum.insert("arctan2", NODE_MATH_ARCTAN2); type_enum.insert("floor", NODE_MATH_FLOOR); type_enum.insert("ceil", NODE_MATH_CEIL); - type_enum.insert("fract", NODE_MATH_FRACT); + type_enum.insert("fraction", NODE_MATH_FRACTION); type_enum.insert("sqrt", NODE_MATH_SQRT); SOCKET_ENUM(type, "Type", type_enum, NODE_MATH_ADD); SOCKET_BOOLEAN(use_clamp, "Use Clamp", false); - SOCKET_IN_FLOAT(value1, "Value1", 0.0f); - SOCKET_IN_FLOAT(value2, "Value2", 0.0f); + SOCKET_IN_FLOAT(value1, "Value1", 0.5f); + SOCKET_IN_FLOAT(value2, "Value2", 0.5f); SOCKET_OUT_FLOAT(value, "Value"); @@ -5306,13 +5579,28 @@ MathNode::MathNode() : ShaderNode(node_type) { } +void MathNode::expand(ShaderGraph *graph) +{ + if (use_clamp) { + ShaderOutput *result_out = output("Value"); + if (!result_out->links.empty()) { + ClampNode *clamp_node = new ClampNode(); + clamp_node->min = 0.0f; + clamp_node->max = 1.0f; + graph->add(clamp_node); + graph->relink(result_out, clamp_node->output("Result")); + graph->connect(result_out, clamp_node->input("Value")); + } + } +} + void MathNode::constant_fold(const ConstantFolder &folder) { if (folder.all_inputs_constant()) { - folder.make_constant_clamp(svm_math(type, value1, value2), use_clamp); + folder.make_constant(svm_math(type, value1, value2)); } else { - folder.fold_math(type, use_clamp); + folder.fold_math(type); } } @@ -5322,20 +5610,19 @@ void MathNode::compile(SVMCompiler &compiler) ShaderInput *value2_in = input("Value2"); ShaderOutput *value_out = output("Value"); - compiler.add_node( - NODE_MATH, type, compiler.stack_assign(value1_in), compiler.stack_assign(value2_in)); - compiler.add_node(NODE_MATH, compiler.stack_assign(value_out)); + int value1_stack_offset = compiler.stack_assign(value1_in); + int value2_stack_offset = compiler.stack_assign(value2_in); + int value_stack_offset = compiler.stack_assign(value_out); - if (use_clamp) { - compiler.add_node(NODE_MATH, NODE_MATH_CLAMP, compiler.stack_assign(value_out)); - compiler.add_node(NODE_MATH, compiler.stack_assign(value_out)); - } + compiler.add_node(NODE_MATH, + type, + compiler.encode_uchar4(value1_stack_offset, value2_stack_offset), + value_stack_offset); } void MathNode::compile(OSLCompiler &compiler) { compiler.parameter(this, "type"); - compiler.parameter(this, "use_clamp"); compiler.add(this, "node_math"); } @@ -5348,14 +5635,32 @@ NODE_DEFINE(VectorMathNode) static NodeEnum type_enum; type_enum.insert("add", NODE_VECTOR_MATH_ADD); type_enum.insert("subtract", NODE_VECTOR_MATH_SUBTRACT); - type_enum.insert("average", NODE_VECTOR_MATH_AVERAGE); - type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT); + type_enum.insert("multiply", NODE_VECTOR_MATH_MULTIPLY); + type_enum.insert("divide", NODE_VECTOR_MATH_DIVIDE); + type_enum.insert("cross_product", NODE_VECTOR_MATH_CROSS_PRODUCT); + type_enum.insert("project", NODE_VECTOR_MATH_PROJECT); + type_enum.insert("reflect", NODE_VECTOR_MATH_REFLECT); + type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT); + + type_enum.insert("distance", NODE_VECTOR_MATH_DISTANCE); + type_enum.insert("length", NODE_VECTOR_MATH_LENGTH); + type_enum.insert("scale", NODE_VECTOR_MATH_SCALE); type_enum.insert("normalize", NODE_VECTOR_MATH_NORMALIZE); + + type_enum.insert("snap", NODE_VECTOR_MATH_SNAP); + type_enum.insert("floor", NODE_VECTOR_MATH_FLOOR); + type_enum.insert("ceil", NODE_VECTOR_MATH_CEIL); + type_enum.insert("modulo", NODE_VECTOR_MATH_MODULO); + type_enum.insert("fraction", NODE_VECTOR_MATH_FRACTION); + type_enum.insert("absolute", NODE_VECTOR_MATH_ABSOLUTE); + type_enum.insert("minimum", NODE_VECTOR_MATH_MINIMUM); + type_enum.insert("maximum", NODE_VECTOR_MATH_MAXIMUM); SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_MATH_ADD); SOCKET_IN_VECTOR(vector1, "Vector1", make_float3(0.0f, 0.0f, 0.0f)); SOCKET_IN_VECTOR(vector2, "Vector2", make_float3(0.0f, 0.0f, 0.0f)); + SOCKET_IN_FLOAT(scale, "Scale", 1.0f); SOCKET_OUT_FLOAT(value, "Value"); SOCKET_OUT_VECTOR(vector, "Vector"); @@ -5373,8 +5678,7 @@ void VectorMathNode::constant_fold(const ConstantFolder &folder) float3 vector; if (folder.all_inputs_constant()) { - svm_vector_math(&value, &vector, type, vector1, vector2); - + svm_vector_math(&value, &vector, type, vector1, vector2, scale); if (folder.output == output("Value")) { folder.make_constant(value); } @@ -5391,15 +5695,21 @@ void VectorMathNode::compile(SVMCompiler &compiler) { ShaderInput *vector1_in = input("Vector1"); ShaderInput *vector2_in = input("Vector2"); + ShaderInput *scale_in = input("Scale"); ShaderOutput *value_out = output("Value"); ShaderOutput *vector_out = output("Vector"); - compiler.add_node(NODE_VECTOR_MATH, - type, - compiler.stack_assign(vector1_in), - compiler.stack_assign(vector2_in)); + int vector1_stack_offset = compiler.stack_assign(vector1_in); + int vector2_stack_offset = compiler.stack_assign(vector2_in); + int scale_stack_offset = compiler.stack_assign(scale_in); + int value_stack_offset = compiler.stack_assign_if_linked(value_out); + int vector_stack_offset = compiler.stack_assign_if_linked(vector_out); + compiler.add_node( - NODE_VECTOR_MATH, compiler.stack_assign(value_out), compiler.stack_assign(vector_out)); + NODE_VECTOR_MATH, + type, + compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset), + compiler.encode_uchar4(value_stack_offset, vector_stack_offset)); } void VectorMathNode::compile(OSLCompiler &compiler) diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 6b21be88663..fbed2ff0ef6 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -370,6 +370,19 @@ class IESLightNode : public TextureNode { void get_slot(); }; +class WhiteNoiseTextureNode : public ShaderNode { + public: + SHADER_NODE_CLASS(WhiteNoiseTextureNode) + virtual int get_group() + { + return NODE_GROUP_LEVEL_2; + } + + int dimensions; + float3 vector; + float w; +}; + class MappingNode : public ShaderNode { public: SHADER_NODE_CLASS(MappingNode) @@ -948,6 +961,21 @@ class HairInfoNode : public ShaderNode { } }; +class VolumeInfoNode : public ShaderNode { + public: + SHADER_NODE_CLASS(VolumeInfoNode) + void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_attribute_dependency() + { + return true; + } + bool has_spatial_varying() + { + return true; + } + void expand(ShaderGraph *graph); +}; + class ValueNode : public ShaderNode { public: SHADER_NODE_CLASS(ValueNode) @@ -1228,6 +1256,31 @@ class BlackbodyNode : public ShaderNode { float temperature; }; +class MapRangeNode : public ShaderNode { + public: + SHADER_NODE_CLASS(MapRangeNode) + void constant_fold(const ConstantFolder &folder); + virtual int get_group() + { + return NODE_GROUP_LEVEL_3; + } + void expand(ShaderGraph *graph); + + float value, from_min, from_max, to_min, to_max; + bool clamp; +}; + +class ClampNode : public ShaderNode { + public: + SHADER_NODE_CLASS(ClampNode) + void constant_fold(const ConstantFolder &folder); + virtual int get_group() + { + return NODE_GROUP_LEVEL_3; + } + float value, min, max; +}; + class MathNode : public ShaderNode { public: SHADER_NODE_CLASS(MathNode) @@ -1235,11 +1288,12 @@ class MathNode : public ShaderNode { { return NODE_GROUP_LEVEL_1; } + void expand(ShaderGraph *graph); void constant_fold(const ConstantFolder &folder); float value1; float value2; - NodeMath type; + NodeMathType type; bool use_clamp; }; @@ -1266,7 +1320,8 @@ class VectorMathNode : public ShaderNode { float3 vector1; float3 vector2; - NodeVectorMath type; + float scale; + NodeVectorMathType type; }; class VectorTransformNode : public ShaderNode { diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 6c6f8810412..849329a086d 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -90,6 +90,7 @@ NODE_DEFINE(Object) SOCKET_NODE(mesh, "Mesh", &Mesh::node_type); SOCKET_TRANSFORM(tfm, "Transform", transform_identity()); SOCKET_UINT(visibility, "Visibility", ~0); + SOCKET_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f)); SOCKET_UINT(random_id, "Random ID", 0); SOCKET_INT(pass_id, "Pass ID", 0); SOCKET_BOOLEAN(use_holdout, "Use Holdout", false); @@ -371,6 +372,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s */ float uniform_scale; float surface_area = 0.0f; + float3 color = ob->color; float pass_id = ob->pass_id; float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF); int particle_index = (ob->particle_system) ? @@ -425,6 +427,9 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s kobject.tfm = tfm; kobject.itfm = itfm; kobject.surface_area = surface_area; + kobject.color[0] = color.x; + kobject.color[1] = color.y; + kobject.color[2] = color.z; kobject.pass_id = pass_id; kobject.random_number = random_number; kobject.particle_index = particle_index; diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 2fd43900da1..cbbff0d4c6d 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -51,6 +51,7 @@ class Object : public Node { BoundBox bounds; uint random_id; int pass_id; + float3 color; ustring asset_name; vector<ParamValue> attributes; uint visibility; diff --git a/intern/cycles/render/sobol.cpp b/intern/cycles/render/sobol.cpp index 5fb3531b03b..c821249b239 100644 --- a/intern/cycles/render/sobol.cpp +++ b/intern/cycles/render/sobol.cpp @@ -62,7 +62,7 @@ typedef struct SobolDirectionNumbers { /* Keep simple alignment. */ /* clang-format off */ -static SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = { +static const SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = { {2, 1, 0, {1}}, {3, 2, 1, {1, 3}}, {4, 3, 1, {1, 3, 1}}, @@ -21279,10 +21279,10 @@ void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions v[i] = 1 << (31 - i); // all m's = 1 for (int dim = 1; dim < dimensions; dim++) { - SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim - 1]; - uint s = numbers->s; - uint a = numbers->a; - uint *m = numbers->m; + const SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim - 1]; + const uint s = numbers->s; + const uint a = numbers->a; + const uint *m = numbers->m; v = vectors[dim]; diff --git a/intern/cycles/render/stats.h b/intern/cycles/render/stats.h index f1bf1903483..e45403a3754 100644 --- a/intern/cycles/render/stats.h +++ b/intern/cycles/render/stats.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN * semantic around the units of size, it just should be the same for all * entries. * - * This is a generic entry foi all size-related statistics, which helps + * This is a generic entry for all size-related statistics, which helps * avoiding duplicating code for things like sorting. */ class NamedSizeEntry { diff --git a/intern/cycles/subd/subd_dice.cpp b/intern/cycles/subd/subd_dice.cpp index fb96be5065b..914b408911e 100644 --- a/intern/cycles/subd/subd_dice.cpp +++ b/intern/cycles/subd/subd_dice.cpp @@ -323,8 +323,8 @@ void QuadDice::dice(SubPatch &sub, EdgeFactors &ef) float S = 1.0f; #endif - Mu = max((int)ceil(S * Mu), 2); // XXX handle 0 & 1? - Mv = max((int)ceil(S * Mv), 2); // XXX handle 0 & 1? + Mu = max((int)ceilf(S * Mu), 2); // XXX handle 0 & 1? + Mv = max((int)ceilf(S * Mv), 2); // XXX handle 0 & 1? /* reserve space for new verts */ int offset = params.mesh->verts.size(); diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp index e6603632ba7..e5b85fcfd60 100644 --- a/intern/cycles/subd/subd_split.cpp +++ b/intern/cycles/subd/subd_split.cpp @@ -80,9 +80,9 @@ int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend) Plast = P; } - int tmin = (int)ceil(Lsum / params.dicing_rate); - int tmax = (int)ceil((params.test_steps - 1) * Lmax / - params.dicing_rate); // XXX paper says N instead of N-1, seems wrong? + int tmin = (int)ceilf(Lsum / params.dicing_rate); + int tmax = (int)ceilf((params.test_steps - 1) * Lmax / + params.dicing_rate); // XXX paper says N instead of N-1, seems wrong? if (tmax - tmin > params.split_threshold) return DSPLIT_NON_UNIFORM; @@ -99,7 +99,7 @@ void DiagSplit::partition_edge( *t1 = T(patch, *P, Pend); } else { - int I = (int)floor((float)t * 0.5f); + int I = (int)floorf((float)t * 0.5f); *P = interp(Pstart, Pend, (t == 0) ? 0 : I / (float)t); /* XXX is t faces or verts */ *t0 = I; *t1 = t - I; diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp index 7fb92bfb862..ca93f8b02d0 100644 --- a/intern/cycles/test/render_graph_finalize_test.cpp +++ b/intern/cycles/test/render_graph_finalize_test.cpp @@ -960,6 +960,13 @@ TEST_F(RenderGraph, constant_fold_blackbody) graph.finalize(scene); } +/* A Note About The Math Node + * + * The clamp option is implemented using graph expansion, where a + * Clamp node named "clamp" is added and connected to the output. + * So the final result is actually from the node "clamp". + */ + /* * Tests: Math with all constant inputs (clamp false). */ @@ -985,7 +992,7 @@ TEST_F(RenderGraph, constant_fold_math) TEST_F(RenderGraph, constant_fold_math_clamp) { EXPECT_ANY_MESSAGE(log); - CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1)."); + CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (1)."); builder .add_node(ShaderNodeBuilder<MathNode>("Math") @@ -1003,7 +1010,7 @@ TEST_F(RenderGraph, constant_fold_math_clamp) * Includes 2 tests: constant on each side. */ static void build_math_partial_test_graph(ShaderGraphBuilder &builder, - NodeMath type, + NodeMathType type, float constval) { builder @@ -1038,7 +1045,7 @@ TEST_F(RenderGraph, constant_fold_part_math_add_0) /* X + 0 == 0 + X == X */ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac."); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_ADD, 0.0f); graph.finalize(scene); @@ -1053,7 +1060,7 @@ TEST_F(RenderGraph, constant_fold_part_math_sub_0) /* X - 0 == X */ INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_SUBTRACT, 0.0f); graph.finalize(scene); @@ -1068,7 +1075,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_1) /* X * 1 == 1 * X == X */ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac."); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 1.0f); graph.finalize(scene); @@ -1083,7 +1090,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_1) /* X / 1 == X */ INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 1.0f); graph.finalize(scene); @@ -1098,7 +1105,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_0) /* X * 0 == 0 * X == 0 */ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0)."); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (0)."); - CORRECT_INFO_MESSAGE(log, "Folding Out::Value to constant (0)"); + CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (0)"); CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode."); build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 0.0f); @@ -1114,7 +1121,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_0) /* 0 / X == 0 */ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0)."); INVALID_INFO_MESSAGE(log, "Folding Math_xC::"); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 0.0f); graph.finalize(scene); @@ -1129,7 +1136,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_0) /* X ^ 0 == 1 */ INVALID_INFO_MESSAGE(log, "Folding Math_Cx::"); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (1)."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_POWER, 0.0f); graph.finalize(scene); @@ -1144,7 +1151,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1) /* 1 ^ X == 1; X ^ 1 == X */ CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (1)"); CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac."); - INVALID_INFO_MESSAGE(log, "Folding Out::"); + INVALID_INFO_MESSAGE(log, "Folding clamp::"); build_math_partial_test_graph(builder, NODE_MATH_POWER, 1.0f); graph.finalize(scene); @@ -1156,21 +1163,14 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1) TEST_F(RenderGraph, constant_fold_vector_math) { EXPECT_ANY_MESSAGE(log); - CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Value to constant (1)."); CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Vector to constant (3, 0, 0)."); - CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_float::value_float to constant (1)."); - CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (2)."); - CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (2, 2, 2)."); builder .add_node(ShaderNodeBuilder<VectorMathNode>("VectorMath") .set(&VectorMathNode::type, NODE_VECTOR_MATH_SUBTRACT) .set("Vector1", make_float3(1.3f, 0.5f, 0.7f)) .set("Vector2", make_float3(-1.7f, 0.5f, 0.7f))) - .add_node(ShaderNodeBuilder<MathNode>("Math").set(&MathNode::type, NODE_MATH_ADD)) - .add_connection("VectorMath::Vector", "Math::Value1") - .add_connection("VectorMath::Value", "Math::Value2") - .output_color("Math::Value"); + .output_color("VectorMath::Vector"); graph.finalize(scene); } @@ -1180,7 +1180,7 @@ TEST_F(RenderGraph, constant_fold_vector_math) * Includes 2 tests: constant on each side. */ static void build_vecmath_partial_test_graph(ShaderGraphBuilder &builder, - NodeVectorMath type, + NodeVectorMathType type, float3 constval) { builder @@ -1234,22 +1234,6 @@ TEST_F(RenderGraph, constant_fold_part_vecmath_sub_0) } /* - * Tests: partial folding for Vector Math Dot Product with known 0. - */ -TEST_F(RenderGraph, constant_fold_part_vecmath_dot_0) -{ - EXPECT_ANY_MESSAGE(log); - /* X * 0 == 0 * X == X */ - CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0)."); - CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0)."); - CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0)."); - CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode."); - - build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_DOT_PRODUCT, make_float3(0, 0, 0)); - graph.finalize(scene); -} - -/* * Tests: partial folding for Vector Math Cross Product with known 0. */ TEST_F(RenderGraph, constant_fold_part_vecmath_cross_0) diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index 7f3bead0a18..760985447a8 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -30,6 +30,7 @@ # ifndef __KERNEL_GPU__ # define ccl_device static inline # define ccl_device_noinline static +# define ccl_device_noinline_cpu ccl_device_noinline # define ccl_global # define ccl_static_constant static const # define ccl_constant const diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h index 785482967db..2b1f26de03d 100644 --- a/intern/cycles/util/util_hash.h +++ b/intern/cycles/util/util_hash.h @@ -21,39 +21,196 @@ CCL_NAMESPACE_BEGIN -ccl_device_inline uint hash_int_2d(uint kx, uint ky) -{ +/* ***** Jenkins Lookup3 Hash Functions ***** */ + +/* Source: http://burtleburtle.net/bob/c/lookup3.c */ + #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) +#define mix(a, b, c) \ + { \ + a -= c; \ + a ^= rot(c, 4); \ + c += b; \ + b -= a; \ + b ^= rot(a, 6); \ + a += c; \ + c -= b; \ + c ^= rot(b, 8); \ + b += a; \ + a -= c; \ + a ^= rot(c, 16); \ + c += b; \ + b -= a; \ + b ^= rot(a, 19); \ + a += c; \ + c -= b; \ + c ^= rot(b, 4); \ + b += a; \ + } + +#define final(a, b, c) \ + { \ + c ^= b; \ + c -= rot(b, 14); \ + a ^= c; \ + a -= rot(c, 11); \ + b ^= a; \ + b -= rot(a, 25); \ + c ^= b; \ + c -= rot(b, 16); \ + a ^= c; \ + a -= rot(c, 4); \ + b ^= a; \ + b -= rot(a, 14); \ + c ^= b; \ + c -= rot(b, 24); \ + } + +ccl_device_inline uint hash_uint(uint kx) +{ uint a, b, c; + a = b = c = 0xdeadbeef + (1 << 2) + 13; + + a += kx; + final(a, b, c); + return c; +} + +ccl_device_inline uint hash_uint2(uint kx, uint ky) +{ + uint a, b, c; a = b = c = 0xdeadbeef + (2 << 2) + 13; + + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (3 << 2) + 13; + + c += kz; + b += ky; + a += kx; + final(a, b, c); + + return c; +} + +ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw) +{ + uint a, b, c; + a = b = c = 0xdeadbeef + (4 << 2) + 13; + a += kx; b += ky; + c += kz; + mix(a, b, c); - c ^= b; - c -= rot(b, 14); - a ^= c; - a -= rot(c, 11); - b ^= a; - b -= rot(a, 25); - c ^= b; - c -= rot(b, 16); - a ^= c; - a -= rot(c, 4); - b ^= a; - b -= rot(a, 14); - c ^= b; - c -= rot(b, 24); + a += kw; + final(a, b, c); return c; +} #undef rot +#undef final +#undef mix + +/* Hashing uint or uint[234] into a float in the range [0, 1]. */ + +ccl_device_inline float hash_uint_to_float(uint kx) +{ + return (float)hash_uint(kx) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint2_to_float(uint kx, uint ky) +{ + return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz) +{ + return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu; +} + +ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw) +{ + return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu; +} + +/* Hashing float or float[234] into a float in the range [0, 1]. */ + +ccl_device_inline float hash_float_to_float(float k) +{ + return hash_uint_to_float(__float_as_uint(k)); +} + +ccl_device_inline float hash_float2_to_float(float2 k) +{ + return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y)); +} + +ccl_device_inline float hash_float3_to_float(float3 k) +{ + return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z)); +} + +ccl_device_inline float hash_float4_to_float(float4 k) +{ + return hash_uint4_to_float( + __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w)); +} + +/* Hashing float[234] into float[234] of components in the range [0, 1]. */ + +ccl_device_inline float2 hash_float2_to_float2(float2 k) +{ + return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0))); +} + +ccl_device_inline float3 hash_float3_to_float3(float3 k) +{ + return make_float3(hash_float3_to_float(k), + hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)), + hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0))); } -ccl_device_inline uint hash_int(uint k) +ccl_device_inline float4 hash_float4_to_float4(float4 k) +{ + return make_float4(hash_float4_to_float(k), + hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)), + hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)), + hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x))); +} + +/* Hashing float or float[234] into float3 of components in range [0, 1]. */ + +ccl_device_inline float3 hash_float_to_float3(float k) { - return hash_int_2d(k, 0); + return make_float3(hash_float_to_float(k), + hash_float2_to_float(make_float2(k, 1.0)), + hash_float2_to_float(make_float2(k, 2.0))); +} + +ccl_device_inline float3 hash_float2_to_float3(float2 k) +{ + return make_float3(hash_float2_to_float(k), + hash_float3_to_float(make_float3(k.x, k.y, 1.0)), + hash_float3_to_float(make_float3(k.x, k.y, 2.0))); +} + +ccl_device_inline float3 hash_float4_to_float3(float4 k) +{ + return make_float3(hash_float4_to_float(k), + hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)), + hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x))); } #ifndef __KERNEL_GPU__ @@ -68,11 +225,6 @@ static inline uint hash_string(const char *str) } #endif -ccl_device_inline float hash_int_01(uint k) -{ - return (float)hash_int(k) * (1.0f / (float)0xFFFFFFFF); -} - CCL_NAMESPACE_END #endif /* __UTIL_HASH_H__ */ diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp index 7c24a4ec28c..62d3d42186d 100644 --- a/intern/cycles/util/util_ies.cpp +++ b/intern/cycles/util/util_ies.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include <algorithm> + #include "util/util_foreach.h" #include "util/util_ies.h" #include "util/util_math.h" @@ -28,7 +30,7 @@ CCL_NAMESPACE_BEGIN // issue. template class GuardedAllocator<char>; -bool IESFile::load(ustring ies) +bool IESFile::load(const string &ies) { clear(); if (!parse(ies) || !process()) { @@ -76,7 +78,7 @@ class IESTextParser { vector<char> text; char *data; - IESTextParser(ustring str) : text(str.begin(), str.end()) + IESTextParser(const string &str) : text(str.begin(), str.end()) { std::replace(text.begin(), text.end(), ',', ' '); data = strstr(&text[0], "\nTILT="); @@ -116,7 +118,7 @@ class IESTextParser { } }; -bool IESFile::parse(ustring ies) +bool IESFile::parse(const string &ies) { if (ies.empty()) { return false; diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h index ab1b9ea57cf..95473103614 100644 --- a/intern/cycles/util/util_ies.h +++ b/intern/cycles/util/util_ies.h @@ -17,7 +17,7 @@ #ifndef __UTIL_IES_H__ #define __UTIL_IES_H__ -#include "util/util_param.h" +#include "util/util_string.h" #include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -32,11 +32,11 @@ class IESFile { int packed_size(); void pack(float *data); - bool load(ustring ies); + bool load(const string &ies); void clear(); protected: - bool parse(ustring ies); + bool parse(const string &ies); bool process(); bool process_type_b(); bool process_type_c(); diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h index 554b7408148..c9a5b34aa58 100644 --- a/intern/cycles/util/util_math_float3.h +++ b/intern/cycles/util/util_math_float3.h @@ -47,6 +47,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f); ccl_device_inline bool operator==(const float3 &a, const float3 &b); ccl_device_inline bool operator!=(const float3 &a, const float3 &b); +ccl_device_inline float distance(const float3 &a, const float3 &b); ccl_device_inline float dot(const float3 &a, const float3 &b); ccl_device_inline float dot_xy(const float3 &a, const float3 &b); ccl_device_inline float3 cross(const float3 &a, const float3 &b); @@ -58,6 +59,8 @@ ccl_device_inline float3 fabs(const float3 &a); ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t); ccl_device_inline float3 rcp(const float3 &a); ccl_device_inline float3 sqrt(const float3 &a); +ccl_device_inline float3 floor(const float3 &a); +ccl_device_inline float3 ceil(const float3 &a); #endif /* !__KERNEL_OPENCL__ */ ccl_device_inline float min3(float3 a); @@ -65,10 +68,15 @@ ccl_device_inline float max3(float3 a); ccl_device_inline float len(const float3 a); ccl_device_inline float len_squared(const float3 a); +ccl_device_inline float3 reflect(const float3 incident, const float3 normal); +ccl_device_inline float3 project(const float3 v, const float3 v_proj); + ccl_device_inline float3 saturate3(float3 a); ccl_device_inline float3 safe_normalize(const float3 a); ccl_device_inline float3 normalize_len(const float3 a, float *t); ccl_device_inline float3 safe_normalize_len(const float3 a, float *t); +ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b); +ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b); ccl_device_inline float3 interp(float3 a, float3 b, float t); ccl_device_inline float3 sqr3(float3 a); @@ -205,6 +213,11 @@ ccl_device_inline bool operator!=(const float3 &a, const float3 &b) return !(a == b); } +ccl_device_inline float distance(const float3 &a, const float3 &b) +{ + return len(a - b); +} + ccl_device_inline float dot(const float3 &a, const float3 &b) { # if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) @@ -281,6 +294,24 @@ ccl_device_inline float3 sqrt(const float3 &a) # endif } +ccl_device_inline float3 floor(const float3 &a) +{ +# ifdef __KERNEL_SSE__ + return float3(_mm_floor_ps(a)); +# else + return make_float3(floorf(a.x), floorf(a.y), floorf(a.z)); +# endif +} + +ccl_device_inline float3 ceil(const float3 &a) +{ +# ifdef __KERNEL_SSE__ + return float3(_mm_ceil_ps(a)); +# else + return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z)); +# endif +} + ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t) { return a + t * (b - a); @@ -321,6 +352,19 @@ ccl_device_inline float len_squared(const float3 a) return dot(a, a); } +ccl_device_inline float3 reflect(const float3 incident, const float3 normal) +{ + float3 unit_normal = normalize(normal); + return incident - 2.0f * unit_normal * dot(incident, unit_normal); +} + +ccl_device_inline float3 project(const float3 v, const float3 v_proj) +{ + float len_squared = dot(v_proj, v_proj); + return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj : + make_float3(0.0f, 0.0f, 0.0f); +} + ccl_device_inline float3 saturate3(float3 a) { return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); @@ -345,6 +389,18 @@ ccl_device_inline float3 safe_normalize_len(const float3 a, float *t) return (*t != 0.0f) ? a / (*t) : a; } +ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b) +{ + return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f, + (b.y != 0.0f) ? a.y / b.y : 0.0f, + (b.z != 0.0f) ? a.z / b.z : 0.0f); +} + +ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b) +{ + return (b != 0.0f) ? a / b : make_float3(0.0f, 0.0f, 0.0f); +} + ccl_device_inline float3 interp(float3 a, float3 b, float t) { return a + t * (b - a); diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index 8fcaadc5f53..f49cfb4184d 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -45,7 +45,7 @@ # endif -# if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86) +# if defined(__x86_64__) || defined(_M_X64) # define SIMD_SET_FLUSH_TO_ZERO \ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index fdbc3800806..24286116dfb 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -283,8 +283,8 @@ vector<int> distribute_threads_on_nodes(const int num_threads) } ++current_node_index; } - /* Second pass: keep scheduling threads to each node one by one, uniformly - * fillign them in. + /* Second pass: keep scheduling threads to each node one by one, + * uniformly filling them in. * This is where things becomes tricky to predict for the maximum * performance: on the one hand this avoids too much threading overhead on * few nodes, but for the final performance having all the overhead on one diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 437478d64d3..04fb33368d9 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -27,7 +27,7 @@ CCL_NAMESPACE_BEGIN -/* Own subclass-ed vestion of std::vector. Subclass is needed because: +/* Own subclass-ed version of std::vector. Subclass is needed because: * * - Use own allocator which keeps track of used/peak memory. * - Have method to ensure capacity is re-set to 0. |