diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2016-12-12 18:17:57 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2016-12-12 18:17:57 +0300 |
commit | 9be6d5ff18fd593e853647d9eec8b3fb074acd7c (patch) | |
tree | 485fb9203c310b5119a549e698db241012ed39ff /intern | |
parent | 54528079e3cfaf74eaa119615386564820b45276 (diff) | |
parent | 5f852a4324212221500d11b2c7594f5e0ca894c6 (diff) |
Merge branch 'master' into blender2.8
Conflicts:
source/blender/blenkernel/intern/depsgraph.c
source/blender/blenloader/intern/versioning_270.c
source/blender/depsgraph/intern/builder/deg_builder_relations.cc
source/blender/makesrna/intern/rna_main_api.c
source/blender/makesrna/intern/rna_particle.c
Diffstat (limited to 'intern')
38 files changed, 674 insertions, 297 deletions
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index b21e8630cdb..9816d614a7c 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -72,20 +72,17 @@ static void session_print(const string& str) static void session_print_status() { - int sample, tile; - double total_time, sample_time, render_time; string status, substatus; /* get status */ - sample = options.session->progress.get_sample(); - options.session->progress.get_tile(tile, total_time, sample_time, render_time); + float progress = options.session->progress.get_progress(); options.session->progress.get_status(status, substatus); if(substatus != "") status += ": " + substatus; /* print status */ - status = string_printf("Sample %d %s", sample, status.c_str()); + status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str()); session_print(status); } @@ -167,13 +164,12 @@ static void display_info(Progress& progress) latency = (elapsed - last); last = elapsed; - int sample, tile; - double total_time, sample_time, render_time; + double total_time, sample_time; string status, substatus; - sample = progress.get_sample(); - progress.get_tile(tile, total_time, sample_time, render_time); + progress.get_time(total_time, sample_time); progress.get_status(status, substatus); + float progress_val = progress.get_progress(); if(substatus != "") status += ": " + substatus; @@ -184,10 +180,10 @@ static void display_info(Progress& progress) "%s" " Time: %.2f" " Latency: %.4f" - " Sample: %d" + " Progress: %05.2f" " Average: %.4f" " Interactive: %s", - status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); + status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str()); view_display_info(str.c_str()); diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 3346beea3b2..cbff5a537dc 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). " "Zero disables the test and never ignores lights", min=0.0, max=1.0, - default=0.05, + default=0.01, ) cls.caustics_reflective = BoolProperty( diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index e16cea0ebaf..71c1eefe65f 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -930,38 +930,13 @@ void BlenderSession::get_status(string& status, string& substatus) void BlenderSession::get_progress(float& progress, double& total_time, double& render_time) { - double tile_time; - int tile, sample, samples_per_tile; - int tile_total = session->tile_manager.state.num_tiles; - int samples = session->tile_manager.state.sample + 1; - int total_samples = session->tile_manager.get_num_effective_samples(); - - session->progress.get_tile(tile, total_time, render_time, tile_time); - - sample = session->progress.get_sample(); - samples_per_tile = session->tile_manager.get_num_effective_samples(); - - if(background && samples_per_tile && tile_total) - progress = ((float)sample / (float)(tile_total * samples_per_tile)); - else if(!background && samples > 0 && total_samples != INT_MAX) - progress = ((float)samples) / total_samples; - else - progress = 0.0; + session->progress.get_time(total_time, render_time); + progress = session->progress.get_progress(); } void BlenderSession::update_bake_progress() { - float progress; - int sample, samples_per_task, parts_total; - - sample = session->progress.get_sample(); - samples_per_task = scene->bake_manager->num_samples; - parts_total = scene->bake_manager->num_parts; - - if(samples_per_task) - progress = ((float)sample / (float)(parts_total * samples_per_task)); - else - progress = 0.0; + float progress = session->progress.get_progress(); if(progress != last_progress) { b_engine.update_progress(progress); diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index ff9387b0a8a..31c99f49d6d 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -64,6 +64,8 @@ std::ostream& operator <<(std::ostream &os, << string_from_bool(requested_features.use_integrator_branched) << std::endl; os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation) << std::endl; + os << "Use Transparent Shadows: " + << string_from_bool(requested_features.use_transparent) << std::endl; return os; } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index b9bdffa2618..ccee25ae34e 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -117,6 +117,9 @@ public: /* Use OpenSubdiv patch evaluation */ bool use_patch_evaluation; + + /* Use Transparent shadows */ + bool use_transparent; DeviceRequestedFeatures() { @@ -133,6 +136,7 @@ public: use_volume = false; use_integrator_branched = false; use_patch_evaluation = false; + use_transparent = false; } bool modified(const DeviceRequestedFeatures& requested_features) @@ -148,7 +152,8 @@ public: use_subsurface == requested_features.use_subsurface && use_volume == requested_features.use_volume && use_integrator_branched == requested_features.use_integrator_branched && - use_patch_evaluation == requested_features.use_patch_evaluation); + use_patch_evaluation == requested_features.use_patch_evaluation && + use_transparent == requested_features.use_transparent); } /* Convert the requested features structure to a build options, @@ -189,6 +194,9 @@ public: if(!use_patch_evaluation) { build_options += " -D__NO_PATCH_EVAL__"; } + if(!use_transparent) { + build_options += " -D__NO_TRANSPARENT__"; + } return build_options; } }; @@ -220,6 +228,7 @@ public: DeviceInfo info; virtual const string& error_message() { return error_msg; } bool have_error() { return !error_message().empty(); } + virtual bool show_samples() const { return false; } /* statistics */ Stats &stats; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index aed86d8d853..c8e001ec2fd 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -112,6 +112,11 @@ public: task_pool.stop(); } + virtual bool show_samples() const + { + return (TaskScheduler::num_threads() == 1); + } + void mem_alloc(device_memory& mem, MemoryType /*type*/) { mem.device_pointer = mem.data_pointer; @@ -275,7 +280,7 @@ public: tile.sample = sample + 1; - task.update_progress(&tile); + task.update_progress(&tile, tile.w*tile.h); } task.release_tile(tile); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index fbb97f78e70..233f94be1bf 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -115,6 +115,12 @@ public: return path_exists(cubins_path); } + virtual bool show_samples() const + { + /* The CUDADevice only processes one tile at a time, so showing samples is fine. */ + return true; + } + /*#ifdef NDEBUG #define cuda_abort() #else @@ -1267,7 +1273,7 @@ public: tile.sample = sample + 1; - task->update_progress(&tile); + task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 48fd159d508..31b800640d3 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -89,6 +89,14 @@ public: return error_msg; } + virtual bool show_samples() const + { + if(devices.size() > 1) { + return false; + } + return devices.front().device->show_samples(); + } + bool load_kernels(const DeviceRequestedFeatures& requested_features) { foreach(SubDevice& sub, devices) diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index 3eb5ad2d2db..53eef6cf199 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -51,6 +51,11 @@ public: thread_mutex rpc_lock; + virtual bool show_samples() const + { + return false; + } + NetworkDevice(DeviceInfo& info, Stats &stats, const char *address) : Device(info, stats, true), socket(io_service) { diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index 1f1128a28f8..48d18035c13 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -19,6 +19,8 @@ #include "device_task.h" +#include "buffers.h" + #include "util_algorithm.h" #include "util_time.h" @@ -99,14 +101,18 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size) } } -void DeviceTask::update_progress(RenderTile *rtile) +void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) { if((type != PATH_TRACE) && (type != SHADER)) return; - if(update_progress_sample) - update_progress_sample(); + if(update_progress_sample) { + if(pixel_samples == -1) { + pixel_samples = shader_w; + } + update_progress_sample(pixel_samples, rtile? rtile->sample : 0); + } if(update_tile_sample) { double current_time = time_dt(); diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h index 8423e83bdfd..8bd54c3d2b0 100644 --- a/intern/cycles/device/device_task.h +++ b/intern/cycles/device/device_task.h @@ -56,10 +56,10 @@ public: int get_subtask_count(int num, int max_size = 0); void split(list<DeviceTask>& tasks, int num, int max_size = 0); - void update_progress(RenderTile *rtile); + void update_progress(RenderTile *rtile, int pixel_samples = -1); function<bool(Device *device, RenderTile&)> acquire_tile; - function<void(void)> update_progress_sample; + function<void(long, int)> update_progress_sample; function<void(RenderTile&)> update_tile_sample; function<void(RenderTile&)> release_tile; function<bool(void)> get_cancel; diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index 369c086df57..6ea7619e022 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -39,6 +39,10 @@ public: { } + virtual bool show_samples() const { + return true; + } + virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/, vector<OpenCLProgram*> &programs) { @@ -120,7 +124,7 @@ public: tile.sample = sample + 1; - task->update_progress(&tile); + task->update_progress(&tile, tile.w*tile.h); } /* Complete kernel execution before release tile */ diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 239e73a40fd..3c3c2150128 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -247,6 +247,10 @@ public: } } + virtual bool show_samples() const { + return false; + } + /* Split kernel utility functions. */ size_t get_tex_size(const char *tex_name) { diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index bede5f45e7e..daaa26dc6ad 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -267,7 +267,10 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, *eval = make_float3(*pdf, *pdf, *pdf); - kernel_assert(dot(locy, *omega_in) < 0.0f); + /* TODO(sergey): Should always be negative, but seems some precision issue + * is involved here. + */ + kernel_assert(dot(locy, *omega_in) < 1e-4f); return LABEL_TRANSMIT|LABEL_GLOSSY; } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 84aaaab7453..636dbcc71e0 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -255,6 +255,17 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte int ka = max(k0 - 1, v00.x); int kb = min(k1 + 1, v00.x + v00.y - 1); +#ifdef __KERNEL_AVX2__ + avxf P_curve_0_1, P_curve_2_3; + if(type & PRIMITIVE_CURVE) { + P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x); + P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object; + motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3); + } +#else /* __KERNEL_AVX2__ */ ssef P_curve[4]; if(type & PRIMITIVE_CURVE) { @@ -267,6 +278,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve); } +#endif /* __KERNEL_AVX2__ */ ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss)); ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn; @@ -278,6 +290,33 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0); ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); +#ifdef __KERNEL_AVX2__ + const avxf vPP = _mm256_broadcast_ps(&P.m128); + const avxf htfm00 = avxf(htfm0.m128, htfm0.m128); + const avxf htfm11 = avxf(htfm1.m128, htfm1.m128); + const avxf htfm22 = avxf(htfm2.m128, htfm2.m128); + + const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP), + htfm00, + madd(shuffle<1>(P_curve_0_1 - vPP), + htfm11, + shuffle<2>(P_curve_0_1 - vPP) * htfm22)); + const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP), + htfm00, + madd(shuffle<1>(P_curve_2_3 - vPP), + htfm11, + shuffle<2>(P_curve_2_3 - vPP)*htfm22)); + + const ssef p0 = _mm256_castps256_ps128(p01); + const ssef p1 = _mm256_extractf128_ps(p01, 1); + const ssef p2 = _mm256_castps256_ps128(p23); + const ssef p3 = _mm256_extractf128_ps(p23, 1); + + const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1); + r_st = ((float4 &)P_curve_1).w; + const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3); + r_en = ((float4 &)P_curve_2).w; +#else /* __KERNEL_AVX2__ */ ssef htfm[] = { htfm0, htfm1, htfm2 }; ssef vP = load4f(P); ssef p0 = transform_point_T3(htfm, P_curve[0] - vP); @@ -285,6 +324,10 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte ssef p2 = transform_point_T3(htfm, P_curve[2] - vP); ssef p3 = transform_point_T3(htfm, P_curve[3] - vP); + r_st = ((float4 &)P_curve[1]).w; + r_en = ((float4 &)P_curve[2]).w; +#endif /* __KERNEL_AVX2__ */ + float fc = 0.71f; ssef vfc = ssef(fc); ssef vfcxp3 = vfc * p3; @@ -294,8 +337,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3))); vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3)); - r_st = ((float4 &)P_curve[1]).w; - r_en = ((float4 &)P_curve[2]).w; } #else float3 curve_coef[4]; @@ -383,8 +424,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte /* begin loop */ while(!(tree >> (depth))) { - float i_st = tree * resol; - float i_en = i_st + (level * resol); + const float i_st = tree * resol; + const float i_en = i_st + (level * resol); + #ifdef __KERNEL_SSE2__ ssef vi_st = ssef(i_st), vi_en = ssef(i_en); ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); @@ -458,13 +500,23 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte if(flags & CURVE_KN_RIBBONS) { float3 tg = (p_en - p_st); +#ifdef __KERNEL_SSE__ + const float3 tg_sq = tg * tg; + float w = tg_sq.x + tg_sq.y; +#else float w = tg.x * tg.x + tg.y * tg.y; +#endif if(w == 0) { tree++; level = tree & -tree; continue; } +#ifdef __KERNEL_SSE__ + const float3 p_sttg = p_st * tg; + w = -(p_sttg.x + p_sttg.y) / w; +#else w = -(p_st.x * tg.x + p_st.y * tg.y) / w; +#endif w = saturate(w); /* compute u on the curve segment */ @@ -496,7 +548,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte if(difl != 0.0f) { mw_extension = min(difl * fabsf(bmaxz), extmax); r_ext = mw_extension + r_curr; +#ifdef __KERNEL_SSE__ + const float3 p_curr_sq = p_curr * p_curr; + const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128)); + float d = dxxx.x; +#else float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); +#endif float d0 = d - r_curr; float d1 = d + r_curr; float inv_mw_extension = 1.0f/mw_extension; diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h index 6de5aa7ea99..80b33fad68b 100644 --- a/intern/cycles/kernel/geom/geom_motion_curve.h +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -118,7 +118,12 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in } /* return 2 curve key locations */ -ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4]) +ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, + int object, + int prim, + float time, + int k0, int k1, int k2, int k3, + float4 keys[4]) { /* get motion info */ int numsteps, numkeys; @@ -147,6 +152,65 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, keys[3] = (1.0f - t)*keys[3] + t*next_keys[3]; } +#ifdef __KERNEL_AVX2__ +/* Similar to above, but returns keys as pair of two AVX registers with each + * holding two float4. + */ +ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg, + int object, + int prim, + float time, + int k0, int k1, + int k2, int k3, + avxf *out_keys_0_1, + avxf *out_keys_2_3) +{ + /* Get motion info. */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* Figure out which steps we need to fetch and their interpolation factor. */ + int maxstep = numsteps * 2; + int step = min((int)(time*maxstep), maxstep - 1); + float t = time*maxstep - step; + + /* Find attribute. */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, + object, + ATTR_STD_MOTION_VERTEX_POSITION, + &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* Fetch key coordinates. */ + float4 next_keys[4]; + float4 keys[4]; + motion_cardinal_curve_keys_for_step(kg, + offset, + numkeys, + numsteps, + step, + k0, k1, k2, k3, + keys); + motion_cardinal_curve_keys_for_step(kg, + offset, + numkeys, + numsteps, + step + 1, + k0, k1, k2, k3, + next_keys); + + const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128); + const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128); + const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128); + const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128); + + /* Interpolate between steps. */ + *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1; + *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3; +} +#endif + #endif CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h index 3cbe59aaece..538c332c63a 100644 --- a/intern/cycles/kernel/geom/geom_motion_triangle.h +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -323,11 +323,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD * time and do a ray intersection with the resulting triangle */ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 dir, float time, uint visibility, int object, int triAddr) + float3 P, float3 dir, float time, uint visibility, int object, int prim_addr) { /* primitive index for vertex location lookup */ - int prim = kernel_tex_fetch(__prim_index, triAddr); - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object; + int prim = kernel_tex_fetch(__prim_index, prim_addr); + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; /* get vertex locations for intersection */ float3 verts[3]; @@ -340,13 +340,13 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection #ifdef __VISIBILITY_FLAG__ /* visibility flag test. we do it here under the assumption * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) + if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif { isect->t = t; isect->u = u; isect->v = v; - isect->prim = triAddr; + isect->prim = prim_addr; isect->object = object; isect->type = PRIMITIVE_MOTION_TRIANGLE; @@ -369,14 +369,14 @@ ccl_device_inline void motion_triangle_intersect_subsurface( float3 dir, float time, int object, - int triAddr, + int prim_addr, float tmax, uint *lcg_state, int max_hits) { /* primitive index for vertex location lookup */ - int prim = kernel_tex_fetch(__prim_index, triAddr); - int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object; + int prim = kernel_tex_fetch(__prim_index, prim_addr); + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object; /* get vertex locations for intersection */ float3 verts[3]; @@ -413,7 +413,7 @@ ccl_device_inline void motion_triangle_intersect_subsurface( isect->t = t; isect->u = u; isect->v = v; - isect->prim = triAddr; + isect->prim = prim_addr; isect->object = object; isect->type = PRIMITIVE_MOTION_TRIANGLE; diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h index eb7340583c8..4db121d94f4 100644 --- a/intern/cycles/kernel/geom/geom_triangle_intersect.h +++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h @@ -108,7 +108,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, float3 P, uint visibility, int object, - int triAddr) + int prim_addr) { const int kx = isect_precalc->kx; const int ky = isect_precalc->ky; @@ -118,7 +118,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__) const avxf avxf_P(P.m128, P.m128); @@ -129,10 +129,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, const avxf AB = tri_ab - avxf_P; const avxf BC = tri_bc - avxf_P; - const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx); + const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx); - const avxf AB_k = shuffle(AB, permuteMask); - const avxf BC_k = shuffle(BC, permuteMask); + const avxf AB_k = shuffle(AB, permute_mask); + const avxf BC_k = shuffle(BC, permute_mask); /* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */ const avxf ABBC_kz = shuffle<2>(AB_k, BC_k); @@ -155,14 +155,14 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, /* By, Bx, Cy, Cx, By, Bx, Ay, Ax */ const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy); - const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000); + const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000); /* W U V * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX */ - const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */); + const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */); - const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask; + const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask; /* Calculate scaled barycentric coordinates. */ float WUVW_array[4]; @@ -231,7 +231,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, #ifdef __VISIBILITY_FLAG__ /* visibility flag test. we do it here under the assumption * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) + if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility) #endif { #ifdef __KERNEL_CUDA__ @@ -241,7 +241,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg, #endif /* Normalize U, V, W, and T. */ const float inv_det = 1.0f / det; - isect->prim = triAddr; + isect->prim = prim_addr; isect->object = object; isect->type = PRIMITIVE_TRIANGLE; isect->u = U * inv_det; @@ -264,7 +264,7 @@ ccl_device_inline void triangle_intersect_subsurface( SubsurfaceIntersection *ss_isect, float3 P, int object, - int triAddr, + int prim_addr, float tmax, uint *lcg_state, int max_hits) @@ -277,7 +277,7 @@ ccl_device_inline void triangle_intersect_subsurface( const float Sz = isect_precalc->Sz; /* Calculate vertices relative to ray origin. */ - const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr); + const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr); const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0), tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1), tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2); @@ -415,7 +415,7 @@ ccl_device_inline void triangle_intersect_subsurface( /* record intersection */ Intersection *isect = &ss_isect->hits[hit]; - isect->prim = triAddr; + isect->prim = prim_addr; isect->object = object; isect->type = PRIMITIVE_TRIANGLE; isect->u = U * inv_det; diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h index aec7bc33acd..67546131746 100644 --- a/intern/cycles/kernel/kernel_jitter.h +++ b/intern/cycles/kernel/kernel_jitter.h @@ -149,6 +149,15 @@ ccl_device_inline uint cmj_hash(uint i, uint p) return i; } +ccl_device_inline uint cmj_hash_simple(uint i, uint p) +{ + i = (i ^ 61) ^ p; + i += i << 3; + i ^= i >> 4; + i *= 0x27d4eb2d; + return i; +} + ccl_device_inline float cmj_randfloat(uint i, uint p) { return cmj_hash(i, p) * (1.0f / 4294967808.0f); diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 6d89a89ed5b..6a36c68d69f 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -141,6 +141,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, #endif /* __LAMP_MIS__ */ #ifdef __VOLUME__ + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } /* volume attenuation, emission, scatter */ if(state->volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = *ray; @@ -658,6 +662,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg, #endif /* __LAMP_MIS__ */ #ifdef __VOLUME__ + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state.volume_stack); + } /* volume attenuation, emission, scatter */ if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index c84727ace99..10174e1c4ce 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -294,6 +294,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #endif /* __KERNEL_DEBUG__ */ #ifdef __VOLUME__ + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state.volume_stack); + } /* volume attenuation, emission, scatter */ if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index 2b767da5041..e773753396f 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -120,13 +120,11 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG * /* Cranly-Patterson rotation using rng seed */ float shift; - /* using the same *rng value to offset seems to give correlation issues, - * we could hash it with the dimension but this has a performance impact, - * we need to find a solution for this */ - if(dimension & 1) - shift = (*rng >> 16) * (1.0f/(float)0xFFFF); - else - shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF); + /* Hash rng with dimension to solve correlation issues. + * See T38710, T50116. + */ + RNG tmp_rng = cmj_hash_simple(dimension, *rng); + shift = tmp_rng * (1.0f/(float)0xFFFFFFFF); return r + shift - floorf(r + shift); #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index a6c31d4a518..fd961836ec9 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -192,6 +192,9 @@ CCL_NAMESPACE_BEGIN #ifdef __NO_PATCH_EVAL__ # undef __PATCH_EVAL__ #endif +#ifdef __NO_TRANSPARENT__ +# undef __TRANSPARENT_SHADOWS__ +#endif /* Random Numbers */ diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index dd7b0d9812d..c7cb29b5af2 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -582,17 +582,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous) { - /* workaround to fix correlation bug in T38710, can find better solution - * in random number generator later, for now this is done here to not impact - * performance of rendering without volumes */ - RNG tmp_rng = cmj_hash(*rng, state->rng_offset); - shader_setup_from_volume(kg, sd, ray); if(heterogeneous) - return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng); + return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, rng); else - return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true); + return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true); } /* Decoupled Volume Sampling @@ -1267,4 +1262,30 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg, } #endif +/* Clean stack after the last bounce. + * + * It is expected that all volumes are closed manifolds, so at the time when ray + * hits nothing (for example, it is a last bounce which goes to environment) the + * only expected volume in the stack is the world's one. All the rest volume + * entries should have been exited already. + * + * This isn't always true because of ray intersection precision issues, which + * could lead us to an infinite non-world volume in the stack, causing render + * artifacts. + * + * Use this function after the last bounce to get rid of all volumes apart from + * the world's one after the last bounce to avoid render artifacts. + */ +ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg, + VolumeStack *volume_stack) +{ + if(kernel_data.background.volume_shader != SHADER_NONE) { + /* Keep the world's volume in stack. */ + volume_stack[1].shader = SHADER_NONE; + } + else { + volume_stack[0].shader = SHADER_NONE; + } +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 13310a61761..d9a297002c6 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -135,20 +135,16 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre { size_t num_pixels = bake_data->size(); - progress.reset_sample(); - this->num_parts = 0; + int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; - /* calculate the total parts for the progress bar */ + /* calculate the total pixel samples for the progress bar */ + total_pixel_samples = 0; for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); - - DeviceTask task(DeviceTask::SHADER); - task.shader_w = shader_size; - - this->num_parts += device->get_split_task_count(task); + total_pixel_samples += shader_size * num_samples; } - - this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1; + progress.reset_sample(); + progress.set_total_pixel_samples(total_pixel_samples); for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) { size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit); @@ -187,9 +183,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre task.shader_x = 0; task.offset = shader_offset; task.shader_w = d_output.size(); - task.num_samples = this->num_samples; + task.num_samples = num_samples; task.get_cancel = function_bind(&Progress::get_cancel, &progress); - task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress); + task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2); device->task_add(task); device->task_wait(); diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 8377e387197..25f5eb3c897 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -73,8 +73,7 @@ public: bool need_update; - int num_samples; - int num_parts; + int total_pixel_samples; private: BakeData *m_bake_data; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 3fb2bb1cf92..c7f37a13fba 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -1442,14 +1442,14 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler) else { if(use_density) { compiler.add_node(NODE_VALUE_F, - __float_as_int(0.0f), - compiler.stack_assign(density_out)); + __float_as_int(0.0f), + compiler.stack_assign(density_out)); } if(use_color) { compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out)); compiler.add_node(NODE_VALUE_V, make_float3(TEX_IMAGE_MISSING_R, - TEX_IMAGE_MISSING_G, - TEX_IMAGE_MISSING_B)); + TEX_IMAGE_MISSING_G, + TEX_IMAGE_MISSING_B)); } } } @@ -2421,7 +2421,7 @@ void BackgroundNode::compile(SVMCompiler& compiler) if(color_in->link || strength_in->link) { compiler.add_node(NODE_EMISSION_WEIGHT, compiler.stack_assign(color_in), - compiler.stack_assign(strength_in)); + compiler.stack_assign(strength_in)); } else compiler.add_node(NODE_CLOSURE_SET_WEIGHT, color*strength); diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 9d8c9fed7af..33721048722 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -67,10 +67,7 @@ Session::Session(const SessionParams& params_) session_thread = NULL; scene = NULL; - start_time = 0.0; reset_time = 0.0; - preview_time = 0.0; - paused_time = 0.0; last_update_time = 0.0; delayed_reset.do_reset = false; @@ -201,12 +198,10 @@ void Session::run_gpu() { bool tiles_written = false; - start_time = time_dt(); reset_time = time_dt(); - paused_time = 0.0; last_update_time = time_dt(); - progress.set_render_start_time(start_time + paused_time); + progress.set_render_start_time(); while(!progress.get_cancel()) { /* advance to next tile */ @@ -233,13 +228,9 @@ void Session::run_gpu() update_status_time(pause, no_tiles); while(1) { - double pause_start = time_dt(); + scoped_timer pause_timer; pause_cond.wait(pause_lock); - paused_time += time_dt() - pause_start; - - if(!params.background) - progress.set_start_time(start_time + paused_time); - progress.set_render_start_time(start_time + paused_time); + progress.add_skip_time(pause_timer, params.background); update_status_time(pause, no_tiles); progress.set_update(); @@ -255,7 +246,9 @@ void Session::run_gpu() if(!no_tiles) { /* update scene */ + scoped_timer update_timer; update_scene(); + progress.add_skip_time(update_timer, params.background); if(!device->error_message().empty()) progress.set_error(device->error_message()); @@ -523,13 +516,9 @@ void Session::run_cpu() update_status_time(pause, no_tiles); while(1) { - double pause_start = time_dt(); + scoped_timer pause_timer; pause_cond.wait(pause_lock); - paused_time += time_dt() - pause_start; - - if(!params.background) - progress.set_start_time(start_time + paused_time); - progress.set_render_start_time(start_time + paused_time); + progress.add_skip_time(pause_timer, params.background); update_status_time(pause, no_tiles); progress.set_update(); @@ -550,7 +539,9 @@ void Session::run_cpu() thread_scoped_lock buffers_lock(buffers_mutex); /* update scene */ + scoped_timer update_timer; update_scene(); + progress.add_skip_time(update_timer, params.background); if(!device->error_message().empty()) progress.set_error(device->error_message()); @@ -645,6 +636,7 @@ DeviceRequestedFeatures Session::get_requested_device_features() BakeManager *bake_manager = scene->bake_manager; requested_features.use_baking = bake_manager->get_baking(); requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH); + requested_features.use_transparent &= scene->integrator->transparent_shadows; return requested_features; } @@ -718,14 +710,14 @@ void Session::reset_(BufferParams& buffer_params, int samples) } tile_manager.reset(buffer_params, samples); + progress.reset_sample(); - start_time = time_dt(); - preview_time = 0.0; - paused_time = 0.0; + bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX; + progress.set_total_pixel_samples(show_progress? tile_manager.state.total_pixel_samples : 0); if(!params.background) - progress.set_start_time(start_time); - progress.set_render_start_time(start_time); + progress.set_start_time(); + progress.set_render_start_time(); } void Session::reset(BufferParams& buffer_params, int samples) @@ -827,61 +819,40 @@ void Session::update_scene() void Session::update_status_time(bool show_pause, bool show_done) { - int sample = tile_manager.state.sample; - int resolution = tile_manager.state.resolution_divider; - int num_tiles = tile_manager.state.num_tiles; + int progressive_sample = tile_manager.state.sample; + int num_samples = tile_manager.get_num_effective_samples(); + int tile = tile_manager.state.num_rendered_tiles; + int num_tiles = tile_manager.state.num_tiles; /* update status */ string status, substatus; if(!params.progressive) { - const int progress_sample = progress.get_sample(), - num_samples = tile_manager.get_num_effective_samples(); - const bool is_gpu = params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL; - const bool is_multidevice = params.device.multi_devices.size() > 1; const bool is_cpu = params.device.type == DEVICE_CPU; - const bool is_last_tile = (num_samples * num_tiles - progress_sample) < num_samples; + const bool is_last_tile = (progress.get_finished_tiles() + 1) == num_tiles; substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles); - if((is_gpu && !is_multidevice && !device->info.use_split_kernel) || - (is_cpu && (num_tiles == 1 || is_last_tile))) + if(device->show_samples() || (is_cpu && is_last_tile)) { - /* When using split-kernel (OpenCL) each thread in a tile will be working on a different - * sample. Can't display sample number when device uses split-kernel + /* Some devices automatically support showing the sample number: + * - CUDADevice + * - OpenCLDevice when using the megakernel (the split kernel renders multiple samples at the same time, so the current sample isn't really defined) + * - CPUDevice when using one thread + * For these devices, the current sample is always shown. + * + * The other option is when the last tile is currently being rendered by the CPU. */ - - /* when rendering on GPU multithreading happens within single tile, as in - * tiles are handling sequentially and in this case we could display - * currently rendering sample number - * this helps a lot from feedback point of view. - * also display the info on CPU, when using 1 tile only - */ - - int status_sample = progress_sample; - if(tile > 1) { - /* sample counter is global for all tiles, subtract samples - * from already finished tiles to get sample counter for - * current tile only - */ - if(is_cpu && is_last_tile && num_tiles > 1) { - status_sample = num_samples - (num_samples * num_tiles - progress_sample); - } - else { - status_sample -= (tile - 1) * num_samples; - } - } - - substatus += string_printf(", Sample %d/%d", status_sample, num_samples); + substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples); } } else if(tile_manager.num_samples == INT_MAX) - substatus = string_printf("Path Tracing Sample %d", sample+1); + substatus = string_printf("Path Tracing Sample %d", progressive_sample+1); else substatus = string_printf("Path Tracing Sample %d/%d", - sample+1, - tile_manager.get_num_effective_samples()); + progressive_sample+1, + num_samples); if(show_pause) { status = "Paused"; @@ -895,22 +866,6 @@ void Session::update_status_time(bool show_pause, bool show_done) } progress.set_status(status, substatus); - - /* update timing */ - if(preview_time == 0.0 && resolution == 1) - preview_time = time_dt(); - - double tile_time = (tile == 0 || sample == 0)? 0.0: (time_dt() - preview_time - paused_time) / sample; - - /* negative can happen when we pause a bit before rendering, can discard that */ - if(preview_time < 0.0) preview_time = 0.0; - - progress.set_tile(tile, tile_time); -} - -void Session::update_progress_sample() -{ - progress.increment_sample(); } void Session::path_trace() @@ -922,7 +877,7 @@ void Session::path_trace() task.release_tile = function_bind(&Session::release_tile, this, _1); task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); - task.update_progress_sample = function_bind(&Session::update_progress_sample, this); + task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); task.need_finish_queue = params.progressive_refine; task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH; task.requested_tile_size = params.tile_size; diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 1db4692e171..c7ff1446171 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -145,6 +145,10 @@ public: void device_free(); + /* Returns the rendering progress or 0 if no progress can be determined + * (for example, when rendering with unlimited samples). */ + float get_progress(); + protected: struct DelayedReset { thread_mutex mutex; @@ -173,8 +177,6 @@ protected: void update_tile_sample(RenderTile& tile); void release_tile(RenderTile& tile); - void update_progress_sample(); - bool device_use_gl; thread *session_thread; @@ -194,10 +196,7 @@ protected: bool kernels_loaded; - double start_time; double reset_time; - double preview_time; - double paused_time; /* progressive refine */ double last_update_time; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 06b6dd969d8..335edcbe609 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -571,6 +571,9 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph, if(node->has_surface_bssrdf()) { requested_features->use_subsurface = true; } + if(node->has_surface_transparent()) { + requested_features->use_transparent = true; + } } } diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 3a6dfea11a7..e59d0c843a3 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -108,36 +108,57 @@ TileManager::~TileManager() { } -void TileManager::reset(BufferParams& params_, int num_samples_) +static int get_divider(int w, int h, int start_resolution) { - params = params_; - int divider = 1; - int w = params.width, h = params.height; - if(start_resolution != INT_MAX) { while(w*h > start_resolution*start_resolution) { w = max(1, w/2); h = max(1, h/2); - divider *= 2; + divider <<= 1; } } + return divider; +} - num_samples = num_samples_; +void TileManager::reset(BufferParams& params_, int num_samples_) +{ + params = params_; + + set_samples(num_samples_); state.buffer = BufferParams(); state.sample = range_start_sample - 1; state.num_tiles = 0; state.num_rendered_tiles = 0; state.num_samples = 0; - state.resolution_divider = divider; + state.resolution_divider = get_divider(params.width, params.height, start_resolution); state.tiles.clear(); } void TileManager::set_samples(int num_samples_) { num_samples = num_samples_; + + /* No real progress indication is possible when using unlimited samples. */ + if(num_samples == INT_MAX) { + state.total_pixel_samples = 0; + } + else { + uint64_t pixel_samples = 0; + /* While rendering in the viewport, the initial preview resolution is increased to the native resolution + * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */ + int divider = get_divider(params.width, params.height, start_resolution) / 2; + while(divider > 1) { + int image_w = max(1, params.width/divider); + int image_h = max(1, params.height/divider); + pixel_samples += image_w * image_h; + divider >>= 1; + } + + state.total_pixel_samples = pixel_samples + get_num_effective_samples() * params.width*params.height; + } } /* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device. diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index af1b1ed8b0f..5d92ebac355 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -64,6 +64,10 @@ public: int resolution_divider; int num_tiles; int num_rendered_tiles; + + /* Total samples over all pixels: Generally num_samples*num_pixels, + * but can be higher due to the initial resolution division for previews. */ + uint64_t total_pixel_samples; /* This vector contains a list of tiles for every logical device in the session. * In each list, the tiles are sorted according to the tile order setting. */ vector<list<Tile> > tiles; @@ -91,7 +95,7 @@ public: /* Number to samples in the rendering range. */ int range_num_samples; - /* get number of actual samples to render. */ + /* Get number of actual samples to render. */ int get_num_effective_samples(); protected: diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h index 2db2c4dad1a..2451213963a 100644 --- a/intern/cycles/util/util_avxf.h +++ b/intern/cycles/util/util_avxf.h @@ -180,6 +180,14 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) { } #endif +#ifndef _mm256_set_m128 +# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \ + _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1) +#endif + +#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \ + _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr)) + CCL_NAMESPACE_END #endif diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 3f4d3e06c0b..6cb68b53d16 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -596,8 +596,7 @@ ccl_device_inline float len_squared(const float4& a) ccl_device_inline float3 normalize(const float3& a) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && 0 +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); return _mm_div_ps(a.m128, norm); #else @@ -798,8 +797,7 @@ ccl_device_inline float4 operator-(const float4& a) ccl_device_inline float4 operator*(const float4& a, const float4& b) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE__) && 0 +#ifdef __KERNEL_SSE__ return _mm_mul_ps(a.m128, b.m128); #else return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); @@ -847,8 +845,7 @@ ccl_device_inline float4 operator/(const float4& a, const float4& b) ccl_device_inline float4 operator+(const float4& a, const float4& b) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE__) && 0 +#ifdef __KERNEL_SSE__ return _mm_add_ps(a.m128, b.m128); #else return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 4ae1d61dd17..14215056840 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -34,12 +34,12 @@ class Progress { public: Progress() { - tile = 0; - sample = 0; + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + finished_tiles = 0; start_time = time_dt(); - total_time = 0.0; - render_time = 0.0; - tile_time = 0.0; + render_start_time = time_dt(); status = "Initializing"; substatus = ""; sync_status = ""; @@ -62,22 +62,22 @@ public: thread_scoped_lock lock(progress.progress_mutex); progress.get_status(status, substatus); - progress.get_tile(tile, total_time, render_time, tile_time); - sample = progress.get_sample(); + pixel_samples = progress.pixel_samples; + total_pixel_samples = progress.total_pixel_samples; + current_tile_sample = progress.get_current_sample(); return *this; } void reset() { - tile = 0; - sample = 0; + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + finished_tiles = 0; start_time = time_dt(); render_start_time = time_dt(); - total_time = 0.0; - render_time = 0.0; - tile_time = 0.0; status = "Initializing"; substatus = ""; sync_status = ""; @@ -139,69 +139,93 @@ public: /* tile and timing information */ - void set_start_time(double start_time_) + void set_start_time() { thread_scoped_lock lock(progress_mutex); - start_time = start_time_; + start_time = time_dt(); } - void set_render_start_time(double render_start_time_) + void set_render_start_time() { thread_scoped_lock lock(progress_mutex); - render_start_time = render_start_time_; + render_start_time = time_dt(); } - void set_tile(int tile_, double tile_time_) + void add_skip_time(const scoped_timer &start_timer, bool only_render) { - thread_scoped_lock lock(progress_mutex); + double skip_time = time_dt() - start_timer.get_start(); - tile = tile_; - total_time = time_dt() - start_time; - render_time = time_dt() - render_start_time; - tile_time = tile_time_; + render_start_time += skip_time; + if(!only_render) { + start_time += skip_time; + } } - void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_) + void get_time(double& total_time_, double& render_time_) { thread_scoped_lock lock(progress_mutex); - tile_ = tile; - total_time_ = (total_time > 0.0)? total_time: 0.0; - render_time_ = (render_time > 0.0)? render_time: 0.0; - tile_time_ = tile_time; + total_time_ = time_dt() - start_time; + render_time_ = time_dt() - render_start_time; } - void get_time(double& total_time_, double& render_time_) + void reset_sample() { - total_time_ = (total_time > 0.0)? total_time: 0.0; - render_time_ = (render_time > 0.0)? render_time: 0.0; + thread_scoped_lock lock(progress_mutex); + + pixel_samples = 0; + current_tile_sample = 0; + finished_tiles = 0; } - void reset_sample() + void set_total_pixel_samples(uint64_t total_pixel_samples_) { thread_scoped_lock lock(progress_mutex); - sample = 0; + total_pixel_samples = total_pixel_samples_; } - void increment_sample() + float get_progress() + { + if(total_pixel_samples > 0) { + return ((float) pixel_samples) / total_pixel_samples; + } + return 0.0f; + } + + void add_samples(uint64_t pixel_samples_, int tile_sample) { thread_scoped_lock lock(progress_mutex); - sample++; + pixel_samples += pixel_samples_; + current_tile_sample = tile_sample; } - void increment_sample_update() + void add_samples_update(uint64_t pixel_samples_, int tile_sample) { - increment_sample(); + add_samples(pixel_samples_, tile_sample); set_update(); } - int get_sample() + void add_finished_tile() + { + thread_scoped_lock lock(progress_mutex); + + finished_tiles++; + } + + int get_current_sample() + { + /* Note that the value here always belongs to the last tile that updated, + * so it's only useful if there is only one active tile. */ + return current_tile_sample; + } + + int get_finished_tiles() { - return sample; + return finished_tiles; } /* status messages */ @@ -212,8 +236,6 @@ public: thread_scoped_lock lock(progress_mutex); status = status_; substatus = substatus_; - total_time = time_dt() - start_time; - render_time = time_dt() - render_start_time; } set_update(); @@ -224,8 +246,6 @@ public: { thread_scoped_lock lock(progress_mutex); substatus = substatus_; - total_time = time_dt() - start_time; - render_time = time_dt() - render_start_time; } set_update(); @@ -237,8 +257,6 @@ public: thread_scoped_lock lock(progress_mutex); sync_status = status_; sync_substatus = substatus_; - total_time = time_dt() - start_time; - render_time = time_dt() - render_start_time; } set_update(); @@ -250,8 +268,6 @@ public: { thread_scoped_lock lock(progress_mutex); sync_substatus = substatus_; - total_time = time_dt() - start_time; - render_time = time_dt() - render_start_time; } set_update(); @@ -292,12 +308,19 @@ protected: function<void(void)> update_cb; function<void(void)> cancel_cb; - int tile; /* counter for rendered tiles */ - int sample; /* counter of rendered samples, global for all tiles */ + /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel. + * This makes the progress estimate more accurate when tiles with different sizes are used. + * + * total_pixel_samples is the total amount of pixel samples that will be rendered. */ + uint64_t pixel_samples, total_pixel_samples; + /* Stores the current sample count of the last tile that called the update function. + * It's used to display the sample count if only one tile is active. */ + int current_tile_sample; + /* Stores the number of tiles that's already finished. + * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */ + int finished_tiles; double start_time, render_start_time; - double total_time, render_time; - double tile_time; string status; string substatus; diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h index a5b074bffa0..65798244111 100644 --- a/intern/cycles/util/util_time.h +++ b/intern/cycles/util/util_time.h @@ -29,7 +29,7 @@ void time_sleep(double t); class scoped_timer { public: - explicit scoped_timer(double *value) : value_(value) + explicit scoped_timer(double *value = NULL) : value_(value) { time_start_ = time_dt(); } @@ -40,6 +40,12 @@ public: *value_ = time_dt() - time_start_; } } + + double get_start() const + { + return time_start_; + } + protected: double *value_; double time_start_; diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index ea5eb3b25b0..a0695f20488 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -74,7 +74,7 @@ ccl_device_inline float3 transform_perspective(const Transform *t, const float3 ccl_device_inline float3 transform_point(const Transform *t, const float3 a) { /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0 +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) ssef x, y, z, w, aa; aa = a.m128; @@ -103,8 +103,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a) ccl_device_inline float3 transform_direction(const Transform *t, const float3 a) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0 +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) ssef x, y, z, w, aa; aa = a.m128; x = _mm_loadu_ps(&t->x.x); diff --git a/intern/opencolorio/fallback_impl.cc b/intern/opencolorio/fallback_impl.cc index d0a129360b0..87629422013 100644 --- a/intern/opencolorio/fallback_impl.cc +++ b/intern/opencolorio/fallback_impl.cc @@ -23,18 +23,26 @@ * ***** END GPL LICENSE BLOCK ***** */ -#include <string.h> +#include <algorithm> +#include <cstring> #include "MEM_guardedalloc.h" #include "BLI_math_color.h" +#include "BLI_math_vector.h" #include "ocio_impl.h" +using std::max; + #define CONFIG_DEFAULT ((OCIO_ConstConfigRcPtr*)1) -#define PROCESSOR_LINEAR_TO_SRGB ((OCIO_ConstProcessorRcPtr*)1) -#define PROCESSOR_SRGB_TO_LINEAR ((OCIO_ConstProcessorRcPtr*)2) -#define PROCESSOR_UNKNOWN ((OCIO_ConstProcessorRcPtr*)3) +enum TransformType { + TRANSFORM_LINEAR_TO_SRGB, + TRANSFORM_SRGB_TO_LINEAR, + TRANSFORM_MATRIX, + TRANSFORM_EXPONENT, + TRANSFORM_UNKNOWN, +}; #define COLORSPACE_LINEAR ((OCIO_ConstColorSpaceRcPtr*)1) #define COLORSPACE_SRGB ((OCIO_ConstColorSpaceRcPtr*)2) @@ -49,6 +57,145 @@ typedef struct OCIO_PackedImageDescription { long yStrideBytes; } OCIO_PackedImageDescription; +struct FallbackTransform { + FallbackTransform() + : type(TRANSFORM_UNKNOWN), + linear_transform(NULL), + display_transform(NULL) + { + } + + ~FallbackTransform() + { + delete linear_transform; + delete display_transform; + } + + void applyRGB(float *pixel) + { + if (type == TRANSFORM_LINEAR_TO_SRGB) { + applyLinearRGB(pixel); + linearrgb_to_srgb_v3_v3(pixel, pixel); + applyDisplayRGB(pixel); + } + else if (type == TRANSFORM_SRGB_TO_LINEAR) { + srgb_to_linearrgb_v3_v3(pixel, pixel); + } + else if (type == TRANSFORM_EXPONENT) { + pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]); + pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]); + pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]); + } + else if (type == TRANSFORM_MATRIX) { + float r = pixel[0]; + float g = pixel[1]; + float b = pixel[2]; + pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2]; + pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6]; + pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10]; + pixel[0] += offset[0]; + pixel[1] += offset[1]; + pixel[2] += offset[2]; + } + } + + void applyRGBA(float *pixel) + { + if (type == TRANSFORM_LINEAR_TO_SRGB) { + applyLinearRGBA(pixel); + linearrgb_to_srgb_v4(pixel, pixel); + applyDisplayRGBA(pixel); + } + else if (type == TRANSFORM_SRGB_TO_LINEAR) { + srgb_to_linearrgb_v4(pixel, pixel); + } + else if (type == TRANSFORM_EXPONENT) { + pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]); + pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]); + pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]); + pixel[3] = powf(max(0.0f, pixel[3]), exponent[3]); + } + else if (type == TRANSFORM_MATRIX) { + float r = pixel[0]; + float g = pixel[1]; + float b = pixel[2]; + float a = pixel[3]; + pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2] + a*matrix[3]; + pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6] + a*matrix[7]; + pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10] + a*matrix[11]; + pixel[3] = r*matrix[12] + g*matrix[13] + b*matrix[14] + a*matrix[15]; + pixel[0] += offset[0]; + pixel[1] += offset[1]; + pixel[2] += offset[2]; + pixel[3] += offset[3]; + } + } + + void applyLinearRGB(float *pixel) + { + if (linear_transform != NULL) { + linear_transform->applyRGB(pixel); + } + } + + void applyLinearRGBA(float *pixel) + { + if (linear_transform != NULL) { + linear_transform->applyRGBA(pixel); + } + } + + void applyDisplayRGB(float *pixel) + { + if (display_transform != NULL) { + display_transform->applyRGB(pixel); + } + } + + void applyDisplayRGBA(float *pixel) + { + if (display_transform != NULL) { + display_transform->applyRGBA(pixel); + } + } + + TransformType type; + FallbackTransform *linear_transform; + FallbackTransform *display_transform; + /* Exponent transform. */ + float exponent[4]; + /* Matrix transform. */ + float matrix[16]; + float offset[4]; + + MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor"); +}; + +struct FallbackProcessor { + FallbackProcessor() + : transform(NULL) + { + } + + ~FallbackProcessor() { + delete transform; + } + + void applyRGB(float *pixel) + { + transform->applyRGB(pixel); + } + + void applyRGBA(float *pixel) + { + transform->applyRGBA(pixel); + } + + FallbackTransform *transform; + + MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor"); +}; + OCIO_ConstConfigRcPtr *FallbackImpl::getCurrentConfig(void) { return CONFIG_DEFAULT; @@ -233,19 +380,27 @@ OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessorWithNames( { OCIO_ConstColorSpaceRcPtr *cs_src = configGetColorSpace(config, srcName); OCIO_ConstColorSpaceRcPtr *cs_dst = configGetColorSpace(config, dstName); + FallbackTransform *transform = new FallbackTransform(); if (cs_src == COLORSPACE_LINEAR && cs_dst == COLORSPACE_SRGB) { - return PROCESSOR_LINEAR_TO_SRGB; + transform->type = TRANSFORM_LINEAR_TO_SRGB; } else if (cs_src == COLORSPACE_SRGB && cs_dst == COLORSPACE_LINEAR) { - return PROCESSOR_SRGB_TO_LINEAR; + transform->type = TRANSFORM_SRGB_TO_LINEAR; } - return 0; + else { + transform->type = TRANSFORM_UNKNOWN; + } + FallbackProcessor *processor = new FallbackProcessor(); + processor->transform = transform; + return (OCIO_ConstProcessorRcPtr *)processor; } OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessor(OCIO_ConstConfigRcPtr * /*config*/, - OCIO_ConstTransformRcPtr *tfm) + OCIO_ConstTransformRcPtr *transform) { - return (OCIO_ConstProcessorRcPtr*)tfm; + FallbackProcessor *processor = new FallbackProcessor(); + processor->transform = (FallbackTransform *)transform; + return (OCIO_ConstProcessorRcPtr *)processor; } void FallbackImpl::processorApply(OCIO_ConstProcessorRcPtr *processor, @@ -297,21 +452,13 @@ void FallbackImpl::processorApply_predivide(OCIO_ConstProcessorRcPtr *processor, void FallbackImpl::processorApplyRGB(OCIO_ConstProcessorRcPtr *processor, float *pixel) { - if (processor == PROCESSOR_LINEAR_TO_SRGB) { - linearrgb_to_srgb_v3_v3(pixel, pixel); - } - else if (processor == PROCESSOR_SRGB_TO_LINEAR) { - srgb_to_linearrgb_v3_v3(pixel, pixel); - } + ((FallbackProcessor *)processor)->applyRGB(pixel); } void FallbackImpl::processorApplyRGBA(OCIO_ConstProcessorRcPtr *processor, float *pixel) { - if (processor == PROCESSOR_LINEAR_TO_SRGB) - linearrgb_to_srgb_v4(pixel, pixel); - else if (processor == PROCESSOR_SRGB_TO_LINEAR) - srgb_to_linearrgb_v4(pixel, pixel); + ((FallbackProcessor *)processor)->applyRGBA(pixel); } void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *processor, @@ -338,8 +485,9 @@ void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *proces } } -void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr * /*p*/) +void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr *processor) { + delete (FallbackProcessor*)(processor); } const char *FallbackImpl::colorSpaceGetName(OCIO_ConstColorSpaceRcPtr *cs) @@ -365,7 +513,9 @@ const char *FallbackImpl::colorSpaceGetFamily(OCIO_ConstColorSpaceRcPtr * /*cs*/ OCIO_DisplayTransformRcPtr *FallbackImpl::createDisplayTransform(void) { - return (OCIO_DisplayTransformRcPtr*)PROCESSOR_LINEAR_TO_SRGB; + FallbackTransform *transform = new FallbackTransform(); + transform->type = TRANSFORM_LINEAR_TO_SRGB; + return (OCIO_DisplayTransformRcPtr*)transform; } void FallbackImpl::displayTransformSetInputColorSpaceName(OCIO_DisplayTransformRcPtr * /*dt*/, @@ -383,14 +533,18 @@ void FallbackImpl::displayTransformSetView(OCIO_DisplayTransformRcPtr * /*dt*/, { } -void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr * /*dt*/, - OCIO_ConstTransformRcPtr * /*et*/) +void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr *dt, + OCIO_ConstTransformRcPtr *et) { + FallbackTransform *transform = (FallbackTransform *)dt; + transform->display_transform = (FallbackTransform *)et; } -void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr * /*dt*/, - OCIO_ConstTransformRcPtr * /*et*/) +void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr *dt, + OCIO_ConstTransformRcPtr *et) { + FallbackTransform *transform = (FallbackTransform *)dt; + transform->linear_transform = (FallbackTransform *)et; } void FallbackImpl::displayTransformSetLooksOverride(OCIO_DisplayTransformRcPtr * /*dt*/, @@ -432,12 +586,16 @@ void FallbackImpl::OCIO_PackedImageDescRelease(OCIO_PackedImageDesc* id) OCIO_ExponentTransformRcPtr *FallbackImpl::createExponentTransform(void) { - return (OCIO_ExponentTransformRcPtr*)PROCESSOR_UNKNOWN; + FallbackTransform *transform = new FallbackTransform(); + transform->type = TRANSFORM_EXPONENT; + return (OCIO_ExponentTransformRcPtr *)transform; } -void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr * /*et*/, - const float * /*exponent*/) +void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr *et, + const float *exponent) { + FallbackTransform *transform = (FallbackTransform *)et; + copy_v4_v4(transform->exponent, exponent); } void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/) @@ -446,23 +604,44 @@ void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/ OCIO_MatrixTransformRcPtr *FallbackImpl::createMatrixTransform(void) { - return (OCIO_MatrixTransformRcPtr*)PROCESSOR_UNKNOWN; + FallbackTransform *transform = new FallbackTransform(); + transform->type = TRANSFORM_MATRIX; + return (OCIO_MatrixTransformRcPtr *)transform; } -void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr * /*mt*/, - const float * /*m44*/, - const float * /*offset4*/) +void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr *mt, + const float *m44, + const float *offset4) { + FallbackTransform *transform = (FallbackTransform *)mt; + copy_m4_m4((float (*)[4])transform->matrix, (float (*)[4])m44); + copy_v4_v4(transform->offset, offset4); } void FallbackImpl::matrixTransformRelease(OCIO_MatrixTransformRcPtr * /*mt*/) { } -void FallbackImpl::matrixTransformScale(float * /*m44*/, - float * /*offset44*/, - const float * /*scale4*/) +void FallbackImpl::matrixTransformScale(float *m44, + float *offset4, + const float *scale4) { + if (scale4 == NULL) { + return; + } + if (m44 != NULL) { + memset(m44, 0, 16*sizeof(float)); + m44[0] = scale4[0]; + m44[5] = scale4[1]; + m44[10] = scale4[2]; + m44[15] = scale4[3]; + } + if (offset4 != NULL) { + offset4[0] = 0.0f; + offset4[1] = 0.0f; + offset4[2] = 0.0f; + offset4[3] = 0.0f; + } } bool FallbackImpl::supportGLSLDraw(void) |