Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2016-12-12 18:17:57 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2016-12-12 18:17:57 +0300
commit9be6d5ff18fd593e853647d9eec8b3fb074acd7c (patch)
tree485fb9203c310b5119a549e698db241012ed39ff /intern
parent54528079e3cfaf74eaa119615386564820b45276 (diff)
parent5f852a4324212221500d11b2c7594f5e0ca894c6 (diff)
Merge branch 'master' into blender2.8
Conflicts: source/blender/blenkernel/intern/depsgraph.c source/blender/blenloader/intern/versioning_270.c source/blender/depsgraph/intern/builder/deg_builder_relations.cc source/blender/makesrna/intern/rna_main_api.c source/blender/makesrna/intern/rna_particle.c
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/app/cycles_standalone.cpp18
-rw-r--r--intern/cycles/blender/addon/properties.py2
-rw-r--r--intern/cycles/blender/blender_session.cpp31
-rw-r--r--intern/cycles/device/device.cpp2
-rw-r--r--intern/cycles/device/device.h11
-rw-r--r--intern/cycles/device/device_cpu.cpp7
-rw-r--r--intern/cycles/device/device_cuda.cpp8
-rw-r--r--intern/cycles/device/device_multi.cpp8
-rw-r--r--intern/cycles/device/device_network.cpp5
-rw-r--r--intern/cycles/device/device_task.cpp12
-rw-r--r--intern/cycles/device/device_task.h4
-rw-r--r--intern/cycles/device/opencl/opencl_mega.cpp6
-rw-r--r--intern/cycles/device/opencl/opencl_split.cpp4
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h5
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h66
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h66
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h18
-rw-r--r--intern/cycles/kernel/geom/geom_triangle_intersect.h26
-rw-r--r--intern/cycles/kernel/kernel_jitter.h9
-rw-r--r--intern/cycles/kernel/kernel_path.h8
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h4
-rw-r--r--intern/cycles/kernel/kernel_random.h12
-rw-r--r--intern/cycles/kernel/kernel_types.h3
-rw-r--r--intern/cycles/kernel/kernel_volume.h35
-rw-r--r--intern/cycles/render/bake.cpp20
-rw-r--r--intern/cycles/render/bake.h3
-rw-r--r--intern/cycles/render/nodes.cpp10
-rw-r--r--intern/cycles/render/session.cpp111
-rw-r--r--intern/cycles/render/session.h9
-rw-r--r--intern/cycles/render/shader.cpp3
-rw-r--r--intern/cycles/render/tile.cpp37
-rw-r--r--intern/cycles/render/tile.h6
-rw-r--r--intern/cycles/util/util_avxf.h8
-rw-r--r--intern/cycles/util/util_math.h9
-rw-r--r--intern/cycles/util/util_progress.h123
-rw-r--r--intern/cycles/util/util_time.h8
-rw-r--r--intern/cycles/util/util_transform.h5
-rw-r--r--intern/opencolorio/fallback_impl.cc249
38 files changed, 674 insertions, 297 deletions
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index b21e8630cdb..9816d614a7c 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -72,20 +72,17 @@ static void session_print(const string& str)
static void session_print_status()
{
- int sample, tile;
- double total_time, sample_time, render_time;
string status, substatus;
/* get status */
- sample = options.session->progress.get_sample();
- options.session->progress.get_tile(tile, total_time, sample_time, render_time);
+ float progress = options.session->progress.get_progress();
options.session->progress.get_status(status, substatus);
if(substatus != "")
status += ": " + substatus;
/* print status */
- status = string_printf("Sample %d %s", sample, status.c_str());
+ status = string_printf("Progress %05.2f %s", (double) progress*100, status.c_str());
session_print(status);
}
@@ -167,13 +164,12 @@ static void display_info(Progress& progress)
latency = (elapsed - last);
last = elapsed;
- int sample, tile;
- double total_time, sample_time, render_time;
+ double total_time, sample_time;
string status, substatus;
- sample = progress.get_sample();
- progress.get_tile(tile, total_time, sample_time, render_time);
+ progress.get_time(total_time, sample_time);
progress.get_status(status, substatus);
+ float progress_val = progress.get_progress();
if(substatus != "")
status += ": " + substatus;
@@ -184,10 +180,10 @@ static void display_info(Progress& progress)
"%s"
" Time: %.2f"
" Latency: %.4f"
- " Sample: %d"
+ " Progress: %05.2f"
" Average: %.4f"
" Interactive: %s",
- status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
+ status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
view_display_info(str.c_str());
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 3346beea3b2..cbff5a537dc 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
"Zero disables the test and never ignores lights",
min=0.0, max=1.0,
- default=0.05,
+ default=0.01,
)
cls.caustics_reflective = BoolProperty(
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index e16cea0ebaf..71c1eefe65f 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -930,38 +930,13 @@ void BlenderSession::get_status(string& status, string& substatus)
void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
{
- double tile_time;
- int tile, sample, samples_per_tile;
- int tile_total = session->tile_manager.state.num_tiles;
- int samples = session->tile_manager.state.sample + 1;
- int total_samples = session->tile_manager.get_num_effective_samples();
-
- session->progress.get_tile(tile, total_time, render_time, tile_time);
-
- sample = session->progress.get_sample();
- samples_per_tile = session->tile_manager.get_num_effective_samples();
-
- if(background && samples_per_tile && tile_total)
- progress = ((float)sample / (float)(tile_total * samples_per_tile));
- else if(!background && samples > 0 && total_samples != INT_MAX)
- progress = ((float)samples) / total_samples;
- else
- progress = 0.0;
+ session->progress.get_time(total_time, render_time);
+ progress = session->progress.get_progress();
}
void BlenderSession::update_bake_progress()
{
- float progress;
- int sample, samples_per_task, parts_total;
-
- sample = session->progress.get_sample();
- samples_per_task = scene->bake_manager->num_samples;
- parts_total = scene->bake_manager->num_parts;
-
- if(samples_per_task)
- progress = ((float)sample / (float)(parts_total * samples_per_task));
- else
- progress = 0.0;
+ float progress = session->progress.get_progress();
if(progress != last_progress) {
b_engine.update_progress(progress);
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index ff9387b0a8a..31c99f49d6d 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -64,6 +64,8 @@ std::ostream& operator <<(std::ostream &os,
<< string_from_bool(requested_features.use_integrator_branched) << std::endl;
os << "Use Patch Evaluation: "
<< string_from_bool(requested_features.use_patch_evaluation) << std::endl;
+ os << "Use Transparent Shadows: "
+ << string_from_bool(requested_features.use_transparent) << std::endl;
return os;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index b9bdffa2618..ccee25ae34e 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -117,6 +117,9 @@ public:
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
+
+ /* Use Transparent shadows */
+ bool use_transparent;
DeviceRequestedFeatures()
{
@@ -133,6 +136,7 @@ public:
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
+ use_transparent = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
@@ -148,7 +152,8 @@ public:
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
- use_patch_evaluation == requested_features.use_patch_evaluation);
+ use_patch_evaluation == requested_features.use_patch_evaluation &&
+ use_transparent == requested_features.use_transparent);
}
/* Convert the requested features structure to a build options,
@@ -189,6 +194,9 @@ public:
if(!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
+ if(!use_transparent) {
+ build_options += " -D__NO_TRANSPARENT__";
+ }
return build_options;
}
};
@@ -220,6 +228,7 @@ public:
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
bool have_error() { return !error_message().empty(); }
+ virtual bool show_samples() const { return false; }
/* statistics */
Stats &stats;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index aed86d8d853..c8e001ec2fd 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -112,6 +112,11 @@ public:
task_pool.stop();
}
+ virtual bool show_samples() const
+ {
+ return (TaskScheduler::num_threads() == 1);
+ }
+
void mem_alloc(device_memory& mem, MemoryType /*type*/)
{
mem.device_pointer = mem.data_pointer;
@@ -275,7 +280,7 @@ public:
tile.sample = sample + 1;
- task.update_progress(&tile);
+ task.update_progress(&tile, tile.w*tile.h);
}
task.release_tile(tile);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index fbb97f78e70..233f94be1bf 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -115,6 +115,12 @@ public:
return path_exists(cubins_path);
}
+ virtual bool show_samples() const
+ {
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+ }
+
/*#ifdef NDEBUG
#define cuda_abort()
#else
@@ -1267,7 +1273,7 @@ public:
tile.sample = sample + 1;
- task->update_progress(&tile);
+ task->update_progress(&tile, tile.w*tile.h);
}
task->release_tile(tile);
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 48fd159d508..31b800640d3 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -89,6 +89,14 @@ public:
return error_msg;
}
+ virtual bool show_samples() const
+ {
+ if(devices.size() > 1) {
+ return false;
+ }
+ return devices.front().device->show_samples();
+ }
+
bool load_kernels(const DeviceRequestedFeatures& requested_features)
{
foreach(SubDevice& sub, devices)
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 3eb5ad2d2db..53eef6cf199 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -51,6 +51,11 @@ public:
thread_mutex rpc_lock;
+ virtual bool show_samples() const
+ {
+ return false;
+ }
+
NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
: Device(info, stats, true), socket(io_service)
{
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 1f1128a28f8..48d18035c13 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -19,6 +19,8 @@
#include "device_task.h"
+#include "buffers.h"
+
#include "util_algorithm.h"
#include "util_time.h"
@@ -99,14 +101,18 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
}
}
-void DeviceTask::update_progress(RenderTile *rtile)
+void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
{
if((type != PATH_TRACE) &&
(type != SHADER))
return;
- if(update_progress_sample)
- update_progress_sample();
+ if(update_progress_sample) {
+ if(pixel_samples == -1) {
+ pixel_samples = shader_w;
+ }
+ update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
+ }
if(update_tile_sample) {
double current_time = time_dt();
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 8423e83bdfd..8bd54c3d2b0 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -56,10 +56,10 @@ public:
int get_subtask_count(int num, int max_size = 0);
void split(list<DeviceTask>& tasks, int num, int max_size = 0);
- void update_progress(RenderTile *rtile);
+ void update_progress(RenderTile *rtile, int pixel_samples = -1);
function<bool(Device *device, RenderTile&)> acquire_tile;
- function<void(void)> update_progress_sample;
+ function<void(long, int)> update_progress_sample;
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<bool(void)> get_cancel;
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 369c086df57..6ea7619e022 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,6 +39,10 @@ public:
{
}
+ virtual bool show_samples() const {
+ return true;
+ }
+
virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
vector<OpenCLProgram*> &programs)
{
@@ -120,7 +124,7 @@ public:
tile.sample = sample + 1;
- task->update_progress(&tile);
+ task->update_progress(&tile, tile.w*tile.h);
}
/* Complete kernel execution before release tile */
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 239e73a40fd..3c3c2150128 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -247,6 +247,10 @@ public:
}
}
+ virtual bool show_samples() const {
+ return false;
+ }
+
/* Split kernel utility functions. */
size_t get_tex_size(const char *tex_name)
{
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index bede5f45e7e..daaa26dc6ad 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -267,7 +267,10 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
*eval = make_float3(*pdf, *pdf, *pdf);
- kernel_assert(dot(locy, *omega_in) < 0.0f);
+ /* TODO(sergey): Should always be negative, but seems some precision issue
+ * is involved here.
+ */
+ kernel_assert(dot(locy, *omega_in) < 1e-4f);
return LABEL_TRANSMIT|LABEL_GLOSSY;
}
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 84aaaab7453..636dbcc71e0 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -255,6 +255,17 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int ka = max(k0 - 1, v00.x);
int kb = min(k1 + 1, v00.x + v00.y - 1);
+#ifdef __KERNEL_AVX2__
+ avxf P_curve_0_1, P_curve_2_3;
+ if(type & PRIMITIVE_CURVE) {
+ P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+ P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+ motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
+ }
+#else /* __KERNEL_AVX2__ */
ssef P_curve[4];
if(type & PRIMITIVE_CURVE) {
@@ -267,6 +278,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
}
+#endif /* __KERNEL_AVX2__ */
ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -278,6 +290,33 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+#ifdef __KERNEL_AVX2__
+ const avxf vPP = _mm256_broadcast_ps(&P.m128);
+ const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+ const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+ const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+ const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_0_1 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+ const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
+ htfm00,
+ madd(shuffle<1>(P_curve_2_3 - vPP),
+ htfm11,
+ shuffle<2>(P_curve_2_3 - vPP)*htfm22));
+
+ const ssef p0 = _mm256_castps256_ps128(p01);
+ const ssef p1 = _mm256_extractf128_ps(p01, 1);
+ const ssef p2 = _mm256_castps256_ps128(p23);
+ const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+ const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+ r_st = ((float4 &)P_curve_1).w;
+ const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+ r_en = ((float4 &)P_curve_2).w;
+#else /* __KERNEL_AVX2__ */
ssef htfm[] = { htfm0, htfm1, htfm2 };
ssef vP = load4f(P);
ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -285,6 +324,10 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
+ r_st = ((float4 &)P_curve[1]).w;
+ r_en = ((float4 &)P_curve[2]).w;
+#endif /* __KERNEL_AVX2__ */
+
float fc = 0.71f;
ssef vfc = ssef(fc);
ssef vfcxp3 = vfc * p3;
@@ -294,8 +337,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
- r_st = ((float4 &)P_curve[1]).w;
- r_en = ((float4 &)P_curve[2]).w;
}
#else
float3 curve_coef[4];
@@ -383,8 +424,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
/* begin loop */
while(!(tree >> (depth))) {
- float i_st = tree * resol;
- float i_en = i_st + (level * resol);
+ const float i_st = tree * resol;
+ const float i_en = i_st + (level * resol);
+
#ifdef __KERNEL_SSE2__
ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -458,13 +500,23 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(flags & CURVE_KN_RIBBONS) {
float3 tg = (p_en - p_st);
+#ifdef __KERNEL_SSE__
+ const float3 tg_sq = tg * tg;
+ float w = tg_sq.x + tg_sq.y;
+#else
float w = tg.x * tg.x + tg.y * tg.y;
+#endif
if(w == 0) {
tree++;
level = tree & -tree;
continue;
}
+#ifdef __KERNEL_SSE__
+ const float3 p_sttg = p_st * tg;
+ w = -(p_sttg.x + p_sttg.y) / w;
+#else
w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+#endif
w = saturate(w);
/* compute u on the curve segment */
@@ -496,7 +548,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
if(difl != 0.0f) {
mw_extension = min(difl * fabsf(bmaxz), extmax);
r_ext = mw_extension + r_curr;
+#ifdef __KERNEL_SSE__
+ const float3 p_curr_sq = p_curr * p_curr;
+ const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
+ float d = dxxx.x;
+#else
float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+#endif
float d0 = d - r_curr;
float d1 = d + r_curr;
float inv_mw_extension = 1.0f/mw_extension;
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 6de5aa7ea99..80b33fad68b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -118,7 +118,12 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
}
/* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1, int k2, int k3,
+ float4 keys[4])
{
/* get motion info */
int numsteps, numkeys;
@@ -147,6 +152,65 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object,
keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
}
+#ifdef __KERNEL_AVX2__
+/* Similar to above, but returns keys as pair of two AVX registers with each
+ * holding two float4.
+ */
+ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
+ int object,
+ int prim,
+ float time,
+ int k0, int k1,
+ int k2, int k3,
+ avxf *out_keys_0_1,
+ avxf *out_keys_2_3)
+{
+ /* Get motion info. */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* Figure out which steps we need to fetch and their interpolation factor. */
+ int maxstep = numsteps * 2;
+ int step = min((int)(time*maxstep), maxstep - 1);
+ float t = time*maxstep - step;
+
+ /* Find attribute. */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg,
+ object,
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* Fetch key coordinates. */
+ float4 next_keys[4];
+ float4 keys[4];
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step,
+ k0, k1, k2, k3,
+ keys);
+ motion_cardinal_curve_keys_for_step(kg,
+ offset,
+ numkeys,
+ numsteps,
+ step + 1,
+ k0, k1, k2, k3,
+ next_keys);
+
+ const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+ const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+ const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+ const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+ /* Interpolate between steps. */
+ *out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
+ *out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+}
+#endif
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 3cbe59aaece..538c332c63a 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -323,11 +323,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
* time and do a ray intersection with the resulting triangle */
ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
+ float3 P, float3 dir, float time, uint visibility, int object, int prim_addr)
{
/* primitive index for vertex location lookup */
- int prim = kernel_tex_fetch(__prim_index, triAddr);
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
/* get vertex locations for intersection */
float3 verts[3];
@@ -340,13 +340,13 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
#ifdef __VISIBILITY_FLAG__
/* visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+ if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
{
isect->t = t;
isect->u = u;
isect->v = v;
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
@@ -369,14 +369,14 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
float3 dir,
float time,
int object,
- int triAddr,
+ int prim_addr,
float tmax,
uint *lcg_state,
int max_hits)
{
/* primitive index for vertex location lookup */
- int prim = kernel_tex_fetch(__prim_index, triAddr);
- int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+ int prim = kernel_tex_fetch(__prim_index, prim_addr);
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
/* get vertex locations for intersection */
float3 verts[3];
@@ -413,7 +413,7 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
isect->t = t;
isect->u = u;
isect->v = v;
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_MOTION_TRIANGLE;
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index eb7340583c8..4db121d94f4 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -108,7 +108,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
float3 P,
uint visibility,
int object,
- int triAddr)
+ int prim_addr)
{
const int kx = isect_precalc->kx;
const int ky = isect_precalc->ky;
@@ -118,7 +118,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
const avxf avxf_P(P.m128, P.m128);
@@ -129,10 +129,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
const avxf AB = tri_ab - avxf_P;
const avxf BC = tri_bc - avxf_P;
- const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+ const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
- const avxf AB_k = shuffle(AB, permuteMask);
- const avxf BC_k = shuffle(BC, permuteMask);
+ const avxf AB_k = shuffle(AB, permute_mask);
+ const avxf BC_k = shuffle(BC, permute_mask);
/* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
@@ -155,14 +155,14 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
/* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
- const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+ const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
/* W U V
* (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
*/
- const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);
+ const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
- const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;
+ const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
/* Calculate scaled barycentric coordinates. */
float WUVW_array[4];
@@ -231,7 +231,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#ifdef __VISIBILITY_FLAG__
/* visibility flag test. we do it here under the assumption
* that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+ if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
#endif
{
#ifdef __KERNEL_CUDA__
@@ -241,7 +241,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
#endif
/* Normalize U, V, W, and T. */
const float inv_det = 1.0f / det;
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
@@ -264,7 +264,7 @@ ccl_device_inline void triangle_intersect_subsurface(
SubsurfaceIntersection *ss_isect,
float3 P,
int object,
- int triAddr,
+ int prim_addr,
float tmax,
uint *lcg_state,
int max_hits)
@@ -277,7 +277,7 @@ ccl_device_inline void triangle_intersect_subsurface(
const float Sz = isect_precalc->Sz;
/* Calculate vertices relative to ray origin. */
- const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+ const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
@@ -415,7 +415,7 @@ ccl_device_inline void triangle_intersect_subsurface(
/* record intersection */
Intersection *isect = &ss_isect->hits[hit];
- isect->prim = triAddr;
+ isect->prim = prim_addr;
isect->object = object;
isect->type = PRIMITIVE_TRIANGLE;
isect->u = U * inv_det;
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index aec7bc33acd..67546131746 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -149,6 +149,15 @@ ccl_device_inline uint cmj_hash(uint i, uint p)
return i;
}
+ccl_device_inline uint cmj_hash_simple(uint i, uint p)
+{
+ i = (i ^ 61) ^ p;
+ i += i << 3;
+ i ^= i >> 4;
+ i *= 0x27d4eb2d;
+ return i;
+}
+
ccl_device_inline float cmj_randfloat(uint i, uint p)
{
return cmj_hash(i, p) * (1.0f / 4294967808.0f);
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 6d89a89ed5b..6a36c68d69f 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -141,6 +141,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state->volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = *ray;
@@ -658,6 +662,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
#endif /* __LAMP_MIS__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state.volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index c84727ace99..10174e1c4ce 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -294,6 +294,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#endif /* __KERNEL_DEBUG__ */
#ifdef __VOLUME__
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state.volume_stack);
+ }
/* volume attenuation, emission, scatter */
if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 2b767da5041..e773753396f 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -120,13 +120,11 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *
/* Cranly-Patterson rotation using rng seed */
float shift;
- /* using the same *rng value to offset seems to give correlation issues,
- * we could hash it with the dimension but this has a performance impact,
- * we need to find a solution for this */
- if(dimension & 1)
- shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
- else
- shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF);
+ /* Hash rng with dimension to solve correlation issues.
+ * See T38710, T50116.
+ */
+ RNG tmp_rng = cmj_hash_simple(dimension, *rng);
+ shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
return r + shift - floorf(r + shift);
#endif
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index a6c31d4a518..fd961836ec9 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -192,6 +192,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_PATCH_EVAL__
# undef __PATCH_EVAL__
#endif
+#ifdef __NO_TRANSPARENT__
+# undef __TRANSPARENT_SHADOWS__
+#endif
/* Random Numbers */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index dd7b0d9812d..c7cb29b5af2 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -582,17 +582,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
{
- /* workaround to fix correlation bug in T38710, can find better solution
- * in random number generator later, for now this is done here to not impact
- * performance of rendering without volumes */
- RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
-
shader_setup_from_volume(kg, sd, ray);
if(heterogeneous)
- return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
+ return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, rng);
else
- return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
+ return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
}
/* Decoupled Volume Sampling
@@ -1267,4 +1262,30 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
}
#endif
+/* Clean stack after the last bounce.
+ *
+ * It is expected that all volumes are closed manifolds, so at the time when ray
+ * hits nothing (for example, it is a last bounce which goes to environment) the
+ * only expected volume in the stack is the world's one. All the rest volume
+ * entries should have been exited already.
+ *
+ * This isn't always true because of ray intersection precision issues, which
+ * could lead us to an infinite non-world volume in the stack, causing render
+ * artifacts.
+ *
+ * Use this function after the last bounce to get rid of all volumes apart from
+ * the world's one after the last bounce to avoid render artifacts.
+ */
+ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
+ VolumeStack *volume_stack)
+{
+ if(kernel_data.background.volume_shader != SHADER_NONE) {
+ /* Keep the world's volume in stack. */
+ volume_stack[1].shader = SHADER_NONE;
+ }
+ else {
+ volume_stack[0].shader = SHADER_NONE;
+ }
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 13310a61761..d9a297002c6 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -135,20 +135,16 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
{
size_t num_pixels = bake_data->size();
- progress.reset_sample();
- this->num_parts = 0;
+ int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
- /* calculate the total parts for the progress bar */
+ /* calculate the total pixel samples for the progress bar */
+ total_pixel_samples = 0;
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-
- DeviceTask task(DeviceTask::SHADER);
- task.shader_w = shader_size;
-
- this->num_parts += device->get_split_task_count(task);
+ total_pixel_samples += shader_size * num_samples;
}
-
- this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
+ progress.reset_sample();
+ progress.set_total_pixel_samples(total_pixel_samples);
for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
@@ -187,9 +183,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
task.shader_x = 0;
task.offset = shader_offset;
task.shader_w = d_output.size();
- task.num_samples = this->num_samples;
+ task.num_samples = num_samples;
task.get_cancel = function_bind(&Progress::get_cancel, &progress);
- task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress);
+ task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
device->task_add(task);
device->task_wait();
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 8377e387197..25f5eb3c897 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -73,8 +73,7 @@ public:
bool need_update;
- int num_samples;
- int num_parts;
+ int total_pixel_samples;
private:
BakeData *m_bake_data;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 3fb2bb1cf92..c7f37a13fba 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -1442,14 +1442,14 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
else {
if(use_density) {
compiler.add_node(NODE_VALUE_F,
- __float_as_int(0.0f),
- compiler.stack_assign(density_out));
+ __float_as_int(0.0f),
+ compiler.stack_assign(density_out));
}
if(use_color) {
compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
compiler.add_node(NODE_VALUE_V, make_float3(TEX_IMAGE_MISSING_R,
- TEX_IMAGE_MISSING_G,
- TEX_IMAGE_MISSING_B));
+ TEX_IMAGE_MISSING_G,
+ TEX_IMAGE_MISSING_B));
}
}
}
@@ -2421,7 +2421,7 @@ void BackgroundNode::compile(SVMCompiler& compiler)
if(color_in->link || strength_in->link) {
compiler.add_node(NODE_EMISSION_WEIGHT,
compiler.stack_assign(color_in),
- compiler.stack_assign(strength_in));
+ compiler.stack_assign(strength_in));
}
else
compiler.add_node(NODE_CLOSURE_SET_WEIGHT, color*strength);
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 9d8c9fed7af..33721048722 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -67,10 +67,7 @@ Session::Session(const SessionParams& params_)
session_thread = NULL;
scene = NULL;
- start_time = 0.0;
reset_time = 0.0;
- preview_time = 0.0;
- paused_time = 0.0;
last_update_time = 0.0;
delayed_reset.do_reset = false;
@@ -201,12 +198,10 @@ void Session::run_gpu()
{
bool tiles_written = false;
- start_time = time_dt();
reset_time = time_dt();
- paused_time = 0.0;
last_update_time = time_dt();
- progress.set_render_start_time(start_time + paused_time);
+ progress.set_render_start_time();
while(!progress.get_cancel()) {
/* advance to next tile */
@@ -233,13 +228,9 @@ void Session::run_gpu()
update_status_time(pause, no_tiles);
while(1) {
- double pause_start = time_dt();
+ scoped_timer pause_timer;
pause_cond.wait(pause_lock);
- paused_time += time_dt() - pause_start;
-
- if(!params.background)
- progress.set_start_time(start_time + paused_time);
- progress.set_render_start_time(start_time + paused_time);
+ progress.add_skip_time(pause_timer, params.background);
update_status_time(pause, no_tiles);
progress.set_update();
@@ -255,7 +246,9 @@ void Session::run_gpu()
if(!no_tiles) {
/* update scene */
+ scoped_timer update_timer;
update_scene();
+ progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
progress.set_error(device->error_message());
@@ -523,13 +516,9 @@ void Session::run_cpu()
update_status_time(pause, no_tiles);
while(1) {
- double pause_start = time_dt();
+ scoped_timer pause_timer;
pause_cond.wait(pause_lock);
- paused_time += time_dt() - pause_start;
-
- if(!params.background)
- progress.set_start_time(start_time + paused_time);
- progress.set_render_start_time(start_time + paused_time);
+ progress.add_skip_time(pause_timer, params.background);
update_status_time(pause, no_tiles);
progress.set_update();
@@ -550,7 +539,9 @@ void Session::run_cpu()
thread_scoped_lock buffers_lock(buffers_mutex);
/* update scene */
+ scoped_timer update_timer;
update_scene();
+ progress.add_skip_time(update_timer, params.background);
if(!device->error_message().empty())
progress.set_error(device->error_message());
@@ -645,6 +636,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
BakeManager *bake_manager = scene->bake_manager;
requested_features.use_baking = bake_manager->get_baking();
requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
+ requested_features.use_transparent &= scene->integrator->transparent_shadows;
return requested_features;
}
@@ -718,14 +710,14 @@ void Session::reset_(BufferParams& buffer_params, int samples)
}
tile_manager.reset(buffer_params, samples);
+ progress.reset_sample();
- start_time = time_dt();
- preview_time = 0.0;
- paused_time = 0.0;
+ bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
+ progress.set_total_pixel_samples(show_progress? tile_manager.state.total_pixel_samples : 0);
if(!params.background)
- progress.set_start_time(start_time);
- progress.set_render_start_time(start_time);
+ progress.set_start_time();
+ progress.set_render_start_time();
}
void Session::reset(BufferParams& buffer_params, int samples)
@@ -827,61 +819,40 @@ void Session::update_scene()
void Session::update_status_time(bool show_pause, bool show_done)
{
- int sample = tile_manager.state.sample;
- int resolution = tile_manager.state.resolution_divider;
- int num_tiles = tile_manager.state.num_tiles;
+ int progressive_sample = tile_manager.state.sample;
+ int num_samples = tile_manager.get_num_effective_samples();
+
int tile = tile_manager.state.num_rendered_tiles;
+ int num_tiles = tile_manager.state.num_tiles;
/* update status */
string status, substatus;
if(!params.progressive) {
- const int progress_sample = progress.get_sample(),
- num_samples = tile_manager.get_num_effective_samples();
- const bool is_gpu = params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL;
- const bool is_multidevice = params.device.multi_devices.size() > 1;
const bool is_cpu = params.device.type == DEVICE_CPU;
- const bool is_last_tile = (num_samples * num_tiles - progress_sample) < num_samples;
+ const bool is_last_tile = (progress.get_finished_tiles() + 1) == num_tiles;
substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
- if((is_gpu && !is_multidevice && !device->info.use_split_kernel) ||
- (is_cpu && (num_tiles == 1 || is_last_tile)))
+ if(device->show_samples() || (is_cpu && is_last_tile))
{
- /* When using split-kernel (OpenCL) each thread in a tile will be working on a different
- * sample. Can't display sample number when device uses split-kernel
+ /* Some devices automatically support showing the sample number:
+ * - CUDADevice
+ * - OpenCLDevice when using the megakernel (the split kernel renders multiple samples at the same time, so the current sample isn't really defined)
+ * - CPUDevice when using one thread
+ * For these devices, the current sample is always shown.
+ *
+ * The other option is when the last tile is currently being rendered by the CPU.
*/
-
- /* when rendering on GPU multithreading happens within single tile, as in
- * tiles are handling sequentially and in this case we could display
- * currently rendering sample number
- * this helps a lot from feedback point of view.
- * also display the info on CPU, when using 1 tile only
- */
-
- int status_sample = progress_sample;
- if(tile > 1) {
- /* sample counter is global for all tiles, subtract samples
- * from already finished tiles to get sample counter for
- * current tile only
- */
- if(is_cpu && is_last_tile && num_tiles > 1) {
- status_sample = num_samples - (num_samples * num_tiles - progress_sample);
- }
- else {
- status_sample -= (tile - 1) * num_samples;
- }
- }
-
- substatus += string_printf(", Sample %d/%d", status_sample, num_samples);
+ substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
}
}
else if(tile_manager.num_samples == INT_MAX)
- substatus = string_printf("Path Tracing Sample %d", sample+1);
+ substatus = string_printf("Path Tracing Sample %d", progressive_sample+1);
else
substatus = string_printf("Path Tracing Sample %d/%d",
- sample+1,
- tile_manager.get_num_effective_samples());
+ progressive_sample+1,
+ num_samples);
if(show_pause) {
status = "Paused";
@@ -895,22 +866,6 @@ void Session::update_status_time(bool show_pause, bool show_done)
}
progress.set_status(status, substatus);
-
- /* update timing */
- if(preview_time == 0.0 && resolution == 1)
- preview_time = time_dt();
-
- double tile_time = (tile == 0 || sample == 0)? 0.0: (time_dt() - preview_time - paused_time) / sample;
-
- /* negative can happen when we pause a bit before rendering, can discard that */
- if(preview_time < 0.0) preview_time = 0.0;
-
- progress.set_tile(tile, tile_time);
-}
-
-void Session::update_progress_sample()
-{
- progress.increment_sample();
}
void Session::path_trace()
@@ -922,7 +877,7 @@ void Session::path_trace()
task.release_tile = function_bind(&Session::release_tile, this, _1);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
- task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
+ task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
task.need_finish_queue = params.progressive_refine;
task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
task.requested_tile_size = params.tile_size;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 1db4692e171..c7ff1446171 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -145,6 +145,10 @@ public:
void device_free();
+ /* Returns the rendering progress or 0 if no progress can be determined
+ * (for example, when rendering with unlimited samples). */
+ float get_progress();
+
protected:
struct DelayedReset {
thread_mutex mutex;
@@ -173,8 +177,6 @@ protected:
void update_tile_sample(RenderTile& tile);
void release_tile(RenderTile& tile);
- void update_progress_sample();
-
bool device_use_gl;
thread *session_thread;
@@ -194,10 +196,7 @@ protected:
bool kernels_loaded;
- double start_time;
double reset_time;
- double preview_time;
- double paused_time;
/* progressive refine */
double last_update_time;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 06b6dd969d8..335edcbe609 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -571,6 +571,9 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph,
if(node->has_surface_bssrdf()) {
requested_features->use_subsurface = true;
}
+ if(node->has_surface_transparent()) {
+ requested_features->use_transparent = true;
+ }
}
}
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 3a6dfea11a7..e59d0c843a3 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -108,36 +108,57 @@ TileManager::~TileManager()
{
}
-void TileManager::reset(BufferParams& params_, int num_samples_)
+static int get_divider(int w, int h, int start_resolution)
{
- params = params_;
-
int divider = 1;
- int w = params.width, h = params.height;
-
if(start_resolution != INT_MAX) {
while(w*h > start_resolution*start_resolution) {
w = max(1, w/2);
h = max(1, h/2);
- divider *= 2;
+ divider <<= 1;
}
}
+ return divider;
+}
- num_samples = num_samples_;
+void TileManager::reset(BufferParams& params_, int num_samples_)
+{
+ params = params_;
+
+ set_samples(num_samples_);
state.buffer = BufferParams();
state.sample = range_start_sample - 1;
state.num_tiles = 0;
state.num_rendered_tiles = 0;
state.num_samples = 0;
- state.resolution_divider = divider;
+ state.resolution_divider = get_divider(params.width, params.height, start_resolution);
state.tiles.clear();
}
void TileManager::set_samples(int num_samples_)
{
num_samples = num_samples_;
+
+ /* No real progress indication is possible when using unlimited samples. */
+ if(num_samples == INT_MAX) {
+ state.total_pixel_samples = 0;
+ }
+ else {
+ uint64_t pixel_samples = 0;
+ /* While rendering in the viewport, the initial preview resolution is increased to the native resolution
+ * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */
+ int divider = get_divider(params.width, params.height, start_resolution) / 2;
+ while(divider > 1) {
+ int image_w = max(1, params.width/divider);
+ int image_h = max(1, params.height/divider);
+ pixel_samples += image_w * image_h;
+ divider >>= 1;
+ }
+
+ state.total_pixel_samples = pixel_samples + get_num_effective_samples() * params.width*params.height;
+ }
}
/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device.
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index af1b1ed8b0f..5d92ebac355 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -64,6 +64,10 @@ public:
int resolution_divider;
int num_tiles;
int num_rendered_tiles;
+
+ /* Total samples over all pixels: Generally num_samples*num_pixels,
+ * but can be higher due to the initial resolution division for previews. */
+ uint64_t total_pixel_samples;
/* This vector contains a list of tiles for every logical device in the session.
* In each list, the tiles are sorted according to the tile order setting. */
vector<list<Tile> > tiles;
@@ -91,7 +95,7 @@ public:
/* Number to samples in the rendering range. */
int range_num_samples;
- /* get number of actual samples to render. */
+ /* Get number of actual samples to render. */
int get_num_effective_samples();
protected:
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 2db2c4dad1a..2451213963a 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -180,6 +180,14 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
}
#endif
+#ifndef _mm256_set_m128
+# define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
+ _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
+#endif
+
+#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
+ _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
+
CCL_NAMESPACE_END
#endif
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 3f4d3e06c0b..6cb68b53d16 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -596,8 +596,7 @@ ccl_device_inline float len_squared(const float4& a)
ccl_device_inline float3 normalize(const float3& a)
{
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && 0
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
return _mm_div_ps(a.m128, norm);
#else
@@ -798,8 +797,7 @@ ccl_device_inline float4 operator-(const float4& a)
ccl_device_inline float4 operator*(const float4& a, const float4& b)
{
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && 0
+#ifdef __KERNEL_SSE__
return _mm_mul_ps(a.m128, b.m128);
#else
return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
@@ -847,8 +845,7 @@ ccl_device_inline float4 operator/(const float4& a, const float4& b)
ccl_device_inline float4 operator+(const float4& a, const float4& b)
{
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && 0
+#ifdef __KERNEL_SSE__
return _mm_add_ps(a.m128, b.m128);
#else
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 4ae1d61dd17..14215056840 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -34,12 +34,12 @@ class Progress {
public:
Progress()
{
- tile = 0;
- sample = 0;
+ pixel_samples = 0;
+ total_pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
start_time = time_dt();
- total_time = 0.0;
- render_time = 0.0;
- tile_time = 0.0;
+ render_start_time = time_dt();
status = "Initializing";
substatus = "";
sync_status = "";
@@ -62,22 +62,22 @@ public:
thread_scoped_lock lock(progress.progress_mutex);
progress.get_status(status, substatus);
- progress.get_tile(tile, total_time, render_time, tile_time);
- sample = progress.get_sample();
+ pixel_samples = progress.pixel_samples;
+ total_pixel_samples = progress.total_pixel_samples;
+ current_tile_sample = progress.get_current_sample();
return *this;
}
void reset()
{
- tile = 0;
- sample = 0;
+ pixel_samples = 0;
+ total_pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
start_time = time_dt();
render_start_time = time_dt();
- total_time = 0.0;
- render_time = 0.0;
- tile_time = 0.0;
status = "Initializing";
substatus = "";
sync_status = "";
@@ -139,69 +139,93 @@ public:
/* tile and timing information */
- void set_start_time(double start_time_)
+ void set_start_time()
{
thread_scoped_lock lock(progress_mutex);
- start_time = start_time_;
+ start_time = time_dt();
}
- void set_render_start_time(double render_start_time_)
+ void set_render_start_time()
{
thread_scoped_lock lock(progress_mutex);
- render_start_time = render_start_time_;
+ render_start_time = time_dt();
}
- void set_tile(int tile_, double tile_time_)
+ void add_skip_time(const scoped_timer &start_timer, bool only_render)
{
- thread_scoped_lock lock(progress_mutex);
+ double skip_time = time_dt() - start_timer.get_start();
- tile = tile_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
- tile_time = tile_time_;
+ render_start_time += skip_time;
+ if(!only_render) {
+ start_time += skip_time;
+ }
}
- void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_)
+ void get_time(double& total_time_, double& render_time_)
{
thread_scoped_lock lock(progress_mutex);
- tile_ = tile;
- total_time_ = (total_time > 0.0)? total_time: 0.0;
- render_time_ = (render_time > 0.0)? render_time: 0.0;
- tile_time_ = tile_time;
+ total_time_ = time_dt() - start_time;
+ render_time_ = time_dt() - render_start_time;
}
- void get_time(double& total_time_, double& render_time_)
+ void reset_sample()
{
- total_time_ = (total_time > 0.0)? total_time: 0.0;
- render_time_ = (render_time > 0.0)? render_time: 0.0;
+ thread_scoped_lock lock(progress_mutex);
+
+ pixel_samples = 0;
+ current_tile_sample = 0;
+ finished_tiles = 0;
}
- void reset_sample()
+ void set_total_pixel_samples(uint64_t total_pixel_samples_)
{
thread_scoped_lock lock(progress_mutex);
- sample = 0;
+ total_pixel_samples = total_pixel_samples_;
}
- void increment_sample()
+ float get_progress()
+ {
+ if(total_pixel_samples > 0) {
+ return ((float) pixel_samples) / total_pixel_samples;
+ }
+ return 0.0f;
+ }
+
+ void add_samples(uint64_t pixel_samples_, int tile_sample)
{
thread_scoped_lock lock(progress_mutex);
- sample++;
+ pixel_samples += pixel_samples_;
+ current_tile_sample = tile_sample;
}
- void increment_sample_update()
+ void add_samples_update(uint64_t pixel_samples_, int tile_sample)
{
- increment_sample();
+ add_samples(pixel_samples_, tile_sample);
set_update();
}
- int get_sample()
+ void add_finished_tile()
+ {
+ thread_scoped_lock lock(progress_mutex);
+
+ finished_tiles++;
+ }
+
+ int get_current_sample()
+ {
+ /* Note that the value here always belongs to the last tile that updated,
+ * so it's only useful if there is only one active tile. */
+ return current_tile_sample;
+ }
+
+ int get_finished_tiles()
{
- return sample;
+ return finished_tiles;
}
/* status messages */
@@ -212,8 +236,6 @@ public:
thread_scoped_lock lock(progress_mutex);
status = status_;
substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -224,8 +246,6 @@ public:
{
thread_scoped_lock lock(progress_mutex);
substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -237,8 +257,6 @@ public:
thread_scoped_lock lock(progress_mutex);
sync_status = status_;
sync_substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -250,8 +268,6 @@ public:
{
thread_scoped_lock lock(progress_mutex);
sync_substatus = substatus_;
- total_time = time_dt() - start_time;
- render_time = time_dt() - render_start_time;
}
set_update();
@@ -292,12 +308,19 @@ protected:
function<void(void)> update_cb;
function<void(void)> cancel_cb;
- int tile; /* counter for rendered tiles */
- int sample; /* counter of rendered samples, global for all tiles */
+ /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
+ * This makes the progress estimate more accurate when tiles with different sizes are used.
+ *
+ * total_pixel_samples is the total amount of pixel samples that will be rendered. */
+ uint64_t pixel_samples, total_pixel_samples;
+ /* Stores the current sample count of the last tile that called the update function.
+ * It's used to display the sample count if only one tile is active. */
+ int current_tile_sample;
+ /* Stores the number of tiles that's already finished.
+ * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */
+ int finished_tiles;
double start_time, render_start_time;
- double total_time, render_time;
- double tile_time;
string status;
string substatus;
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
index a5b074bffa0..65798244111 100644
--- a/intern/cycles/util/util_time.h
+++ b/intern/cycles/util/util_time.h
@@ -29,7 +29,7 @@ void time_sleep(double t);
class scoped_timer {
public:
- explicit scoped_timer(double *value) : value_(value)
+ explicit scoped_timer(double *value = NULL) : value_(value)
{
time_start_ = time_dt();
}
@@ -40,6 +40,12 @@ public:
*value_ = time_dt() - time_start_;
}
}
+
+ double get_start() const
+ {
+ return time_start_;
+ }
+
protected:
double *value_;
double time_start_;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index ea5eb3b25b0..a0695f20488 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -74,7 +74,7 @@ ccl_device_inline float3 transform_perspective(const Transform *t, const float3
ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
{
/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
ssef x, y, z, w, aa;
aa = a.m128;
@@ -103,8 +103,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
{
- /* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
ssef x, y, z, w, aa;
aa = a.m128;
x = _mm_loadu_ps(&t->x.x);
diff --git a/intern/opencolorio/fallback_impl.cc b/intern/opencolorio/fallback_impl.cc
index d0a129360b0..87629422013 100644
--- a/intern/opencolorio/fallback_impl.cc
+++ b/intern/opencolorio/fallback_impl.cc
@@ -23,18 +23,26 @@
* ***** END GPL LICENSE BLOCK *****
*/
-#include <string.h>
+#include <algorithm>
+#include <cstring>
#include "MEM_guardedalloc.h"
#include "BLI_math_color.h"
+#include "BLI_math_vector.h"
#include "ocio_impl.h"
+using std::max;
+
#define CONFIG_DEFAULT ((OCIO_ConstConfigRcPtr*)1)
-#define PROCESSOR_LINEAR_TO_SRGB ((OCIO_ConstProcessorRcPtr*)1)
-#define PROCESSOR_SRGB_TO_LINEAR ((OCIO_ConstProcessorRcPtr*)2)
-#define PROCESSOR_UNKNOWN ((OCIO_ConstProcessorRcPtr*)3)
+enum TransformType {
+ TRANSFORM_LINEAR_TO_SRGB,
+ TRANSFORM_SRGB_TO_LINEAR,
+ TRANSFORM_MATRIX,
+ TRANSFORM_EXPONENT,
+ TRANSFORM_UNKNOWN,
+};
#define COLORSPACE_LINEAR ((OCIO_ConstColorSpaceRcPtr*)1)
#define COLORSPACE_SRGB ((OCIO_ConstColorSpaceRcPtr*)2)
@@ -49,6 +57,145 @@ typedef struct OCIO_PackedImageDescription {
long yStrideBytes;
} OCIO_PackedImageDescription;
+struct FallbackTransform {
+ FallbackTransform()
+ : type(TRANSFORM_UNKNOWN),
+ linear_transform(NULL),
+ display_transform(NULL)
+ {
+ }
+
+ ~FallbackTransform()
+ {
+ delete linear_transform;
+ delete display_transform;
+ }
+
+ void applyRGB(float *pixel)
+ {
+ if (type == TRANSFORM_LINEAR_TO_SRGB) {
+ applyLinearRGB(pixel);
+ linearrgb_to_srgb_v3_v3(pixel, pixel);
+ applyDisplayRGB(pixel);
+ }
+ else if (type == TRANSFORM_SRGB_TO_LINEAR) {
+ srgb_to_linearrgb_v3_v3(pixel, pixel);
+ }
+ else if (type == TRANSFORM_EXPONENT) {
+ pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]);
+ pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]);
+ pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]);
+ }
+ else if (type == TRANSFORM_MATRIX) {
+ float r = pixel[0];
+ float g = pixel[1];
+ float b = pixel[2];
+ pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2];
+ pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6];
+ pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10];
+ pixel[0] += offset[0];
+ pixel[1] += offset[1];
+ pixel[2] += offset[2];
+ }
+ }
+
+ void applyRGBA(float *pixel)
+ {
+ if (type == TRANSFORM_LINEAR_TO_SRGB) {
+ applyLinearRGBA(pixel);
+ linearrgb_to_srgb_v4(pixel, pixel);
+ applyDisplayRGBA(pixel);
+ }
+ else if (type == TRANSFORM_SRGB_TO_LINEAR) {
+ srgb_to_linearrgb_v4(pixel, pixel);
+ }
+ else if (type == TRANSFORM_EXPONENT) {
+ pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]);
+ pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]);
+ pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]);
+ pixel[3] = powf(max(0.0f, pixel[3]), exponent[3]);
+ }
+ else if (type == TRANSFORM_MATRIX) {
+ float r = pixel[0];
+ float g = pixel[1];
+ float b = pixel[2];
+ float a = pixel[3];
+ pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2] + a*matrix[3];
+ pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6] + a*matrix[7];
+ pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10] + a*matrix[11];
+ pixel[3] = r*matrix[12] + g*matrix[13] + b*matrix[14] + a*matrix[15];
+ pixel[0] += offset[0];
+ pixel[1] += offset[1];
+ pixel[2] += offset[2];
+ pixel[3] += offset[3];
+ }
+ }
+
+ void applyLinearRGB(float *pixel)
+ {
+ if (linear_transform != NULL) {
+ linear_transform->applyRGB(pixel);
+ }
+ }
+
+ void applyLinearRGBA(float *pixel)
+ {
+ if (linear_transform != NULL) {
+ linear_transform->applyRGBA(pixel);
+ }
+ }
+
+ void applyDisplayRGB(float *pixel)
+ {
+ if (display_transform != NULL) {
+ display_transform->applyRGB(pixel);
+ }
+ }
+
+ void applyDisplayRGBA(float *pixel)
+ {
+ if (display_transform != NULL) {
+ display_transform->applyRGBA(pixel);
+ }
+ }
+
+ TransformType type;
+ FallbackTransform *linear_transform;
+ FallbackTransform *display_transform;
+ /* Exponent transform. */
+ float exponent[4];
+ /* Matrix transform. */
+ float matrix[16];
+ float offset[4];
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor");
+};
+
+struct FallbackProcessor {
+ FallbackProcessor()
+ : transform(NULL)
+ {
+ }
+
+ ~FallbackProcessor() {
+ delete transform;
+ }
+
+ void applyRGB(float *pixel)
+ {
+ transform->applyRGB(pixel);
+ }
+
+ void applyRGBA(float *pixel)
+ {
+ transform->applyRGBA(pixel);
+ }
+
+ FallbackTransform *transform;
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor");
+};
+
OCIO_ConstConfigRcPtr *FallbackImpl::getCurrentConfig(void)
{
return CONFIG_DEFAULT;
@@ -233,19 +380,27 @@ OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessorWithNames(
{
OCIO_ConstColorSpaceRcPtr *cs_src = configGetColorSpace(config, srcName);
OCIO_ConstColorSpaceRcPtr *cs_dst = configGetColorSpace(config, dstName);
+ FallbackTransform *transform = new FallbackTransform();
if (cs_src == COLORSPACE_LINEAR && cs_dst == COLORSPACE_SRGB) {
- return PROCESSOR_LINEAR_TO_SRGB;
+ transform->type = TRANSFORM_LINEAR_TO_SRGB;
}
else if (cs_src == COLORSPACE_SRGB && cs_dst == COLORSPACE_LINEAR) {
- return PROCESSOR_SRGB_TO_LINEAR;
+ transform->type = TRANSFORM_SRGB_TO_LINEAR;
}
- return 0;
+ else {
+ transform->type = TRANSFORM_UNKNOWN;
+ }
+ FallbackProcessor *processor = new FallbackProcessor();
+ processor->transform = transform;
+ return (OCIO_ConstProcessorRcPtr *)processor;
}
OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessor(OCIO_ConstConfigRcPtr * /*config*/,
- OCIO_ConstTransformRcPtr *tfm)
+ OCIO_ConstTransformRcPtr *transform)
{
- return (OCIO_ConstProcessorRcPtr*)tfm;
+ FallbackProcessor *processor = new FallbackProcessor();
+ processor->transform = (FallbackTransform *)transform;
+ return (OCIO_ConstProcessorRcPtr *)processor;
}
void FallbackImpl::processorApply(OCIO_ConstProcessorRcPtr *processor,
@@ -297,21 +452,13 @@ void FallbackImpl::processorApply_predivide(OCIO_ConstProcessorRcPtr *processor,
void FallbackImpl::processorApplyRGB(OCIO_ConstProcessorRcPtr *processor,
float *pixel)
{
- if (processor == PROCESSOR_LINEAR_TO_SRGB) {
- linearrgb_to_srgb_v3_v3(pixel, pixel);
- }
- else if (processor == PROCESSOR_SRGB_TO_LINEAR) {
- srgb_to_linearrgb_v3_v3(pixel, pixel);
- }
+ ((FallbackProcessor *)processor)->applyRGB(pixel);
}
void FallbackImpl::processorApplyRGBA(OCIO_ConstProcessorRcPtr *processor,
float *pixel)
{
- if (processor == PROCESSOR_LINEAR_TO_SRGB)
- linearrgb_to_srgb_v4(pixel, pixel);
- else if (processor == PROCESSOR_SRGB_TO_LINEAR)
- srgb_to_linearrgb_v4(pixel, pixel);
+ ((FallbackProcessor *)processor)->applyRGBA(pixel);
}
void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *processor,
@@ -338,8 +485,9 @@ void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *proces
}
}
-void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr * /*p*/)
+void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr *processor)
{
+ delete (FallbackProcessor*)(processor);
}
const char *FallbackImpl::colorSpaceGetName(OCIO_ConstColorSpaceRcPtr *cs)
@@ -365,7 +513,9 @@ const char *FallbackImpl::colorSpaceGetFamily(OCIO_ConstColorSpaceRcPtr * /*cs*/
OCIO_DisplayTransformRcPtr *FallbackImpl::createDisplayTransform(void)
{
- return (OCIO_DisplayTransformRcPtr*)PROCESSOR_LINEAR_TO_SRGB;
+ FallbackTransform *transform = new FallbackTransform();
+ transform->type = TRANSFORM_LINEAR_TO_SRGB;
+ return (OCIO_DisplayTransformRcPtr*)transform;
}
void FallbackImpl::displayTransformSetInputColorSpaceName(OCIO_DisplayTransformRcPtr * /*dt*/,
@@ -383,14 +533,18 @@ void FallbackImpl::displayTransformSetView(OCIO_DisplayTransformRcPtr * /*dt*/,
{
}
-void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr * /*dt*/,
- OCIO_ConstTransformRcPtr * /*et*/)
+void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr *dt,
+ OCIO_ConstTransformRcPtr *et)
{
+ FallbackTransform *transform = (FallbackTransform *)dt;
+ transform->display_transform = (FallbackTransform *)et;
}
-void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr * /*dt*/,
- OCIO_ConstTransformRcPtr * /*et*/)
+void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr *dt,
+ OCIO_ConstTransformRcPtr *et)
{
+ FallbackTransform *transform = (FallbackTransform *)dt;
+ transform->linear_transform = (FallbackTransform *)et;
}
void FallbackImpl::displayTransformSetLooksOverride(OCIO_DisplayTransformRcPtr * /*dt*/,
@@ -432,12 +586,16 @@ void FallbackImpl::OCIO_PackedImageDescRelease(OCIO_PackedImageDesc* id)
OCIO_ExponentTransformRcPtr *FallbackImpl::createExponentTransform(void)
{
- return (OCIO_ExponentTransformRcPtr*)PROCESSOR_UNKNOWN;
+ FallbackTransform *transform = new FallbackTransform();
+ transform->type = TRANSFORM_EXPONENT;
+ return (OCIO_ExponentTransformRcPtr *)transform;
}
-void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr * /*et*/,
- const float * /*exponent*/)
+void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr *et,
+ const float *exponent)
{
+ FallbackTransform *transform = (FallbackTransform *)et;
+ copy_v4_v4(transform->exponent, exponent);
}
void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/)
@@ -446,23 +604,44 @@ void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/
OCIO_MatrixTransformRcPtr *FallbackImpl::createMatrixTransform(void)
{
- return (OCIO_MatrixTransformRcPtr*)PROCESSOR_UNKNOWN;
+ FallbackTransform *transform = new FallbackTransform();
+ transform->type = TRANSFORM_MATRIX;
+ return (OCIO_MatrixTransformRcPtr *)transform;
}
-void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr * /*mt*/,
- const float * /*m44*/,
- const float * /*offset4*/)
+void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr *mt,
+ const float *m44,
+ const float *offset4)
{
+ FallbackTransform *transform = (FallbackTransform *)mt;
+ copy_m4_m4((float (*)[4])transform->matrix, (float (*)[4])m44);
+ copy_v4_v4(transform->offset, offset4);
}
void FallbackImpl::matrixTransformRelease(OCIO_MatrixTransformRcPtr * /*mt*/)
{
}
-void FallbackImpl::matrixTransformScale(float * /*m44*/,
- float * /*offset44*/,
- const float * /*scale4*/)
+void FallbackImpl::matrixTransformScale(float *m44,
+ float *offset4,
+ const float *scale4)
{
+ if (scale4 == NULL) {
+ return;
+ }
+ if (m44 != NULL) {
+ memset(m44, 0, 16*sizeof(float));
+ m44[0] = scale4[0];
+ m44[5] = scale4[1];
+ m44[10] = scale4[2];
+ m44[15] = scale4[3];
+ }
+ if (offset4 != NULL) {
+ offset4[0] = 0.0f;
+ offset4[1] = 0.0f;
+ offset4[2] = 0.0f;
+ offset4[3] = 0.0f;
+ }
}
bool FallbackImpl::supportGLSLDraw(void)