Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/kernel')
-rw-r--r--intern/cycles/kernel/CMakeLists.txt6
-rw-r--r--intern/cycles/kernel/kernel.h13
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h8
-rw-r--r--intern/cycles/kernel/kernel_bvh.h24
-rw-r--r--intern/cycles/kernel/kernel_emission.h13
-rw-r--r--intern/cycles/kernel/kernel_light.h42
-rw-r--r--intern/cycles/kernel/kernel_path.h30
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp60
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp (renamed from intern/cycles/kernel/kernel_optimized.cpp)6
-rw-r--r--intern/cycles/kernel/kernel_types.h10
-rw-r--r--intern/cycles/kernel/osl/SConscript1
-rw-r--r--intern/cycles/kernel/shaders/node_musgrave_texture.osl24
-rw-r--r--intern/cycles/kernel/shaders/node_texture.h14
13 files changed, 148 insertions, 103 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 6d5b9a063a0..e83756b7c8a 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -12,7 +12,8 @@ set(INC_SYS
set(SRC
kernel.cpp
- kernel_optimized.cpp
+ kernel_sse2.cpp
+ kernel_sse3.cpp
kernel.cl
kernel.cu
)
@@ -149,7 +150,8 @@ include_directories(SYSTEM ${INC_SYS})
add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS})
if(WITH_CYCLES_OPTIMIZED_KERNEL)
- set_source_files_properties(kernel_optimized.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_OPTIMIZED_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
endif()
if(WITH_CYCLES_CUDA)
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 26c0bcd6d1a..20ea5a61906 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -44,11 +44,18 @@ void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
#ifdef WITH_OPTIMIZED_KERNEL
-void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
+void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
-void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
int sample, int resolution, int x, int y, int offset, int stride);
-void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float4 *output,
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
+ int type, int i);
+
+void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
+ int sample, int x, int y, int offset, int stride);
+void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+ int sample, int resolution, int x, int y, int offset, int stride);
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
#endif
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 86177301357..1cf230634fc 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -220,7 +220,7 @@ __device_inline void path_radiance_accum_ao(PathRadiance *L, float3 throughput,
#endif
}
-__device_inline void path_radiance_accum_light(PathRadiance *L, float3 throughput, BsdfEval *bsdf_eval, float3 shadow, int bounce, bool is_lamp)
+__device_inline void path_radiance_accum_light(PathRadiance *L, float3 throughput, BsdfEval *bsdf_eval, float3 shadow, float shadow_fac, int bounce, bool is_lamp)
{
#ifdef __PASSES__
if(L->use_light_pass) {
@@ -233,9 +233,9 @@ __device_inline void path_radiance_accum_light(PathRadiance *L, float3 throughpu
if(is_lamp) {
float3 sum = throughput*(bsdf_eval->diffuse + bsdf_eval->glossy + bsdf_eval->transmission);
- L->shadow.x += shadow.x;
- L->shadow.y += shadow.y;
- L->shadow.z += shadow.z;
+ L->shadow.x += shadow.x*shadow_fac;
+ L->shadow.y += shadow.y*shadow_fac;
+ L->shadow.z += shadow.z*shadow_fac;
L->shadow.w += average(sum);
}
}
diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h
index 1a85b5bbefd..2b9ebf35d0c 100644
--- a/intern/cycles/kernel/kernel_bvh.h
+++ b/intern/cycles/kernel/kernel_bvh.h
@@ -126,21 +126,21 @@ __device_inline void bvh_node_intersect(KernelGlobals *kg,
/* intersect ray against child nodes */
float3 ood = P * idir;
- float c0lox = n0xy.x * idir.x - ood.x;
- float c0hix = n0xy.y * idir.x - ood.x;
- float c0loy = n0xy.z * idir.y - ood.y;
- float c0hiy = n0xy.w * idir.y - ood.y;
- float c0loz = nz.x * idir.z - ood.z;
- float c0hiz = nz.y * idir.z - ood.z;
+ NO_EXTENDED_PRECISION float c0lox = n0xy.x * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c0hix = n0xy.y * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c0loy = n0xy.z * idir.y - ood.y;
+ NO_EXTENDED_PRECISION float c0hiy = n0xy.w * idir.y - ood.y;
+ NO_EXTENDED_PRECISION float c0loz = nz.x * idir.z - ood.z;
+ NO_EXTENDED_PRECISION float c0hiz = nz.y * idir.z - ood.z;
NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
- float c1loz = nz.z * idir.z - ood.z;
- float c1hiz = nz.w * idir.z - ood.z;
- float c1lox = n1xy.x * idir.x - ood.x;
- float c1hix = n1xy.y * idir.x - ood.x;
- float c1loy = n1xy.z * idir.y - ood.y;
- float c1hiy = n1xy.w * idir.y - ood.y;
+ NO_EXTENDED_PRECISION float c1loz = nz.z * idir.z - ood.z;
+ NO_EXTENDED_PRECISION float c1hiz = nz.w * idir.z - ood.z;
+ NO_EXTENDED_PRECISION float c1lox = n1xy.x * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c1hix = n1xy.y * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c1loy = n1xy.z * idir.y - ood.y;
+ NO_EXTENDED_PRECISION float c1hiy = n1xy.w * idir.y - ood.y;
NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 54bc0717b60..4048bbd9dfc 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -76,7 +76,7 @@ __device float3 direct_emissive_eval(KernelGlobals *kg, float rando,
__device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
float randt, float rando, float randu, float randv, Ray *ray, BsdfEval *eval,
- int *lamp)
+ bool *is_lamp)
{
LightSample ls;
@@ -92,12 +92,6 @@ __device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
light_sample(kg, randt, randu, randv, sd->time, sd->P, &ls);
}
- /* return lamp index for MIS */
- if(ls.use_mis)
- *lamp = ls.lamp;
- else
- *lamp= ~0;
-
if(ls.pdf == 0.0f)
return false;
@@ -114,7 +108,7 @@ __device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
shader_bsdf_eval(kg, sd, ls.D, eval, &bsdf_pdf);
- if(ls.use_mis) {
+ if(ls.shader & SHADER_USE_MIS) {
/* multiple importance sampling */
float mis_weight = power_heuristic(ls.pdf, bsdf_pdf);
light_eval *= mis_weight;
@@ -146,6 +140,9 @@ __device bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
ray->t = 0.0f;
}
+ /* return if it's a lamp for shadow pass */
+ *is_lamp = (ls.prim == ~0);
+
return true;
}
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 6ba3e439329..10a32226f17 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -31,7 +31,6 @@ typedef struct LightSample {
int prim; /* primitive id for triangle/curve ligths */
int shader; /* shader id */
int lamp; /* lamp id */
- int use_mis; /* for lamps with size zero */
LightType type; /* type of light */
} LightSample;
@@ -218,11 +217,10 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
LightType type = (LightType)__float_as_int(data0.x);
ls->type = type;
-#ifdef __LAMP_MIS__
- ls->use_mis = true;
-#else
- ls->use_mis = false;
-#endif
+ ls->shader = __float_as_int(data1.x);
+ ls->object = ~0;
+ ls->prim = ~0;
+ ls->lamp = lamp;
if(type == LIGHT_DISTANT) {
/* distant light */
@@ -233,10 +231,6 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
if(radius > 0.0f)
D = distant_light_sample(D, radius, randu, randv);
-#ifdef __LAMP_MIS__
- else
- ls->use_mis = false;
-#endif
ls->P = D;
ls->Ng = D;
@@ -257,9 +251,6 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
ls->D = -D;
ls->t = FLT_MAX;
ls->eval_fac = 1.0f;
-#ifndef __LAMP_MIS__
- ls->use_mis = true;
-#endif
}
#endif
else {
@@ -271,10 +262,6 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
if(radius > 0.0f)
/* sphere light */
ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
-#ifdef __LAMP_MIS__
- else
- ls->use_mis = false;
-#endif
ls->D = normalize_len(ls->P - P, &ls->t);
ls->Ng = -ls->D;
@@ -304,13 +291,6 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
float invarea = data2.x;
- if(invarea == 0.0f) {
-#ifdef __LAMP_MIS__
- ls->use_mis = false;
-#endif
- invarea = 1.0f;
- }
-
ls->eval_fac = 0.25f*invarea;
ls->pdf = invarea;
}
@@ -318,11 +298,6 @@ __device void lamp_light_sample(KernelGlobals *kg, int lamp,
ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
}
-
- ls->shader = __float_as_int(data1.x);
- ls->object = ~0;
- ls->prim = ~0;
- ls->lamp = lamp;
}
__device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
@@ -336,7 +311,9 @@ __device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, f
ls->object = ~0;
ls->prim = ~0;
ls->lamp = lamp;
- ls->use_mis = false; /* flag not used for eval */
+
+ if(!(ls->shader & SHADER_USE_MIS))
+ return false;
if(type == LIGHT_DISTANT) {
/* distant light */
@@ -475,7 +452,7 @@ __device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
ls->object = object;
ls->prim = prim;
ls->lamp = ~0;
- ls->use_mis = true;
+ ls->shader |= SHADER_USE_MIS;
ls->t = 0.0f;
ls->type = LIGHT_AREA;
ls->eval_fac = 1.0f;
@@ -529,11 +506,10 @@ __device void curve_segment_light_sample(KernelGlobals *kg, int prim, int object
ls->object = object;
ls->prim = prim;
ls->lamp = ~0;
- ls->use_mis = true;
ls->t = 0.0f;
ls->type = LIGHT_STRAND;
ls->eval_fac = 1.0f;
- ls->shader = __float_as_int(v00.z);
+ ls->shader = __float_as_int(v00.z) | SHADER_USE_MIS;
object_transform_light_sample(kg, ls, object, time);
}
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1a5df66e6c2..9b13f4d2a42 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -254,7 +254,7 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
bool hit = scene_intersect(kg, &ray, visibility, &isect);
#ifdef __LAMP_MIS__
- if(kernel_data.integrator.pdf_lights > 0.0f && !(state.flag & PATH_RAY_CAMERA)) {
+ if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
@@ -399,20 +399,19 @@ __device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample,
Ray light_ray;
BsdfEval L_light;
- int lamp;
+ bool is_lamp;
#ifdef __OBJECT_MOTION__
light_ray.time = sd.time;
#endif
- if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &lamp)) {
+ if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
/* accumulate */
- bool is_lamp = (lamp != ~0);
- path_radiance_accum_light(&L, throughput, &L_light, shadow, state.bounce, is_lamp);
+ path_radiance_accum_light(&L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
}
}
}
@@ -501,7 +500,7 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
bool hit = scene_intersect(kg, &ray, visibility, &isect);
#ifdef __LAMP_MIS__
- if(kernel_data.integrator.pdf_lights > 0.0f && !(state.flag & PATH_RAY_CAMERA)) {
+ if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
/* ray starting from previous non-transparent bounce */
Ray light_ray;
@@ -612,21 +611,20 @@ __device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray
Ray light_ray;
BsdfEval L_light;
- int lamp;
+ bool is_lamp;
#ifdef __OBJECT_MOTION__
light_ray.time = sd.time;
#endif
/* sample random light */
- if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &lamp)) {
+ if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
/* accumulate */
- bool is_lamp = (lamp != ~0);
- path_radiance_accum_light(L, throughput, &L_light, shadow, state.bounce, is_lamp);
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
}
}
}
@@ -819,7 +817,7 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
if(sd.flag & SD_BSDF_HAS_EVAL) {
Ray light_ray;
BsdfEval L_light;
- int lamp;
+ bool is_lamp;
#ifdef __OBJECT_MOTION__
light_ray.time = sd.time;
@@ -837,14 +835,13 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
float light_u = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_U);
float light_v = path_rng(kg, rng, sample*num_samples + j, rng_offset + PRNG_LIGHT_V);
- if(direct_emission(kg, &sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &lamp)) {
+ if(direct_emission(kg, &sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
/* accumulate */
- bool is_lamp = (lamp != ~0);
- path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, state.bounce, is_lamp);
+ path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
}
}
}
@@ -867,14 +864,13 @@ __device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sam
if(kernel_data.integrator.num_all_lights)
light_t = 0.5f*light_t;
- if(direct_emission(kg, &sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &lamp)) {
+ if(direct_emission(kg, &sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp)) {
/* trace shadow ray */
float3 shadow;
if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
/* accumulate */
- bool is_lamp = (lamp != ~0);
- path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, state.bounce, is_lamp);
+ path_radiance_accum_light(&L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state.bounce, is_lamp);
}
}
}
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
new file mode 100644
index 00000000000..e9d482ae5cf
--- /dev/null
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/* Optimized CPU kernel entry points. This file is compiled with SSE2
+ * optimization flags and nearly all functions inlined, while kernel.cpp
+ * is compiled without for other CPU's. */
+
+#ifdef WITH_OPTIMIZED_KERNEL
+
+#include "kernel.h"
+#include "kernel_compat_cpu.h"
+#include "kernel_math.h"
+#include "kernel_types.h"
+#include "kernel_globals.h"
+#include "kernel_film.h"
+#include "kernel_path.h"
+#include "kernel_displace.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Path Tracing */
+
+void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
+{
+ kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+}
+
+/* Tonemapping */
+
+void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int resolution, int x, int y, int offset, int stride)
+{
+ kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
+}
+
+/* Shader Evaluate */
+
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+{
+ kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+}
+
+CCL_NAMESPACE_END
+
+#endif
+
diff --git a/intern/cycles/kernel/kernel_optimized.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 0b662095133..9a8b389cf68 100644
--- a/intern/cycles/kernel/kernel_optimized.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -35,21 +35,21 @@ CCL_NAMESPACE_BEGIN
/* Path Tracing */
-void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
+void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
{
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
}
/* Tonemapping */
-void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int resolution, int x, int y, int offset, int stride)
+void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer, int sample, int resolution, int x, int y, int offset, int stride)
{
kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
}
/* Shader Evaluate */
-void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
{
kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
}
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 1236f43e018..52bdddc72fb 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -44,7 +44,6 @@ CCL_NAMESPACE_BEGIN
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __NON_PROGRESSIVE__
-#define __LAMP_MIS__
#define __HAIR__
#ifdef WITH_OSL
#define __OSL__
@@ -56,9 +55,6 @@ CCL_NAMESPACE_BEGIN
#if __CUDA_ARCH__ >= 200
#define __KERNEL_ADV_SHADING__
#endif
-#if __CUDA_ARCH__ >= 210
-#define __LAMP_MIS__
-#endif
#endif
#ifdef __KERNEL_OPENCL__
@@ -116,6 +112,7 @@ CCL_NAMESPACE_BEGIN
#define __TRANSPARENT_SHADOWS__
#define __PASSES__
#define __BACKGROUND_MIS__
+#define __LAMP_MIS__
#define __AO__
#define __ANISOTROPIC__
#define __CAMERA_MOTION__
@@ -288,8 +285,9 @@ typedef enum ShaderFlag {
SHADER_SMOOTH_NORMAL = (1 << 31),
SHADER_CAST_SHADOW = (1 << 30),
SHADER_AREA_LIGHT = (1 << 29),
+ SHADER_USE_MIS = (1 << 28),
- SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT)
+ SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT|SHADER_USE_MIS)
} ShaderFlag;
/* Light Type */
@@ -680,7 +678,7 @@ typedef struct KernelIntegrator {
int transmission_samples;
int ao_samples;
int mesh_light_samples;
- int pad1;
+ int use_lamp_mis;
} KernelIntegrator;
typedef struct KernelBVH {
diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript
index fe7fec463a7..09899567128 100644
--- a/intern/cycles/kernel/osl/SConscript
+++ b/intern/cycles/kernel/osl/SConscript
@@ -46,6 +46,7 @@ defs.append('WITH_OSL')
if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID'.split())
incs.append(env['BF_PTHREADS_INC'])
+ defs.append('OSL_STATIC_LIBRARY')
else:
cxxflags.append('-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID'.split())
diff --git a/intern/cycles/kernel/shaders/node_musgrave_texture.osl b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
index 38232ea0aeb..8675c23d0d8 100644
--- a/intern/cycles/kernel/shaders/node_musgrave_texture.osl
+++ b/intern/cycles/kernel/shaders/node_musgrave_texture.osl
@@ -37,14 +37,14 @@ float noise_musgrave_fBm(point p, string basis, float H, float lacunarity, float
int i;
for (i = 0; i < (int)octaves; i++) {
- value += safe_noise(p) * pwr;
+ value += safe_noise(p, 0) * pwr;
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- value += rmd * safe_noise(p) * pwr;
+ value += rmd * safe_noise(p, 0) * pwr;
return value;
}
@@ -65,14 +65,14 @@ float noise_musgrave_multi_fractal(point p, string basis, float H, float lacunar
int i;
for (i = 0; i < (int)octaves; i++) {
- value *= (pwr * safe_noise(p) + 1.0);
+ value *= (pwr * safe_noise(p, 0) + 1.0);
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- value *= (rmd * pwr * safe_noise(p) + 1.0); /* correct? */
+ value *= (rmd * pwr * safe_noise(p, 0) + 1.0); /* correct? */
return value;
}
@@ -93,11 +93,11 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna
int i;
/* first unscaled octave of function; later octaves are scaled */
- value = offset + safe_noise(p);
+ value = offset + safe_noise(p, 0);
p *= lacunarity;
for (i = 1; i < (int)octaves; i++) {
- increment = (safe_noise(p) + offset) * pwr * value;
+ increment = (safe_noise(p, 0) + offset) * pwr * value;
value += increment;
pwr *= pwHL;
p *= lacunarity;
@@ -105,7 +105,7 @@ float noise_musgrave_hetero_terrain(point p, string basis, float H, float lacuna
rmd = octaves - floor(octaves);
if (rmd != 0.0) {
- increment = (safe_noise(p) + offset) * pwr * value;
+ increment = (safe_noise(p, 0) + offset) * pwr * value;
value += rmd * increment;
}
@@ -128,7 +128,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
float pwr = pwHL;
int i;
- result = safe_noise(p) + offset;
+ result = safe_noise(p, 0) + offset;
weight = gain * result;
p *= lacunarity;
@@ -136,7 +136,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
if (weight > 1.0)
weight = 1.0;
- signal = (safe_noise(p) + offset) * pwr;
+ signal = (safe_noise(p, 0) + offset) * pwr;
pwr *= pwHL;
result += weight * signal;
weight *= gain * signal;
@@ -145,7 +145,7 @@ float noise_musgrave_hybrid_multi_fractal(point p, string basis, float H,
rmd = octaves - floor(octaves);
if (rmd != 0.0)
- result += rmd * ((safe_noise(p) + offset) * pwr);
+ result += rmd * ((safe_noise(p, 0) + offset) * pwr);
return result;
}
@@ -166,7 +166,7 @@ float noise_musgrave_ridged_multi_fractal(point p, string basis, float H,
float pwr = pwHL;
int i;
- signal = offset - fabs(safe_noise(p));
+ signal = offset - fabs(safe_noise(p, 0));
signal *= signal;
result = signal;
weight = 1.0;
@@ -174,7 +174,7 @@ float noise_musgrave_ridged_multi_fractal(point p, string basis, float H,
for (i = 1; i < (int)octaves; i++) {
p *= lacunarity;
weight = clamp(signal * gain, 0.0, 1.0);
- signal = offset - fabs(safe_noise(p));
+ signal = offset - fabs(safe_noise(p, 0));
signal *= signal;
signal *= weight;
result += signal * pwr;
diff --git a/intern/cycles/kernel/shaders/node_texture.h b/intern/cycles/kernel/shaders/node_texture.h
index 2de0fc0ea57..463c68a6c27 100644
--- a/intern/cycles/kernel/shaders/node_texture.h
+++ b/intern/cycles/kernel/shaders/node_texture.h
@@ -151,9 +151,17 @@ float voronoi_CrS(point p) { return 2.0 * voronoi_Cr(p) - 1.0; }
/* Noise Bases */
-float safe_noise(point p)
+float safe_noise(point p, int type)
{
- float f = noise(p);
+ float f = 0.0;
+
+ /* Perlin noise in range -1..1 */
+ if (type == 0)
+ f = noise("perlin", p);
+
+ /* Perlin noise in range 0..1 */
+ else
+ f = noise(p);
/* can happen for big coordinates, things even out to 0.5 then anyway */
if(!isfinite(f))
@@ -167,7 +175,7 @@ float noise_basis(point p, string basis)
float result = 0.0;
if (basis == "Perlin")
- result = safe_noise(p); /* returns perlin noise in range 0..1 */
+ result = safe_noise(p, 1);
if (basis == "Voronoi F1")
result = voronoi_F1S(p);
if (basis == "Voronoi F2")