Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorLukas Stockner <lukas.stockner@freenet.de>2017-05-03 19:33:02 +0300
committerLukas Stockner <lukas.stockner@freenet.de>2017-05-03 19:33:02 +0300
commit82e242cc7287ce56d10f88a76de5418a7b20e239 (patch)
treec513d6537cfd3d3a1b3d098a8a4f45c55611f165 /intern
parent2564d929fc5f75403bc5ca051fb33bc7b8874646 (diff)
parent15189baa5231d70c1363192eb01b83fa946f36f0 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/addon/__init__.py3
-rw-r--r--intern/cycles/blender/addon/engine.py33
-rw-r--r--intern/cycles/blender/addon/properties.py35
-rw-r--r--intern/cycles/blender/addon/ui.py14
-rw-r--r--intern/cycles/blender/blender_curves.cpp14
-rw-r--r--intern/cycles/blender/blender_mesh.cpp10
-rw-r--r--intern/cycles/blender/blender_python.cpp8
-rw-r--r--intern/cycles/blender/blender_session.cpp117
-rw-r--r--intern/cycles/blender/blender_sync.cpp88
-rw-r--r--intern/cycles/blender/blender_sync.h4
-rw-r--r--intern/cycles/device/device_split_kernel.cpp13
-rw-r--r--intern/cycles/device/device_split_kernel.h2
-rw-r--r--intern/cycles/device/opencl/opencl.h3
-rw-r--r--intern/cycles/kernel/CMakeLists.txt7
-rw-r--r--intern/cycles/kernel/kernel_path.h6
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h5
-rw-r--r--intern/cycles/kernel/kernel_path_surface.h2
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h4
-rw-r--r--intern/cycles/kernel/kernel_types.h74
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu.h2
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h10
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_split.cu8
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl3
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl27
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl28
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_split.cl2
-rw-r--r--intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl3
-rw-r--r--intern/cycles/kernel/split/kernel_branched.h150
-rw-r--r--intern/cycles/kernel/split/kernel_data_init.h19
-rw-r--r--intern/cycles/kernel/split/kernel_direct_lighting.h66
-rw-r--r--intern/cycles/kernel/split/kernel_do_volume.h180
-rw-r--r--intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h105
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_background.h6
-rw-r--r--intern/cycles/kernel/split/kernel_indirect_subsurface.h37
-rw-r--r--intern/cycles/kernel/split/kernel_next_iteration_setup.h220
-rw-r--r--intern/cycles/kernel/split/kernel_shader_eval.h69
-rw-r--r--intern/cycles/kernel/split/kernel_shader_setup.h70
-rw-r--r--intern/cycles/kernel/split/kernel_shader_sort.h97
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_ao.h42
-rw-r--r--intern/cycles/kernel/split/kernel_shadow_blocked_dl.h79
-rw-r--r--intern/cycles/kernel/split/kernel_split_common.h61
-rw-r--r--intern/cycles/kernel/split/kernel_split_data_types.h50
-rw-r--r--intern/cycles/kernel/split/kernel_subsurface_scatter.h309
-rw-r--r--intern/cycles/kernel/svm/svm_image.h46
-rw-r--r--intern/cycles/util/util_color.h43
45 files changed, 1581 insertions, 593 deletions
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index 7c58df68011..1cb7835d14d 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -102,6 +102,9 @@ class CyclesRender(bpy.types.RenderEngine):
else:
self.report({'ERROR'}, "OSL support disabled in this build.")
+ def update_render_passes(self, scene, srl):
+ engine.register_passes(self, scene, srl)
+
def engine_exit():
engine.exit()
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index a418cdd6821..d74ae5f3061 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -208,3 +208,36 @@ def with_network():
def system_info():
import _cycles
return _cycles.system_info()
+
+def register_passes(engine, scene, srl):
+ engine.register_pass(scene, srl, "Combined", 4, "RGBA", 'COLOR')
+
+ if srl.use_pass_z: engine.register_pass(scene, srl, "Depth", 1, "Z", 'VALUE')
+ if srl.use_pass_mist: engine.register_pass(scene, srl, "Mist", 1, "Z", 'VALUE')
+ if srl.use_pass_normal: engine.register_pass(scene, srl, "Normal", 3, "XYZ", 'VECTOR')
+ if srl.use_pass_vector: engine.register_pass(scene, srl, "Vector", 4, "XYZW", 'VECTOR')
+ if srl.use_pass_uv: engine.register_pass(scene, srl, "UV", 3, "UVA", 'VECTOR')
+ if srl.use_pass_object_index: engine.register_pass(scene, srl, "IndexOB", 1, "X", 'VALUE')
+ if srl.use_pass_material_index: engine.register_pass(scene, srl, "IndexMA", 1, "X", 'VALUE')
+ if srl.use_pass_shadow: engine.register_pass(scene, srl, "Shadow", 3, "RGB", 'COLOR')
+ if srl.use_pass_ambient_occlusion: engine.register_pass(scene, srl, "AO", 3, "RGB", 'COLOR')
+ if srl.use_pass_diffuse_direct: engine.register_pass(scene, srl, "DiffDir", 3, "RGB", 'COLOR')
+ if srl.use_pass_diffuse_indirect: engine.register_pass(scene, srl, "DiffInd", 3, "RGB", 'COLOR')
+ if srl.use_pass_diffuse_color: engine.register_pass(scene, srl, "DiffCol", 3, "RGB", 'COLOR')
+ if srl.use_pass_glossy_direct: engine.register_pass(scene, srl, "GlossDir", 3, "RGB", 'COLOR')
+ if srl.use_pass_glossy_indirect: engine.register_pass(scene, srl, "GlossInd", 3, "RGB", 'COLOR')
+ if srl.use_pass_glossy_color: engine.register_pass(scene, srl, "GlossCol", 3, "RGB", 'COLOR')
+ if srl.use_pass_transmission_direct: engine.register_pass(scene, srl, "TransDir", 3, "RGB", 'COLOR')
+ if srl.use_pass_transmission_indirect: engine.register_pass(scene, srl, "TransInd", 3, "RGB", 'COLOR')
+ if srl.use_pass_transmission_color: engine.register_pass(scene, srl, "TransCol", 3, "RGB", 'COLOR')
+ if srl.use_pass_subsurface_direct: engine.register_pass(scene, srl, "SubsurfaceDir", 3, "RGB", 'COLOR')
+ if srl.use_pass_subsurface_indirect: engine.register_pass(scene, srl, "SubsurfaceInd", 3, "RGB", 'COLOR')
+ if srl.use_pass_subsurface_color: engine.register_pass(scene, srl, "SubsurfaceCol", 3, "RGB", 'COLOR')
+ if srl.use_pass_emit: engine.register_pass(scene, srl, "Emit", 3, "RGB", 'COLOR')
+ if srl.use_pass_environment: engine.register_pass(scene, srl, "Env", 3, "RGB", 'COLOR')
+
+ crl = srl.cycles
+ if crl.pass_debug_bvh_traversed_nodes: engine.register_pass(scene, srl, "Debug BVH Traversed Nodes", 1, "X", 'VALUE')
+ if crl.pass_debug_bvh_traversed_instances: engine.register_pass(scene, srl, "Debug BVH Traversed Instances", 1, "X", 'VALUE')
+ if crl.pass_debug_bvh_intersections: engine.register_pass(scene, srl, "Debug BVH Intersections", 1, "X", 'VALUE')
+ if crl.pass_debug_ray_bounces: engine.register_pass(scene, srl, "Debug Ray Bounces", 1, "X", 'VALUE')
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 53740efb627..a8a0f0bfc70 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1166,6 +1166,39 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
def unregister(cls):
del bpy.types.Scene.cycles_curves
+class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
+ @classmethod
+ def register(cls):
+ bpy.types.SceneRenderLayer.cycles = PointerProperty(
+ name="Cycles SceneRenderLayer Settings",
+ description="Cycles SceneRenderLayer Settings",
+ type=cls,
+ )
+ cls.pass_debug_bvh_traversed_nodes = BoolProperty(
+ name="Debug BVH Traversed Nodes",
+ description="Store Debug BVH Traversed Nodes pass",
+ default=False,
+ )
+ cls.pass_debug_bvh_traversed_instances = BoolProperty(
+ name="Debug BVH Traversed Instances",
+ description="Store Debug BVH Traversed Instances pass",
+ default=False,
+ )
+ cls.pass_debug_bvh_intersections = BoolProperty(
+ name="Debug BVH Intersections",
+ description="Store Debug BVH Intersections",
+ default=False,
+ )
+ cls.pass_debug_ray_bounces = BoolProperty(
+ name="Debug Ray Bounces",
+ description="Store Debug Ray Bounces pass",
+ default=False,
+ )
+
+ @classmethod
+ def unregister(cls):
+ del bpy.types.SceneRenderLayer.cycles
+
class CyclesCurveSettings(bpy.types.PropertyGroup):
@classmethod
@@ -1324,6 +1357,7 @@ def register():
bpy.utils.register_class(CyclesCurveSettings)
bpy.utils.register_class(CyclesDeviceSettings)
bpy.utils.register_class(CyclesPreferences)
+ bpy.utils.register_class(CyclesRenderLayerSettings)
def unregister():
@@ -1339,3 +1373,4 @@ def unregister():
bpy.utils.unregister_class(CyclesCurveSettings)
bpy.utils.unregister_class(CyclesDeviceSettings)
bpy.utils.unregister_class(CyclesPreferences)
+ bpy.utils.unregister_class(CyclesRenderLayerSettings)
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 4423f3c266b..e81f7ba52b3 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -78,7 +78,7 @@ def use_cuda(context):
def use_branched_path(context):
cscene = context.scene.cycles
- return (cscene.progressive == 'BRANCHED_PATH' and not use_opencl(context))
+ return (cscene.progressive == 'BRANCHED_PATH')
def use_sample_all_lights(context):
@@ -156,7 +156,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
row = layout.row()
sub = row.row()
- sub.active = get_device_type(context) != 'OPENCL' or use_cpu(context)
sub.prop(cscene, "progressive", text="")
row.prop(cscene, "use_square_samples")
@@ -477,11 +476,14 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel):
bl_options = {'DEFAULT_CLOSED'}
def draw(self, context):
+ import _cycles
+
layout = self.layout
scene = context.scene
rd = scene.render
rl = rd.layers.active
+ crl = rl.cycles
split = layout.split()
@@ -528,8 +530,12 @@ class CyclesRender_PT_layer_passes(CyclesButtonsPanel, Panel):
col.prop(rl, "use_pass_emit", text="Emission")
col.prop(rl, "use_pass_environment")
- if hasattr(rd, "debug_pass_type"):
- layout.prop(rd, "debug_pass_type")
+ if _cycles.with_cycles_debug:
+ col = layout.column()
+ col.prop(crl, "pass_debug_bvh_traversed_nodes")
+ col.prop(crl, "pass_debug_bvh_traversed_instances")
+ col.prop(crl, "pass_debug_bvh_intersections")
+ col.prop(crl, "pass_debug_ray_bounces")
class CyclesRender_PT_views(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index ed361cc971e..42b985305ea 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -776,17 +776,17 @@ static void ExportCurveTriangleVcol(ParticleCurveData *CData,
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1; curvekey++) {
for(int section = 0; section < resol; section++) {
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
- cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear(CData->curve_vcol[curve]));
+ cdata[vertexindex] = color_float_to_byte(color_srgb_to_scene_linear_v3(CData->curve_vcol[curve]));
vertexindex++;
}
}
@@ -1004,7 +1004,7 @@ void BlenderSync::sync_curves(Mesh *mesh,
for(size_t curve = 0; curve < CData.curve_vcol.size(); curve++)
if(!(CData.curve_keynum[curve] <= 1 || CData.curve_length[curve] == 0.0f))
- fdata[i++] = color_srgb_to_scene_linear(CData.curve_vcol[curve]);
+ fdata[i++] = color_srgb_to_scene_linear_v3(CData.curve_vcol[curve]);
}
}
}
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 54571b1fea1..f13b9db7013 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -356,7 +356,7 @@ static void attr_create_vertex_color(Scene *scene,
int n = p->loop_total();
for(int i = 0; i < n; i++) {
float3 color = get_float3(l->data[p->loop_start() + i].color());
- *(cdata++) = color_float_to_byte(color_srgb_to_scene_linear(color));
+ *(cdata++) = color_float_to_byte(color_srgb_to_scene_linear_v3(color));
}
}
}
@@ -380,11 +380,11 @@ static void attr_create_vertex_color(Scene *scene,
face_split_tri_indices(nverts[i], face_flags[i], tri_a, tri_b);
uchar4 colors[4];
- colors[0] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color1())));
- colors[1] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color2())));
- colors[2] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color3())));
+ colors[0] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color1())));
+ colors[1] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color2())));
+ colors[2] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color3())));
if(nverts[i] == 4) {
- colors[3] = color_float_to_byte(color_srgb_to_scene_linear(get_float3(c->color4())));
+ colors[3] = color_float_to_byte(color_srgb_to_scene_linear_v3(get_float3(c->color4())));
}
cdata[0] = colors[tri_a[0]];
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index 92393c10893..3c769881bb7 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -815,6 +815,14 @@ void *CCL_python_module_init()
PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
#endif
+#ifdef WITH_CYCLES_DEBUG
+ PyModule_AddObject(mod, "with_cycles_debug", Py_True);
+ Py_INCREF(Py_True);
+#else
+ PyModule_AddObject(mod, "with_cycles_debug", Py_False);
+ Py_INCREF(Py_False);
+#endif
+
#ifdef WITH_NETWORK
PyModule_AddObject(mod, "with_network", Py_True);
Py_INCREF(Py_True);
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index 3e851adbf54..6f2e7065d97 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -247,90 +247,6 @@ void BlenderSession::free_session()
delete session;
}
-static PassType get_pass_type(BL::RenderPass& b_pass)
-{
- switch(b_pass.type()) {
- case BL::RenderPass::type_COMBINED:
- return PASS_COMBINED;
-
- case BL::RenderPass::type_Z:
- return PASS_DEPTH;
- case BL::RenderPass::type_MIST:
- return PASS_MIST;
- case BL::RenderPass::type_NORMAL:
- return PASS_NORMAL;
- case BL::RenderPass::type_OBJECT_INDEX:
- return PASS_OBJECT_ID;
- case BL::RenderPass::type_UV:
- return PASS_UV;
- case BL::RenderPass::type_VECTOR:
- return PASS_MOTION;
- case BL::RenderPass::type_MATERIAL_INDEX:
- return PASS_MATERIAL_ID;
-
- case BL::RenderPass::type_DIFFUSE_DIRECT:
- return PASS_DIFFUSE_DIRECT;
- case BL::RenderPass::type_GLOSSY_DIRECT:
- return PASS_GLOSSY_DIRECT;
- case BL::RenderPass::type_TRANSMISSION_DIRECT:
- return PASS_TRANSMISSION_DIRECT;
- case BL::RenderPass::type_SUBSURFACE_DIRECT:
- return PASS_SUBSURFACE_DIRECT;
-
- case BL::RenderPass::type_DIFFUSE_INDIRECT:
- return PASS_DIFFUSE_INDIRECT;
- case BL::RenderPass::type_GLOSSY_INDIRECT:
- return PASS_GLOSSY_INDIRECT;
- case BL::RenderPass::type_TRANSMISSION_INDIRECT:
- return PASS_TRANSMISSION_INDIRECT;
- case BL::RenderPass::type_SUBSURFACE_INDIRECT:
- return PASS_SUBSURFACE_INDIRECT;
-
- case BL::RenderPass::type_DIFFUSE_COLOR:
- return PASS_DIFFUSE_COLOR;
- case BL::RenderPass::type_GLOSSY_COLOR:
- return PASS_GLOSSY_COLOR;
- case BL::RenderPass::type_TRANSMISSION_COLOR:
- return PASS_TRANSMISSION_COLOR;
- case BL::RenderPass::type_SUBSURFACE_COLOR:
- return PASS_SUBSURFACE_COLOR;
-
- case BL::RenderPass::type_EMIT:
- return PASS_EMISSION;
- case BL::RenderPass::type_ENVIRONMENT:
- return PASS_BACKGROUND;
- case BL::RenderPass::type_AO:
- return PASS_AO;
- case BL::RenderPass::type_SHADOW:
- return PASS_SHADOW;
-
- case BL::RenderPass::type_DIFFUSE:
- case BL::RenderPass::type_COLOR:
- case BL::RenderPass::type_REFRACTION:
- case BL::RenderPass::type_SPECULAR:
- case BL::RenderPass::type_REFLECTION:
- return PASS_NONE;
-#ifdef WITH_CYCLES_DEBUG
- case BL::RenderPass::type_DEBUG:
- {
- switch(b_pass.debug_type()) {
- case BL::RenderPass::debug_type_BVH_TRAVERSED_NODES:
- return PASS_BVH_TRAVERSED_NODES;
- case BL::RenderPass::debug_type_BVH_TRAVERSED_INSTANCES:
- return PASS_BVH_TRAVERSED_INSTANCES;
- case BL::RenderPass::debug_type_BVH_INTERSECTIONS:
- return PASS_BVH_INTERSECTIONS;
- case BL::RenderPass::debug_type_RAY_BOUNCES:
- return PASS_RAY_BOUNCES;
- }
- break;
- }
-#endif
- }
-
- return PASS_NONE;
-}
-
static ShaderEvalType get_shader_type(const string& pass_type)
{
const char *shader_type = pass_type.c_str();
@@ -487,22 +403,11 @@ void BlenderSession::render()
/* add passes */
array<Pass> passes;
- Pass::add(PASS_COMBINED, passes);
-
if(session_params.device.advanced_shading) {
-
- /* loop over passes */
- BL::RenderLayer::passes_iterator b_pass_iter;
-
- for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
- BL::RenderPass b_pass(*b_pass_iter);
- PassType pass_type = get_pass_type(b_pass);
-
- if(pass_type == PASS_MOTION && scene->integrator->motion_blur)
- continue;
- if(pass_type != PASS_NONE)
- Pass::add(pass_type, passes);
- }
+ passes = sync->sync_render_passes(b_rlay, *b_layer_iter);
+ }
+ else {
+ Pass::add(PASS_COMBINED, passes);
}
buffer_params.passes = passes;
@@ -757,19 +662,25 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
BL::RenderPass b_pass(*b_iter);
/* find matching pass type */
- PassType pass_type = get_pass_type(b_pass);
+ PassType pass_type = BlenderSync::get_pass_type(b_pass);
int components = b_pass.channels();
- /* copy pixels */
- if(!buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]))
+ bool read = false;
+ if(pass_type != PASS_NONE) {
+ /* copy pixels */
+ read = buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]);
+ }
+
+ if(!read) {
memset(&pixels[0], 0, pixels.size()*sizeof(float));
+ }
b_pass.rect(&pixels[0]);
}
}
else {
/* copy combined pass */
- BL::RenderPass b_combined_pass(b_rlay.passes.find_by_type(BL::RenderPass::type_COMBINED, b_rview_name.c_str()));
+ BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0]))
b_combined_pass.rect(&pixels[0]);
}
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index abec9616cff..08120a2ca28 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -481,6 +481,94 @@ void BlenderSync::sync_images()
}
}
+/* Passes */
+PassType BlenderSync::get_pass_type(BL::RenderPass& b_pass)
+{
+ string name = b_pass.name();
+#define MAP_PASS(passname, passtype) if(name == passname) return passtype;
+ /* NOTE: Keep in sync with defined names from DNA_scene_types.h */
+ MAP_PASS("Combined", PASS_COMBINED);
+ MAP_PASS("Depth", PASS_DEPTH);
+ MAP_PASS("Mist", PASS_MIST);
+ MAP_PASS("Normal", PASS_NORMAL);
+ MAP_PASS("IndexOB", PASS_OBJECT_ID);
+ MAP_PASS("UV", PASS_UV);
+ MAP_PASS("Vector", PASS_MOTION);
+ MAP_PASS("IndexMA", PASS_MATERIAL_ID);
+
+ MAP_PASS("DiffDir", PASS_DIFFUSE_DIRECT);
+ MAP_PASS("GlossDir", PASS_GLOSSY_DIRECT);
+ MAP_PASS("TransDir", PASS_TRANSMISSION_DIRECT);
+ MAP_PASS("SubsurfaceDir", PASS_SUBSURFACE_DIRECT);
+
+ MAP_PASS("DiffInd", PASS_DIFFUSE_INDIRECT);
+ MAP_PASS("GlossInd", PASS_GLOSSY_INDIRECT);
+ MAP_PASS("TransInd", PASS_TRANSMISSION_INDIRECT);
+ MAP_PASS("SubsurfaceInd", PASS_SUBSURFACE_INDIRECT);
+
+ MAP_PASS("DiffCol", PASS_DIFFUSE_COLOR);
+ MAP_PASS("GlossCol", PASS_GLOSSY_COLOR);
+ MAP_PASS("TransCol", PASS_TRANSMISSION_COLOR);
+ MAP_PASS("SubsurfaceCol", PASS_SUBSURFACE_COLOR);
+
+ MAP_PASS("Emit", PASS_EMISSION);
+ MAP_PASS("Env", PASS_BACKGROUND);
+ MAP_PASS("AO", PASS_AO);
+ MAP_PASS("Shadow", PASS_SHADOW);
+
+#ifdef __KERNEL_DEBUG__
+ MAP_PASS("Debug BVH Traversed Nodes", PASS_BVH_TRAVERSED_NODES);
+ MAP_PASS("Debug BVH Traversed Instances", PASS_BVH_TRAVERSED_INSTANCES);
+ MAP_PASS("Debug BVH Intersections", PASS_BVH_INTERSECTIONS);
+ MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
+#endif
+#undef MAP_PASS
+
+ return PASS_NONE;
+}
+
+array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
+ BL::SceneRenderLayer& b_srlay)
+{
+ array<Pass> passes;
+ Pass::add(PASS_COMBINED, passes);
+
+ /* loop over passes */
+ BL::RenderLayer::passes_iterator b_pass_iter;
+
+ for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
+ BL::RenderPass b_pass(*b_pass_iter);
+ PassType pass_type = get_pass_type(b_pass);
+
+ if(pass_type == PASS_MOTION && scene->integrator->motion_blur)
+ continue;
+ if(pass_type != PASS_NONE)
+ Pass::add(pass_type, passes);
+ }
+
+#ifdef __KERNEL_DEBUG__
+ PointerRNA crp = RNA_pointer_get(&b_srlay.ptr, "cycles");
+ if(get_boolean(crp, "pass_debug_bvh_traversed_nodes")) {
+ b_engine.add_pass("Debug BVH Traversed Nodes", 1, "X", b_srlay.name().c_str());
+ Pass::add(PASS_BVH_TRAVERSED_NODES, passes);
+ }
+ if(get_boolean(crp, "pass_debug_bvh_traversed_instances")) {
+ b_engine.add_pass("Debug BVH Traversed Instances", 1, "X", b_srlay.name().c_str());
+ Pass::add(PASS_BVH_TRAVERSED_INSTANCES, passes);
+ }
+ if(get_boolean(crp, "pass_debug_bvh_intersections")) {
+ b_engine.add_pass("Debug BVH Intersections", 1, "X", b_srlay.name().c_str());
+ Pass::add(PASS_BVH_INTERSECTIONS, passes);
+ }
+ if(get_boolean(crp, "pass_debug_ray_bounces")) {
+ b_engine.add_pass("Debug Ray Bounces", 1, "X", b_srlay.name().c_str());
+ Pass::add(PASS_RAY_BOUNCES, passes);
+ }
+#endif
+
+ return passes;
+}
+
/* Scene Parameters */
SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 1388e59f36d..bc7a4de3b36 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -68,6 +68,8 @@ public:
void **python_thread_state,
const char *layer = 0);
void sync_render_layers(BL::SpaceView3D& b_v3d, const char *layer);
+ array<Pass> sync_render_passes(BL::RenderLayer& b_rlay,
+ BL::SceneRenderLayer& b_srlay);
void sync_integrator();
void sync_camera(BL::RenderSettings& b_render,
BL::Object& b_override,
@@ -94,6 +96,8 @@ public:
Camera *cam,
int width, int height);
+ static PassType get_pass_type(BL::RenderPass& b_pass);
+
private:
/* sync */
void sync_lamps(bool update_all);
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 71d52bb8097..9118793aad6 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -19,6 +19,7 @@
#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data_types.h"
+#include "util/util_logging.h"
#include "util/util_time.h"
CCL_NAMESPACE_BEGIN
@@ -38,6 +39,8 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
kernel_do_volume = NULL;
kernel_queue_enqueue = NULL;
kernel_indirect_background = NULL;
+ kernel_shader_setup = NULL;
+ kernel_shader_sort = NULL;
kernel_shader_eval = NULL;
kernel_holdout_emission_blurring_pathtermination_ao = NULL;
kernel_subsurface_scatter = NULL;
@@ -63,6 +66,8 @@ DeviceSplitKernel::~DeviceSplitKernel()
delete kernel_do_volume;
delete kernel_queue_enqueue;
delete kernel_indirect_background;
+ delete kernel_shader_setup;
+ delete kernel_shader_sort;
delete kernel_shader_eval;
delete kernel_holdout_emission_blurring_pathtermination_ao;
delete kernel_subsurface_scatter;
@@ -88,6 +93,8 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
LOAD_KERNEL(do_volume);
LOAD_KERNEL(queue_enqueue);
LOAD_KERNEL(indirect_background);
+ LOAD_KERNEL(shader_setup);
+ LOAD_KERNEL(shader_sort);
LOAD_KERNEL(shader_eval);
LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
LOAD_KERNEL(subsurface_scatter);
@@ -108,6 +115,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
{
uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
+ VLOG(1) << "Split state element size: "
+ << string_human_readable_number(size_per_element) << " bytes. ("
+ << string_human_readable_size(size_per_element) << ").";
return max_buffer_size / size_per_element;
}
@@ -237,9 +247,12 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
+ ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size);
ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size);
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index 55548122c0c..58c2fdbb077 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -61,6 +61,8 @@ private:
SplitKernelFunction *kernel_do_volume;
SplitKernelFunction *kernel_queue_enqueue;
SplitKernelFunction *kernel_indirect_background;
+ SplitKernelFunction *kernel_shader_setup;
+ SplitKernelFunction *kernel_shader_sort;
SplitKernelFunction *kernel_shader_eval;
SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
SplitKernelFunction *kernel_subsurface_scatter;
diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h
index 764216d0dfa..d061973dcb7 100644
--- a/intern/cycles/device/opencl/opencl.h
+++ b/intern/cycles/device/opencl/opencl.h
@@ -31,19 +31,16 @@ CCL_NAMESPACE_BEGIN
/* Work around AMD driver hangs by ensuring each command is finished before doing anything else. */
# undef clEnqueueNDRangeKernel
# define clEnqueueNDRangeKernel(a, b, c, d, e, f, g, h, i) \
- clFinish(a); \
CLEW_GET_FUN(__clewEnqueueNDRangeKernel)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
# undef clEnqueueWriteBuffer
# define clEnqueueWriteBuffer(a, b, c, d, e, f, g, h, i) \
- clFinish(a); \
CLEW_GET_FUN(__clewEnqueueWriteBuffer)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
# undef clEnqueueReadBuffer
# define clEnqueueReadBuffer(a, b, c, d, e, f, g, h, i) \
- clFinish(a); \
CLEW_GET_FUN(__clewEnqueueReadBuffer)(a, b, c, d, e, f, g, h, i); \
clFinish(a);
#endif /* CYCLES_DISABLE_DRIVER_WORKAROUNDS */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 3750225571d..9bb0455b9d5 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -21,6 +21,8 @@ set(SRC
kernels/opencl/kernel_lamp_emission.cl
kernels/opencl/kernel_do_volume.cl
kernels/opencl/kernel_indirect_background.cl
+ kernels/opencl/kernel_shader_setup.cl
+ kernels/opencl/kernel_shader_sort.cl
kernels/opencl/kernel_shader_eval.cl
kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
kernels/opencl/kernel_subsurface_scatter.cl
@@ -235,6 +237,7 @@ set(SRC_UTIL_HEADERS
)
set(SRC_SPLIT_HEADERS
+ split/kernel_branched.h
split/kernel_buffer_update.h
split/kernel_data_init.h
split/kernel_direct_lighting.h
@@ -247,6 +250,8 @@ set(SRC_SPLIT_HEADERS
split/kernel_path_init.h
split/kernel_queue_enqueue.h
split/kernel_scene_intersect.h
+ split/kernel_shader_setup.h
+ split/kernel_shader_sort.h
split/kernel_shader_eval.h
split/kernel_shadow_blocked_ao.h
split/kernel_shadow_blocked_dl.h
@@ -456,6 +461,8 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_interse
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_setup.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_sort.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index e7957042182..58da141aed3 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -58,7 +58,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
- PathState *state,
+ ccl_addr_space PathState *state,
RNG *rng,
float3 throughput,
float3 ao_alpha)
@@ -98,6 +98,8 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
}
}
+#ifndef __SPLIT_KERNEL__
+
ccl_device void kernel_path_indirect(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
@@ -818,5 +820,7 @@ ccl_device void kernel_path_trace(KernelGlobals *kg,
path_rng_end(kg, rng_state, rng);
}
+#endif /* __SPLIT_KERNEL__ */
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 085eb42325d..ddcb57161ea 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -22,7 +22,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
PathRadiance *L,
- PathState *state,
+ ccl_addr_space PathState *state,
RNG *rng,
float3 throughput)
{
@@ -65,6 +65,7 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
}
}
+#ifndef __SPLIT_KERNEL__
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
@@ -648,6 +649,8 @@ ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
path_rng_end(kg, rng_state, rng);
}
+#endif /* __SPLIT_KERNEL__ */
+
#endif /* __BRANCHED_PATH__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 076c82f3853..bd4ba775b4d 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -155,7 +155,7 @@ ccl_device bool kernel_branched_path_surface_bounce(
ccl_addr_space float3 *throughput,
ccl_addr_space PathState *state,
PathRadiance *L,
- Ray *ray)
+ ccl_addr_space Ray *ray)
{
/* sample BSDF */
float bsdf_pdf;
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index baf629342b9..274713addc2 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -417,9 +417,8 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N);
}
-#ifndef __SPLIT_KERNEL__
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
-ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
+ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_global PathState *state,
int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
{
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
@@ -507,7 +506,6 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
/* setup diffuse bsdf */
subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N);
}
-#endif /* ! __SPLIT_KERNEL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index b1269cdb6b4..9b354457b91 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -64,6 +64,18 @@ CCL_NAMESPACE_BEGIN
# define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
#endif
+
+#define SHADER_SORT_BLOCK_SIZE 2048
+
+#ifdef __KERNEL_OPENCL__
+# define SHADER_SORT_LOCAL_SIZE 64
+#elif defined(__KERNEL_CUDA__)
+# define SHADER_SORT_LOCAL_SIZE 32
+#else
+# define SHADER_SORT_LOCAL_SIZE 1
+#endif
+
+
/* device capabilities */
#ifdef __KERNEL_CPU__
# ifdef __KERNEL_SSE2__
@@ -71,22 +83,18 @@ CCL_NAMESPACE_BEGIN
# endif
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# ifndef __SPLIT_KERNEL__
-# define __BRANCHED_PATH__
-# endif
+# define __BRANCHED_PATH__
# ifdef WITH_OSL
# define __OSL__
# endif
-# define __SUBSURFACE__
# define __PRINCIPLED__
+# define __SUBSURFACE__
# define __CMJ__
# define __VOLUME__
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
-# ifndef __SPLIT_KERNEL__
-# define __VOLUME_DECOUPLED__
-# define __VOLUME_RECORD_ALL__
-# endif
+# define __VOLUME_DECOUPLED__
+# define __VOLUME_RECORD_ALL__
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
@@ -138,6 +146,7 @@ CCL_NAMESPACE_BEGIN
# define __VOLUME_SCATTER__
# define __SHADOW_RECORD_ALL__
# define __CMJ__
+# define __BRANCHED_PATH__
# endif /* __KERNEL_OPENCL_AMD__ */
# ifdef __KERNEL_OPENCL_INTEL_CPU__
@@ -1300,7 +1309,6 @@ typedef ccl_addr_space struct DebugData {
* Queue 3 - Shadow ray cast kernel - AO
* Queeu 4 - Shadow ray cast kernel - direct lighting
*/
-#define NUM_QUEUES 4
/* Queue names */
enum QueueNumber {
@@ -1313,22 +1321,40 @@ enum QueueNumber {
* 3. Rays to be regenerated
* are enqueued here.
*/
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS = 1,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contribution for AO are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_AO_RAYS = 2,
+ QUEUE_SHADOW_RAY_CAST_AO_RAYS,
/* All rays for which a shadow ray should be cast to determine radiance
* contributing for direct lighting are enqueued here.
*/
- QUEUE_SHADOW_RAY_CAST_DL_RAYS = 3,
+ QUEUE_SHADOW_RAY_CAST_DL_RAYS,
+
+ /* Rays sorted according to shader->id */
+ QUEUE_SHADER_SORTED_RAYS,
+
+#ifdef __BRANCHED_PATH__
+ /* All rays moving to next iteration of the indirect loop for light */
+ QUEUE_LIGHT_INDIRECT_ITER,
+# ifdef __VOLUME__
+ /* All rays moving to next iteration of the indirect loop for volumes */
+ QUEUE_VOLUME_INDIRECT_ITER,
+# endif
+# ifdef __SUBSURFACE__
+ /* All rays moving to next iteration of the indirect loop for subsurface */
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+# endif
+#endif /* __BRANCHED_PATH__ */
+
+ NUM_QUEUES
};
-/* We use RAY_STATE_MASK to get ray_state (enums 0 to 5) */
-#define RAY_STATE_MASK 0x007
-#define RAY_FLAG_MASK 0x0F8
+/* We use RAY_STATE_MASK to get ray_state */
+#define RAY_STATE_MASK 0x0F
+#define RAY_FLAG_MASK 0xF0
enum RayState {
RAY_INVALID = 0,
/* Denotes ray is actively involved in path-iteration. */
@@ -1343,14 +1369,22 @@ enum RayState {
RAY_TO_REGENERATE,
/* Denotes ray has been regenerated */
RAY_REGENERATED,
- /* Flag's ray has to execute shadow blocked function in AO part */
- RAY_SHADOW_RAY_CAST_AO = 16,
- /* Flag's ray has to execute shadow blocked function in direct lighting part. */
- RAY_SHADOW_RAY_CAST_DL = 32,
+ /* Denotes ray is moving to next iteration of the branched indirect loop */
+ RAY_LIGHT_INDIRECT_NEXT_ITER,
+ RAY_VOLUME_INDIRECT_NEXT_ITER,
+ RAY_SUBSURFACE_INDIRECT_NEXT_ITER,
+
+ /* Ray flags */
+
+ /* Flags to denote that the ray is currently evaluating the branched indirect loop */
+ RAY_BRANCHED_LIGHT_INDIRECT = (1 << 4),
+ RAY_BRANCHED_VOLUME_INDIRECT = (1 << 5),
+ RAY_BRANCHED_SUBSURFACE_INDIRECT = (1 << 6),
+ RAY_BRANCHED_INDIRECT = (RAY_BRANCHED_LIGHT_INDIRECT | RAY_BRANCHED_VOLUME_INDIRECT | RAY_BRANCHED_SUBSURFACE_INDIRECT),
};
#define ASSIGN_RAY_STATE(ray_state, ray_index, state) (ray_state[ray_index] = ((ray_state[ray_index] & RAY_FLAG_MASK) | state))
-#define IS_STATE(ray_state, ray_index, state) ((ray_state[ray_index] & RAY_STATE_MASK) == state)
+#define IS_STATE(ray_state, ray_index, state) ((ray_index) != QUEUE_EMPTY_SLOT && ((ray_state)[(ray_index)] & RAY_STATE_MASK) == (state))
#define ADD_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] | flag))
#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 896b80d783e..39c9a9cf33c 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -77,6 +77,8 @@ DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_setup)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_sort)
DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 148b2eef568..8c05dd1d9ef 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -44,6 +44,8 @@
# include "kernel/split/kernel_do_volume.h"
# include "kernel/split/kernel_queue_enqueue.h"
# include "kernel/split/kernel_indirect_background.h"
+# include "kernel/split/kernel_shader_setup.h"
+# include "kernel/split/kernel_shader_sort.h"
# include "kernel/split/kernel_shader_eval.h"
# include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
# include "kernel/split/kernel_subsurface_scatter.h"
@@ -181,9 +183,11 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
@@ -209,6 +213,8 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
REGISTER(do_volume);
REGISTER(queue_enqueue);
REGISTER(indirect_background);
+ REGISTER(shader_setup);
+ REGISTER(shader_sort);
REGISTER(shader_eval);
REGISTER(holdout_emission_blurring_pathtermination_ao);
REGISTER(subsurface_scatter);
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index a679eff8409..8b7f1a8d405 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -31,6 +31,8 @@
#include "kernel/split/kernel_do_volume.h"
#include "kernel/split/kernel_queue_enqueue.h"
#include "kernel/split/kernel_indirect_background.h"
+#include "kernel/split/kernel_shader_setup.h"
+#include "kernel/split/kernel_shader_sort.h"
#include "kernel/split/kernel_shader_eval.h"
#include "kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h"
#include "kernel/split/kernel_subsurface_scatter.h"
@@ -108,9 +110,11 @@ DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(queue_enqueue, QueueEnqueueLocals)
DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_eval, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_setup, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(shader_sort, ShaderSortLocals)
+DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(holdout_emission_blurring_pathtermination_ao, BackgroundAOLocals)
-DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(subsurface_scatter, uint)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
DEFINE_SPLIT_KERNEL_FUNCTION_LOCALS(direct_lighting, uint)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_ao)
DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked_dl)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
index 6baee460986..5bfb31b193a 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
@@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_shader_eval(
ccl_global char *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- kernel_shader_eval((KernelGlobals*)kg, &local_queue_atomics);
+ kernel_shader_eval((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
new file mode 100644
index 00000000000..38bfd04ad4c
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_setup.cl
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_shader_setup.h"
+
+__kernel void kernel_ocl_path_trace_shader_setup(
+ ccl_global char *kg,
+ ccl_constant KernelData *data)
+{
+ ccl_local unsigned int local_queue_atomics;
+ kernel_shader_setup((KernelGlobals*)kg, &local_queue_atomics);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
new file mode 100644
index 00000000000..6f722915d45
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_shader_sort.cl
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/kernel_compat_opencl.h"
+#include "kernel/split/kernel_split_common.h"
+#include "kernel/split/kernel_shader_sort.h"
+
+__attribute__((reqd_work_group_size(64, 1, 1)))
+__kernel void kernel_ocl_path_trace_shader_sort(
+ ccl_global char *kg,
+ ccl_constant KernelData *data)
+{
+ ccl_local ShaderSortLocals locals;
+ kernel_shader_sort((KernelGlobals*)kg, &locals);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
index 732cda30115..8de82db7afe 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_split.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
@@ -23,6 +23,8 @@
#include "kernel/kernels/opencl/kernel_do_volume.cl"
#include "kernel/kernels/opencl/kernel_indirect_background.cl"
#include "kernel/kernels/opencl/kernel_queue_enqueue.cl"
+#include "kernel/kernels/opencl/kernel_shader_setup.cl"
+#include "kernel/kernels/opencl/kernel_shader_sort.cl"
#include "kernel/kernels/opencl/kernel_shader_eval.cl"
#include "kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl"
#include "kernel/kernels/opencl/kernel_subsurface_scatter.cl"
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
index 7a1838e485f..99b74a1802b 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
@@ -22,6 +22,5 @@ __kernel void kernel_ocl_path_trace_subsurface_scatter(
ccl_global char *kg,
ccl_constant KernelData *data)
{
- ccl_local unsigned int local_queue_atomics;
- kernel_subsurface_scatter((KernelGlobals*)kg, &local_queue_atomics);
+ kernel_subsurface_scatter((KernelGlobals*)kg);
}
diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h
new file mode 100644
index 00000000000..c7bc1b4df0a
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_branched.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __BRANCHED_PATH__
+
+/* sets up the various state needed to do an indirect loop */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ /* save a copy of the state to restore later */
+#define BRANCHED_STORE(name) \
+ branched_state->name = kernel_split_state.name[ray_index];
+
+ BRANCHED_STORE(path_state);
+ BRANCHED_STORE(throughput);
+ BRANCHED_STORE(ray);
+ BRANCHED_STORE(sd);
+ BRANCHED_STORE(isect);
+ BRANCHED_STORE(ray_state);
+
+#undef BRANCHED_STORE
+
+ /* set loop counters to intial position */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = 0;
+}
+
+/* ends an indirect loop and restores the previous state */
+ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ /* restore state */
+#define BRANCHED_RESTORE(name) \
+ kernel_split_state.name[ray_index] = branched_state->name;
+
+ BRANCHED_RESTORE(path_state);
+ BRANCHED_RESTORE(throughput);
+ BRANCHED_RESTORE(ray);
+ BRANCHED_RESTORE(sd);
+ BRANCHED_RESTORE(isect);
+ BRANCHED_RESTORE(ray_state);
+
+#undef BRANCHED_RESTORE
+
+ /* leave indirect loop */
+ REMOVE_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT);
+}
+
+/* bounce off surface and integrate indirect light */
+ccl_device_noinline bool kernel_split_branched_path_surface_indirect_light_iter(KernelGlobals *kg,
+ int ray_index,
+ float num_samples_adjust,
+ ShaderData *saved_sd,
+ bool reset_path_state)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = saved_sd;
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ float3 throughput = branched_state->throughput;
+
+ for(int i = branched_state->next_closure; i < sd->num_closure; i++) {
+ const ShaderClosure *sc = &sd->closure[i];
+
+ if(!CLOSURE_IS_BSDF(sc->type))
+ continue;
+ /* transparency is not handled here, but in outer loop */
+ if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
+ continue;
+
+ int num_samples;
+
+ if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
+ num_samples = kernel_data.integrator.diffuse_samples;
+ else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
+ num_samples = 1;
+ else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
+ num_samples = kernel_data.integrator.glossy_samples;
+ else
+ num_samples = kernel_data.integrator.transmission_samples;
+
+ num_samples = ceil_to_int(num_samples_adjust*num_samples);
+
+ float num_samples_inv = num_samples_adjust/num_samples;
+ RNG bsdf_rng = cmj_hash(rng, i);
+
+ for(int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ if(reset_path_state) {
+ *ps = branched_state->path_state;
+ }
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = throughput;
+
+ ccl_global Ray *bsdf_ray = &kernel_split_state.ray[ray_index];
+
+ if(!kernel_branched_path_surface_bounce(kg,
+ &bsdf_rng,
+ sd,
+ sc,
+ j,
+ num_samples,
+ tp,
+ ps,
+ L,
+ bsdf_ray))
+ {
+ continue;
+ }
+
+ /* update state for next iteration */
+ branched_state->next_closure = i;
+ branched_state->next_sample = j+1;
+ branched_state->num_samples = num_samples;
+
+ /* start the indirect path */
+ *tp *= num_samples_inv;
+
+ return true;
+ }
+
+ branched_state->next_sample = 0;
+ }
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ */
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 9d3d01fff75..642ccac8239 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -105,21 +105,16 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
/* Initialize queue data and queue index. */
if(thread_index < queuesize) {
- /* Initialize active ray queue. */
- kernel_split_state.queue_data[QUEUE_ACTIVE_AND_REGENERATED_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize background and buffer update queue. */
- kernel_split_state.queue_data[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of AO queue. */
- kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_AO_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
- /* Initialize shadow ray cast of direct lighting queue. */
- kernel_split_state.queue_data[QUEUE_SHADOW_RAY_CAST_DL_RAYS * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ kernel_split_state.queue_data[i * queuesize + thread_index] = QUEUE_EMPTY_SLOT;
+ }
}
if(thread_index == 0) {
- Queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
- Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS] = 0;
- Queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
+ for(int i = 0; i < NUM_QUEUES; i++) {
+ Queue_index[i] = 0;
+ }
+
/* The scene-intersect kernel should not use the queues very first time.
* since the queue would be empty.
*/
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index bdbf7387b95..3336c968a44 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -56,23 +56,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_params.queue_size,
0);
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
ShaderData *sd = &kernel_split_state.sd[ray_index];
@@ -80,25 +63,24 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
/* direct lighting */
#ifdef __EMISSION__
RNG rng = kernel_split_state.rng[ray_index];
+
bool flag = (kernel_data.integrator.use_direct_light &&
(sd->flag & SD_BSDF_HAS_EVAL));
+
+# ifdef __BRANCHED_PATH__
+ if(flag && kernel_data.integrator.branched) {
+ flag = false;
+ enqueue_flag = 1;
+ }
+# endif /* __BRANCHED_PATH__ */
+
# ifdef __SHADOW_TRICKS__
if(flag && state->flag & PATH_RAY_SHADOW_CATCHER) {
flag = false;
- ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
- float3 throughput = kernel_split_state.throughput[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- kernel_branched_path_surface_connect_light(kg,
- &rng,
- sd,
- emission_sd,
- state,
- throughput,
- 1.0f,
- L,
- 1);
+ enqueue_flag = 1;
}
# endif /* __SHADOW_TRICKS__ */
+
if(flag) {
/* Sample illumination from lights to find path contribution. */
float light_t = path_state_rng_1D(kg, &rng, state, PRNG_LIGHT);
@@ -129,7 +111,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_state.bsdf_eval[ray_index] = L_light;
kernel_split_state.is_lamp[ray_index] = is_lamp;
/* Mark ray state for next shadow kernel. */
- ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
enqueue_flag = 1;
}
}
@@ -138,10 +119,6 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
#endif /* __EMISSION__ */
}
-#ifndef __COMPUTE_DEVICE_GPU__
- }
-#endif
-
#ifdef __EMISSION__
/* Enqueue RAY_SHADOW_RAY_CAST_DL rays. */
enqueue_ray_index_local(ray_index,
@@ -152,6 +129,27 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
#endif
+
+#ifdef __BRANCHED_PATH__
+ /* Enqueue RAY_LIGHT_INDIRECT_NEXT_ITER rays
+ * this is the last kernel before next_iteration_setup that uses local atomics so we do this here
+ */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(ray_index,
+ QUEUE_LIGHT_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
index 47d3c280831..182e6c6e4fa 100644
--- a/intern/cycles/kernel/split/kernel_do_volume.h
+++ b/intern/cycles/kernel/split/kernel_do_volume.h
@@ -16,6 +16,81 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
+
+ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ Ray volume_ray = branched_state->ray;
+ volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
+
+ for(int j = branched_state->next_sample; j < num_samples; j++) {
+ ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
+ *ps = branched_state->path_state;
+
+ ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
+ *pray = branched_state->ray;
+
+ ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
+ *tp = branched_state->throughput * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(ps, j, num_samples);
+
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, ps, sd, &volume_ray, L, tp, &rng, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *tp, &branched_state->path_state, L);
+
+ /* indirect light bounce */
+ if(!kernel_path_volume_bounce(kg, &rng, sd, tp, ps, L, pray)) {
+ continue;
+ }
+
+ /* start the indirect path */
+ branched_state->next_closure = 0;
+ branched_state->next_sample = j+1;
+ branched_state->num_samples = num_samples;
+
+ return true;
+ }
+# endif
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
+ kernel_split_state.throughput[ray_index] = throughput;
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __VOLUME__ */
ccl_device void kernel_do_volume(KernelGlobals *kg)
{
@@ -23,37 +98,37 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
/* We will empty this queue in this kernel. */
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+# ifdef __BRANCHED_PATH__
+ kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
+# endif /* __BRANCHED_PATH__ */
}
- /* Fetch use_queues_flag. */
- char local_use_queues_flag = *kernel_split_params.use_queues_flag;
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
- if(local_use_queues_flag) {
+
+ if(*kernel_split_params.use_queues_flag) {
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
1);
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
}
- if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
- IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+ ccl_global char *ray_state = kernel_split_state.ray_state;
- bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
- ShaderData *sd = &kernel_split_state.sd[ray_index];
- ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
+ IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+
+ bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
/* Sanitize volume stack. */
if(!hit) {
@@ -64,31 +139,68 @@ ccl_device void kernel_do_volume(KernelGlobals *kg)
Ray volume_ray = *ray;
volume_ray.t = (hit)? isect->t: FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+# ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+# endif /* __BRANCHED_PATH__ */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, &rng, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, &rng, sd, sd_input, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray))
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
- else
- ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, &rng, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_path_end(kg, ray_index);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+# ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
-# endif
}
+# endif /* __BRANCHED_PATH__ */
}
+
kernel_split_state.rng[ray_index] = rng;
}
-#endif
+# ifdef __BRANCHED_PATH__
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_VOLUME_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+#endif /* __VOLUME__ */
}
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 89adeb64c8a..87498910d38 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -52,6 +52,7 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with
* flag RAY_SHADOW_RAY_CAST_AO
*/
+
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
KernelGlobals *kg,
ccl_local_param BackgroundAOLocals *locals)
@@ -62,8 +63,9 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
+#ifdef __AO__
char enqueue_flag = 0;
- char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
+#endif
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -155,8 +157,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
kernel_split_state.L_transparent[ray_index] += average(holdout_weight*throughput);
}
if(sd->object_flag & SD_OBJECT_HOLDOUT_MASK) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
}
#endif /* __HOLDOUT__ */
@@ -164,18 +165,31 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
- /* Holdout mask objects do not write data passes. */
- kernel_write_data_passes(kg,
- buffer,
- L,
- sd,
- sample,
- state,
- throughput);
+
+#ifdef __BRANCHED_PATH__
+ if(!IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT))
+#endif /* __BRANCHED_PATH__ */
+ {
+ /* Holdout mask objects do not write data passes. */
+ kernel_write_data_passes(kg,
+ buffer,
+ L,
+ sd,
+ sample,
+ state,
+ throughput);
+ }
+
/* Blurring of bsdf after bounces, for rays that have a small likelihood
* of following this particular path (diffuse, rough glossy.
*/
- if(kernel_data.integrator.filter_glossy != FLT_MAX) {
+#ifndef __BRANCHED_PATH__
+ if(kernel_data.integrator.filter_glossy != FLT_MAX)
+#else
+ if(kernel_data.integrator.filter_glossy != FLT_MAX &&
+ (!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)))
+#endif /* __BRANCHED_PATH__ */
+ {
float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
if(blur_pdf < 1.0f) {
float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
@@ -201,19 +215,32 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
* mainly due to the mixed in MIS that we use. gives too many unneeded
* shader evaluations, only need emission if we are going to terminate.
*/
+#ifndef __BRANCHED_PATH__
float probability = path_state_terminate_probability(kg, state, throughput);
+#else
+ float probability = 1.0f;
+
+ if(!kernel_data.integrator.branched) {
+ probability = path_state_terminate_probability(kg, state, throughput);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ int num_samples = kernel_split_state.branched_state[ray_index].num_samples;
+ probability = path_state_terminate_probability(kg, state, throughput*num_samples);
+ }
+ else if(state->flag & PATH_RAY_TRANSPARENT) {
+ probability = path_state_terminate_probability(kg, state, throughput);
+ }
+#endif
if(probability == 0.0f) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(probability != 1.0f) {
float terminate = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_TERMINATE);
if(terminate >= probability) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ kernel_split_path_end(kg, ray_index);
}
else {
kernel_split_state.throughput[ray_index] = throughput/probability;
@@ -225,61 +252,23 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
#ifdef __AO__
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* ambient occlusion */
- if(kernel_data.integrator.use_ambient_occlusion ||
- (sd->flag & SD_AO))
- {
- /* todo: solve correlation */
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, &rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- kernel_split_state.ao_bsdf[ray_index] = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
- kernel_split_state.ao_alpha[ray_index] = shader_bsdf_alpha(kg, sd);
-
- float3 ao_D;
- float ao_pdf;
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray _ray;
- _ray.P = ray_offset(sd->P, sd->Ng);
- _ray.D = ao_D;
- _ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- _ray.time = sd->time;
-#endif
- _ray.dP = sd->dP;
- _ray.dD = differential3_zero();
- kernel_split_state.ao_light_ray[ray_index] = _ray;
-
- ADD_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
- enqueue_flag_AO_SHADOW_RAY_CAST = 1;
- }
+ if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
+ enqueue_flag = 1;
}
}
#endif /* __AO__ */
- kernel_split_state.rng[ray_index] = rng;
+ kernel_split_state.rng[ray_index] = rng;
#ifndef __COMPUTE_DEVICE_GPU__
}
#endif
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- &locals->queue_atomics_bg,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
#ifdef __AO__
/* Enqueue to-shadow-ray-cast rays. */
enqueue_ray_index_local(ray_index,
QUEUE_SHADOW_RAY_CAST_AO_RAYS,
- enqueue_flag_AO_SHADOW_RAY_CAST,
+ enqueue_flag,
kernel_split_params.queue_size,
&locals->queue_atomics_ao,
kernel_split_state.queue_data,
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
index 8192528622e..6fbc888e358 100644
--- a/intern/cycles/kernel/split/kernel_indirect_background.h
+++ b/intern/cycles/kernel/split/kernel_indirect_background.h
@@ -34,7 +34,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
if(state->bounce > kernel_data.integrator.ao_bounces) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
}
}
@@ -63,7 +63,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
#ifdef __PASSES__
if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
@@ -72,7 +72,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg)
float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
path_radiance_accum_background(L, state, (*throughput), L_background);
#endif
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ kernel_split_path_end(kg, ray_index);
}
}
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
index a56e85abeb9..82bc2f01fd7 100644
--- a/intern/cycles/kernel/split/kernel_indirect_subsurface.h
+++ b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
@@ -49,26 +49,29 @@ ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
- ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
- kernel_path_subsurface_accum_indirect(ss_indirect, L);
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+#endif
+ if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+ ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+ kernel_path_subsurface_accum_indirect(ss_indirect, L);
- /* Trace indirect subsurface rays by restarting the loop. this uses less
- * stack memory than invoking kernel_path_indirect.
- */
- if(ss_indirect->num_rays) {
- kernel_path_subsurface_setup_indirect(kg,
- ss_indirect,
- state,
- ray,
- L,
- throughput);
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
- }
- else {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ /* Trace indirect subsurface rays by restarting the loop. this uses less
+ * stack memory than invoking kernel_path_indirect.
+ */
+ if(ss_indirect->num_rays) {
+ kernel_path_subsurface_setup_indirect(kg,
+ ss_indirect,
+ state,
+ ray,
+ L,
+ throughput);
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
}
+#ifdef __BRANCHED_PATH__
}
+#endif
#endif /* __SUBSURFACE__ */
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 1bebc16e25b..71017fed19e 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -44,6 +44,52 @@ CCL_NAMESPACE_BEGIN
* - QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with
* RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays.
*/
+
+#ifdef __BRANCHED_PATH__
+ccl_device_inline void kernel_split_branched_indirect_light_init(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT);
+}
+
+ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int ray_index)
+{
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+
+ /* continue in case of transparency */
+ *throughput *= shader_bsdf_transparency(kg, sd);
+
+ if(is_zero(*throughput)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+ else {
+ /* Update Path State */
+ state->flag |= PATH_RAY_TRANSPARENT;
+ state->transparent_bounce++;
+
+ ray->P = ray_offset(sd->P, -sd->Ng);
+ ray->t -= sd->ray_length; /* clipping works through transparent */
+
+# ifdef __RAY_DIFFERENTIALS__
+ ray->dP = sd->dP;
+ ray->dD.dx = -sd->dI.dx;
+ ray->dD.dy = -sd->dI.dy;
+# endif /* __RAY_DIFFERENTIALS__ */
+
+# ifdef __VOLUME__
+ /* enter/exit volume */
+ kernel_volume_stack_enter_exit(kg, sd, state->volume_stack);
+# endif /* __VOLUME__ */
+ }
+}
+#endif /* __BRANCHED_PATH__ */
+
ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
ccl_local_param unsigned int *local_queue_atomics)
{
@@ -67,7 +113,6 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
kernel_split_params.queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS] = 0;
}
- char enqueue_flag = 0;
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
@@ -75,102 +120,125 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg,
kernel_split_params.queue_size,
0);
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
-
- /* Load ShaderData structure. */
- PathRadiance *L = NULL;
- ccl_global PathState *state = NULL;
ccl_global char *ray_state = kernel_split_state.ray_state;
- /* Path radiance update for AO/Direct_lighting's shadow blocked. */
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
- IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
- {
- state = &kernel_split_state.path_state[ray_index];
- L = &kernel_split_state.path_radiance[ray_index];
- float3 _throughput = kernel_split_state.throughput[ray_index];
-
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
- float3 shadow = kernel_split_state.ao_light_ray[ray_index].P;
- // TODO(mai): investigate correctness here
- char update_path_radiance = (char)kernel_split_state.ao_light_ray[ray_index].t;
- if(update_path_radiance) {
- path_radiance_accum_ao(L,
- _throughput,
- kernel_split_state.ao_alpha[ray_index],
- kernel_split_state.ao_bsdf[ray_index],
- shadow,
- state->bounce);
- }
- else {
- path_radiance_accum_total_ao(L, _throughput, kernel_split_state.ao_bsdf[ray_index]);
+ bool active = IS_STATE(ray_state, ray_index, RAY_ACTIVE);
+ if(active) {
+ ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+ ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+ /* Compute direct lighting and next bounce. */
+ if(!kernel_path_surface_bounce(kg, &rng, sd, throughput, state, L, ray)) {
+ kernel_split_path_end(kg, ray_index);
}
- REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO);
+#ifdef __BRANCHED_PATH__
}
-
- if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
- float3 shadow = kernel_split_state.light_ray[ray_index].P;
- // TODO(mai): investigate correctness here
- char update_path_radiance = (char)kernel_split_state.light_ray[ray_index].t;
- BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
- if(update_path_radiance) {
- path_radiance_accum_light(L,
- _throughput,
- &L_light,
- shadow,
- 1.0f,
- state->bounce,
- kernel_split_state.is_lamp[ray_index]);
+ else {
+ kernel_split_branched_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ 1.0f,
+ &kernel_split_state.branched_state[ray_index].sd,
+ true))
+ {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
}
else {
- path_radiance_accum_total_light(L, _throughput, &L_light);
+ kernel_split_branched_indirect_light_end(kg, ray_index);
}
- REMOVE_RAY_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL);
}
+#endif /* __BRANCHED_PATH__ */
+
+ kernel_split_state.rng[ray_index] = rng;
}
- if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
- ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
- ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
- RNG rng = kernel_split_state.rng[ray_index];
- state = &kernel_split_state.path_state[ray_index];
- L = &kernel_split_state.path_radiance[ray_index];
+ /* Enqueue RAY_UPDATE_BUFFER rays. */
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER) && active,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+#ifdef __BRANCHED_PATH__
+ /* iter loop */
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_LIGHT_INDIRECT_ITER] = 0;
+ }
- /* Compute direct lighting and next bounce. */
- if(!kernel_path_surface_bounce(kg, &rng, &kernel_split_state.sd[ray_index], throughput, state, L, ray)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_LIGHT_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ 1.0f,
+ &kernel_split_state.branched_state[ray_index].sd,
+ true))
+ {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ else {
+ kernel_split_branched_indirect_light_end(kg, ray_index);
}
- kernel_split_state.rng[ray_index] = rng;
}
-#ifndef __COMPUTE_DEVICE_GPU__
+# ifdef __VOLUME__
+ /* Enqueue RAY_VOLUME_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
}
-#endif
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
- /* Enqueue RAY_UPDATE_BUFFER rays. */
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
+ QUEUE_VOLUME_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER),
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+# endif /* __VOLUME__ */
+
+# ifdef __SUBSURFACE__
+ /* Enqueue RAY_SUBSURFACE_INDIRECT_NEXT_ITER rays */
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ enqueue_ray_index_local(ray_index,
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ IS_STATE(kernel_split_state.ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER),
kernel_split_params.queue_size,
local_queue_atomics,
kernel_split_state.queue_data,
kernel_split_params.queue_index);
+# endif /* __SUBSURFACE__ */
+#endif /* __BRANCHED_PATH__ */
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 0f1696e34a0..2801b32f285 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2015 Blender Foundation
+ * Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,54 +16,61 @@
CCL_NAMESPACE_BEGIN
-/* This kernel sets up the ShaderData structure from the values computed
+/* This kernel evaluates ShaderData structure from the values computed
* by the previous kernels.
- *
- * It also identifies the rays of state RAY_TO_REGENERATE and enqueues them
- * in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
*/
-ccl_device void kernel_shader_eval(KernelGlobals *kg,
- ccl_local_param unsigned int *local_queue_atomics)
+ccl_device void kernel_shader_eval(KernelGlobals *kg)
{
- /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
- }
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ /* Sorting on cuda split is not implemented */
+#ifdef __KERNEL_CUDA__
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+#else
+ int queue_index = kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS];
+#endif
+ if(ray_index >= queue_index) {
+ return;
+ }
ray_index = get_ray_index(kg, ray_index,
+#ifdef __KERNEL_CUDA__
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+#else
+ QUEUE_SHADER_SORTED_RAYS,
+#endif
kernel_split_state.queue_data,
kernel_split_params.queue_size,
0);
- char enqueue_flag = 0;
- if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) {
- enqueue_flag = 1;
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
}
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
-
- /* Continue on with shader evaluation. */
- if((ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
- Intersection isect = kernel_split_state.isect[ray_index];
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+ if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
RNG rng = kernel_split_state.rng[ray_index];
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- Ray ray = kernel_split_state.ray[ray_index];
- shader_setup_from_ray(kg,
- &kernel_split_state.sd[ray_index],
- &isect,
- &ray);
+#ifndef __BRANCHED_PATH__
float rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
+#else
+ ShaderContext ctx = SHADER_CONTEXT_MAIN;
+ float rbsdf = 0.0f;
+
+ if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ rbsdf = path_state_rng_1D_for_decision(kg, &rng, state, PRNG_BSDF);
+
+ }
+
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ ctx = SHADER_CONTEXT_INDIRECT;
+ }
+
+ shader_eval_surface(kg, &kernel_split_state.sd[ray_index], &rng, state, rbsdf, state->flag, ctx);
+ shader_merge_closures(&kernel_split_state.sd[ray_index]);
+#endif /* __BRANCHED_PATH__ */
+
kernel_split_state.rng[ray_index] = rng;
}
}
diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h
new file mode 100644
index 00000000000..0432689d9fa
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_shader_setup.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* This kernel sets up the ShaderData structure from the values computed
+ * by the previous kernels.
+ *
+ * It also identifies the rays of state RAY_TO_REGENERATE and enqueues them
+ * in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
+ */
+ccl_device void kernel_shader_setup(KernelGlobals *kg,
+ ccl_local_param unsigned int *local_queue_atomics)
+{
+ /* Enqeueue RAY_TO_REGENERATE rays into QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue. */
+ if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+ *local_queue_atomics = 0;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ int queue_index = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if(ray_index >= queue_index) {
+ return;
+ }
+ ray_index = get_ray_index(kg, ray_index,
+ QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 0);
+
+ if(ray_index == QUEUE_EMPTY_SLOT) {
+ return;
+ }
+
+ char enqueue_flag = (IS_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE)) ? 1 : 0;
+ enqueue_ray_index_local(ray_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ enqueue_flag,
+ kernel_split_params.queue_size,
+ local_queue_atomics,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_index);
+
+ /* Continue on with shader evaluation. */
+ if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+ Intersection isect = kernel_split_state.isect[ray_index];
+ Ray ray = kernel_split_state.ray[ray_index];
+
+ shader_setup_from_ray(kg,
+ &kernel_split_state.sd[ray_index],
+ &isect,
+ &ray);
+ }
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h
new file mode 100644
index 00000000000..297decb0bc2
--- /dev/null
+++ b/intern/cycles/kernel/split/kernel_shader_sort.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+
+ccl_device void kernel_shader_sort(KernelGlobals *kg,
+ ccl_local_param ShaderSortLocals *locals)
+{
+#ifndef __KERNEL_CUDA__
+ int tid = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ uint qsize = kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS];
+ if(tid == 0) {
+ kernel_split_params.queue_index[QUEUE_SHADER_SORTED_RAYS] = qsize;
+ }
+
+ uint offset = (tid/SHADER_SORT_LOCAL_SIZE)*SHADER_SORT_BLOCK_SIZE;
+ if(offset >= qsize) {
+ return;
+ }
+
+ int lid = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
+ uint input = QUEUE_ACTIVE_AND_REGENERATED_RAYS * (kernel_split_params.queue_size);
+ uint output = QUEUE_SHADER_SORTED_RAYS * (kernel_split_params.queue_size);
+ ccl_local uint *local_value = &locals->local_value[0];
+ ccl_local ushort *local_index = &locals->local_index[0];
+
+ /* copy to local memory */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint add = input + idx;
+ uint value = (~0);
+ if(idx < qsize) {
+ int ray_index = kernel_split_state.queue_data[add];
+ bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE);
+ if(valid) {
+ value = kernel_split_state.sd[ray_index].shader & SHADER_MASK;
+ }
+ }
+ local_value[i + lid] = value;
+ local_index[i + lid] = i + lid;
+ }
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+ /* skip sorting for cpu split kernel */
+# ifdef __KERNEL_OPENCL__
+
+ /* bitonic sort */
+ for (uint length = 1; length < SHADER_SORT_BLOCK_SIZE; length <<= 1) {
+ for (uint inc = length; inc > 0; inc >>= 1) {
+ for (uint ii = 0; ii < SHADER_SORT_BLOCK_SIZE; ii += SHADER_SORT_LOCAL_SIZE) {
+ uint i = lid + ii;
+ bool direction = ((i & (length << 1)) != 0);
+ uint j = i ^ inc;
+ ushort ioff = local_index[i];
+ ushort joff = local_index[j];
+ uint iKey = local_value[ioff];
+ uint jKey = local_value[joff];
+ bool smaller = (jKey < iKey) || (jKey == iKey && j < i);
+ bool swap = smaller ^ (j < i) ^ direction;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ local_index[i] = (swap) ? joff : ioff;
+ local_index[j] = (swap) ? ioff : joff;
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
+ }
+ }
+ }
+# endif /* __KERNEL_OPENCL__ */
+
+ /* copy to destination */
+ for (uint i = 0; i < SHADER_SORT_BLOCK_SIZE; i += SHADER_SORT_LOCAL_SIZE) {
+ uint idx = offset + i + lid;
+ uint lidx = local_index[i + lid];
+ uint outi = output + idx;
+ uint ini = input + offset + lidx;
+ uint value = local_value[lidx];
+ if(idx < qsize) {
+ kernel_split_state.queue_data[outi] = (value == (~0)) ? QUEUE_EMPTY_SLOT : kernel_split_state.queue_data[ini];
+ }
+ }
+#endif /* __KERNEL_CUDA__ */
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
index 4243e18de72..474286285a9 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h
@@ -29,31 +29,29 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg)
kernel_split_state.queue_data, kernel_split_params.queue_size, 1);
}
- if(ray_index == QUEUE_EMPTY_SLOT)
+ if(ray_index == QUEUE_EMPTY_SLOT) {
return;
+ }
- /* Flag determining if we need to update L. */
- char update_path_radiance = 0;
-
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *light_ray_global = &kernel_split_state.ao_light_ray[ray_index];
-
- float3 shadow;
- Ray ray = *light_ray_global;
- update_path_radiance = !(shadow_blocked(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
- state,
- &ray,
- &shadow));
-
- *light_ray_global = ray;
- /* We use light_ray_global's P and t to store shadow and
- * update_path_radiance.
- */
- light_ray_global->P = shadow;
- light_ray_global->t = update_path_radiance;
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched || IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+#endif
+ kernel_path_ao(kg, sd, emission_sd, L, state, &rng, throughput, shader_bsdf_alpha(kg, sd));
+#ifdef __BRANCHED_PATH__
+ }
+ else {
+ kernel_branched_path_ao(kg, sd, emission_sd, L, state, &rng, throughput);
}
+#endif
+
+ kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
index bb8f0157965..452b6e45a36 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h
@@ -32,28 +32,71 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg)
if(ray_index == QUEUE_EMPTY_SLOT)
return;
- /* Flag determining if we need to update L. */
- char update_path_radiance = 0;
+ ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+ Ray ray = kernel_split_state.light_ray[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *sd = &kernel_split_state.sd[ray_index];
+ float3 throughput = kernel_split_state.throughput[ray_index];
+ RNG rng = kernel_split_state.rng[ray_index];
- if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL)) {
- ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
- ccl_global Ray *light_ray_global = &kernel_split_state.light_ray[ray_index];
+ BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+ bool is_lamp = kernel_split_state.is_lamp[ray_index];
+# if defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)
+ bool use_branched = false;
+ int all = 0;
+
+ if(state->flag & PATH_RAY_SHADOW_CATCHER) {
+ use_branched = true;
+ all = 1;
+ }
+# if defined(__BRANCHED_PATH__)
+ else if(kernel_data.integrator.branched) {
+ use_branched = true;
+
+ if(IS_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ all = (kernel_data.integrator.sample_all_lights_indirect);
+ }
+ else
+ {
+ all = (kernel_data.integrator.sample_all_lights_direct);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
+
+ if(use_branched) {
+ kernel_branched_path_surface_connect_light(kg,
+ &rng,
+ sd,
+ emission_sd,
+ state,
+ throughput,
+ 1.0f,
+ L,
+ all);
+ }
+ else
+# endif /* defined(__BRANCHED_PATH__) || defined(__SHADOW_TRICKS__)*/
+ {
+ /* trace shadow ray */
float3 shadow;
- Ray ray = *light_ray_global;
- update_path_radiance = !(shadow_blocked(kg,
- &kernel_split_state.sd_DL_shadow[ray_index],
- state,
- &ray,
- &shadow));
-
- *light_ray_global = ray;
- /* We use light_ray_global's P and t to store shadow and
- * update_path_radiance.
- */
- light_ray_global->P = shadow;
- light_ray_global->t = update_path_radiance;
+
+ if(!shadow_blocked(kg,
+ emission_sd,
+ state,
+ &ray,
+ &shadow))
+ {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ else {
+ path_radiance_accum_total_light(L, throughput, &L_light);
+ }
}
+
+ kernel_split_state.rng[ray_index] = rng;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_split_common.h b/intern/cycles/kernel/split/kernel_split_common.h
index 4303ba0a905..57f070d51e0 100644
--- a/intern/cycles/kernel/split/kernel_split_common.h
+++ b/intern/cycles/kernel/split/kernel_split_common.h
@@ -37,41 +37,42 @@
#include "util/util_atomic.h"
-#include "kernel/kernel_random.h"
-#include "kernel/kernel_projection.h"
-#include "kernel/kernel_montecarlo.h"
-#include "kernel/kernel_differential.h"
-#include "kernel/kernel_camera.h"
-
-#include "kernel/geom/geom.h"
-#include "kernel/bvh/bvh.h"
-
-#include "kernel/kernel_accumulate.h"
-#include "kernel/kernel_shader.h"
-#include "kernel/kernel_light.h"
-#include "kernel/kernel_passes.h"
-
-#ifdef __SUBSURFACE__
-# include "kernel/kernel_subsurface.h"
+#include "kernel/kernel_path.h"
+#ifdef __BRANCHED_PATH__
+# include "kernel/kernel_path_branched.h"
#endif
-#ifdef __VOLUME__
-# include "kernel/kernel_volume.h"
-#endif
+#include "kernel/kernel_queues.h"
+#include "kernel/kernel_work_stealing.h"
-#include "kernel/kernel_path_state.h"
-#include "kernel/kernel_shadow.h"
-#include "kernel/kernel_emission.h"
-#include "kernel/kernel_path_common.h"
-#include "kernel/kernel_path_surface.h"
-#include "kernel/kernel_path_volume.h"
-#include "kernel/kernel_path_subsurface.h"
+#ifdef __BRANCHED_PATH__
+# include "kernel/split/kernel_branched.h"
+#endif
-#ifdef __KERNEL_DEBUG__
-# include "kernel/kernel_debug.h"
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_split_path_end(KernelGlobals *kg, int ray_index)
+{
+ ccl_global char *ray_state = kernel_split_state.ray_state;
+
+#ifdef __BRANCHED_PATH__
+ if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_LIGHT_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_LIGHT_INDIRECT_NEXT_ITER);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER);
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER);
+ }
+ else {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+ }
+#else
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
#endif
+}
-#include "kernel/kernel_queues.h"
-#include "kernel/kernel_work_stealing.h"
+CCL_NAMESPACE_END
#endif /* __KERNEL_SPLIT_H__ */
diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h
index 0af8bfc89d5..913e0dfd08d 100644
--- a/intern/cycles/kernel/split/kernel_split_data_types.h
+++ b/intern/cycles/kernel/split/kernel_split_data_types.h
@@ -62,7 +62,46 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(DebugData, debug_data, 1)
#else
# define SPLIT_DATA_DEBUG_ENTRIES
-#endif
+#endif /* DEBUG */
+
+#ifdef __BRANCHED_PATH__
+
+typedef ccl_global struct SplitBranchedState {
+ /* various state that must be kept and restored after an indirect loop */
+ PathState path_state;
+ float3 throughput;
+ Ray ray;
+
+ struct ShaderData sd;
+ Intersection isect;
+
+ char ray_state;
+
+ /* indirect loop state */
+ int next_closure;
+ int next_sample;
+ int num_samples;
+
+#ifdef __SUBSURFACE__
+ int ss_next_closure;
+ int ss_next_sample;
+ int next_hit;
+ int num_hits;
+
+ uint lcg_state;
+ SubsurfaceIntersection ss_isect;
+
+# ifdef __VOLUME__
+ VolumeStack volume_stack[VOLUME_STACK_SIZE];
+# endif /* __VOLUME__ */
+#endif /*__SUBSURFACE__ */
+} SplitBranchedState;
+
+#define SPLIT_DATA_BRANCHED_ENTRIES \
+ SPLIT_DATA_ENTRY( SplitBranchedState, branched_state, 1)
+#else
+#define SPLIT_DATA_BRANCHED_ENTRIES
+#endif /* __BRANCHED_PATH__ */
#define SPLIT_DATA_ENTRIES \
SPLIT_DATA_ENTRY(ccl_global RNG, rng, 1) \
@@ -72,9 +111,6 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
- SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
- SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
- SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
@@ -82,6 +118,7 @@ typedef struct SplitParams {
SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
SPLIT_DATA_ENTRY(ShaderData, sd_DL_shadow, 1) \
+ SPLIT_DATA_BRANCHED_ENTRIES \
SPLIT_DATA_DEBUG_ENTRIES \
/* struct that holds pointers to data in the shared state buffer */
@@ -125,6 +162,11 @@ typedef struct BackgroundAOLocals {
uint queue_atomics_ao;
} BackgroundAOLocals;
+typedef struct ShaderSortLocals {
+ uint local_value[SHADER_SORT_BLOCK_SIZE];
+ ushort local_index[SHADER_SORT_BLOCK_SIZE];
+} ShaderSortLocals;
+
CCL_NAMESPACE_END
#endif /* __KERNEL_SPLIT_DATA_TYPES_H__ */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
index 0b4d50c70ee..8364f185d75 100644
--- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h
+++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
@@ -16,42 +16,206 @@
CCL_NAMESPACE_BEGIN
+#if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
-ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
- ccl_local_param unsigned int* local_queue_atomics)
+ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
{
-#ifdef __SUBSURFACE__
- if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
- *local_queue_atomics = 0;
+ kernel_split_branched_path_indirect_loop_init(kg, ray_index);
+
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ branched_state->ss_next_closure = 0;
+ branched_state->ss_next_sample = 0;
+
+ branched_state->num_hits = 0;
+ branched_state->next_hit = 0;
+
+ ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
+}
+
+ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
+{
+ SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
+
+ ShaderData *sd = &branched_state->sd;
+ RNG rng = kernel_split_state.rng[ray_index];
+ PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+ ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+ for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
+ ShaderClosure *sc = &sd->closure[i];
+
+ if(!CLOSURE_IS_BSSRDF(sc->type))
+ continue;
+
+ /* set up random number generator */
+ if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
+ branched_state->next_closure == 0 && branched_state->next_sample == 0)
+ {
+ branched_state->lcg_state = lcg_state_init(&rng,
+ branched_state->path_state.rng_offset,
+ branched_state->path_state.sample,
+ 0x68bc21eb);
+ }
+ int num_samples = kernel_data.integrator.subsurface_samples;
+ float num_samples_inv = 1.0f/num_samples;
+ RNG bssrdf_rng = cmj_hash(rng, i);
+
+ /* do subsurface scatter step with copy of shader data, this will
+ * replace the BSSRDF with a diffuse BSDF closure */
+ for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
+ ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg,
+ &bssrdf_rng,
+ &branched_state->path_state,
+ j,
+ num_samples,
+ PRNG_BSDF_U,
+ &bssrdf_u,
+ &bssrdf_v);
+
+ /* intersection is expensive so avoid doing multiple times for the same input */
+ if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ RNG lcg_state = branched_state->lcg_state;
+ SubsurfaceIntersection ss_isect_private;
+
+ branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
+ &ss_isect_private,
+ sd,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ true);
+
+ branched_state->lcg_state = lcg_state;
+ *ss_isect = ss_isect_private;
+ }
+
+#ifdef __VOLUME__
+ Ray volume_ray = branched_state->ray;
+ bool need_update_volume_stack =
+ kernel_data.integrator.use_volumes &&
+ sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#endif /* __VOLUME__ */
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
+ ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
+ *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
+ * important as the indirect path will write into bssrdf_sd */
+
+ SubsurfaceIntersection ss_isect_private = *ss_isect;
+ subsurface_scatter_multi_setup(kg,
+ &ss_isect_private,
+ hit,
+ bssrdf_sd,
+ &branched_state->path_state,
+ branched_state->path_state.flag,
+ sc,
+ true);
+ *ss_isect = ss_isect_private;
+
+ ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
+ *hit_state = branched_state->path_state;
+
+ path_state_branch(hit_state, j, num_samples);
+
+#ifdef __VOLUME__
+ if(need_update_volume_stack) {
+ /* Setup ray from previous surface point to the new one. */
+ float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
+ volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
+
+ /* this next part is expensive as it does scene intersection so only do once */
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ branched_state->volume_stack[k] = hit_state->volume_stack[k];
+ }
+
+ kernel_volume_stack_update_for_subsurface(kg,
+ emission_sd,
+ &volume_ray,
+ branched_state->volume_stack);
+ }
+
+ for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
+ hit_state->volume_stack[k] = branched_state->volume_stack[k];
+ }
+ }
+#endif /* __VOLUME__ */
+
+#ifdef __EMISSION__
+ if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
+ /* direct light */
+ if(kernel_data.integrator.use_direct_light) {
+ int all = (kernel_data.integrator.sample_all_lights_direct) ||
+ (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
+ kernel_branched_path_surface_connect_light(kg,
+ &rng,
+ bssrdf_sd,
+ emission_sd,
+ hit_state,
+ branched_state->throughput,
+ num_samples_inv,
+ L,
+ all);
+ }
+ }
+#endif /* __EMISSION__ */
+
+ /* indirect light */
+ if(kernel_split_branched_path_surface_indirect_light_iter(kg,
+ ray_index,
+ num_samples_inv,
+ bssrdf_sd,
+ false))
+ {
+ branched_state->ss_next_closure = i;
+ branched_state->ss_next_sample = j;
+ branched_state->next_hit = hit;
+
+ return true;
+ }
+
+ branched_state->next_closure = 0;
+ }
+
+ branched_state->next_hit = 0;
+ }
+
+ branched_state->ss_next_sample = 0;
+ }
+
+ kernel_split_branched_path_indirect_loop_end(kg, ray_index);
+
+ return false;
+}
+
+#endif /* __BRANCHED_PATH__ && __SUBSURFACE__ */
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+ int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+ if(thread_index == 0) {
+ /* We will empty both queues in this kernel. */
+ kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+ kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
}
- ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(kg, ray_index,
QUEUE_ACTIVE_AND_REGENERATED_RAYS,
kernel_split_state.queue_data,
kernel_split_params.queue_size,
- 0);
-
-#ifdef __COMPUTE_DEVICE_GPU__
- /* If we are executing on a GPU device, we exit all threads that are not
- * required.
- *
- * If we are executing on a CPU device, then we need to keep all threads
- * active since we have barrier() calls later in the kernel. CPU devices,
- * expect all threads to execute barrier statement.
- */
- if(ray_index == QUEUE_EMPTY_SLOT) {
- return;
- }
-#endif
-
- char enqueue_flag = 0;
-
-#ifndef __COMPUTE_DEVICE_GPU__
- if(ray_index != QUEUE_EMPTY_SLOT) {
-#endif
+ 1);
+ get_ray_index(kg, thread_index,
+ QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+#ifdef __SUBSURFACE__
ccl_global char *ray_state = kernel_split_state.ray_state;
ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
@@ -64,34 +228,85 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg,
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
if(sd->flag & SD_BSSRDF) {
- if(kernel_path_subsurface_scatter(kg,
- sd,
- emission_sd,
- L,
- state,
- &rng,
- ray,
- throughput,
- ss_indirect)) {
- ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
- enqueue_flag = 1;
+
+#ifdef __BRANCHED_PATH__
+ if(!kernel_data.integrator.branched) {
+#endif
+ if(kernel_path_subsurface_scatter(kg,
+ sd,
+ emission_sd,
+ L,
+ state,
+ &rng,
+ ray,
+ throughput,
+ ss_indirect)) {
+ kernel_split_path_end(kg, ray_index);
+ }
+#ifdef __BRANCHED_PATH__
+ }
+ else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
+ float bssrdf_probability;
+ ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+ /* modify throughput for picking bssrdf or bsdf */
+ *throughput *= bssrdf_probability;
+
+ /* do bssrdf scatter step if we picked a bssrdf closure */
+ if(sc) {
+ uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
+
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg,
+ &rng,
+ state,
+ PRNG_BSDF_U,
+ &bssrdf_u, &bssrdf_v);
+ subsurface_scatter_step(kg,
+ sd,
+ state,
+ state->flag,
+ sc,
+ &lcg_state,
+ bssrdf_u, bssrdf_v,
+ false);
+ }
+ }
+ else {
+ kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
}
+#endif
}
kernel_split_state.rng[ray_index] = rng;
}
-#ifndef __COMPUTE_DEVICE_GPU__
+# ifdef __BRANCHED_PATH__
+ if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+ kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
}
-#endif
- /* Enqueue RAY_UPDATE_BUFFER rays. */
- enqueue_ray_index_local(ray_index,
- QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
- enqueue_flag,
- kernel_split_params.queue_size,
- local_queue_atomics,
- kernel_split_state.queue_data,
- kernel_split_params.queue_index);
+ /* iter loop */
+ ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
+ QUEUE_SUBSURFACE_INDIRECT_ITER,
+ kernel_split_state.queue_data,
+ kernel_split_params.queue_size,
+ 1);
+
+ if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
+ path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
+
+ if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
+ ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+ }
+ }
+# endif /* __BRANCHED_PATH__ */
#endif /* __SUBSURFACE__ */
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 4b5e4ebac00..328ff79223b 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -32,13 +32,7 @@ CCL_NAMESPACE_BEGIN
ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
{
#ifdef __KERNEL_CPU__
-# ifdef __KERNEL_SSE2__
- ssef r_ssef;
- float4 &r = (float4 &)r_ssef;
- r = kernel_tex_image_interp(id, x, y);
-# else
float4 r = kernel_tex_image_interp(id, x, y);
-# endif
#elif defined(__KERNEL_OPENCL__)
float4 r = kernel_tex_image_interp(kg, id, x, y);
#else
@@ -152,7 +146,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
/* float4, byte4 and half4 */
const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) {
+ if(texture_type == IMAGE_DATA_TYPE_FLOAT4 ||
+ texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_HALF4)
+ {
r = kernel_tex_image_interp_float4(tex, x, y);
}
/* float, byte and half */
@@ -163,43 +160,22 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
# endif
#endif
-#ifdef __KERNEL_SSE2__
- float alpha = r.w;
+ const float alpha = r.w;
if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
- r_ssef = r_ssef / ssef(alpha);
+ r /= alpha;
const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
- r_ssef = min(r_ssef, ssef(1.0f));
+ if(texture_type == IMAGE_DATA_TYPE_BYTE4 ||
+ texture_type == IMAGE_DATA_TYPE_BYTE)
+ {
+ r = min(r, make_float4(1.0f, 1.0f, 1.0f, 1.0f));
}
r.w = alpha;
}
if(srgb) {
- r_ssef = color_srgb_to_scene_linear(r_ssef);
- r.w = alpha;
+ r = color_srgb_to_scene_linear_v4(r);
}
-#else
- if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
- float invw = 1.0f/r.w;
- r.x *= invw;
- r.y *= invw;
- r.z *= invw;
-
- const int texture_type = kernel_tex_type(id);
- if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
- r.x = min(r.x, 1.0f);
- r.y = min(r.y, 1.0f);
- r.z = min(r.z, 1.0f);
- }
- }
-
- if(srgb) {
- r.x = color_srgb_to_scene_linear(r.x);
- r.y = color_srgb_to_scene_linear(r.y);
- r.z = color_srgb_to_scene_linear(r.z);
- }
-#endif
return r;
}
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 4d673dc34d8..c73beab98dc 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -157,16 +157,6 @@ ccl_device float3 xyz_to_rgb(float x, float y, float z)
0.055648f * x + -0.204043f * y + 1.057311f * z);
}
-#ifndef __KERNEL_OPENCL__
-
-ccl_device float3 color_srgb_to_scene_linear(float3 c)
-{
- return make_float3(
- color_srgb_to_scene_linear(c.x),
- color_srgb_to_scene_linear(c.y),
- color_srgb_to_scene_linear(c.z));
-}
-
#ifdef __KERNEL_SSE2__
/*
* Calculate initial guess for arg^exp based on float representation
@@ -222,17 +212,38 @@ ccl_device ssef color_srgb_to_scene_linear(const ssef &c)
ssef gte = fastpow24(gtebase);
return select(cmp, lt, gte);
}
-#endif
+#endif /* __KERNEL_SSE2__ */
-ccl_device float3 color_scene_linear_to_srgb(float3 c)
+ccl_device float3 color_srgb_to_scene_linear_v3(float3 c)
{
- return make_float3(
- color_scene_linear_to_srgb(c.x),
- color_scene_linear_to_srgb(c.y),
- color_scene_linear_to_srgb(c.z));
+ return make_float3(color_srgb_to_scene_linear(c.x),
+ color_srgb_to_scene_linear(c.y),
+ color_srgb_to_scene_linear(c.z));
}
+ccl_device float3 color_scene_linear_to_srgb_v3(float3 c)
+{
+ return make_float3(color_scene_linear_to_srgb(c.x),
+ color_scene_linear_to_srgb(c.y),
+ color_scene_linear_to_srgb(c.z));
+}
+
+ccl_device float4 color_srgb_to_scene_linear_v4(float4 c)
+{
+#ifdef __KERNEL_SSE2__
+ ssef r_ssef;
+ float4 &r = (float4 &)r_ssef;
+ r = c;
+ r_ssef = color_srgb_to_scene_linear(r_ssef);
+ r.w = c.w;
+ return r;
+#else
+ return make_float4(color_srgb_to_scene_linear(c.x),
+ color_srgb_to_scene_linear(c.y),
+ color_srgb_to_scene_linear(c.z),
+ c.w);
#endif
+}
ccl_device float linear_rgb_to_gray(float3 c)
{