Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2017-10-11 11:14:16 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2017-10-11 11:14:16 +0300
commitdc95c79971d48dae9418e0047f14c01e6dcab13c (patch)
treef6da183a7e7b3d0adf691c5b270ccab4f8801e35 /intern/cycles
parentef1918d3128b1f56d69c189a838b648dc88de4c9 (diff)
parent4782000fd5b2a1ae3041884f64ab192dbcb853c0 (diff)
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/blender/blender_session.cpp13
-rw-r--r--intern/cycles/blender/blender_sync.cpp17
-rw-r--r--intern/cycles/blender/blender_sync.h7
-rw-r--r--intern/cycles/device/device.cpp48
-rw-r--r--intern/cycles/device/device.h5
-rw-r--r--intern/cycles/device/device_cpu.cpp2
-rw-r--r--intern/cycles/device/device_cuda.cpp23
-rw-r--r--intern/cycles/device/device_network.cpp6
-rw-r--r--intern/cycles/device/device_opencl.cpp2
-rw-r--r--intern/cycles/kernel/kernel_path.h125
-rw-r--r--intern/cycles/kernel/kernel_path_branched.h305
-rw-r--r--intern/cycles/kernel/kernel_types.h2
-rw-r--r--intern/cycles/kernel/kernel_volume.h3
-rw-r--r--intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h2
-rw-r--r--intern/cycles/render/integrator.cpp1
-rw-r--r--intern/cycles/render/mesh.cpp16
-rw-r--r--intern/cycles/render/mesh.h3
-rw-r--r--intern/cycles/render/scene.h2
-rw-r--r--intern/cycles/render/shader.cpp10
19 files changed, 324 insertions, 268 deletions
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index f4e9da43454..75cc06ecd82 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -121,8 +121,7 @@ void BlenderSession::create()
void BlenderSession::create_session()
{
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
- bool is_cpu = session_params.device.type == DEVICE_CPU;
- SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
/* reset status/progress */
@@ -147,7 +146,7 @@ void BlenderSession::create_session()
session->set_pause(session_pause);
/* create sync */
- sync = new BlenderSync(b_engine, b_data, b_depsgraph, b_scene, scene, !background, session->progress, is_cpu);
+ sync = new BlenderSync(b_engine, b_data, b_depsgraph, b_scene, scene, !background, session->progress);
BL::Object b_camera_override(b_engine.camera_override());
if(b_v3d) {
if(session_pause == false) {
@@ -185,8 +184,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_)
b_scene = b_scene_;
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
- const bool is_cpu = session_params.device.type == DEVICE_CPU;
- SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
width = render_resolution_x(b_render);
height = render_resolution_y(b_render);
@@ -217,7 +215,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_)
session->stats.mem_peak = session->stats.mem_used;
/* sync object should be re-created */
- sync = new BlenderSync(b_engine, b_data, b_depsgraph, b_scene, scene, !background, session->progress, is_cpu);
+ sync = new BlenderSync(b_engine, b_data, b_depsgraph, b_scene, scene, !background, session->progress);
/* for final render we will do full data sync per render layer, only
* do some basic syncing here, no objects or materials for speed */
@@ -742,8 +740,7 @@ void BlenderSession::synchronize()
/* on session/scene parameter changes, we recreate session entirely */
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
- const bool is_cpu = session_params.device.type == DEVICE_CPU;
- SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+ SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if(session->params.modified(session_params) ||
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 432f67f3b5e..c1783045f66 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -48,8 +48,7 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
BL::Scene& b_scene,
Scene *scene,
bool preview,
- Progress &progress,
- bool is_cpu)
+ Progress &progress)
: b_engine(b_engine),
b_data(b_data),
b_depsgraph(b_depsgraph),
@@ -65,7 +64,6 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
scene(scene),
preview(preview),
experimental(false),
- is_cpu(is_cpu),
dicing_rate(1.0f),
max_subdivisions(12),
progress(progress)
@@ -598,8 +596,7 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
/* Scene Parameters */
SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
- bool background,
- bool is_cpu)
+ bool background)
{
BL::RenderSettings r = b_scene.render();
SceneParams params;
@@ -639,15 +636,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
params.texture_limit = 0;
}
-#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
- if(is_cpu) {
- params.use_qbvh = DebugFlags().cpu.qbvh && system_cpu_support_sse2();
- }
- else
-#endif
- {
- params.use_qbvh = false;
- }
+ params.use_qbvh = DebugFlags().cpu.qbvh;
return params;
}
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 69fee9551dd..544836f0979 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -56,8 +56,7 @@ public:
BL::Scene& b_scene,
Scene *scene,
bool preview,
- Progress &progress,
- bool is_cpu);
+ Progress &progress);
~BlenderSync();
/* sync */
@@ -85,8 +84,7 @@ public:
/* get parameters */
static SceneParams get_scene_params(BL::Scene& b_scene,
- bool background,
- bool is_cpu);
+ bool background);
static SessionParams get_session_params(BL::RenderEngine& b_engine,
BL::UserPreferences& b_userpref,
BL::Scene& b_scene,
@@ -182,7 +180,6 @@ private:
Scene *scene;
bool preview;
bool experimental;
- bool is_cpu;
float dicing_rate;
int max_subdivisions;
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 5ae83b56fcd..19c4bec55a8 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -35,6 +35,7 @@ CCL_NAMESPACE_BEGIN
bool Device::need_types_update = true;
bool Device::need_devices_update = true;
+thread_mutex Device::device_mutex;
vector<DeviceType> Device::types;
vector<DeviceInfo> Device::devices;
@@ -443,53 +444,49 @@ string Device::string_from_type(DeviceType type)
vector<DeviceType>& Device::available_types()
{
+ thread_scoped_lock lock(device_mutex);
if(need_types_update) {
types.clear();
types.push_back(DEVICE_CPU);
-
#ifdef WITH_CUDA
- if(device_cuda_init())
+ if(device_cuda_init()) {
types.push_back(DEVICE_CUDA);
+ }
#endif
-
#ifdef WITH_OPENCL
- if(device_opencl_init())
+ if(device_opencl_init()) {
types.push_back(DEVICE_OPENCL);
+ }
#endif
-
#ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK);
#endif
-
need_types_update = false;
}
-
return types;
}
vector<DeviceInfo>& Device::available_devices()
{
+ thread_scoped_lock lock(device_mutex);
if(need_devices_update) {
devices.clear();
-#ifdef WITH_CUDA
- if(device_cuda_init())
- device_cuda_info(devices);
-#endif
-
#ifdef WITH_OPENCL
- if(device_opencl_init())
+ if(device_opencl_init()) {
device_opencl_info(devices);
+ }
+#endif
+#ifdef WITH_CUDA
+ if(device_cuda_init()) {
+ device_cuda_info(devices);
+ }
#endif
-
device_cpu_info(devices);
-
#ifdef WITH_NETWORK
device_network_info(devices);
#endif
-
need_devices_update = false;
}
-
return devices;
}
@@ -497,12 +494,6 @@ string Device::device_capabilities()
{
string capabilities = "CPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
-#ifdef WITH_CUDA
- if(device_cuda_init()) {
- capabilities += "\nCUDA device capabilities:\n";
- capabilities += device_cuda_capabilities();
- }
-#endif
#ifdef WITH_OPENCL
if(device_opencl_init()) {
@@ -511,6 +502,13 @@ string Device::device_capabilities()
}
#endif
+#ifdef WITH_CUDA
+ if(device_cuda_init()) {
+ capabilities += "\nCUDA device capabilities:\n";
+ capabilities += device_cuda_capabilities();
+ }
+#endif
+
return capabilities;
}
@@ -526,10 +524,14 @@ DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
info.num = 0;
info.has_bindless_textures = true;
+ info.has_volume_decoupled = true;
+ info.has_qbvh = true;
foreach(DeviceInfo &device, subdevices) {
assert(device.type == info.multi_devices[0].type);
info.has_bindless_textures &= device.has_bindless_textures;
+ info.has_volume_decoupled &= device.has_volume_decoupled;
+ info.has_qbvh &= device.has_qbvh;
}
return info;
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index fe0bcc5b91f..29803abd153 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -55,6 +55,8 @@ public:
bool display_device;
bool advanced_shading;
bool has_bindless_textures; /* flag for GPU and Multi device */
+ bool has_volume_decoupled;
+ bool has_qbvh;
bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
vector<DeviceInfo> multi_devices;
@@ -66,6 +68,8 @@ public:
display_device = false;
advanced_shading = true;
has_bindless_textures = false;
+ has_volume_decoupled = false;
+ has_qbvh = false;
use_split_kernel = false;
}
@@ -364,6 +368,7 @@ public:
private:
/* Indicted whether device types and devices lists were initialized. */
static bool need_types_update, need_devices_update;
+ static thread_mutex device_mutex;
static vector<DeviceType> types;
static vector<DeviceInfo> devices;
};
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index ac6d3246d38..a17caabc850 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -1024,6 +1024,8 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
+ info.has_qbvh = system_cpu_support_sse2();
+ info.has_volume_decoupled = true;
devices.insert(devices.begin(), info);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 3d209e5560c..066be82d55b 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1313,9 +1313,14 @@ public:
CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
/* Prepare work size. More step samples render faster, but for now we
- * remain conservative to avoid driver timeouts. */
+ * remain conservative for GPUs connected to a display to avoid driver
+ * timeouts and display freezing. */
int min_blocks, num_threads_per_block;
cuda_assert(cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuPathTrace, NULL, 0, 0));
+ if(!info.display_device) {
+ min_blocks *= 8;
+ }
+
uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);;
/* Render all samples. */
@@ -2130,7 +2135,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
for(int num = 0; num < count; num++) {
char name[256];
- int attr;
if(cuDeviceGetName(name, 256, num) != CUDA_SUCCESS)
continue;
@@ -2149,6 +2153,8 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.advanced_shading = (major >= 2);
info.has_bindless_textures = (major >= 3);
+ info.has_volume_decoupled = false;
+ info.has_qbvh = false;
int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
@@ -2160,14 +2166,21 @@ void device_cuda_info(vector<DeviceInfo>& devices)
(unsigned int)pci_location[1],
(unsigned int)pci_location[2]);
- /* if device has a kernel timeout, assume it is used for display */
- if(cuDeviceGetAttribute(&attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num) == CUDA_SUCCESS && attr == 1) {
+ /* If device has a kernel timeout and no compute preemption, we assume
+ * it is connected to a display and will freeze the display while doing
+ * computations. */
+ int timeout_attr = 0, preempt_attr = 0;
+ cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
+ cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
+
+ if(timeout_attr && !preempt_attr) {
info.description += " (Display)";
info.display_device = true;
display_devices.push_back(info);
}
- else
+ else {
devices.push_back(info);
+ }
}
if(!display_devices.empty())
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index deea59f1d23..ced10c98dc9 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -343,7 +343,11 @@ void device_network_info(vector<DeviceInfo>& devices)
info.description = "Network Device";
info.id = "NETWORK";
info.num = 0;
- info.advanced_shading = true; /* todo: get this info from device */
+
+ /* todo: get this info from device */
+ info.advanced_shading = true;
+ info.has_volume_decoupled = false;
+ info.has_qbvh = false;
devices.push_back(info);
}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 9d89decaaaf..5808a31e605 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -123,6 +123,8 @@ void device_opencl_info(vector<DeviceInfo>& devices)
info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name);
info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
device_type);
+ info.has_volume_decoupled = false;
+ info.has_qbvh = false;
info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
devices.push_back(info);
num_devices++;
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 6b6c5603b70..652777a77a0 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -170,87 +170,90 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
if(!hit) {
kernel_volume_clean_stack(kg, state->volume_stack);
}
+
+ if(state->volume_stack[0].shader == SHADER_NONE) {
+ return VOLUME_PATH_ATTENUATED;
+ }
+
/* volume attenuation, emission, scatter */
- if(state->volume_stack[0].shader != SHADER_NONE) {
- Ray volume_ray = *ray;
- volume_ray.t = (hit)? isect->t: FLT_MAX;
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit)? isect->t: FLT_MAX;
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
# ifdef __VOLUME_DECOUPLED__
- int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
- bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
- bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+ int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+ bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+ bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
- if(decoupled) {
- /* cache steps along volume for repeated sampling */
- VolumeSegment volume_segment;
+ if(decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
- shader_setup_from_volume(kg, sd, &volume_ray);
- kernel_volume_decoupled_record(kg, state,
- &volume_ray, sd, &volume_segment, heterogeneous);
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state,
+ &volume_ray, sd, &volume_segment, heterogeneous);
- volume_segment.sampling_method = sampling_method;
+ volume_segment.sampling_method = sampling_method;
- /* emission */
- if(volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+ /* emission */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
- /* scattering */
- VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+ /* scattering */
+ VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
- if(volume_segment.closure_flag & SD_SCATTER) {
- int all = kernel_data.integrator.sample_all_lights_indirect;
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ int all = kernel_data.integrator.sample_all_lights_indirect;
- /* direct light sampling */
- kernel_branched_path_volume_connect_light(kg, sd,
- emission_sd, *throughput, state, L, all,
- &volume_ray, &volume_segment);
+ /* direct light sampling */
+ kernel_branched_path_volume_connect_light(kg, sd,
+ emission_sd, *throughput, state, L, all,
+ &volume_ray, &volume_segment);
- /* indirect sample. if we use distance sampling and take just
- * one sample for direct and indirect light, we could share
- * this computation, but makes code a bit complex */
- float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+ /* indirect sample. if we use distance sampling and take just
+ * one sample for direct and indirect light, we could share
+ * this computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
- result = kernel_volume_decoupled_scatter(kg,
- state, &volume_ray, sd, throughput,
- rphase, rscatter, &volume_segment, NULL, true);
- }
+ result = kernel_volume_decoupled_scatter(kg,
+ state, &volume_ray, sd, throughput,
+ rphase, rscatter, &volume_segment, NULL, true);
+ }
- /* free cached steps */
- kernel_volume_decoupled_free(kg, &volume_segment);
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
- if(result == VOLUME_PATH_SCATTERED) {
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
- return VOLUME_PATH_SCATTERED;
- else
- return VOLUME_PATH_MISSED;
- }
- else {
- *throughput *= volume_segment.accum_transmittance;
- }
+ if(result == VOLUME_PATH_SCATTERED) {
+ if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
}
- else
+ else {
+ *throughput *= volume_segment.accum_transmittance;
+ }
+ }
+ else
# endif /* __VOLUME_DECOUPLED__ */
- {
- /* integrate along volume segment with distance sampling */
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+ {
+ /* integrate along volume segment with distance sampling */
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, state, sd, &volume_ray, L, throughput, heterogeneous);
# ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* direct lighting */
- kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
- /* indirect light bounce */
- if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
- return VOLUME_PATH_SCATTERED;
- else
- return VOLUME_PATH_MISSED;
- }
-# endif /* __VOLUME_SCATTER__ */
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* direct lighting */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+ /* indirect light bounce */
+ if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+ return VOLUME_PATH_SCATTERED;
+ else
+ return VOLUME_PATH_MISSED;
}
+# endif /* __VOLUME_SCATTER__ */
}
return VOLUME_PATH_ATTENUATED;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 2597d684a36..42df7e85b41 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -64,6 +64,164 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
#ifndef __SPLIT_KERNEL__
+#ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(
+ KernelGlobals *kg,
+ ShaderData *sd,
+ PathState *state,
+ Ray *ray,
+ float3 *throughput,
+ ccl_addr_space Intersection *isect,
+ bool hit,
+ ShaderData *indirect_sd,
+ ShaderData *emission_sd,
+ PathRadiance *L)
+{
+ /* Sanitize volume stack. */
+ if(!hit) {
+ kernel_volume_clean_stack(kg, state->volume_stack);
+ }
+
+ if(state->volume_stack[0].shader == SHADER_NONE) {
+ return;
+ }
+
+ /* volume attenuation, emission, scatter */
+ Ray volume_ray = *ray;
+ volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+# ifdef __VOLUME_DECOUPLED__
+ /* decoupled ray marching only supported on CPU */
+ if(kernel_data.integrator.volume_decoupled) {
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+
+ shader_setup_from_volume(kg, sd, &volume_ray);
+ kernel_volume_decoupled_record(kg, state,
+ &volume_ray, sd, &volume_segment, heterogeneous);
+
+ /* direct light sampling */
+ if(volume_segment.closure_flag & SD_SCATTER) {
+ volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+ int all = kernel_data.integrator.sample_all_lights_direct;
+
+ kernel_branched_path_volume_connect_light(kg, sd,
+ emission_sd, *throughput, state, L, all,
+ &volume_ray, &volume_segment);
+
+ /* indirect light sampling */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ for(int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = *throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ /* scatter sample. if we use distance sampling and take just one
+ * sample for direct and indirect light, we could share this
+ * computation, but makes code a bit complex */
+ float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+ float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+ if(result == VOLUME_PATH_SCATTERED &&
+ kernel_path_volume_bounce(kg,
+ sd,
+ &tp,
+ &ps,
+ &L->state,
+ &pray))
+ {
+ kernel_path_indirect(kg,
+ indirect_sd,
+ emission_sd,
+ &pray,
+ tp*num_samples_inv,
+ &ps,
+ L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+ }
+
+ /* emission and transmittance */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+ *throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+ }
+ else
+# endif /* __VOLUME_DECOUPLED__ */
+ {
+ /* GPU: no decoupled ray marching, scatter probalistically */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ /* todo: we should cache the shader evaluations from stepping
+ * through the volume, for now we redo them multiple times */
+
+ for(int j = 0; j < num_samples; j++) {
+ PathState ps = *state;
+ Ray pray = *ray;
+ float3 tp = (*throughput) * num_samples_inv;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ VolumeIntegrateResult result = kernel_volume_integrate(
+ kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+# ifdef __VOLUME_SCATTER__
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* todo: support equiangular, MIS and all light sampling.
+ * alternatively get decoupled ray marching working on the GPU */
+ kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+ if(kernel_path_volume_bounce(kg,
+ sd,
+ &tp,
+ &ps,
+ &L->state,
+ &pray))
+ {
+ kernel_path_indirect(kg,
+ indirect_sd,
+ emission_sd,
+ &pray,
+ tp,
+ &ps,
+ L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+# endif /* __VOLUME_SCATTER__ */
+ }
+
+ /* todo: avoid this calculation using decoupled ray marching */
+ kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+ }
+}
+#endif /* __VOLUME__ */
+
/* bounce off surface and integrate indirect light */
ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
@@ -293,142 +451,17 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
#ifdef __VOLUME__
- /* Sanitize volume stack. */
- if(!hit) {
- kernel_volume_clean_stack(kg, state.volume_stack);
- }
- /* volume attenuation, emission, scatter */
- if(state.volume_stack[0].shader != SHADER_NONE) {
- Ray volume_ray = ray;
- volume_ray.t = (hit)? isect.t: FLT_MAX;
-
- bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-
-#ifdef __VOLUME_DECOUPLED__
- /* decoupled ray marching only supported on CPU */
-
- /* cache steps along volume for repeated sampling */
- VolumeSegment volume_segment;
-
- shader_setup_from_volume(kg, &sd, &volume_ray);
- kernel_volume_decoupled_record(kg, &state,
- &volume_ray, &sd, &volume_segment, heterogeneous);
-
- /* direct light sampling */
- if(volume_segment.closure_flag & SD_SCATTER) {
- volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
-
- int all = kernel_data.integrator.sample_all_lights_direct;
-
- kernel_branched_path_volume_connect_light(kg, &sd,
- &emission_sd, throughput, &state, L, all,
- &volume_ray, &volume_segment);
-
- /* indirect light sampling */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = state;
- Ray pray = ray;
- float3 tp = throughput;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- /* scatter sample. if we use distance sampling and take just one
- * sample for direct and indirect light, we could share this
- * computation, but makes code a bit complex */
- float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
- float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
-
- VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
- &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-
- if(result == VOLUME_PATH_SCATTERED &&
- kernel_path_volume_bounce(kg,
- &sd,
- &tp,
- &ps,
- &L->state,
- &pray))
- {
- kernel_path_indirect(kg,
- &indirect_sd,
- &emission_sd,
- &pray,
- tp*num_samples_inv,
- &ps,
- L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
- }
- }
- }
-
- /* emission and transmittance */
- if(volume_segment.closure_flag & SD_EMISSION)
- path_radiance_accum_emission(L, &state, throughput, volume_segment.accum_emission);
- throughput *= volume_segment.accum_transmittance;
-
- /* free cached steps */
- kernel_volume_decoupled_free(kg, &volume_segment);
-#else
- /* GPU: no decoupled ray marching, scatter probalistically */
- int num_samples = kernel_data.integrator.volume_samples;
- float num_samples_inv = 1.0f/num_samples;
-
- /* todo: we should cache the shader evaluations from stepping
- * through the volume, for now we redo them multiple times */
-
- for(int j = 0; j < num_samples; j++) {
- PathState ps = state;
- Ray pray = ray;
- float3 tp = throughput * num_samples_inv;
-
- /* branch RNG state */
- path_state_branch(&ps, j, num_samples);
-
- VolumeIntegrateResult result = kernel_volume_integrate(
- kg, &ps, &sd, &volume_ray, L, &tp, heterogeneous);
-
-#ifdef __VOLUME_SCATTER__
- if(result == VOLUME_PATH_SCATTERED) {
- /* todo: support equiangular, MIS and all light sampling.
- * alternatively get decoupled ray marching working on the GPU */
- kernel_path_volume_connect_light(kg, &sd, &emission_sd, tp, &state, L);
-
- if(kernel_path_volume_bounce(kg,
- &sd,
- &tp,
- &ps,
- &L->state,
- &pray))
- {
- kernel_path_indirect(kg,
- &indirect_sd,
- &emission_sd,
- &pray,
- tp,
- &ps,
- L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(L);
- path_radiance_reset_indirect(L);
- }
- }
-#endif /* __VOLUME_SCATTER__ */
- }
-
- /* todo: avoid this calculation using decoupled ray marching */
- kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
-#endif /* __VOLUME_DECOUPLED__ */
- }
+ /* Volume integration. */
+ kernel_branched_path_volume(kg,
+ &sd,
+ &state,
+ &ray,
+ &throughput,
+ &isect,
+ hit,
+ &indirect_sd,
+ &emission_sd,
+ L);
#endif /* __VOLUME__ */
/* Shade background. */
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 19c77c1ed4f..f76d6c2e556 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1262,6 +1262,7 @@ typedef struct KernelIntegrator {
/* branched path */
int branched;
+ int volume_decoupled;
int diffuse_samples;
int glossy_samples;
int transmission_samples;
@@ -1287,7 +1288,6 @@ typedef struct KernelIntegrator {
float light_inv_rr_threshold;
int start_sample;
- int pad1;
} KernelIntegrator;
static_assert_align(KernelIntegrator, 16);
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index d9c310a893e..5905fb3bf12 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -1026,6 +1026,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
/* decoupled ray marching for heterogeneous volumes not supported on the GPU,
* which also means equiangular and multiple importance sampling is not
* support for that case */
+ if(!kernel_data.integrator.volume_decoupled)
+ return false;
+
#ifdef __KERNEL_GPU__
if(heterogeneous)
return false;
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index 269e74f6164..b7be4fe4409 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -293,7 +293,7 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
return r;
}
-ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
+ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
{
float4 r;
switch(id) {
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 15b728d6e02..b268478e6d3 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -145,6 +145,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f;
kintegrator->branched = (method == BRANCHED_PATH);
+ kintegrator->volume_decoupled = device->info.has_volume_decoupled;
kintegrator->diffuse_samples = diffuse_samples;
kintegrator->glossy_samples = glossy_samples;
kintegrator->transmission_samples = transmission_samples;
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 84537bf5993..c02a5222463 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -1016,7 +1016,8 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui
}
}
-void Mesh::compute_bvh(DeviceScene *dscene,
+void Mesh::compute_bvh(Device *device,
+ DeviceScene *dscene,
SceneParams *params,
Progress *progress,
int n,
@@ -1050,7 +1051,7 @@ void Mesh::compute_bvh(DeviceScene *dscene,
BVHParams bparams;
bparams.use_spatial_split = params->use_bvh_spatial_split;
- bparams.use_qbvh = params->use_qbvh;
+ bparams.use_qbvh = params->use_qbvh && device->info.has_qbvh;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
params->use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
@@ -1814,18 +1815,18 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
/* bvh build */
progress.set_status("Updating Scene BVH", "Building");
- VLOG(1) << (scene->params.use_qbvh ? "Using QBVH optimization structure"
- : "Using regular BVH optimization structure");
-
BVHParams bparams;
bparams.top_level = true;
- bparams.use_qbvh = scene->params.use_qbvh;
+ bparams.use_qbvh = scene->params.use_qbvh && device->info.has_qbvh;
bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
scene->params.use_bvh_unaligned_nodes;
bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
+ VLOG(1) << (bparams.use_qbvh ? "Using QBVH optimization structure"
+ : "Using regular BVH optimization structure");
+
delete bvh;
bvh = BVH::create(bparams, scene->objects);
bvh->build(progress);
@@ -1879,7 +1880,7 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
}
dscene->data.bvh.root = pack.root_index;
- dscene->data.bvh.use_qbvh = scene->params.use_qbvh;
+ dscene->data.bvh.use_qbvh = bparams.use_qbvh;
dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
}
@@ -2084,6 +2085,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
if(mesh->need_update) {
pool.push(function_bind(&Mesh::compute_bvh,
mesh,
+ device,
dscene,
&scene->params,
&progress,
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index 043ce9d0ffc..9a51ca73950 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -282,7 +282,8 @@ public:
void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset);
- void compute_bvh(DeviceScene *dscene,
+ void compute_bvh(Device *device,
+ DeviceScene *dscene,
SceneParams *params,
Progress *progress,
int n,
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 0194327f567..a1966afd23b 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -149,7 +149,7 @@ public:
use_bvh_spatial_split = false;
use_bvh_unaligned_nodes = true;
num_bvh_time_steps = 0;
- use_qbvh = false;
+ use_qbvh = true;
persistent_data = false;
texture_limit = 0;
}
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 864875361c0..3992ada2e85 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -451,10 +451,12 @@ void ShaderManager::device_update_common(Device *device,
flag |= SD_HETEROGENEOUS_VOLUME;
if(shader->has_bssrdf_bump)
flag |= SD_HAS_BSSRDF_BUMP;
- if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR)
- flag |= SD_VOLUME_EQUIANGULAR;
- if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
- flag |= SD_VOLUME_MIS;
+ if(device->info.has_volume_decoupled) {
+ if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR)
+ flag |= SD_VOLUME_EQUIANGULAR;
+ if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
+ flag |= SD_VOLUME_MIS;
+ }
if(shader->volume_interpolation_method == VOLUME_INTERPOLATION_CUBIC)
flag |= SD_VOLUME_CUBIC;
if(shader->has_bump)