From cdb0b3b1dcd4e9962426422868b2f40535670a5c Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 8 Oct 2017 04:32:25 +0200 Subject: Code refactor: use DeviceInfo to enable QBVH and decoupled volume shading. --- intern/cycles/blender/blender_session.cpp | 13 +- intern/cycles/blender/blender_sync.cpp | 17 +- intern/cycles/blender/blender_sync.h | 7 +- intern/cycles/device/device.cpp | 4 + intern/cycles/device/device.h | 4 + intern/cycles/device/device_cpu.cpp | 2 + intern/cycles/device/device_cuda.cpp | 2 + intern/cycles/device/device_network.cpp | 6 +- intern/cycles/device/device_opencl.cpp | 2 + intern/cycles/kernel/kernel_path.h | 125 ++++++------ intern/cycles/kernel/kernel_path_branched.h | 305 +++++++++++++++------------- intern/cycles/kernel/kernel_types.h | 2 +- intern/cycles/kernel/kernel_volume.h | 3 + intern/cycles/render/integrator.cpp | 1 + intern/cycles/render/mesh.cpp | 16 +- intern/cycles/render/mesh.h | 3 +- intern/cycles/render/scene.h | 2 +- intern/cycles/render/shader.cpp | 10 +- 18 files changed, 285 insertions(+), 239 deletions(-) diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 12de3da063f..9e54b7de573 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -115,8 +115,7 @@ void BlenderSession::create() void BlenderSession::create_session() { SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); - bool is_cpu = session_params.device.type == DEVICE_CPU; - SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu); + SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); bool session_pause = BlenderSync::get_session_pause(b_scene, background); /* reset status/progress */ @@ -141,7 +140,7 @@ void BlenderSession::create_session() session->set_pause(session_pause); /* create sync */ - sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress, is_cpu); + sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); BL::Object b_camera_override(b_engine.camera_override()); if(b_v3d) { if(session_pause == false) { @@ -179,8 +178,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_) b_scene = b_scene_; SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); - const bool is_cpu = session_params.device.type == DEVICE_CPU; - SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu); + SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); width = render_resolution_x(b_render); height = render_resolution_y(b_render); @@ -211,7 +209,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_) session->stats.mem_peak = session->stats.mem_used; /* sync object should be re-created */ - sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress, is_cpu); + sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress); /* for final render we will do full data sync per render layer, only * do some basic syncing here, no objects or materials for speed */ @@ -736,8 +734,7 @@ void BlenderSession::synchronize() /* on session/scene parameter changes, we recreate session entirely */ SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); - const bool is_cpu = session_params.device.type == DEVICE_CPU; - SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu); + SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background); bool session_pause = BlenderSync::get_session_pause(b_scene, background); if(session->params.modified(session_params) || diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 42e3721883f..2e3301c4209 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -47,8 +47,7 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine, BL::Scene& b_scene, Scene *scene, bool preview, - Progress &progress, - bool is_cpu) + Progress &progress) : b_engine(b_engine), b_data(b_data), b_scene(b_scene), @@ -62,7 +61,6 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine, scene(scene), preview(preview), experimental(false), - is_cpu(is_cpu), dicing_rate(1.0f), max_subdivisions(12), progress(progress) @@ -613,8 +611,7 @@ array BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, /* Scene Parameters */ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene, - bool background, - bool is_cpu) + bool background) { BL::RenderSettings r = b_scene.render(); SceneParams params; @@ -654,15 +651,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene, params.texture_limit = 0; } -#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) - if(is_cpu) { - params.use_qbvh = DebugFlags().cpu.qbvh && system_cpu_support_sse2(); - } - else -#endif - { - params.use_qbvh = false; - } + params.use_qbvh = DebugFlags().cpu.qbvh; return params; } diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 4ec46424b5a..11e279b81c4 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -54,8 +54,7 @@ public: BL::Scene& b_scene, Scene *scene, bool preview, - Progress &progress, - bool is_cpu); + Progress &progress); ~BlenderSync(); /* sync */ @@ -83,8 +82,7 @@ public: /* get parameters */ static SceneParams get_scene_params(BL::Scene& b_scene, - bool background, - bool is_cpu); + bool background); static SessionParams get_session_params(BL::RenderEngine& b_engine, BL::UserPreferences& b_userpref, BL::Scene& b_scene, @@ -177,7 +175,6 @@ private: Scene *scene; bool preview; bool experimental; - bool is_cpu; float dicing_rate; int max_subdivisions; diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index f64436aec7b..533294407ea 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -379,10 +379,14 @@ DeviceInfo Device::get_multi_device(vector subdevices) info.num = 0; info.has_bindless_textures = true; + info.has_volume_decoupled = true; + info.has_qbvh = true; foreach(DeviceInfo &device, subdevices) { assert(device.type == info.multi_devices[0].type); info.has_bindless_textures &= device.has_bindless_textures; + info.has_volume_decoupled &= device.has_volume_decoupled; + info.has_qbvh &= device.has_qbvh; } return info; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 0e0a0079209..c134fc9411e 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -55,6 +55,8 @@ public: bool display_device; bool advanced_shading; bool has_bindless_textures; /* flag for GPU and Multi device */ + bool has_volume_decoupled; + bool has_qbvh; bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */ vector multi_devices; @@ -66,6 +68,8 @@ public: display_device = false; advanced_shading = true; has_bindless_textures = false; + has_volume_decoupled = false; + has_qbvh = false; use_split_kernel = false; } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index ac6d3246d38..a17caabc850 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -1024,6 +1024,8 @@ void device_cpu_info(vector& devices) info.id = "CPU"; info.num = 0; info.advanced_shading = true; + info.has_qbvh = system_cpu_support_sse2(); + info.has_volume_decoupled = true; devices.insert(devices.begin(), info); } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dcbe6033bcc..56a56c5217c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -2128,6 +2128,8 @@ void device_cuda_info(vector& devices) info.advanced_shading = (major >= 2); info.has_bindless_textures = (major >= 3); + info.has_volume_decoupled = false; + info.has_qbvh = false; int pci_location[3] = {0, 0, 0}; cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num); diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index deea59f1d23..ced10c98dc9 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -343,7 +343,11 @@ void device_network_info(vector& devices) info.description = "Network Device"; info.id = "NETWORK"; info.num = 0; - info.advanced_shading = true; /* todo: get this info from device */ + + /* todo: get this info from device */ + info.advanced_shading = true; + info.has_volume_decoupled = false; + info.has_qbvh = false; devices.push_back(info); } diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 9d89decaaaf..5808a31e605 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -123,6 +123,8 @@ void device_opencl_info(vector& devices) info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name); info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name, device_type); + info.has_volume_decoupled = false; + info.has_qbvh = false; info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id; devices.push_back(info); num_devices++; diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 6b6c5603b70..652777a77a0 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -170,87 +170,90 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume( if(!hit) { kernel_volume_clean_stack(kg, state->volume_stack); } + + if(state->volume_stack[0].shader == SHADER_NONE) { + return VOLUME_PATH_ATTENUATED; + } + /* volume attenuation, emission, scatter */ - if(state->volume_stack[0].shader != SHADER_NONE) { - Ray volume_ray = *ray; - volume_ray.t = (hit)? isect->t: FLT_MAX; + Ray volume_ray = *ray; + volume_ray.t = (hit)? isect->t: FLT_MAX; - bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); # ifdef __VOLUME_DECOUPLED__ - int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); - bool direct = (state->flag & PATH_RAY_CAMERA) != 0; - bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); + int sampling_method = volume_stack_sampling_method(kg, state->volume_stack); + bool direct = (state->flag & PATH_RAY_CAMERA) != 0; + bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method); - if(decoupled) { - /* cache steps along volume for repeated sampling */ - VolumeSegment volume_segment; + if(decoupled) { + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; - shader_setup_from_volume(kg, sd, &volume_ray); - kernel_volume_decoupled_record(kg, state, - &volume_ray, sd, &volume_segment, heterogeneous); + shader_setup_from_volume(kg, sd, &volume_ray); + kernel_volume_decoupled_record(kg, state, + &volume_ray, sd, &volume_segment, heterogeneous); - volume_segment.sampling_method = sampling_method; + volume_segment.sampling_method = sampling_method; - /* emission */ - if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); + /* emission */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); - /* scattering */ - VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; + /* scattering */ + VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED; - if(volume_segment.closure_flag & SD_SCATTER) { - int all = kernel_data.integrator.sample_all_lights_indirect; + if(volume_segment.closure_flag & SD_SCATTER) { + int all = kernel_data.integrator.sample_all_lights_indirect; - /* direct light sampling */ - kernel_branched_path_volume_connect_light(kg, sd, - emission_sd, *throughput, state, L, all, - &volume_ray, &volume_segment); + /* direct light sampling */ + kernel_branched_path_volume_connect_light(kg, sd, + emission_sd, *throughput, state, L, all, + &volume_ray, &volume_segment); - /* indirect sample. if we use distance sampling and take just - * one sample for direct and indirect light, we could share - * this computation, but makes code a bit complex */ - float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); + /* indirect sample. if we use distance sampling and take just + * one sample for direct and indirect light, we could share + * this computation, but makes code a bit complex */ + float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); - result = kernel_volume_decoupled_scatter(kg, - state, &volume_ray, sd, throughput, - rphase, rscatter, &volume_segment, NULL, true); - } + result = kernel_volume_decoupled_scatter(kg, + state, &volume_ray, sd, throughput, + rphase, rscatter, &volume_segment, NULL, true); + } - /* free cached steps */ - kernel_volume_decoupled_free(kg, &volume_segment); + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); - if(result == VOLUME_PATH_SCATTERED) { - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) - return VOLUME_PATH_SCATTERED; - else - return VOLUME_PATH_MISSED; - } - else { - *throughput *= volume_segment.accum_transmittance; - } + if(result == VOLUME_PATH_SCATTERED) { + if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) + return VOLUME_PATH_SCATTERED; + else + return VOLUME_PATH_MISSED; } - else + else { + *throughput *= volume_segment.accum_transmittance; + } + } + else # endif /* __VOLUME_DECOUPLED__ */ - { - /* integrate along volume segment with distance sampling */ - VolumeIntegrateResult result = kernel_volume_integrate( - kg, state, sd, &volume_ray, L, throughput, heterogeneous); + { + /* integrate along volume segment with distance sampling */ + VolumeIntegrateResult result = kernel_volume_integrate( + kg, state, sd, &volume_ray, L, throughput, heterogeneous); # ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* direct lighting */ - kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); - - /* indirect light bounce */ - if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) - return VOLUME_PATH_SCATTERED; - else - return VOLUME_PATH_MISSED; - } -# endif /* __VOLUME_SCATTER__ */ + if(result == VOLUME_PATH_SCATTERED) { + /* direct lighting */ + kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L); + + /* indirect light bounce */ + if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) + return VOLUME_PATH_SCATTERED; + else + return VOLUME_PATH_MISSED; } +# endif /* __VOLUME_SCATTER__ */ } return VOLUME_PATH_ATTENUATED; diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index 2597d684a36..42df7e85b41 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -64,6 +64,164 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg, #ifndef __SPLIT_KERNEL__ +#ifdef __VOLUME__ +ccl_device_forceinline void kernel_branched_path_volume( + KernelGlobals *kg, + ShaderData *sd, + PathState *state, + Ray *ray, + float3 *throughput, + ccl_addr_space Intersection *isect, + bool hit, + ShaderData *indirect_sd, + ShaderData *emission_sd, + PathRadiance *L) +{ + /* Sanitize volume stack. */ + if(!hit) { + kernel_volume_clean_stack(kg, state->volume_stack); + } + + if(state->volume_stack[0].shader == SHADER_NONE) { + return; + } + + /* volume attenuation, emission, scatter */ + Ray volume_ray = *ray; + volume_ray.t = (hit)? isect->t: FLT_MAX; + + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); + +# ifdef __VOLUME_DECOUPLED__ + /* decoupled ray marching only supported on CPU */ + if(kernel_data.integrator.volume_decoupled) { + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + + shader_setup_from_volume(kg, sd, &volume_ray); + kernel_volume_decoupled_record(kg, state, + &volume_ray, sd, &volume_segment, heterogeneous); + + /* direct light sampling */ + if(volume_segment.closure_flag & SD_SCATTER) { + volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack); + + int all = kernel_data.integrator.sample_all_lights_direct; + + kernel_branched_path_volume_connect_light(kg, sd, + emission_sd, *throughput, state, L, all, + &volume_ray, &volume_segment); + + /* indirect light sampling */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + for(int j = 0; j < num_samples; j++) { + PathState ps = *state; + Ray pray = *ray; + float3 tp = *throughput; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + /* scatter sample. if we use distance sampling and take just one + * sample for direct and indirect light, we could share this + * computation, but makes code a bit complex */ + float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); + float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false); + + if(result == VOLUME_PATH_SCATTERED && + kernel_path_volume_bounce(kg, + sd, + &tp, + &ps, + &L->state, + &pray)) + { + kernel_path_indirect(kg, + indirect_sd, + emission_sd, + &pray, + tp*num_samples_inv, + &ps, + L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } + } + + /* emission and transmittance */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission); + *throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); + } + else +# endif /* __VOLUME_DECOUPLED__ */ + { + /* GPU: no decoupled ray marching, scatter probalistically */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + /* todo: we should cache the shader evaluations from stepping + * through the volume, for now we redo them multiple times */ + + for(int j = 0; j < num_samples; j++) { + PathState ps = *state; + Ray pray = *ray; + float3 tp = (*throughput) * num_samples_inv; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + VolumeIntegrateResult result = kernel_volume_integrate( + kg, &ps, sd, &volume_ray, L, &tp, heterogeneous); + +# ifdef __VOLUME_SCATTER__ + if(result == VOLUME_PATH_SCATTERED) { + /* todo: support equiangular, MIS and all light sampling. + * alternatively get decoupled ray marching working on the GPU */ + kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L); + + if(kernel_path_volume_bounce(kg, + sd, + &tp, + &ps, + &L->state, + &pray)) + { + kernel_path_indirect(kg, + indirect_sd, + emission_sd, + &pray, + tp, + &ps, + L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } +# endif /* __VOLUME_SCATTER__ */ + } + + /* todo: avoid this calculation using decoupled ray marching */ + kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput); + } +} +#endif /* __VOLUME__ */ + /* bounce off surface and integrate indirect light */ ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd, @@ -293,142 +451,17 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L); #ifdef __VOLUME__ - /* Sanitize volume stack. */ - if(!hit) { - kernel_volume_clean_stack(kg, state.volume_stack); - } - /* volume attenuation, emission, scatter */ - if(state.volume_stack[0].shader != SHADER_NONE) { - Ray volume_ray = ray; - volume_ray.t = (hit)? isect.t: FLT_MAX; - - bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); - -#ifdef __VOLUME_DECOUPLED__ - /* decoupled ray marching only supported on CPU */ - - /* cache steps along volume for repeated sampling */ - VolumeSegment volume_segment; - - shader_setup_from_volume(kg, &sd, &volume_ray); - kernel_volume_decoupled_record(kg, &state, - &volume_ray, &sd, &volume_segment, heterogeneous); - - /* direct light sampling */ - if(volume_segment.closure_flag & SD_SCATTER) { - volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack); - - int all = kernel_data.integrator.sample_all_lights_direct; - - kernel_branched_path_volume_connect_light(kg, &sd, - &emission_sd, throughput, &state, L, all, - &volume_ray, &volume_segment); - - /* indirect light sampling */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - for(int j = 0; j < num_samples; j++) { - PathState ps = state; - Ray pray = ray; - float3 tp = throughput; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - /* scatter sample. if we use distance sampling and take just one - * sample for direct and indirect light, we could share this - * computation, but makes code a bit complex */ - float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL); - float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE); - - VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, - &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false); - - if(result == VOLUME_PATH_SCATTERED && - kernel_path_volume_bounce(kg, - &sd, - &tp, - &ps, - &L->state, - &pray)) - { - kernel_path_indirect(kg, - &indirect_sd, - &emission_sd, - &pray, - tp*num_samples_inv, - &ps, - L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - } - } - } - - /* emission and transmittance */ - if(volume_segment.closure_flag & SD_EMISSION) - path_radiance_accum_emission(L, &state, throughput, volume_segment.accum_emission); - throughput *= volume_segment.accum_transmittance; - - /* free cached steps */ - kernel_volume_decoupled_free(kg, &volume_segment); -#else - /* GPU: no decoupled ray marching, scatter probalistically */ - int num_samples = kernel_data.integrator.volume_samples; - float num_samples_inv = 1.0f/num_samples; - - /* todo: we should cache the shader evaluations from stepping - * through the volume, for now we redo them multiple times */ - - for(int j = 0; j < num_samples; j++) { - PathState ps = state; - Ray pray = ray; - float3 tp = throughput * num_samples_inv; - - /* branch RNG state */ - path_state_branch(&ps, j, num_samples); - - VolumeIntegrateResult result = kernel_volume_integrate( - kg, &ps, &sd, &volume_ray, L, &tp, heterogeneous); - -#ifdef __VOLUME_SCATTER__ - if(result == VOLUME_PATH_SCATTERED) { - /* todo: support equiangular, MIS and all light sampling. - * alternatively get decoupled ray marching working on the GPU */ - kernel_path_volume_connect_light(kg, &sd, &emission_sd, tp, &state, L); - - if(kernel_path_volume_bounce(kg, - &sd, - &tp, - &ps, - &L->state, - &pray)) - { - kernel_path_indirect(kg, - &indirect_sd, - &emission_sd, - &pray, - tp, - &ps, - L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(L); - path_radiance_reset_indirect(L); - } - } -#endif /* __VOLUME_SCATTER__ */ - } - - /* todo: avoid this calculation using decoupled ray marching */ - kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput); -#endif /* __VOLUME_DECOUPLED__ */ - } + /* Volume integration. */ + kernel_branched_path_volume(kg, + &sd, + &state, + &ray, + &throughput, + &isect, + hit, + &indirect_sd, + &emission_sd, + L); #endif /* __VOLUME__ */ /* Shade background. */ diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 19c77c1ed4f..f76d6c2e556 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -1262,6 +1262,7 @@ typedef struct KernelIntegrator { /* branched path */ int branched; + int volume_decoupled; int diffuse_samples; int glossy_samples; int transmission_samples; @@ -1287,7 +1288,6 @@ typedef struct KernelIntegrator { float light_inv_rr_threshold; int start_sample; - int pad1; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index d9c310a893e..5905fb3bf12 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -1026,6 +1026,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou /* decoupled ray marching for heterogeneous volumes not supported on the GPU, * which also means equiangular and multiple importance sampling is not * support for that case */ + if(!kernel_data.integrator.volume_decoupled) + return false; + #ifdef __KERNEL_GPU__ if(heterogeneous) return false; diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 15b728d6e02..b268478e6d3 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -145,6 +145,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f; kintegrator->branched = (method == BRANCHED_PATH); + kintegrator->volume_decoupled = device->info.has_volume_decoupled; kintegrator->diffuse_samples = diffuse_samples; kintegrator->glossy_samples = glossy_samples; kintegrator->transmission_samples = transmission_samples; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 84537bf5993..c02a5222463 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -1016,7 +1016,8 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui } } -void Mesh::compute_bvh(DeviceScene *dscene, +void Mesh::compute_bvh(Device *device, + DeviceScene *dscene, SceneParams *params, Progress *progress, int n, @@ -1050,7 +1051,7 @@ void Mesh::compute_bvh(DeviceScene *dscene, BVHParams bparams; bparams.use_spatial_split = params->use_bvh_spatial_split; - bparams.use_qbvh = params->use_qbvh; + bparams.use_qbvh = params->use_qbvh && device->info.has_qbvh; bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && params->use_bvh_unaligned_nodes; bparams.num_motion_triangle_steps = params->num_bvh_time_steps; @@ -1814,18 +1815,18 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * /* bvh build */ progress.set_status("Updating Scene BVH", "Building"); - VLOG(1) << (scene->params.use_qbvh ? "Using QBVH optimization structure" - : "Using regular BVH optimization structure"); - BVHParams bparams; bparams.top_level = true; - bparams.use_qbvh = scene->params.use_qbvh; + bparams.use_qbvh = scene->params.use_qbvh && device->info.has_qbvh; bparams.use_spatial_split = scene->params.use_bvh_spatial_split; bparams.use_unaligned_nodes = dscene->data.bvh.have_curves && scene->params.use_bvh_unaligned_nodes; bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps; bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps; + VLOG(1) << (bparams.use_qbvh ? "Using QBVH optimization structure" + : "Using regular BVH optimization structure"); + delete bvh; bvh = BVH::create(bparams, scene->objects); bvh->build(progress); @@ -1879,7 +1880,7 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * } dscene->data.bvh.root = pack.root_index; - dscene->data.bvh.use_qbvh = scene->params.use_qbvh; + dscene->data.bvh.use_qbvh = bparams.use_qbvh; dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0); } @@ -2084,6 +2085,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen if(mesh->need_update) { pool.push(function_bind(&Mesh::compute_bvh, mesh, + device, dscene, &scene->params, &progress, diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 043ce9d0ffc..9a51ca73950 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -282,7 +282,8 @@ public: void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset); void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset); - void compute_bvh(DeviceScene *dscene, + void compute_bvh(Device *device, + DeviceScene *dscene, SceneParams *params, Progress *progress, int n, diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 0194327f567..a1966afd23b 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -149,7 +149,7 @@ public: use_bvh_spatial_split = false; use_bvh_unaligned_nodes = true; num_bvh_time_steps = 0; - use_qbvh = false; + use_qbvh = true; persistent_data = false; texture_limit = 0; } diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 864875361c0..3992ada2e85 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -451,10 +451,12 @@ void ShaderManager::device_update_common(Device *device, flag |= SD_HETEROGENEOUS_VOLUME; if(shader->has_bssrdf_bump) flag |= SD_HAS_BSSRDF_BUMP; - if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR) - flag |= SD_VOLUME_EQUIANGULAR; - if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) - flag |= SD_VOLUME_MIS; + if(device->info.has_volume_decoupled) { + if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR) + flag |= SD_VOLUME_EQUIANGULAR; + if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) + flag |= SD_VOLUME_MIS; + } if(shader->volume_interpolation_method == VOLUME_INTERPOLATION_CUBIC) flag |= SD_VOLUME_CUBIC; if(shader->has_bump) -- cgit v1.2.3