diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-11-09 12:59:15 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-11-09 12:59:15 +0300 |
commit | c99481b6320a77e4793c812403f7d37dfc2d5ced (patch) | |
tree | 448226ab76b4f2d92263cc115828c23f1f753540 /intern | |
parent | ffe76ae9f4abe2a64d4c749623b99f70b3746d87 (diff) | |
parent | 8d7ec519dff93b04fdec548aeef4b90137d788c8 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
28 files changed, 87 insertions, 59 deletions
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 3e052bb926e..0364f809f8c 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -47,7 +47,6 @@ std::ostream& operator <<(std::ostream &os, { os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl; - os << "Max closure count: " << requested_features.max_closure << std::endl; os << "Max nodes group: " << requested_features.max_nodes_group << std::endl; /* TODO(sergey): Decode bitflag into list of names. */ os << "Nodes features: " << requested_features.nodes_features << std::endl; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index a2cd3e23c79..70f56165f8a 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -91,9 +91,6 @@ public: /* Use experimental feature set. */ bool experimental; - /* Maximum number of closures in shader trees. */ - int max_closure; - /* Selective nodes compilation. */ /* Identifier of a node group up to which all the nodes needs to be @@ -146,7 +143,6 @@ public: { /* TODO(sergey): Find more meaningful defaults. */ experimental = false; - max_closure = 0; max_nodes_group = 0; nodes_features = 0; use_hair = false; @@ -167,7 +163,6 @@ public: bool modified(const DeviceRequestedFeatures& requested_features) { return !(experimental == requested_features.experimental && - max_closure == requested_features.max_closure && max_nodes_group == requested_features.max_nodes_group && nodes_features == requested_features.nodes_features && use_hair == requested_features.use_hair && @@ -198,7 +193,6 @@ public: string_printf("%d", max_nodes_group); build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features); - build_options += string_printf(" -D__MAX_CLOSURE__=%d", max_closure); if(!use_hair) { build_options += " -D__NO_HAIR__"; } diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 1a54c3380ee..0f4001ab1a6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -760,7 +760,6 @@ public: CPUSplitKernel *split_kernel = NULL; if(use_split_kernel) { split_kernel = new CPUSplitKernel(this); - requested_features.max_closure = MAX_CLOSURE; if(!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer); kgbuffer.free(); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 3334c2b9f91..2f52bd49b16 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1882,10 +1882,6 @@ public: DeviceRequestedFeatures requested_features; if(use_split_kernel()) { - if(!use_adaptive_compilation()) { - requested_features.max_closure = 64; - } - if(split_kernel == NULL) { split_kernel = new CUDASplitKernel(this); split_kernel->load_kernels(requested_features); diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp index 115273d9f0a..566d4020b33 100644 --- a/intern/cycles/device/device_split_kernel.cpp +++ b/intern/cycles/device/device_split_kernel.cpp @@ -34,7 +34,6 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) use_queues_flag(device, "use_queues_flag"), work_pool_wgs(device, "work_pool_wgs") { - current_max_closure = -1; first_tile = true; avg_time_per_sample = 0.0; @@ -116,8 +115,6 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe #undef LOAD_KERNEL - current_max_closure = requested_features.max_closure; - return true; } diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h index 0647c664447..2ec0261e847 100644 --- a/intern/cycles/device/device_split_kernel.h +++ b/intern/cycles/device/device_split_kernel.h @@ -92,9 +92,6 @@ private: /* Work pool with respect to each work group. */ device_only_memory<unsigned int> work_pool_wgs; - /* clos_max value for which the kernels have been loaded currently. */ - int current_max_closure; - /* Marked True in constructor and marked false at the end of path_trace(). */ bool first_tile; diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index 73cddeb27f7..8788e89c40e 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -51,7 +51,7 @@ ccl_device_inline void compute_light_pass(KernelGlobals *kg, path_state_init(kg, &emission_sd, &state, rng_hash, sample, NULL); /* evaluate surface shader */ - shader_eval_surface(kg, sd, &state, state.flag, MAX_CLOSURE); + shader_eval_surface(kg, sd, &state, state.flag, kernel_data.integrator.max_closures); /* TODO, disable more closures we don't need besides transparent */ shader_bsdf_disable_transparency(kg, sd); @@ -228,12 +228,12 @@ ccl_device float3 kernel_bake_evaluate_direct_indirect(KernelGlobals *kg, } else { /* surface color of the pass only */ - shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE); + shader_eval_surface(kg, sd, state, 0, kernel_data.integrator.max_closures); return kernel_bake_shader_bsdf(kg, sd, type); } } else { - shader_eval_surface(kg, sd, state, 0, MAX_CLOSURE); + shader_eval_surface(kg, sd, state, 0, kernel_data.integrator.max_closures); color = kernel_bake_shader_bsdf(kg, sd, type); } @@ -333,7 +333,7 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, { float3 N = sd.N; if((sd.flag & SD_HAS_BUMP)) { - shader_eval_surface(kg, &sd, &state, 0, MAX_CLOSURE); + shader_eval_surface(kg, &sd, &state, 0, kernel_data.integrator.max_closures); N = shader_bsdf_average_normal(kg, &sd); } diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 8519e0682e1..207ba741e6f 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -443,7 +443,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, sd, &isect, ray); - shader_eval_surface(kg, sd, state, state->flag, MAX_CLOSURE); + shader_eval_surface(kg, sd, state, state->flag, kernel_data.integrator.max_closures); shader_prepare_closures(sd, state); /* Apply shadow catcher, holdout, emission. */ @@ -594,7 +594,7 @@ ccl_device_forceinline void kernel_path_integrate( /* Setup and evaluate shader. */ shader_setup_from_ray(kg, &sd, &isect, ray); - shader_eval_surface(kg, &sd, state, state->flag, MAX_CLOSURE); + shader_eval_surface(kg, &sd, state, state->flag, kernel_data.integrator.max_closures); shader_prepare_closures(&sd, state); /* Apply shadow catcher, holdout, emission. */ diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h index b37bc65f4df..9996f52f9a4 100644 --- a/intern/cycles/kernel/kernel_path_branched.h +++ b/intern/cycles/kernel/kernel_path_branched.h @@ -474,7 +474,7 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg, /* Setup and evaluate shader. */ shader_setup_from_ray(kg, &sd, &isect, &ray); - shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE); + shader_eval_surface(kg, &sd, &state, state.flag, kernel_data.integrator.max_closures); shader_merge_closures(&sd); /* Apply shadow catcher, holdout, emission. */ diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index 87e7d7ff398..616ad71af3c 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -76,11 +76,11 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd, } /* replace closures with a single diffuse bsdf closure after scatter step */ -ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N) +ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N) { sd->flag &= ~SD_CLOSURE_FLAGS; sd->num_closure = 0; - sd->num_closure_left = MAX_CLOSURE; + sd->num_closure_left = kernel_data.integrator.max_closures; if(hit) { Bssrdf *bssrdf = (Bssrdf *)sc; @@ -154,7 +154,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, if(bump || texture_blur > 0.0f) { /* average color and normal at incoming point */ - shader_eval_surface(kg, sd, state, state_flag, MAX_CLOSURE); + shader_eval_surface(kg, sd, state, state_flag, kernel_data.integrator.max_closures); float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL); /* we simply divide out the average color and multiply with the average @@ -342,7 +342,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup( subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N); /* Setup diffuse BSDF. */ - subsurface_scatter_setup_diffuse_bsdf(sd, sc, weight, true, N); + subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N); } /* subsurface scattering step, from a point on the surface to another nearby point on the same object */ @@ -439,7 +439,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_a subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N); /* setup diffuse bsdf */ - subsurface_scatter_setup_diffuse_bsdf(sd, sc, eval, (ss_isect.num_hits > 0), N); + subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index fc3e7b3da98..919dafbc780 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -782,10 +782,14 @@ typedef struct AttributeDescriptor { /* Closure data */ #ifdef __MULTI_CLOSURE__ -# ifndef __MAX_CLOSURE__ -# define MAX_CLOSURE 64 +# ifdef __SPLIT_KERNEL__ +# define MAX_CLOSURE 1 # else -# define MAX_CLOSURE __MAX_CLOSURE__ +# ifndef __MAX_CLOSURE__ +# define MAX_CLOSURE 64 +# else +# define MAX_CLOSURE __MAX_CLOSURE__ +# endif # endif #else # define MAX_CLOSURE 1 @@ -1313,7 +1317,8 @@ typedef struct KernelIntegrator { int volume_samples; int start_sample; - int pad; + + int max_closures; } KernelIntegrator; static_assert_align(KernelIntegrator, 16); diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index fb3c5437275..5604d8e5163 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -62,7 +62,7 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, VolumeShaderCoefficients *coeff) { sd->P = P; - shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, MAX_CLOSURE); + shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, kernel_data.integrator.max_closures); if(!(sd->flag & (SD_EXTINCTION|SD_SCATTER|SD_EMISSION))) return false; diff --git a/intern/cycles/kernel/split/kernel_branched.h b/intern/cycles/kernel/split/kernel_branched.h index 2313feac089..6456636caaa 100644 --- a/intern/cycles/kernel/split/kernel_branched.h +++ b/intern/cycles/kernel/split/kernel_branched.h @@ -30,10 +30,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_init(KernelGloba BRANCHED_STORE(path_state); BRANCHED_STORE(throughput); BRANCHED_STORE(ray); - BRANCHED_STORE(sd); BRANCHED_STORE(isect); BRANCHED_STORE(ray_state); + branched_state->sd = *kernel_split_sd(sd, ray_index); + for(int i = 0; i < branched_state->sd.num_closure; i++) { + branched_state->sd.closure[i] = kernel_split_sd(sd, ray_index)->closure[i]; + } + #undef BRANCHED_STORE /* set loop counters to intial position */ @@ -53,10 +57,14 @@ ccl_device_inline void kernel_split_branched_path_indirect_loop_end(KernelGlobal BRANCHED_RESTORE(path_state); BRANCHED_RESTORE(throughput); BRANCHED_RESTORE(ray); - BRANCHED_RESTORE(sd); BRANCHED_RESTORE(isect); BRANCHED_RESTORE(ray_state); + *kernel_split_sd(sd, ray_index) = branched_state->sd; + for(int i = 0; i < branched_state->sd.num_closure; i++) { + kernel_split_sd(sd, ray_index)->closure[i] = branched_state->sd.closure[i]; + } + #undef BRANCHED_RESTORE /* leave indirect loop */ diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h index 832b0e5b265..ca79602c565 100644 --- a/intern/cycles/kernel/split/kernel_direct_lighting.h +++ b/intern/cycles/kernel/split/kernel_direct_lighting.h @@ -58,7 +58,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg, if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); /* direct lighting */ #ifdef __EMISSION__ diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h index 02881da6c04..fb5bd3d48dd 100644 --- a/intern/cycles/kernel/split/kernel_do_volume.h +++ b/intern/cycles/kernel/split/kernel_do_volume.h @@ -29,7 +29,7 @@ ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(K { SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); @@ -140,7 +140,7 @@ ccl_device void kernel_do_volume(KernelGlobals *kg) ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND); diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h index bc8ca3aa3ca..88919f47c7a 100644 --- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h +++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h @@ -94,7 +94,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao( float3 throughput; ccl_global char *ray_state = kernel_split_state.ray_state; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { uint buffer_offset = kernel_split_state.buffer_offset[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h index 0c894bd3d71..4cf88a02590 100644 --- a/intern/cycles/kernel/split/kernel_indirect_background.h +++ b/intern/cycles/kernel/split/kernel_indirect_background.h @@ -55,7 +55,7 @@ ccl_device void kernel_indirect_background(KernelGlobals *kg) PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; float3 throughput = kernel_split_state.throughput[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); kernel_path_background(kg, state, ray, throughput, sd, L); kernel_split_path_end(kg, ray_index); diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h index d5099ac66e6..c14f66f664f 100644 --- a/intern/cycles/kernel/split/kernel_lamp_emission.h +++ b/intern/cycles/kernel/split/kernel_lamp_emission.h @@ -58,7 +58,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg) float3 throughput = kernel_split_state.throughput[ray_index]; Ray ray = kernel_split_state.ray[ray_index]; ccl_global Intersection *isect = &kernel_split_state.isect[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); kernel_path_lamp_emission(kg, state, &ray, throughput, isect, sd, L); } diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h index c3373174582..bb6bf1cc7e6 100644 --- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h +++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h @@ -58,7 +58,7 @@ ccl_device void kernel_split_branched_indirect_light_end(KernelGlobals *kg, int kernel_split_branched_path_indirect_loop_end(kg, ray_index); ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; @@ -126,7 +126,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg, if(active) { ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h index 22602537524..2409d1ba28b 100644 --- a/intern/cycles/kernel/split/kernel_shader_eval.h +++ b/intern/cycles/kernel/split/kernel_shader_eval.h @@ -50,15 +50,15 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg) if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) { ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; - shader_eval_surface(kg, &kernel_split_state.sd[ray_index], state, state->flag, MAX_CLOSURE); + shader_eval_surface(kg, kernel_split_sd(sd, ray_index), state, state->flag, kernel_data.integrator.max_closures); #ifdef __BRANCHED_PATH__ if(kernel_data.integrator.branched) { - shader_merge_closures(&kernel_split_state.sd[ray_index]); + shader_merge_closures(kernel_split_sd(sd, ray_index)); } else #endif { - shader_prepare_closures(&kernel_split_state.sd[ray_index], state); + shader_prepare_closures(kernel_split_sd(sd, ray_index), state); } } } diff --git a/intern/cycles/kernel/split/kernel_shader_setup.h b/intern/cycles/kernel/split/kernel_shader_setup.h index 0432689d9fa..9d428ee8139 100644 --- a/intern/cycles/kernel/split/kernel_shader_setup.h +++ b/intern/cycles/kernel/split/kernel_shader_setup.h @@ -61,7 +61,7 @@ ccl_device void kernel_shader_setup(KernelGlobals *kg, Ray ray = kernel_split_state.ray[ray_index]; shader_setup_from_ray(kg, - &kernel_split_state.sd[ray_index], + kernel_split_sd(sd, ray_index), &isect, &ray); } diff --git a/intern/cycles/kernel/split/kernel_shader_sort.h b/intern/cycles/kernel/split/kernel_shader_sort.h index 5a55b680695..2132c42220f 100644 --- a/intern/cycles/kernel/split/kernel_shader_sort.h +++ b/intern/cycles/kernel/split/kernel_shader_sort.h @@ -47,7 +47,7 @@ ccl_device void kernel_shader_sort(KernelGlobals *kg, int ray_index = kernel_split_state.queue_data[add]; bool valid = (ray_index != QUEUE_EMPTY_SLOT) && IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE); if(valid) { - value = kernel_split_state.sd[ray_index].shader & SHADER_MASK; + value = kernel_split_sd(sd, ray_index)->shader & SHADER_MASK; } } local_value[i + lid] = value; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h index b50de615fc8..a4cffd77eff 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_ao.h @@ -33,7 +33,7 @@ ccl_device void kernel_shadow_blocked_ao(KernelGlobals *kg) return; } - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h index 9a6bdfbdffe..da072fd5f1a 100644 --- a/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h +++ b/intern/cycles/kernel/split/kernel_shadow_blocked_dl.h @@ -43,7 +43,7 @@ ccl_device void kernel_shadow_blocked_dl(KernelGlobals *kg) ccl_global PathState *state = &kernel_split_state.path_state[ray_index]; Ray ray = kernel_split_state.light_ray[ray_index]; PathRadiance *L = &kernel_split_state.path_radiance[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); float3 throughput = kernel_split_state.throughput[ray_index]; BsdfEval L_light = kernel_split_state.bsdf_eval[ray_index]; diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h index eac22050a38..fa2f0b20a83 100644 --- a/intern/cycles/kernel/split/kernel_split_data.h +++ b/intern/cycles/kernel/split/kernel_split_data.h @@ -31,6 +31,14 @@ ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_ size = size SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + +#ifdef __BRANCHED_PATH__ + size += align_up(closure_size * num_elements, 16); +#endif + + size += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + return size; } @@ -49,6 +57,15 @@ ccl_device_inline void split_data_init(KernelGlobals *kg, SPLIT_DATA_ENTRIES; #undef SPLIT_DATA_ENTRY + uint64_t closure_size = sizeof(ShaderClosure) * (kernel_data.integrator.max_closures-1); + +#ifdef __BRANCHED_PATH__ + p += align_up(closure_size * num_elements, 16); +#endif + + split_data->_sd = (ShaderData*)p; + p += align_up(num_elements * (sizeof(ShaderData) + closure_size), 16); + split_data->ray_state = ray_state; } diff --git a/intern/cycles/kernel/split/kernel_split_data_types.h b/intern/cycles/kernel/split/kernel_split_data_types.h index 5c2aadcf4ec..9ac3f904819 100644 --- a/intern/cycles/kernel/split/kernel_split_data_types.h +++ b/intern/cycles/kernel/split/kernel_split_data_types.h @@ -51,7 +51,6 @@ typedef ccl_global struct SplitBranchedState { float3 throughput; Ray ray; - struct ShaderData sd; Intersection isect; char ray_state; @@ -77,6 +76,9 @@ typedef ccl_global struct SplitBranchedState { int shared_sample_count; /* number of branched samples shared with other threads */ int original_ray; /* index of original ray when sharing branched samples */ bool waiting_on_shared_samples; + + /* Must be last in to allow for dynamic size of closures */ + struct ShaderData sd; } SplitBranchedState; #define SPLIT_DATA_BRANCHED_ENTRIES \ @@ -110,11 +112,11 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \ SPLIT_DATA_ENTRY(ccl_global uint, buffer_offset, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* entries to be copied to inactive rays when sharing branched samples (TODO: which are actually needed?) */ #define SPLIT_DATA_ENTRIES_BRANCHED_SHARED \ @@ -126,11 +128,11 @@ typedef ccl_global struct SplitBranchedState { SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \ SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \ SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \ - SPLIT_DATA_ENTRY(ShaderData, sd, 1) \ SPLIT_DATA_ENTRY(ShaderDataTinyStorage, sd_DL_shadow, 1) \ SPLIT_DATA_SUBSURFACE_ENTRIES \ SPLIT_DATA_VOLUME_ENTRIES \ SPLIT_DATA_BRANCHED_ENTRIES \ + SPLIT_DATA_ENTRY(ShaderData, _sd, 0) /* struct that holds pointers to data in the shared state buffer */ typedef struct SplitData { @@ -154,6 +156,12 @@ __device__ SplitParams __split_param_data; # define kernel_split_params (__split_param_data) #endif /* __KERNEL_CUDA__ */ +#define kernel_split_sd(sd, ray_index) ((ShaderData*) \ + ( \ + ((ccl_global char*)kernel_split_state._##sd) + \ + (sizeof(ShaderData) + sizeof(ShaderClosure)*(kernel_data.integrator.max_closures-1)) * (ray_index) \ + )) + /* Local storage for queue_enqueue kernel. */ typedef struct QueueEnqueueLocals { uint queue_atomics[2]; diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h index c5504d0a89b..887c3e313d1 100644 --- a/intern/cycles/kernel/split/kernel_subsurface_scatter.h +++ b/intern/cycles/kernel/split/kernel_subsurface_scatter.h @@ -98,7 +98,7 @@ ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_it /* compute lighting with the BSDF closure */ for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) { - ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index]; + ShaderData *bssrdf_sd = kernel_split_sd(sd, ray_index); *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is * important as the indirect path will write into bssrdf_sd */ @@ -228,7 +228,7 @@ ccl_device void kernel_subsurface_scatter(KernelGlobals *kg) ccl_global Ray *ray = &kernel_split_state.ray[ray_index]; ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index]; ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index]; - ShaderData *sd = &kernel_split_state.sd[ray_index]; + ShaderData *sd = kernel_split_sd(sd, ray_index); ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]); if(sd->flag & SD_BSSRDF) { diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 74cfd02e1a4..e8d9558c38d 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -642,13 +642,11 @@ DeviceRequestedFeatures Session::get_requested_device_features() DeviceRequestedFeatures requested_features; requested_features.experimental = params.experimental; - requested_features.max_closure = get_max_closure_count(); scene->shader_manager->get_requested_features( scene, &requested_features); if(!params.background) { /* Avoid too much re-compilations for viewport render. */ - requested_features.max_closure = 64; requested_features.max_nodes_group = NODE_GROUP_LEVEL_MAX; requested_features.nodes_features = NODE_FEATURE_ALL; } @@ -858,6 +856,16 @@ void Session::update_scene() if(scene->need_update()) { load_kernels(false); + /* Update max_closures. */ + KernelIntegrator *kintegrator = &scene->dscene.data.integrator; + if(params.background) { + kintegrator->max_closures = get_max_closure_count(); + } + else { + /* Currently viewport render is faster with higher max_closures, needs investigating. */ + kintegrator->max_closures = 64; + } + progress.set_status("Updating Scene"); MEM_GUARDED_CALL(&progress, scene->device_update, device, progress); } |