diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2017-11-10 12:13:19 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2017-11-10 12:13:19 +0300 |
commit | 7a6ad2901cce23d47c3a0618b4791d4eb034a6c5 (patch) | |
tree | abf9644577d5b7adcfb196ea74b81c5ee9e7e4d1 /intern/cycles | |
parent | 7defb27f084288e0218f068aec74a693a0ca9b6a (diff) | |
parent | a466d7ae248b2807b4e2f8693b458d3da509e9bc (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/device/device_cpu.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 11 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.cpp | 58 | ||||
-rw-r--r-- | intern/cycles/device/device_denoising.h | 16 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.h | 20 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl.h | 2 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_base.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_mega.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_split.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/closure/alloc.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_volume.h | 97 | ||||
-rw-r--r-- | intern/cycles/kernel/osl/osl_closures.cpp | 102 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_closure.h | 232 | ||||
-rw-r--r-- | intern/cycles/render/mesh.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/render/object.cpp | 1 |
15 files changed, 337 insertions, 225 deletions
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 0f4001ab1a6..ce02a5a932e 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -712,12 +712,10 @@ public: } } - void denoise(DeviceTask &task, RenderTile &tile) + void denoise(DeviceTask &task, DenoisingTask& denoising, RenderTile &tile) { tile.sample = tile.start_sample + tile.num_samples; - DenoisingTask denoising(this); - denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising); denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); @@ -769,6 +767,8 @@ public: } RenderTile tile; + DenoisingTask denoising(this); + while(task.acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel) { @@ -780,7 +780,7 @@ public: } } else if(tile.task == RenderTile::DENOISE) { - denoise(task, tile); + denoise(task, denoising, tile); } task.release_tile(tile); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 2f52bd49b16..e602cba00b8 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1408,10 +1408,8 @@ public: return !have_error(); } - void denoise(RenderTile &rtile, const DeviceTask &task) + void denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task) { - DenoisingTask denoising(this); - denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising); denoising.functions.reconstruct = function_bind(&CUDADevice::denoising_reconstruct, this, _1, _2, _3, &denoising); denoising.functions.divide_shadow = function_bind(&CUDADevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising); @@ -1878,8 +1876,6 @@ public: CUDAContextScope scope(this); if(task->type == DeviceTask::RENDER) { - RenderTile tile; - DeviceRequestedFeatures requested_features; if(use_split_kernel()) { if(split_kernel == NULL) { @@ -1891,6 +1887,9 @@ public: device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); /* keep rendering tiles until done */ + RenderTile tile; + DenoisingTask denoising(this); + while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { if(use_split_kernel()) { @@ -1904,7 +1903,7 @@ public: else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; - denoise(tile, *task); + denoise(tile, denoising, *task); task->update_progress(&tile, tile.w*tile.h); } diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index 2d39721e3d3..69c43e4a8cf 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -20,6 +20,27 @@ CCL_NAMESPACE_BEGIN +DenoisingTask::DenoisingTask(Device *device) +: tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE), + storage(device), + buffer(device), + device(device) +{ +} + +DenoisingTask::~DenoisingTask() +{ + storage.XtWX.free(); + storage.XtWY.free(); + storage.transform.free(); + storage.rank.free(); + storage.temporary_1.free(); + storage.temporary_2.free(); + storage.temporary_color.free(); + buffer.mem.free(); + tiles_mem.free(); +} + void DenoisingTask::init_from_devicetask(const DeviceTask &task) { radius = task.denoising_radius; @@ -75,7 +96,7 @@ bool DenoisingTask::run_denoising() buffer.w = align_up(rect.z - rect.x, 4); buffer.h = rect.w - rect.y; buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float))); - buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes); + buffer.mem.alloc_to_device(buffer.pass_stride * buffer.passes, false); device_ptr null_ptr = (device_ptr) 0; @@ -159,11 +180,10 @@ bool DenoisingTask::run_denoising() int variance_to[] = {11, 12, 13}; int num_color_passes = 3; - device_only_memory<float> temp_color(device, "Denoising temporary color"); - temp_color.alloc_to_device(3*buffer.pass_stride); + storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false); for(int pass = 0; pass < num_color_passes; pass++) { - device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride); device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride); functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass); } @@ -172,28 +192,24 @@ bool DenoisingTask::run_denoising() device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride); device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride); - functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); + functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } - - temp_color.free(); } storage.w = filter_area.z; storage.h = filter_area.w; - storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE); - storage.rank.alloc_to_device(storage.w*storage.h); + storage.transform.alloc_to_device(storage.w*storage.h*TRANSFORM_SIZE, false); + storage.rank.alloc_to_device(storage.w*storage.h, false); functions.construct_transform(); - device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1"); - device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2"); - temporary_1.alloc_to_device(buffer.w*buffer.h); - temporary_2.alloc_to_device(buffer.w*buffer.h); - reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; - reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; + storage.temporary_1.alloc_to_device(buffer.w*buffer.h, false); + storage.temporary_2.alloc_to_device(buffer.w*buffer.h, false); + reconstruction_state.temporary_1_ptr = storage.temporary_1.device_pointer; + reconstruction_state.temporary_2_ptr = storage.temporary_2.device_pointer; - storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE); - storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE); + storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE, false); + storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false); reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h); int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x; @@ -210,14 +226,6 @@ bool DenoisingTask::run_denoising() functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr); } - storage.XtWX.free(); - storage.XtWY.free(); - storage.transform.free(); - storage.rank.free(); - temporary_1.free(); - temporary_2.free(); - buffer.mem.free(); - tiles_mem.free(); return true; } diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index 606f7422ac8..ec4e7933cdc 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -121,6 +121,9 @@ public: device_only_memory<int> rank; device_only_memory<float> XtWX; device_only_memory<float3> XtWY; + device_only_memory<float> temporary_1; + device_only_memory<float> temporary_2; + device_only_memory<float> temporary_color; int w; int h; @@ -128,16 +131,15 @@ public: : transform(device, "denoising transform"), rank(device, "denoising rank"), XtWX(device, "denoising XtWX"), - XtWY(device, "denoising XtWY") + XtWY(device, "denoising XtWY"), + temporary_1(device, "denoising NLM temporary 1"), + temporary_2(device, "denoising NLM temporary 2"), + temporary_color(device, "denoising temporary color") {} } storage; - DenoisingTask(Device *device) - : tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE), - storage(device), - buffer(device), - device(device) - {} + DenoisingTask(Device *device); + ~DenoisingTask(); void init_from_devicetask(const DeviceTask &task); diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index a2866ae3984..453dab9bfb3 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -243,15 +243,29 @@ public: free(); } - void alloc_to_device(size_t num) + void alloc_to_device(size_t num, bool shrink_to_fit = true) { - data_size = num*sizeof(T); - device_alloc(); + size_t new_size = num*sizeof(T); + bool reallocate; + + if(shrink_to_fit) { + reallocate = (data_size != new_size); + } + else { + reallocate = (data_size < new_size); + } + + if(reallocate) { + device_free(); + data_size = new_size; + device_alloc(); + } } void free() { device_free(); + data_size = 0; } void zero_to_device() diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index 55848c8112d..c02f8ffafe6 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -360,7 +360,7 @@ public: void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half); void shader(DeviceTask& task); - void denoise(RenderTile& tile, const DeviceTask& task); + void denoise(RenderTile& tile, DenoisingTask& denoising, const DeviceTask& task); class OpenCLDeviceTask : public DeviceTask { public: diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index d4af392fdd2..f43177247ef 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -1066,10 +1066,8 @@ bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers, return true; } -void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task) +void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising, const DeviceTask &task) { - DenoisingTask denoising(this); - denoising.functions.set_tiles = function_bind(&OpenCLDeviceBase::denoising_set_tiles, this, _1, &denoising); denoising.functions.construct_transform = function_bind(&OpenCLDeviceBase::denoising_construct_transform, this, &denoising); denoising.functions.reconstruct = function_bind(&OpenCLDeviceBase::denoising_reconstruct, this, _1, _2, _3, &denoising); diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp index f4555eaba4f..575ab73330e 100644 --- a/intern/cycles/device/opencl/opencl_mega.cpp +++ b/intern/cycles/device/opencl/opencl_mega.cpp @@ -105,6 +105,8 @@ public: } else if(task->type == DeviceTask::RENDER) { RenderTile tile; + DenoisingTask denoising(this); + /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { @@ -137,7 +139,7 @@ public: } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; - denoise(tile, *task); + denoise(tile, denoising, *task); task->update_progress(&tile, tile.w*tile.h); } diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index 2125f3d126f..1073cfa6bf6 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -128,6 +128,7 @@ public: } else if(task->type == DeviceTask::RENDER) { RenderTile tile; + DenoisingTask denoising(this); /* Allocate buffer for kernel globals */ device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); @@ -155,7 +156,7 @@ public: } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; - denoise(tile, *task); + denoise(tile, denoising, *task); task->update_progress(&tile, tile.w*tile.h); } diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h index 48a60405b5a..b903aeb8073 100644 --- a/intern/cycles/kernel/closure/alloc.h +++ b/intern/cycles/kernel/closure/alloc.h @@ -45,7 +45,7 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size) int num_extra = ((size + sizeof(ShaderClosure) - 1) / sizeof(ShaderClosure)); if(num_extra > sd->num_closure_left) { - /* Remove previous closure. */ + /* Remove previous closure if it was allocated. */ sd->num_closure--; sd->num_closure_left++; return NULL; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 5604d8e5163..2af4c9a5e7a 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -74,7 +74,8 @@ ccl_device_inline bool volume_shader_sample(KernelGlobals *kg, make_float3(0.0f, 0.0f, 0.0f); if(sd->flag & SD_SCATTER) { - if(state->volume_bounce < kernel_data.integrator.max_volume_bounce) { + if(state->bounce < kernel_data.integrator.max_bounce && + state->volume_bounce < kernel_data.integrator.max_volume_bounce) { for(int i = 0; i < sd->num_closure; i++) { const ShaderClosure *sc = &sd->closure[i]; @@ -340,6 +341,34 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe /* Volume Path */ +ccl_device int kernel_volume_sample_channel(float3 albedo, float3 throughput, float rand, float3 *pdf) +{ + /* Sample color channel proportional to throughput and single scattering + * albedo, to significantly reduce noise with many bounce, following: + * + * "Practical and Controllable Subsurface Scattering for Production Path + * Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */ + float3 weights = fabs(throughput * albedo); + float sum_weights = weights.x + weights.y + weights.z; + + if(sum_weights > 0.0f) { + *pdf = weights/sum_weights; + } + else { + *pdf = make_float3(1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f); + } + + if(rand < pdf->x) { + return 0; + } + else if(rand < pdf->x + pdf->y) { + return 1; + } + else { + return 2; + } +} + /* homogeneous volume: assume shader evaluation at the start gives * the volume shading coefficient for the entire line segment */ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( @@ -363,20 +392,18 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( #ifdef __VOLUME_SCATTER__ /* randomly scatter, and if we do t is shortened */ if(closure_flag & SD_SCATTER) { - /* extinction coefficient */ - float3 sigma_t = coeff.sigma_t; - - /* pick random color channel, we use the Veach one-sample - * model with balance heuristic for the channels */ + /* Sample channel, use MIS with balance heuristic. */ float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - int channel = (int)(rphase*3.0f); + float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + float3 channel_pdf; + int channel = kernel_volume_sample_channel(albedo, *throughput, rphase, &channel_pdf); /* decide if we will hit or miss */ bool scatter = true; float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); if(probalistic_scatter) { - float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); float sample_transmittance = expf(-sample_sigma_t * t); if(1.0f - xi >= sample_transmittance) { @@ -397,19 +424,19 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( float sample_t; /* distance sampling */ - sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf); + sample_t = kernel_volume_distance_sample(ray->t, coeff.sigma_t, channel, xi, &transmittance, &pdf); /* modify pdf for hit/miss decision */ if(probalistic_scatter) - pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t); + pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(coeff.sigma_t, t); - new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf); + new_tp = *throughput * coeff.sigma_s * transmittance / dot(channel_pdf, pdf); t = sample_t; } else { /* no scattering */ - float3 transmittance = volume_color_transmittance(sigma_t, t); - float pdf = average(transmittance); + float3 transmittance = volume_color_transmittance(coeff.sigma_t, t); + float pdf = dot(channel_pdf, transmittance); new_tp = *throughput * transmittance / pdf; } } @@ -423,8 +450,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous( /* integrate emission attenuated by extinction */ if(L && (closure_flag & SD_EMISSION)) { - float3 sigma_t = coeff.sigma_t; - float3 transmittance = volume_color_transmittance(sigma_t, ray->t); + float3 transmittance = volume_color_transmittance(coeff.sigma_t, ray->t); float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t); path_radiance_accum_emission(L, state, *throughput, emission); } @@ -473,7 +499,6 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( * model with balance heuristic for the channels */ float xi = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE); float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL); - int channel = (int)(rphase*3.0f); bool has_scatter = false; for(int i = 0; i < max_steps; i++) { @@ -500,32 +525,34 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance( if((closure_flag & SD_SCATTER) || (has_scatter && (closure_flag & SD_EXTINCTION))) { has_scatter = true; - float3 sigma_t = coeff.sigma_t; - float3 sigma_s = coeff.sigma_s; + /* Sample channel, use MIS with balance heuristic. */ + float3 albedo = safe_divide_color(coeff.sigma_s, coeff.sigma_t); + float3 channel_pdf; + int channel = kernel_volume_sample_channel(albedo, tp, rphase, &channel_pdf); /* compute transmittance over full step */ - transmittance = volume_color_transmittance(sigma_t, dt); + transmittance = volume_color_transmittance(coeff.sigma_t, dt); /* decide if we will scatter or continue */ float sample_transmittance = kernel_volume_channel_get(transmittance, channel); if(1.0f - xi >= sample_transmittance) { /* compute sampling distance */ - float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float sample_sigma_t = kernel_volume_channel_get(coeff.sigma_t, channel); float new_dt = -logf(1.0f - xi)/sample_sigma_t; new_t = t + new_dt; /* transmittance and pdf */ - float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt); - float3 pdf = sigma_t * new_transmittance; + float3 new_transmittance = volume_color_transmittance(coeff.sigma_t, new_dt); + float3 pdf = coeff.sigma_t * new_transmittance; /* throughput */ - new_tp = tp * sigma_s * new_transmittance / average(pdf); + new_tp = tp * coeff.sigma_s * new_transmittance / dot(channel_pdf, pdf); scatter = true; } else { /* throughput */ - float pdf = average(transmittance); + float pdf = dot(channel_pdf, transmittance); new_tp = tp * transmittance / pdf; /* remap xi so we can reuse it and keep thing stratified */ @@ -632,6 +659,7 @@ typedef struct VolumeSegment { float3 accum_emission; /* accumulated emission at end of segment */ float3 accum_transmittance; /* accumulated transmittance at end of segment */ + float3 accum_albedo; /* accumulated average albedo over segment */ int sampling_method; /* volume sampling method */ } VolumeSegment; @@ -698,6 +726,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta /* init accumulation variables */ float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f); float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); + float3 accum_albedo = make_float3(0.0f, 0.0f, 0.0f); float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); float t = 0.0f; @@ -724,6 +753,11 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta int closure_flag = sd->flag; float3 sigma_t = coeff.sigma_t; + /* compute average albedo for channel sampling */ + if(closure_flag & SD_SCATTER) { + accum_albedo += dt * safe_divide_color(coeff.sigma_s, sigma_t); + } + /* compute accumulated transmittance */ float3 transmittance = volume_color_transmittance(sigma_t, dt); @@ -783,6 +817,7 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta /* store total emission and transmittance */ segment->accum_emission = accum_emission; segment->accum_transmittance = accum_transmittance; + segment->accum_albedo = accum_albedo; /* normalize cumulative density function for distance sampling */ VolumeStep *last_step = segment->steps + segment->numsteps - 1; @@ -825,9 +860,13 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( { kernel_assert(segment->closure_flag & SD_SCATTER); - /* pick random color channel, we use the Veach one-sample - * model with balance heuristic for the channels */ - int channel = (int)(rphase*3.0f); + /* Sample color channel, use MIS with balance heuristic. */ + float3 channel_pdf; + int channel = kernel_volume_sample_channel(segment->accum_albedo, + *throughput, + rphase, + &channel_pdf); + float xi = rscatter; /* probabilistic scattering decision based on transmittance */ @@ -914,7 +953,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( if(probalistic_scatter) distance_pdf *= make_float3(1.0f, 1.0f, 1.0f) - segment->accum_transmittance; - pdf = average(distance_pdf * step_pdf_distance); + pdf = dot(channel_pdf, distance_pdf * step_pdf_distance); /* multiple importance sampling */ if(use_mis) { @@ -977,7 +1016,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( /* multiple importance sampling */ if(use_mis) { float3 distance_pdf3 = kernel_volume_distance_pdf(step_t, step->sigma_t, step_sample_t); - float distance_pdf = average(distance_pdf3 * step_pdf_distance); + float distance_pdf = dot(channel_pdf, distance_pdf3 * step_pdf_distance); mis_weight = 2.0f*power_heuristic(pdf, distance_pdf); } } diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 68c707e6c3e..e3e85705ebc 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -186,22 +186,22 @@ public: MicrofacetBsdf *alloc(ShaderData *sd, int path_flag, float3 weight) { MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf && extra) { - bsdf->extra = extra; - - bsdf->ior = 1.5f; - - bsdf->alpha_x = clearcoat_roughness; - bsdf->alpha_y = clearcoat_roughness; - - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); - bsdf->extra->clearcoat = clearcoat; + if(!bsdf) { + return NULL; + } - return bsdf; + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + return NULL; } - return NULL; + bsdf->extra = extra; + bsdf->ior = 1.5f; + bsdf->alpha_x = clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness; + bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); + bsdf->extra->clearcoat = clearcoat; + return bsdf; } void setup(ShaderData *sd, int path_flag, float3 weight) @@ -359,18 +359,24 @@ public: /* Technically, the MultiGGX Glass closure may also transmit. However, * since this is set statically and only used for caustic flags, this * is probably as good as it gets. */ - if(!skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf && extra) { - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - return bsdf; - } + if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return NULL; + } + + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if(!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + return NULL; } - return NULL; + bsdf->extra = extra; + bsdf->extra->color = color; + bsdf->extra->cspec0 = cspec0; + return bsdf; } }; @@ -437,17 +443,23 @@ public: /* Technically, the MultiGGX closure may also transmit. However, * since this is set statically and only used for caustic flags, this * is probably as good as it gets. */ - if(!skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf && extra) { - bsdf->extra = extra; - bsdf->extra->color = color; - return bsdf; - } + if(skip(sd, path_flag, LABEL_GLOSSY|LABEL_REFLECT)) { + return NULL; } - return NULL; + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if(!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + return NULL; + } + + bsdf->extra = extra; + bsdf->extra->color = color; + return bsdf; } }; @@ -536,18 +548,24 @@ public: /* Technically, the MultiGGX closure may also transmit. However, * since this is set statically and only used for caustic flags, this * is probably as good as it gets. */ - if(!skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { - MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf && extra) { - bsdf->extra = extra; - bsdf->extra->color = color; - bsdf->extra->cspec0 = cspec0; - return bsdf; - } + if(skip(sd, path_flag, LABEL_GLOSSY | LABEL_REFLECT)) { + return NULL; + } + + MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc_osl(sd, sizeof(MicrofacetBsdf), weight, ¶ms); + if(!bsdf) { + return NULL; + } + + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + return NULL; } - return NULL; + bsdf->extra = extra; + bsdf->extra->color = color; + bsdf->extra->cspec0 = cspec0; + return bsdf; } }; diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 4afb91e732b..f04c46ef7f9 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -269,33 +269,38 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float3 spec_weight = weight * specular_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), spec_weight); + if(!bsdf){ + break; + } + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + break; + } - if(bsdf && extra) { - bsdf->N = N; - bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f; - bsdf->T = T; - bsdf->extra = extra; + bsdf->N = N; + bsdf->ior = (2.0f / (1.0f - safe_sqrtf(0.08f * specular))) - 1.0f; + bsdf->T = T; + bsdf->extra = extra; - float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f); - float r2 = roughness * roughness; + float aspect = safe_sqrtf(1.0f - anisotropic * 0.9f); + float r2 = roughness * roughness; - bsdf->alpha_x = r2 / aspect; - bsdf->alpha_y = r2 * aspect; + bsdf->alpha_x = r2 / aspect; + bsdf->alpha_y = r2 * aspect; - float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx. - float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat - float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + m_ctint * specular_tint; + float m_cdlum = 0.3f * base_color.x + 0.6f * base_color.y + 0.1f * base_color.z; // luminance approx. + float3 m_ctint = m_cdlum > 0.0f ? base_color / m_cdlum : make_float3(0.0f, 0.0f, 0.0f); // normalize lum. to isolate hue+sat + float3 tmp_col = make_float3(1.0f, 1.0f, 1.0f) * (1.0f - specular_tint) + m_ctint * specular_tint; - bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic; - bsdf->extra->color = base_color; + bsdf->extra->cspec0 = (specular * 0.08f * tmp_col) * (1.0f - metallic) + base_color * metallic; + bsdf->extra->color = base_color; - /* setup bsdf */ - if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */ - sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); - else /* use multi-scatter GGX */ - sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); - } + /* setup bsdf */ + if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID || roughness <= 0.075f) /* use single-scatter GGX */ + sd->flag |= bsdf_microfacet_ggx_aniso_fresnel_setup(bsdf, sd); + else /* use multi-scatter GGX */ + sd->flag |= bsdf_microfacet_multi_ggx_aniso_fresnel_setup(bsdf, sd); } #ifdef __CAUSTICS_TRICKS__ } @@ -318,22 +323,27 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif { MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight*fresnel); + if(!bsdf) { + break; + } + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + break; + } - if(bsdf && extra) { - bsdf->N = N; - bsdf->extra = extra; + bsdf->N = N; + bsdf->extra = extra; - bsdf->alpha_x = refl_roughness * refl_roughness; - bsdf->alpha_y = refl_roughness * refl_roughness; - bsdf->ior = ior; + bsdf->alpha_x = refl_roughness * refl_roughness; + bsdf->alpha_y = refl_roughness * refl_roughness; + bsdf->ior = ior; - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; + bsdf->extra->color = base_color; + bsdf->extra->cspec0 = cspec0; - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); - } + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_fresnel_setup(bsdf, sd); } /* refraction */ @@ -342,43 +352,49 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif { MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), base_color*glass_weight*(1.0f - fresnel)); + if(!bsdf) { + break; + } - if(bsdf) { - bsdf->N = N; + bsdf->N = N; - if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) - transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness); - else - transmission_roughness = refl_roughness; + if(distribution == CLOSURE_BSDF_MICROFACET_GGX_GLASS_ID) + transmission_roughness = 1.0f - (1.0f - refl_roughness) * (1.0f - transmission_roughness); + else + transmission_roughness = refl_roughness; - bsdf->alpha_x = transmission_roughness * transmission_roughness; - bsdf->alpha_y = transmission_roughness * transmission_roughness; - bsdf->ior = ior; + bsdf->alpha_x = transmission_roughness * transmission_roughness; + bsdf->alpha_y = transmission_roughness * transmission_roughness; + bsdf->ior = ior; - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); - } + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_refraction_setup(bsdf); } } else { /* use multi-scatter GGX */ MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), glass_weight); + if(!bsdf) { + break; + } + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + break; + } - if(bsdf && extra) { - bsdf->N = N; - bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + bsdf->N = N; + bsdf->extra = extra; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - bsdf->alpha_x = roughness * roughness; - bsdf->alpha_y = roughness * roughness; - bsdf->ior = ior; + bsdf->alpha_x = roughness * roughness; + bsdf->alpha_y = roughness * roughness; + bsdf->ior = ior; - bsdf->extra->color = base_color; - bsdf->extra->cspec0 = cspec0; + bsdf->extra->color = base_color; + bsdf->extra->cspec0 = cspec0; - /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); - } + /* setup bsdf */ + sd->flag |= bsdf_microfacet_multi_ggx_glass_fresnel_setup(bsdf, sd); } } #ifdef __CAUSTICS_TRICKS__ @@ -391,22 +407,27 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif if(clearcoat > CLOSURE_WEIGHT_CUTOFF) { MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); + if(!bsdf) { + break; + } + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + break; + } - if(bsdf && extra) { - bsdf->N = clearcoat_normal; - bsdf->ior = 1.5f; - bsdf->extra = extra; + bsdf->N = clearcoat_normal; + bsdf->ior = 1.5f; + bsdf->extra = extra; - bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; - bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; + bsdf->alpha_x = clearcoat_roughness * clearcoat_roughness; + bsdf->alpha_y = clearcoat_roughness * clearcoat_roughness; - bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); - bsdf->extra->clearcoat = clearcoat; + bsdf->extra->cspec0 = make_float3(0.04f, 0.04f, 0.04f); + bsdf->extra->clearcoat = clearcoat; - /* setup bsdf */ - sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); - } + /* setup bsdf */ + sd->flag |= bsdf_microfacet_ggx_clearcoat_setup(bsdf, sd); } #ifdef __CAUSTICS_TRICKS__ } @@ -461,30 +482,33 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - if(bsdf) { - bsdf->N = N; - bsdf->alpha_x = param1; - bsdf->alpha_y = param1; - bsdf->ior = 0.0f; - bsdf->extra = NULL; + if(!bsdf) { + break; + } - /* setup bsdf */ - if(type == CLOSURE_BSDF_REFLECTION_ID) - sd->flag |= bsdf_reflection_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) - sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) - sd->flag |= bsdf_microfacet_ggx_setup(bsdf); - else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { - kernel_assert(stack_valid(data_node.z)); - bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); - if(bsdf->extra) { - bsdf->extra->color = stack_load_float3(stack, data_node.z); - sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); - } + bsdf->N = N; + bsdf->alpha_x = param1; + bsdf->alpha_y = param1; + bsdf->ior = 0.0f; + bsdf->extra = NULL; + + /* setup bsdf */ + if(type == CLOSURE_BSDF_REFLECTION_ID) + sd->flag |= bsdf_reflection_setup(bsdf); + else if(type == CLOSURE_BSDF_MICROFACET_BECKMANN_ID) + sd->flag |= bsdf_microfacet_beckmann_setup(bsdf); + else if(type == CLOSURE_BSDF_MICROFACET_GGX_ID) + sd->flag |= bsdf_microfacet_ggx_setup(bsdf); + else if(type == CLOSURE_BSDF_MICROFACET_MULTI_GGX_ID) { + kernel_assert(stack_valid(data_node.z)); + bsdf->extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(bsdf->extra) { + bsdf->extra->color = stack_load_float3(stack, data_node.z); + sd->flag |= bsdf_microfacet_multi_ggx_setup(bsdf); } - else - sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); + } + else { + sd->flag |= bsdf_ashikhmin_shirley_setup(bsdf); } break; @@ -586,25 +610,29 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * #endif float3 weight = sd->svm_closure_weight * mix_weight; MicrofacetBsdf *bsdf = (MicrofacetBsdf*)bsdf_alloc(sd, sizeof(MicrofacetBsdf), weight); - MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!bsdf) { + break; + } - if(bsdf && extra) { - bsdf->N = N; - bsdf->extra = extra; - bsdf->T = make_float3(0.0f, 0.0f, 0.0f); + MicrofacetExtra *extra = (MicrofacetExtra*)closure_alloc_extra(sd, sizeof(MicrofacetExtra)); + if(!extra) { + break; + } - bsdf->alpha_x = param1; - bsdf->alpha_y = param1; - float eta = fmaxf(param2, 1e-5f); - bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; + bsdf->N = N; + bsdf->extra = extra; + bsdf->T = make_float3(0.0f, 0.0f, 0.0f); - kernel_assert(stack_valid(data_node.z)); - bsdf->extra->color = stack_load_float3(stack, data_node.z); + bsdf->alpha_x = param1; + bsdf->alpha_y = param1; + float eta = fmaxf(param2, 1e-5f); + bsdf->ior = (sd->flag & SD_BACKFACING)? 1.0f/eta: eta; - /* setup bsdf */ - sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); - } + kernel_assert(stack_valid(data_node.z)); + bsdf->extra->color = stack_load_float3(stack, data_node.z); + /* setup bsdf */ + sd->flag |= bsdf_microfacet_multi_ggx_glass_setup(bsdf); break; } case CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID: diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 189ba80ad2a..4353fd4b819 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -2170,6 +2170,8 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) og->attribute_map.clear(); og->object_names.clear(); } +#else + (void)device; #endif } diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index c70111d15b0..aef7fc29573 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -420,6 +420,7 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); + objects[offset+11] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* Object flag. */ if(ob->use_holdout) { |