diff options
author | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2018-08-28 20:15:08 +0300 |
---|---|---|
committer | Brecht Van Lommel <brechtvanlommel@gmail.com> | 2018-08-28 20:15:08 +0300 |
commit | 871b7ba8922650c34b487fec29a9d677b1aeadfd (patch) | |
tree | a13c8635e43919aed3ad48afe096f6501bbeb9a2 /intern | |
parent | 55263c8a4c03c1add138e5191f98a8b68591289f (diff) | |
parent | 9fd0060c0f3458e53b38003d1388f16cb56f45d8 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'intern')
36 files changed, 203 insertions, 175 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 00ac8e7e182..0147a4306f4 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -200,6 +200,9 @@ endif() if(WITH_CYCLES_OSL) add_definitions(-DWITH_OSL) + #osl 1.9.x + add_definitions(-DOSL_STATIC_BUILD) + #pre 1.9 add_definitions(-DOSL_STATIC_LIBRARY) include_directories( SYSTEM diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp index 01a1234531b..da8ca53c8df 100644 --- a/intern/cycles/app/cycles_cubin_cc.cpp +++ b/intern/cycles/app/cycles_cubin_cc.cpp @@ -159,8 +159,7 @@ bool link_ptxas(CompilationSettings &settings) " --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" + std::to_string(settings.bits); - if (settings.verbose) - { + if(settings.verbose) { ptx += " --verbose"; printf("%s\n", ptx.c_str()); } diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 476cf975737..1604422211b 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -268,3 +268,10 @@ def register_passes(engine, scene, srl): engine.register_pass(scene, srl, "Denoising Shadow B", 3, "XYV", 'VECTOR') engine.register_pass(scene, srl, "Denoising Image", 3, "RGB", 'COLOR') engine.register_pass(scene, srl, "Denoising Image Variance", 3, "RGB", 'COLOR') + + clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect", + "denoising_glossy_direct", "denoising_glossy_indirect", + "denoising_transmission_direct", "denoising_transmission_indirect", + "denoising_subsurface_direct", "denoising_subsurface_indirect") + if any(getattr(crl, option) for option in clean_options): + engine.register_pass(scene, srl, "Denoising Clean", 3, "RGB", 'COLOR') diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 05adb6f5fe0..4ac0e1f21c1 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -412,21 +412,19 @@ void BlenderSession::render(BL::Depsgraph& b_depsgraph_) PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles"); bool use_denoising = get_boolean(crl, "use_denoising"); - buffer_params.denoising_data_pass = use_denoising; + session->tile_manager.schedule_denoising = use_denoising; + buffer_params.denoising_data_pass = use_denoising; + buffer_params.denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES); + session->params.use_denoising = use_denoising; + session->params.denoising_radius = get_int(crl, "denoising_radius"); + session->params.denoising_strength = get_float(crl, "denoising_strength"); + session->params.denoising_feature_strength = get_float(crl, "denoising_feature_strength"); + session->params.denoising_relative_pca = get_boolean(crl, "denoising_relative_pca"); + scene->film->denoising_data_pass = buffer_params.denoising_data_pass; - scene->film->denoising_flags = 0; - if(!get_boolean(crl, "denoising_diffuse_direct")) scene->film->denoising_flags |= DENOISING_CLEAN_DIFFUSE_DIR; - if(!get_boolean(crl, "denoising_diffuse_indirect")) scene->film->denoising_flags |= DENOISING_CLEAN_DIFFUSE_IND; - if(!get_boolean(crl, "denoising_glossy_direct")) scene->film->denoising_flags |= DENOISING_CLEAN_GLOSSY_DIR; - if(!get_boolean(crl, "denoising_glossy_indirect")) scene->film->denoising_flags |= DENOISING_CLEAN_GLOSSY_IND; - if(!get_boolean(crl, "denoising_transmission_direct")) scene->film->denoising_flags |= DENOISING_CLEAN_TRANSMISSION_DIR; - if(!get_boolean(crl, "denoising_transmission_indirect")) scene->film->denoising_flags |= DENOISING_CLEAN_TRANSMISSION_IND; - if(!get_boolean(crl, "denoising_subsurface_direct")) scene->film->denoising_flags |= DENOISING_CLEAN_SUBSURFACE_DIR; - if(!get_boolean(crl, "denoising_subsurface_indirect")) scene->film->denoising_flags |= DENOISING_CLEAN_SUBSURFACE_IND; - scene->film->denoising_clean_pass = (scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES); - buffer_params.denoising_clean_pass = scene->film->denoising_clean_pass; + scene->film->denoising_clean_pass = buffer_params.denoising_clean_pass; session->params.denoising_radius = get_int(crl, "denoising_radius"); session->params.denoising_strength = get_float(crl, "denoising_strength"); session->params.denoising_feature_strength = get_float(crl, "denoising_feature_strength"); diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 62c160ca503..439f6bdd32e 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -1038,7 +1038,7 @@ static void add_nodes(Scene *scene, for(b_node->internal_links.begin(b_link); b_link != b_node->internal_links.end(); ++b_link) { BL::NodeSocket to_socket(b_link->to_socket()); SocketType::Type to_socket_type = convert_socket_type(to_socket); - if (to_socket_type == SocketType::UNDEFINED) { + if(to_socket_type == SocketType::UNDEFINED) { continue; } @@ -1065,7 +1065,7 @@ static void add_nodes(Scene *scene, */ for(b_node->inputs.begin(b_input); b_input != b_node->inputs.end(); ++b_input) { SocketType::Type input_type = convert_socket_type(*b_input); - if (input_type == SocketType::UNDEFINED) { + if(input_type == SocketType::UNDEFINED) { continue; } @@ -1081,7 +1081,7 @@ static void add_nodes(Scene *scene, } for(b_node->outputs.begin(b_output); b_output != b_node->outputs.end(); ++b_output) { SocketType::Type output_type = convert_socket_type(*b_output); - if (output_type == SocketType::UNDEFINED) { + if(output_type == SocketType::UNDEFINED) { continue; } diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index d679ad1ee2e..0ea0e37e180 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -473,6 +473,7 @@ int BlenderSync::get_denoising_pass(BL::RenderPass& b_pass) MAP_PASS("Shadow B", DENOISING_PASS_SHADOW_B); MAP_PASS("Image", DENOISING_PASS_COLOR); MAP_PASS("Image Variance", DENOISING_PASS_COLOR_VAR); + MAP_PASS("Clean", DENOISING_PASS_CLEAN); #undef MAP_PASS return -1; @@ -502,6 +503,7 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, Pass::add(pass_type, passes); } + scene->film->denoising_flags = 0; PointerRNA crp = RNA_pointer_get(&b_view_layer.ptr, "cycles"); if(get_boolean(crp, "denoising_store_passes") && get_boolean(crp, "use_denoising")) @@ -516,6 +518,21 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay, b_engine.add_pass("Denoising Shadow B", 3, "XYV", b_view_layer.name().c_str()); b_engine.add_pass("Denoising Image", 3, "RGB", b_view_layer.name().c_str()); b_engine.add_pass("Denoising Image Variance", 3, "RGB", b_view_layer.name().c_str()); + +#define MAP_OPTION(name, flag) if(!get_boolean(crp, name)) scene->film->denoising_flags |= flag; + MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR); + MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND); + MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR); + MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND); + MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR); + MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND); + MAP_OPTION("denoising_subsurface_direct", DENOISING_CLEAN_SUBSURFACE_DIR); + MAP_OPTION("denoising_subsurface_indirect", DENOISING_CLEAN_SUBSURFACE_IND); +#undef MAP_OPTION + + if(scene->film->denoising_flags & DENOISING_CLEAN_ALL_PASSES) { + b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str()); + } } #ifdef __KERNEL_DEBUG__ if(get_boolean(crp, "pass_debug_bvh_traversed_nodes")) { diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index be0dcc20755..e92bbbfa6e6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -471,9 +471,10 @@ public: int w = align_up(rect.z-rect.x, 4); int h = rect.w-rect.y; - float *blurDifference = (float*) task->nlm_state.temporary_1_ptr; - float *difference = (float*) task->nlm_state.temporary_2_ptr; - float *weightAccum = (float*) task->nlm_state.temporary_3_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *blurDifference = temporary_mem; + float *difference = temporary_mem + task->buffer.pass_stride; + float *weightAccum = temporary_mem + 2*task->buffer.pass_stride; memset(weightAccum, 0, sizeof(float)*w*h); memset((float*) out_ptr, 0, sizeof(float)*w*h); @@ -537,8 +538,9 @@ public: mem_zero(task->storage.XtWX); mem_zero(task->storage.XtWY); - float *difference = (float*) task->reconstruction_state.temporary_1_ptr; - float *blurDifference = (float*) task->reconstruction_state.temporary_2_ptr; + float *temporary_mem = (float*) task->buffer.temporary_mem.device_pointer; + float *difference = temporary_mem; + float *blurDifference = temporary_mem + task->buffer.pass_stride; int r = task->radius; for(int i = 0; i < (2*r+1)*(2*r+1); i++) { @@ -713,6 +715,7 @@ public: denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h); denoising.render_buffer.samples = tile.sample; + denoising.buffer.gpu_temporary_mem = false; denoising.run_denoising(&tile); } @@ -1025,7 +1028,7 @@ void device_cpu_info(vector<DeviceInfo>& devices) info.num = 0; info.advanced_shading = true; info.bvh_layout_mask = BVH_LAYOUT_BVH2; - if (system_cpu_support_sse2()) { + if(system_cpu_support_sse2()) { info.bvh_layout_mask |= BVH_LAYOUT_BVH4; } info.has_volume_decoupled = true; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index c9ec1cc1219..830cc207d54 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -1294,23 +1294,19 @@ public: float a = task->nlm_state.a; float k_2 = task->nlm_state.k_2; - int shift_stride = stride*h; + int pass_stride = task->buffer.pass_stride; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*num_shifts; int channel_offset = 0; - device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem"); - temporary_mem.alloc_to_device(2*mem_size); - if(have_error()) return false; - CUdeviceptr difference = cuda_device_ptr(temporary_mem.device_pointer); - CUdeviceptr blurDifference = difference + mem_size; + CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer); + CUdeviceptr blurDifference = difference + sizeof(float)*pass_stride*num_shifts; + CUdeviceptr weightAccum = difference + 2*sizeof(float)*pass_stride*num_shifts; - CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr; - cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride)); - cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float)*shift_stride)); + cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*pass_stride)); + cuda_assert(cuMemsetD8(out_ptr, 0, sizeof(float)*pass_stride)); { CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMUpdateOutput; @@ -1326,10 +1322,10 @@ public: CUDA_GET_BLOCKSIZE_1D(cuNLMCalcDifference, w*h, num_shifts); - void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &difference, &w, &h, &stride, &shift_stride, &r, &channel_offset, &a, &k_2}; - void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &shift_stride, &r, &f}; - void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &shift_stride, &r, &f}; - void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &shift_stride, &r, &f}; + void *calc_difference_args[] = {&guide_ptr, &variance_ptr, &difference, &w, &h, &stride, &pass_stride, &r, &channel_offset, &a, &k_2}; + void *blur_args[] = {&difference, &blurDifference, &w, &h, &stride, &pass_stride, &r, &f}; + void *calc_weight_args[] = {&blurDifference, &difference, &w, &h, &stride, &pass_stride, &r, &f}; + void *update_output_args[] = {&blurDifference, &image_ptr, &out_ptr, &weightAccum, &w, &h, &stride, &pass_stride, &r, &f}; CUDA_LAUNCH_KERNEL_1D(cuNLMCalcDifference, calc_difference_args); CUDA_LAUNCH_KERNEL_1D(cuNLMBlur, blur_args); @@ -1338,8 +1334,6 @@ public: CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args); } - temporary_mem.free(); - { CUfunction cuNLMNormalize; cuda_assert(cuModuleGetFunction(&cuNLMNormalize, cuFilterModule, "kernel_cuda_filter_nlm_normalize")); @@ -1614,6 +1608,7 @@ public: denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.render_buffer.samples = rtile.sample; + denoising.buffer.gpu_temporary_mem = true; denoising.run_denoising(&rtile); } diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp index c0d4634262d..23c18fa15b2 100644 --- a/intern/cycles/device/device_denoising.cpp +++ b/intern/cycles/device/device_denoising.cpp @@ -51,10 +51,8 @@ DenoisingTask::~DenoisingTask() storage.XtWY.free(); storage.transform.free(); storage.rank.free(); - storage.temporary_1.free(); - storage.temporary_2.free(); - storage.temporary_color.free(); buffer.mem.free(); + buffer.temporary_mem.free(); tile_info_mem.free(); } @@ -99,6 +97,16 @@ void DenoisingTask::setup_denoising_buffer() /* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */ int mem_size = align_up(buffer.pass_stride * buffer.passes + 4, alignment_floats); buffer.mem.alloc_to_device(mem_size, false); + + /* CPUs process shifts sequentially while GPUs process them in parallel. */ + int num_shifts = 1; + if(buffer.gpu_temporary_mem) { + /* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */ + int max_radius = max(radius, 6); + num_shifts = (2*max_radius + 1) * (2*max_radius + 1); + } + /* Allocate two layers per shift as well as one for the weight accumulation. */ + buffer.temporary_mem.alloc_to_device((2*num_shifts + 1) * buffer.pass_stride); } void DenoisingTask::prefilter_shadowing() @@ -111,13 +119,6 @@ void DenoisingTask::prefilter_shadowing() device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride); device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride); device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); - - nlm_state.temporary_1_ptr = *nlm_temporary_1; - nlm_state.temporary_2_ptr = *nlm_temporary_2; - nlm_state.temporary_3_ptr = *nlm_temporary_3; /* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the sample variance and the buffer variance. */ functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var); @@ -154,13 +155,6 @@ void DenoisingTask::prefilter_features() { device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride); device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride); - - nlm_state.temporary_1_ptr = *nlm_temporary_1; - nlm_state.temporary_2_ptr = *nlm_temporary_2; - nlm_state.temporary_3_ptr = *nlm_temporary_3; int mean_from[] = { 0, 1, 2, 12, 6, 7, 8 }; int variance_from[] = { 3, 4, 5, 13, 9, 10, 11}; @@ -183,17 +177,11 @@ void DenoisingTask::prefilter_color() int variance_to[] = {11, 12, 13}; int num_color_passes = 3; - storage.temporary_color.alloc_to_device(3*buffer.pass_stride, false); - device_sub_ptr nlm_temporary_1(storage.temporary_color, 0*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_2(storage.temporary_color, 1*buffer.pass_stride, buffer.pass_stride); - device_sub_ptr nlm_temporary_3(storage.temporary_color, 2*buffer.pass_stride, buffer.pass_stride); - - nlm_state.temporary_1_ptr = *nlm_temporary_1; - nlm_state.temporary_2_ptr = *nlm_temporary_2; - nlm_state.temporary_3_ptr = *nlm_temporary_3; + device_only_memory<float> temporary_color(device, "denoising temporary color"); + temporary_color.alloc_to_device(3*buffer.pass_stride, false); for(int pass = 0; pass < num_color_passes; pass++) { - device_sub_ptr color_pass(storage.temporary_color, pass*buffer.pass_stride, buffer.pass_stride); + device_sub_ptr color_pass(temporary_color, pass*buffer.pass_stride, buffer.pass_stride); device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride); functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass); } @@ -201,9 +189,7 @@ void DenoisingTask::prefilter_color() device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride); device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride); device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride); - functions.detect_outliers(storage.temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); - - storage.temporary_color.free(); + functions.detect_outliers(temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass); } void DenoisingTask::construct_transform() @@ -219,14 +205,6 @@ void DenoisingTask::construct_transform() void DenoisingTask::reconstruct() { - - device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1"); - device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2"); - temporary_1.alloc_to_device(buffer.pass_stride, false); - temporary_2.alloc_to_device(buffer.pass_stride, false); - reconstruction_state.temporary_1_ptr = temporary_1.device_pointer; - reconstruction_state.temporary_2_ptr = temporary_2.device_pointer; - storage.XtWX.alloc_to_device(storage.w*storage.h*XTWX_SIZE, false); storage.XtWY.alloc_to_device(storage.w*storage.h*XTWY_SIZE, false); diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h index e0da52867f1..7474f71ff78 100644 --- a/intern/cycles/device/device_denoising.h +++ b/intern/cycles/device/device_denoising.h @@ -96,9 +96,6 @@ public: /* Stores state of the current Reconstruction operation, * which is accessed by the device in order to perform the operation. */ struct ReconstructionState { - device_ptr temporary_1_ptr; /* There two images are used as temporary storage. */ - device_ptr temporary_2_ptr; - int4 filter_window; int4 buffer_params; @@ -109,10 +106,6 @@ public: /* Stores state of the current NLM operation, * which is accessed by the device in order to perform the operation. */ struct NLMState { - device_ptr temporary_1_ptr; /* There three images are used as temporary storage. */ - device_ptr temporary_2_ptr; - device_ptr temporary_3_ptr; - int r; /* Search radius of the filter. */ int f; /* Patch size of the filter. */ float a; /* Variance compensation factor in the MSE estimation. */ @@ -126,9 +119,6 @@ public: device_only_memory<int> rank; device_only_memory<float> XtWX; device_only_memory<float3> XtWY; - device_only_memory<float> temporary_1; - device_only_memory<float> temporary_2; - device_only_memory<float> temporary_color; int w; int h; @@ -136,10 +126,7 @@ public: : transform(device, "denoising transform"), rank(device, "denoising rank"), XtWX(device, "denoising XtWX"), - XtWY(device, "denoising XtWY"), - temporary_1(device, "denoising NLM temporary 1"), - temporary_2(device, "denoising NLM temporary 2"), - temporary_color(device, "denoising temporary color") + XtWY(device, "denoising XtWY") {} } storage; @@ -155,9 +142,13 @@ public: int h; int width; device_only_memory<float> mem; + device_only_memory<float> temporary_mem; + + bool gpu_temporary_mem; DenoiseBuffers(Device *device) - : mem(device, "denoising pixel buffer") + : mem(device, "denoising pixel buffer"), + temporary_mem(device, "denoising temporary mem") {} } buffer; diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index 9a50d217321..69a2bd7adcb 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -738,7 +738,6 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr, device_ptr out_ptr, DenoisingTask *task) { - int stride = task->buffer.stride; int w = task->buffer.width; int h = task->buffer.h; @@ -747,24 +746,23 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr, float a = task->nlm_state.a; float k_2 = task->nlm_state.k_2; - int shift_stride = stride*h; + int pass_stride = task->buffer.pass_stride; int num_shifts = (2*r+1)*(2*r+1); - int mem_size = sizeof(float)*shift_stride*num_shifts; - cl_mem weightAccum = CL_MEM_PTR(task->nlm_state.temporary_3_ptr); - - cl_mem difference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr); - opencl_assert_err(ciErr, "clCreateBuffer denoising_non_local_means"); - cl_mem blurDifference = clCreateBuffer(cxContext, CL_MEM_READ_WRITE, mem_size, NULL, &ciErr); - opencl_assert_err(ciErr, "clCreateBuffer denoising_non_local_means"); + device_sub_ptr difference(task->buffer.temporary_mem, 0, pass_stride*num_shifts); + device_sub_ptr blurDifference(task->buffer.temporary_mem, pass_stride*num_shifts, pass_stride*num_shifts); + device_sub_ptr weightAccum(task->buffer.temporary_mem, 2*pass_stride*num_shifts, pass_stride); + cl_mem weightAccum_mem = CL_MEM_PTR(*weightAccum); + cl_mem difference_mem = CL_MEM_PTR(*difference); + cl_mem blurDifference_mem = CL_MEM_PTR(*blurDifference); cl_mem image_mem = CL_MEM_PTR(image_ptr); cl_mem guide_mem = CL_MEM_PTR(guide_ptr); cl_mem variance_mem = CL_MEM_PTR(variance_ptr); cl_mem out_mem = CL_MEM_PTR(out_ptr); - mem_zero_kernel(task->nlm_state.temporary_3_ptr, sizeof(float)*w*h); - mem_zero_kernel(out_ptr, sizeof(float)*w*h); + mem_zero_kernel(*difference, sizeof(float)*pass_stride); + mem_zero_kernel(out_ptr, sizeof(float)*pass_stride); cl_kernel ckNLMCalcDifference = denoising_program(ustring("filter_nlm_calc_difference")); cl_kernel ckNLMBlur = denoising_program(ustring("filter_nlm_blur")); @@ -775,29 +773,29 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr, kernel_set_args(ckNLMCalcDifference, 0, guide_mem, variance_mem, - difference, + difference_mem, w, h, stride, - shift_stride, + pass_stride, r, 0, a, k_2); kernel_set_args(ckNLMBlur, 0, - difference, - blurDifference, + difference_mem, + blurDifference_mem, w, h, stride, - shift_stride, + pass_stride, r, f); kernel_set_args(ckNLMCalcWeight, 0, - blurDifference, - difference, + blurDifference_mem, + difference_mem, w, h, stride, - shift_stride, + pass_stride, r, f); kernel_set_args(ckNLMUpdateOutput, 0, - blurDifference, + blurDifference_mem, image_mem, out_mem, - weightAccum, + weightAccum_mem, w, h, stride, - shift_stride, + pass_stride, r, f); enqueue_kernel(ckNLMCalcDifference, w*h, num_shifts, true); @@ -806,11 +804,8 @@ bool OpenCLDeviceBase::denoising_non_local_means(device_ptr image_ptr, enqueue_kernel(ckNLMBlur, w*h, num_shifts, true); enqueue_kernel(ckNLMUpdateOutput, w*h, num_shifts, true); - opencl_assert(clReleaseMemObject(difference)); - opencl_assert(clReleaseMemObject(blurDifference)); - kernel_set_args(ckNLMNormalize, 0, - out_mem, weightAccum, w, h, stride); + out_mem, weightAccum_mem, w, h, stride); enqueue_kernel(ckNLMNormalize, w, h); return true; @@ -1081,6 +1076,7 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, DenoisingTask& denoising) denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h); denoising.render_buffer.samples = rtile.sample; + denoising.buffer.gpu_temporary_mem = true; denoising.run_denoising(&rtile); } diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index d6304ba688a..895e4149a3a 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -1139,13 +1139,13 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id) /* Special exception for AMD Vega, need to be able to tell * Vega 56 from 64 apart. */ - if (name == "Radeon RX Vega") { + if(name == "Radeon RX Vega") { cl_int max_compute_units = 0; - if (clGetDeviceInfo(device_id, - CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(max_compute_units), - &max_compute_units, - NULL) == CL_SUCCESS) + if(clGetDeviceInfo(device_id, + CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(max_compute_units), + &max_compute_units, + NULL) == CL_SUCCESS) { name += " " + to_string(max_compute_units); } diff --git a/intern/cycles/graph/node.cpp b/intern/cycles/graph/node.cpp index 5960d9aa7d5..19fe0a168ea 100644 --- a/intern/cycles/graph/node.cpp +++ b/intern/cycles/graph/node.cpp @@ -443,7 +443,7 @@ template<typename T> void array_hash(const Node *node, const SocketType& socket, MD5Hash& md5) { const array<T>& a = *(const array<T>*)(((char*)node) + socket.struct_offset); - for (size_t i = 0; i < a.size(); i++) { + for(size_t i = 0; i < a.size(); i++) { md5.append((uint8_t*)&a[i], sizeof(T)); } } @@ -452,7 +452,7 @@ void float3_array_hash(const Node *node, const SocketType& socket, MD5Hash& md5) { /* Don't compare 4th element used for padding. */ const array<float3>& a = *(const array<float3>*)(((char*)node) + socket.struct_offset); - for (size_t i = 0; i < a.size(); i++) { + for(size_t i = 0; i < a.size(); i++) { md5.append((uint8_t*)&a[i], sizeof(float) * 3); } } diff --git a/intern/cycles/kernel/closure/bsdf_hair_principled.h b/intern/cycles/kernel/closure/bsdf_hair_principled.h index 4ee58089384..b3b56be39ff 100644 --- a/intern/cycles/kernel/closure/bsdf_hair_principled.h +++ b/intern/cycles/kernel/closure/bsdf_hair_principled.h @@ -120,7 +120,7 @@ ccl_device_inline float bessel_I0(float x) /* Logarithm of the Bessel function of the first kind. */ ccl_device_inline float log_bessel_I0(float x) { - if (x > 12.0f) { + if(x > 12.0f) { /* log(1/x) == -log(x) iff x > 0. * This is only used with positive cosines */ return x + 0.5f * (1.f / (8.0f * x) - M_LN_2PI_F - logf(x)); diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index b13fabde268..e85050df4bb 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -170,7 +170,7 @@ ccl_device_inline bool path_state_volume_next(KernelGlobals *kg, ccl_addr_space /* For volume bounding meshes we pass through without counting transparent * bounces, only sanity check in case self intersection gets us stuck. */ state->volume_bounds_bounce++; - if (state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) { + if(state->volume_bounds_bounce > VOLUME_BOUNDS_MAX) { return false; } diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b83460b3ffb..e834b701f96 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -1010,7 +1010,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), make_float3(0.8f, 0.8f, 0.8f)); - if (bsdf != NULL) { + if(bsdf != NULL) { bsdf->N = sd->N; sd->flag |= bsdf_diffuse_setup(bsdf); } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 85548484873..d169915cff9 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -454,6 +454,7 @@ typedef enum DenoisingPassOffsets { DENOISING_PASS_SHADOW_B = 17, DENOISING_PASS_COLOR = 20, DENOISING_PASS_COLOR_VAR = 23, + DENOISING_PASS_CLEAN = 26, DENOISING_PASS_SIZE_BASE = 26, DENOISING_PASS_SIZE_CLEAN = 3, diff --git a/intern/cycles/kernel/osl/osl_bssrdf.cpp b/intern/cycles/kernel/osl/osl_bssrdf.cpp index 2ebfd5f0c2f..66ec8a996ca 100644 --- a/intern/cycles/kernel/osl/osl_bssrdf.cpp +++ b/intern/cycles/kernel/osl/osl_bssrdf.cpp @@ -69,22 +69,22 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { - if (method == u_cubic) { + if(method == u_cubic) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_CUBIC_ID); } - else if (method == u_gaussian) { + else if(method == u_gaussian) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_GAUSSIAN_ID); } - else if (method == u_burley) { + else if(method == u_burley) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_BURLEY_ID); } - else if (method == u_principled) { + else if(method == u_principled) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_ID); } - else if (method == u_random_walk) { + else if(method == u_random_walk) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_RANDOM_WALK_ID); } - else if (method == u_principled_random_walk) { + else if(method == u_principled_random_walk) { alloc(sd, path_flag, weight, CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID); } } diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index 8c7ae30725c..169351d5ad9 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -200,11 +200,9 @@ public: void setup(ShaderData *sd, int path_flag, float3 weight) { - if(!skip(sd, path_flag, LABEL_GLOSSY)) - { + if(!skip(sd, path_flag, LABEL_GLOSSY)) { PrincipledHairBSDF *bsdf = (PrincipledHairBSDF*)alloc(sd, path_flag, weight); - if (!bsdf) - { + if(!bsdf) { return; } diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h index d2d36cefd50..0337c88a543 100644 --- a/intern/cycles/kernel/svm/svm_ao.h +++ b/intern/cycles/kernel/svm/svm_ao.h @@ -98,11 +98,11 @@ ccl_device void svm_node_ao(KernelGlobals *kg, float3 normal = stack_valid(normal_offset)? stack_load_float3(stack, normal_offset): sd->N; float ao = svm_ao(kg, sd, normal, state, dist, samples, flags); - if (stack_valid(out_ao_offset)) { + if(stack_valid(out_ao_offset)) { stack_store_float(stack, out_ao_offset, ao); } - if (stack_valid(out_color_offset)) { + if(stack_valid(out_color_offset)) { float3 color = stack_load_float3(stack, color_offset); stack_store_float3(stack, out_color_offset, ao * color); } diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h index 19cdb71c20f..79d0fb6ddbe 100644 --- a/intern/cycles/kernel/svm/svm_bevel.h +++ b/intern/cycles/kernel/svm/svm_bevel.h @@ -148,11 +148,11 @@ ccl_device_noinline float3 svm_bevel( int prim = kernel_tex_fetch(__prim_index, isect.hits[hit].prim); int shader = kernel_tex_fetch(__tri_shader, prim); - if (shader & SHADER_SMOOTH_NORMAL) { + if(shader & SHADER_SMOOTH_NORMAL) { float u = isect.hits[hit].u; float v = isect.hits[hit].v; - if (sd->type & PRIMITIVE_TRIANGLE) { + if(sd->type & PRIMITIVE_TRIANGLE) { N = triangle_smooth_normal(kg, N, prim, u, v); } #ifdef __OBJECT_MOTION__ diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index aa253223c93..cce0e0eefe0 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -757,7 +757,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y); float random = 0.0f; - if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) { + if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) { random = primitive_attribute_float(kg, sd, attr_descr_random, NULL, NULL); } else { @@ -769,7 +769,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * if(bsdf) { PrincipledHairExtra *extra = (PrincipledHairExtra*)closure_alloc_extra(sd, sizeof(PrincipledHairExtra)); - if (!extra) + if(!extra) break; /* Random factors range: [-randomization/2, +randomization/2]. */ diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index d3490ab284f..669b174e4a3 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -94,13 +94,13 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) Fac = fabsf(Fac1); else if(type == NODE_MATH_ARCTAN2) Fac = atan2f(Fac1, Fac2); - else if (type == NODE_MATH_FLOOR) + else if(type == NODE_MATH_FLOOR) Fac = floorf(Fac1); - else if (type == NODE_MATH_CEIL) + else if(type == NODE_MATH_CEIL) Fac = ceilf(Fac1); - else if (type == NODE_MATH_FRACT) + else if(type == NODE_MATH_FRACT) Fac = Fac1 - floorf(Fac1); - else if (type == NODE_MATH_SQRT) + else if(type == NODE_MATH_SQRT) Fac = safe_sqrtf(Fac1); else if(type == NODE_MATH_CLAMP) Fac = saturate(Fac1); diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 75af910d940..d661df54ead 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -32,6 +32,11 @@ ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, da[2] = 1e10f; da[3] = 1e10f; + pa[0] = make_float3(0.0f, 0.0f, 0.0f); + pa[1] = make_float3(0.0f, 0.0f, 0.0f); + pa[2] = make_float3(0.0f, 0.0f, 0.0f); + pa[3] = make_float3(0.0f, 0.0f, 0.0f); + int3 xyzi = quick_floor_to_int3(p); for(int xx = -1; xx <= 1; xx++) { @@ -65,7 +70,7 @@ ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, } /* To keep the shortest four distances and associated points we have to keep them in sorted order. */ - if (d < da[0]) { + if(d < da[0]) { da[3] = da[2]; da[2] = da[1]; da[1] = da[0]; @@ -76,7 +81,7 @@ ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, pa[1] = pa[0]; pa[0] = vp; } - else if (d < da[1]) { + else if(d < da[1]) { da[3] = da[2]; da[2] = da[1]; da[1] = d; @@ -85,14 +90,14 @@ ccl_device void voronoi_neighbors(float3 p, NodeVoronoiDistanceMetric distance, pa[2] = pa[1]; pa[1] = vp; } - else if (d < da[2]) { + else if(d < da[2]) { da[3] = da[2]; da[2] = d; pa[3] = pa[2]; pa[2] = vp; } - else if (d < da[3]) { + else if(d < da[3]) { da[3] = d; pa[3] = vp; } diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index a258b465fa2..de2b38340e9 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -160,7 +160,7 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp (offset == DENOISING_PASS_DEPTH_VAR) || (offset == DENOISING_PASS_COLOR_VAR); - if(offset == DENOISING_PASS_COLOR) { + if(offset == DENOISING_PASS_COLOR || offset == DENOISING_PASS_CLEAN) { scale *= exposure; } else if(offset == DENOISING_PASS_COLOR_VAR) { diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 3a9e2981418..eea1bed58dc 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -245,7 +245,7 @@ void ShaderGraph::connect(ShaderOutput *from, ShaderInput *to) ShaderNode *convert; ShaderInput *convert_in; - if (to->type() == SocketType::CLOSURE) { + if(to->type() == SocketType::CLOSURE) { EmissionNode *emission = new EmissionNode(); emission->color = make_float3(1.0f, 1.0f, 1.0f); emission->strength = 1.0f; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 49cfae4888b..a5854f022cd 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -549,7 +549,7 @@ void LightManager::device_update_background(Device *device, /* get the resolution from the light's size (we stuff it in there) */ int2 res = make_int2(background_light->map_resolution, background_light->map_resolution/2); /* If the resolution isn't set manually, try to find an environment texture. */ - if (res.x == 0) { + if(res.x == 0) { Shader *shader = (scene->background->shader) ? scene->background->shader : scene->default_background; foreach(ShaderNode *node, shader->graph->nodes) { if(node->type == EnvironmentTextureNode::node_type) { @@ -561,12 +561,12 @@ void LightManager::device_update_background(Device *device, } } } - if (res.x > 0 && res.y > 0) { + if(res.x > 0 && res.y > 0) { VLOG(2) << "Automatically set World MIS resolution to " << res.x << " by " << res.y << "\n"; } } /* If it's still unknown, just use the default. */ - if (res.x == 0 || res.y == 0) { + if(res.x == 0 || res.y == 0) { res = make_int2(1024, 512); VLOG(2) << "Setting World MIS resolution to default\n"; } diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index cce851612db..06a2baa8a38 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -951,9 +951,9 @@ void VoronoiTextureNode::compile(SVMCompiler& compiler) ShaderOutput *color_out = output("Color"); ShaderOutput *fac_out = output("Fac"); - if (vector_in->link) compiler.stack_assign(vector_in); - if (scale_in->link) compiler.stack_assign(scale_in); - if (exponent_in->link) compiler.stack_assign(exponent_in); + if(vector_in->link) compiler.stack_assign(vector_in); + if(scale_in->link) compiler.stack_assign(scale_in); + if(exponent_in->link) compiler.stack_assign(exponent_in); int vector_offset = tex_mapping.compile_begin(compiler, vector_in); @@ -2853,7 +2853,7 @@ void AmbientOcclusionNode::compile(SVMCompiler& compiler) int flags = (inside? NODE_AO_INSIDE : 0) | (only_local? NODE_AO_ONLY_LOCAL : 0); - if (!distance_in->link && distance == 0.0f) { + if(!distance_in->link && distance == 0.0f) { flags |= NODE_AO_GLOBAL_RADIUS; } diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index fdd872f62b9..2bf69c869b9 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -28,6 +28,7 @@ #include "kernel/osl/osl_globals.h" #include "kernel/osl/osl_services.h" #include "kernel/osl/osl_shader.h" +#include "OSL/llvm_util.h" #include "util/util_foreach.h" #include "util/util_logging.h" @@ -66,6 +67,14 @@ OSLShaderManager::~OSLShaderManager() { shading_system_free(); texture_system_free(); +#ifdef OSL_HAS_BLENDER_CLEANUP_FIX + /* There is a problem with llvm+osl: The order global destructors across + * different compilation units run cannot be guaranteed, on windows this means + * that the llvm destructors run before the osl destructors, causing a crash + * when the process exits. the OSL in svn has a special cleanup hack to + * sidestep this behavior */ + OSL::pvt::LLVM_Util::Cleanup(); +#endif } void OSLShaderManager::reset(Scene * /*scene*/) diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h index 5f15487df57..bc3dd1500a9 100644 --- a/intern/cycles/util/util_math_intersect.h +++ b/intern/cycles/util/util_math_intersect.h @@ -136,10 +136,10 @@ ccl_device_forceinline bool ray_triangle_intersect( #if defined(__KERNEL_SSE2__) && defined (__KERNEL_SSE__) int uvw_sign = movemask(UVWW) & 0x7; - if (uvw_sign != 0) - { - if (uvw_sign != 0x7) + if(uvw_sign != 0) { + if(uvw_sign != 0x7) { return false; + } } #else const float minUVW = min(U, min(V, W)); diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 8ae5b63b2f9..2428b0b2989 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -300,7 +300,7 @@ size_t system_physical_ram() #elif defined(__APPLE__) uint64_t ram = 0; size_t len = sizeof(ram); - if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { + if(sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { return ram; } return 0; diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index 924173a6c68..99fa5026c03 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -1362,7 +1362,7 @@ LRESULT WINAPI GHOST_SystemWin32::s_wndProc(HWND hwnd, UINT msg, WPARAM wParam, * will not be dispatched to OUR active window if we minimize one of OUR windows. */ if (LOWORD(wParam) == WA_INACTIVE) window->lostMouseCapture(); - + window->processWin32TabletActivateEvent(GET_WM_ACTIVATE_STATE(wParam, lParam)); lResult = ::DefWindowProc(hwnd, msg, wParam, lParam); break; } diff --git a/intern/ghost/intern/GHOST_WindowWin32.cpp b/intern/ghost/intern/GHOST_WindowWin32.cpp index 676a29f28d4..70edc66d570 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.cpp +++ b/intern/ghost/intern/GHOST_WindowWin32.cpp @@ -265,23 +265,22 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system, GHOST_WIN32_WTInfo fpWTInfo = (GHOST_WIN32_WTInfo) ::GetProcAddress(m_wintab, "WTInfoA"); GHOST_WIN32_WTOpen fpWTOpen = (GHOST_WIN32_WTOpen) ::GetProcAddress(m_wintab, "WTOpenA"); - // let's see if we can initialize tablet here - /* check if WinTab available. */ - if (fpWTInfo && fpWTInfo(0, 0, NULL)) { + // Let's see if we can initialize tablet here. + // Check if WinTab available by getting system context info. + LOGCONTEXT lc = { 0 }; + lc.lcOptions |= CXO_SYSTEM; + if (fpWTInfo && fpWTInfo(WTI_DEFSYSCTX, 0, &lc)) { // Now init the tablet - LOGCONTEXT lc; /* The maximum tablet size, pressure and orientation (tilt) */ AXIS TabletX, TabletY, Pressure, Orientation[3]; // Open a Wintab context - // Get default context information - fpWTInfo(WTI_DEFCONTEXT, 0, &lc); - // Open the context lc.lcPktData = PACKETDATA; lc.lcPktMode = PACKETMODE; - lc.lcOptions |= CXO_MESSAGES | CXO_SYSTEM; + lc.lcOptions |= CXO_MESSAGES; + lc.lcMoveMask = PACKETDATA; /* Set the entire tablet as active */ fpWTInfo(WTI_DEVICES, DVC_X, &TabletX); @@ -309,11 +308,17 @@ GHOST_WindowWin32::GHOST_WindowWin32(GHOST_SystemWin32 *system, } if (fpWTOpen) { - m_tablet = fpWTOpen(m_hWnd, &lc, TRUE); + // The Wintab spec says we must open the context disabled if we are using cursor masks. + m_tablet = fpWTOpen(m_hWnd, &lc, FALSE); if (m_tablet) { m_tabletData = new GHOST_TabletData(); m_tabletData->Active = GHOST_kTabletModeNone; } + + GHOST_WIN32_WTEnable fpWTEnable = (GHOST_WIN32_WTEnable) ::GetProcAddress(m_wintab, "WTEnable"); + if (fpWTEnable) { + fpWTEnable(m_tablet, TRUE); + } } } } @@ -835,6 +840,23 @@ GHOST_TSuccess GHOST_WindowWin32::setWindowCursorShape(GHOST_TStandardCursor cur return GHOST_kSuccess; } +void GHOST_WindowWin32::processWin32TabletActivateEvent(WORD state) +{ + if (!m_tablet) { + return; + } + + GHOST_WIN32_WTEnable fpWTEnable = (GHOST_WIN32_WTEnable) ::GetProcAddress(m_wintab, "WTEnable"); + GHOST_WIN32_WTOverlap fpWTOverlap = (GHOST_WIN32_WTOverlap) ::GetProcAddress(m_wintab, "WTOverlap"); + + if (fpWTEnable) { + fpWTEnable(m_tablet, state); + if (fpWTOverlap && state) { + fpWTOverlap(m_tablet, TRUE); + } + } +} + void GHOST_WindowWin32::processWin32TabletInitEvent() { if (m_wintab && m_tabletData) { diff --git a/intern/ghost/intern/GHOST_WindowWin32.h b/intern/ghost/intern/GHOST_WindowWin32.h index d998e86c9b1..c72669ed898 100644 --- a/intern/ghost/intern/GHOST_WindowWin32.h +++ b/intern/ghost/intern/GHOST_WindowWin32.h @@ -56,6 +56,7 @@ typedef UINT (API * GHOST_WIN32_WTInfo)(UINT, UINT, LPVOID); typedef HCTX (API * GHOST_WIN32_WTOpen)(HWND, LPLOGCONTEXTA, BOOL); typedef BOOL (API * GHOST_WIN32_WTClose)(HCTX); typedef BOOL (API * GHOST_WIN32_WTPacket)(HCTX, UINT, LPVOID); +typedef BOOL (API * GHOST_WIN32_WTEnable)(HCTX, BOOL); typedef BOOL (API * GHOST_WIN32_WTOverlap)(HCTX, BOOL); // typedefs for user32 functions to allow dynamic loading of Windows 10 DPI scaling functions @@ -249,6 +250,7 @@ public: return m_tabletData; } + void processWin32TabletActivateEvent(WORD state); void processWin32TabletInitEvent(); void processWin32TabletEvent(WPARAM wParam, LPARAM lParam); void bringTabletContextToFront(); diff --git a/intern/opencolorio/CMakeLists.txt b/intern/opencolorio/CMakeLists.txt index 10898e4239b..2fd54363db5 100644 --- a/intern/opencolorio/CMakeLists.txt +++ b/intern/opencolorio/CMakeLists.txt @@ -64,6 +64,9 @@ if(WITH_OPENCOLORIO) list(APPEND INC_SYS ${BOOST_INCLUDE_DIR} ) + add_definitions( + -DOpenColorIO_STATIC + ) endif() data_to_c_simple(gpu_shader_display_transform.glsl SRC) diff --git a/intern/openvdb/CMakeLists.txt b/intern/openvdb/CMakeLists.txt index f666dc78e75..9ac0817903b 100644 --- a/intern/openvdb/CMakeLists.txt +++ b/intern/openvdb/CMakeLists.txt @@ -38,6 +38,7 @@ set(SRC if(WITH_OPENVDB) add_definitions( -DWITH_OPENVDB + ${OPENVDB_DEFINITIONS} ) if(WITH_OPENVDB_3_ABI_COMPATIBLE) |