From 8138eb0dfefa3b6d05d197a9a701e964b8719328 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 19 Feb 2019 13:48:29 +0100 Subject: Fix Cycles OpenCL multithreaded compilation not working on Windows. --- intern/cycles/device/opencl/opencl_util.cpp | 36 ++++++++++++++++++++++------- intern/cycles/util/util_system.cpp | 5 ++-- 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'intern') diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index fe5ba4886a9..a6a80b0c2de 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -383,6 +383,12 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_kernel(const string *debug_src) return true; } +static void escape_python_string(string& str) +{ + /* Escape string to be passed as a Python raw string with '' quotes'. */ + string_replace(str, "'", "\'"); +} + bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin) { vector args; @@ -390,16 +396,30 @@ bool OpenCLDeviceBase::OpenCLProgram::compile_separate(const string& clbin) args.push_back("--factory-startup"); args.push_back("--python-expr"); + const char *force_all_platforms = (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false"; + int device_platform_id = device->device_num; + string device_name = device->device_name; + string platform_name = device->platform_name; + string build_options = device->kernel_build_options(NULL) + kernel_build_options; + string kernel_file_escaped = kernel_file; + string clbin_escaped = clbin; + + escape_python_string(device_name); + escape_python_string(platform_name); + escape_python_string(build_options); + escape_python_string(kernel_file_escaped); + escape_python_string(clbin_escaped); + args.push_back( string_printf( - "import _cycles; _cycles.opencl_compile('%s', '%d', '%s', '%s', '%s', '%s', '%s')", - (DebugFlags().opencl.kernel_type != DebugFlags::OpenCL::KERNEL_DEFAULT)? "true" : "false", - device->device_num, - device->device_name.c_str(), - device->platform_name.c_str(), - (device->kernel_build_options(NULL) + kernel_build_options).c_str(), - kernel_file.c_str(), - clbin.c_str())); + "import _cycles; _cycles.opencl_compile(r'%s', r'%d', r'%s', r'%s', r'%s', r'%s', r'%s')", + force_all_platforms, + device_platform_id, + device_name.c_str(), + platform_name.c_str(), + build_options.c_str(), + kernel_file_escaped.c_str(), + clbin_escaped.c_str())); double starttime = time_dt(); add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index a79829a3dd9..2a5c4a8f012 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -341,10 +341,11 @@ bool system_call_self(const vector& args) cmd += " \"" + args[i] + "\""; } - /* Quiet output. */ #ifdef _WIN32 - cmd += " > nul"; + /* Use cmd /S to avoid issues with spaces in arguments. */ + cmd = "cmd /S /C \"" + cmd + " > nul \""; #else + /* Quiet output. */ cmd += " > /dev/null"; #endif -- cgit v1.2.3 From e6f5632eb11b37a2398f80841a77674656243dcf Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 19 Feb 2019 15:41:22 +0100 Subject: T61513: Refactored Cycles Attribute Retrieval There is a generic function to retrieve float and float3 attributes `primitive_attribute_float` and primitive_attribute_float3`. Inside these functions an prioritised if-else construction checked where the attribute is stored and then retrieved from that location. Actually the calling function most of the time already knows where the data is stored. So we could simplify this by splitting these functions and remove the check logic. This patch splits the `primitive_attribute_float?` functions into `primitive_surface_attribute_float?` and `primitive_volume_attribute_float?`. What leads to less branching and more optimum kernels. The original function is still being used by OSL and `svm_node_attr`. This will reduce the compilation time and render time for kernels. Especially in production scenes there is a lot of benefit. Impact in compilation times job | scene_name | previous | new | percentage -------+-----------------+----------+-------+------------ t61513 | empty | 10.63 | 10.66 | 0% t61513 | bmw | 17.91 | 17.65 | 1% t61513 | fishycat | 19.57 | 17.68 | 10% t61513 | barbershop | 54.10 | 24.41 | 55% t61513 | classroom | 17.55 | 16.29 | 7% t61513 | koro | 18.92 | 18.05 | 5% t61513 | pavillion | 17.43 | 16.52 | 5% t61513 | splash279 | 16.48 | 14.91 | 10% t61513 | volume_emission | 36.22 | 21.60 | 40% Impact in render times job | scene_name | previous | new | percentage -------+-----------------+----------+--------+------------ 61513 | empty | 21.06 | 20.35 | 3% 61513 | bmw | 198.44 | 190.05 | 4% 61513 | fishycat | 394.20 | 401.25 | -2% 61513 | barbershop | 1188.16 | 912.39 | 23% 61513 | classroom | 341.08 | 340.38 | 0% 61513 | koro | 472.43 | 471.80 | 0% 61513 | pavillion | 905.77 | 899.80 | 1% 61513 | splash279 | 55.26 | 54.86 | 1% 61513 | volume_emission | 62.59 | 61.70 | 1% There is also a possitive impact when using CPU and CUDA, but they are small. I didn't split the hair logic from the surface logic due to: * Hair and surface use same attribute types. It was not clear if it could be splitted when looking at the code only. * Hair and surface are quick to compile and to read. So the benefit is quite small. Differential Revision: https://developer.blender.org/D4375 --- intern/cycles/kernel/geom/geom_primitive.h | 95 +++++++++++++++++++++++++++--- intern/cycles/kernel/geom/geom_volume.h | 11 +--- intern/cycles/kernel/osl/osl_services.cpp | 4 +- intern/cycles/kernel/svm/svm_attribute.h | 66 +++++++++------------ intern/cycles/kernel/svm/svm_bump.h | 2 +- intern/cycles/kernel/svm/svm_closure.h | 8 +-- intern/cycles/kernel/svm/svm_displace.h | 4 +- intern/cycles/kernel/svm/svm_tex_coord.h | 19 +++--- 8 files changed, 136 insertions(+), 73 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 6db8475d196..e3e2648e9ec 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -22,7 +22,6 @@ CCL_NAMESPACE_BEGIN /* Generic primitive attribute reading functions */ - ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, @@ -41,7 +40,9 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, #endif #ifdef __VOLUME__ else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, desc, dx, dy); + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + return volume_attribute_float(kg, sd, desc); } #endif else { @@ -51,6 +52,43 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, } } +ccl_device_inline float primitive_surface_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float *dx, float *dy) +{ + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); + } +#ifdef __HAIR__ + else if(sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float(kg, sd, desc, dx, dy); + } +#endif + else { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + return 0.0f; + } +} + +#ifdef __VOLUME__ +ccl_device_inline float primitive_volume_attribute_float(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) +{ + if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, desc); + } + else { + return 0.0f; + } +} +#endif + ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, @@ -69,7 +107,32 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, #endif #ifdef __VOLUME__ else if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, desc, dx, dy); + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return volume_attribute_float3(kg, sd, desc); + } +#endif + else { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } +} + +ccl_device_inline float3 primitive_surface_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc, + float3 *dx, float3 *dy) +{ + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { + if(subd_triangle_patch(kg, sd) == ~0) + return triangle_attribute_float3(kg, sd, desc, dx, dy); + else + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); + } +#ifdef __HAIR__ + else if(sd->type & PRIMITIVE_ALL_CURVE) { + return curve_attribute_float3(kg, sd, desc, dx, dy); } #endif else { @@ -79,6 +142,20 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, } } +#ifdef __VOLUME__ +ccl_device_inline float3 primitive_volume_attribute_float3(KernelGlobals *kg, + const ShaderData *sd, + const AttributeDescriptor desc) +{ + if(sd->object != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, desc); + } + else { + return make_float3(0.0f, 0.0f, 0.0f); + } +} +#endif + /* Default UV coordinate */ ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) @@ -88,7 +165,7 @@ ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) if(desc.offset == ATTR_STD_NOT_FOUND) return make_float3(0.0f, 0.0f, 0.0f); - float3 uv = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + float3 uv = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); uv.z = 1.0f; return uv; } @@ -104,8 +181,8 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) return false; - float3 uv3 = primitive_attribute_float3(kg, sd, desc_uv, NULL, NULL); - float face_id_f = primitive_attribute_float(kg, sd, desc_face_id, NULL, NULL); + float3 uv3 = primitive_surface_attribute_float3(kg, sd, desc_uv, NULL, NULL); + float face_id_f = primitive_surface_attribute_float(kg, sd, desc_face_id, NULL, NULL); *uv = make_float2(uv3.x, uv3.y); *face_id = (int)face_id_f; @@ -130,7 +207,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); if(desc.offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + float3 data = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); return cross(sd->N, normalize(cross(data, sd->N))); @@ -176,10 +253,10 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData * object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); /* lookup attributes */ - motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + motion_pre = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); desc.offset += (sd->type & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; - motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + motion_post = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ if(is_curve_primitive && (sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 688413b74a1..1977d263ece 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -47,27 +47,20 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) +ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { float3 P = volume_normalized_position(kg, sd, sd->P); InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - return average(float4_to_float3(r)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { float3 P = volume_normalized_position(kg, sd, sd->P); InterpolationType interp = (sd->flag & SD_VOLUME_CUBIC)? INTERPOLATION_CUBIC: INTERPOLATION_NONE; float4 r = kernel_tex_image_interp_3d(kg, desc.offset, P.x, P.y, P.z, interp); - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - if(r.w > 1e-6f && r.w != 1.0f) { /* For RGBA colors, unpremultiply after interpolation. */ return float4_to_float3(r) / r.w; diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 5b4bddbb6de..5436a66c9d4 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -561,7 +561,7 @@ static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val) return false; } -static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, +static bool get_primitive_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr, const TypeDesc& type, bool derivatives, void *val) { if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || @@ -849,7 +849,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring if(attr.desc.element != ATTR_ELEMENT_OBJECT) { /* triangle and vertex attributes */ - if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val)) + if(get_primitive_attribute(kg, sd, attr, type, derivatives, val)) return true; else return get_mesh_attribute(kg, sd, attr, type, derivatives, val); diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index d98f538d089..ef6f7d7cbb5 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -52,24 +52,22 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ - if(type == NODE_ATTR_FLOAT) { - if(desc.type == NODE_ATTR_FLOAT) { - float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); + if (desc.type == NODE_ATTR_FLOAT) { + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); + if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f); } else { - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - stack_store_float(stack, out_offset, average(f)); + stack_store_float3(stack, out_offset, make_float3(f, f, f)); } } else { - if(desc.type == NODE_ATTR_FLOAT3) { - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - stack_store_float3(stack, out_offset, f); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f)); } else { - float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); - stack_store_float3(stack, out_offset, make_float3(f, f, f)); + stack_store_float3(stack, out_offset, f); } } } @@ -86,28 +84,24 @@ void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ - if(type == NODE_ATTR_FLOAT) { - if(desc.type == NODE_ATTR_FLOAT) { - float dx; - float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); + if (desc.type == NODE_ATTR_FLOAT) { + float dx; + float f = primitive_surface_attribute_float(kg, sd, desc, &dx, NULL); + if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f+dx); } else { - float3 dx; - float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); - stack_store_float(stack, out_offset, average(f+dx)); + stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx)); } } else { - if(desc.type == NODE_ATTR_FLOAT3) { - float3 dx; - float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); - stack_store_float3(stack, out_offset, f+dx); + float3 dx; + float3 f = primitive_surface_attribute_float3(kg, sd, desc, &dx, NULL); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f+dx)); } else { - float dx; - float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); - stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx)); + stack_store_float3(stack, out_offset, f+dx); } } } @@ -127,28 +121,24 @@ void svm_node_attr_bump_dy(KernelGlobals *kg, AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ - if(type == NODE_ATTR_FLOAT) { - if(desc.type == NODE_ATTR_FLOAT) { - float dy; - float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); + if (desc.type == NODE_ATTR_FLOAT) { + float dy; + float f = primitive_surface_attribute_float(kg, sd, desc, NULL, &dy); + if (type == NODE_ATTR_FLOAT) { stack_store_float(stack, out_offset, f+dy); } else { - float3 dy; - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); - stack_store_float(stack, out_offset, average(f+dy)); + stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy)); } } else { - if(desc.type == NODE_ATTR_FLOAT3) { - float3 dy; - float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); - stack_store_float3(stack, out_offset, f+dy); + float3 dy; + float3 f = primitive_surface_attribute_float3(kg, sd, desc, NULL, &dy); + if (type == NODE_ATTR_FLOAT) { + stack_store_float(stack, out_offset, average(f+dy)); } else { - float dy; - float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); - stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy)); + stack_store_float3(stack, out_offset, f+dy); } } } diff --git a/intern/cycles/kernel/svm/svm_bump.h b/intern/cycles/kernel/svm/svm_bump.h index 1c1fe155538..35aac174409 100644 --- a/intern/cycles/kernel/svm/svm_bump.h +++ b/intern/cycles/kernel/svm/svm_bump.h @@ -30,7 +30,7 @@ ccl_device void svm_node_enter_bump_eval(KernelGlobals *kg, ShaderData *sd, floa if(desc.offset != ATTR_STD_NOT_FOUND) { float3 P, dPdx, dPdy; - P = primitive_attribute_float3(kg, sd, desc, &dPdx, &dPdy); + P = primitive_surface_attribute_float3(kg, sd, desc, &dPdx, &dPdy); object_position_transform(kg, sd, &P); object_dir_transform(kg, sd, &dPdx); diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 3cf33f4d431..a7e87715ed4 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -744,7 +744,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y); float random = 0.0f; if(attr_descr_random.offset != ATTR_STD_NOT_FOUND) { - random = primitive_attribute_float(kg, sd, attr_descr_random, NULL, NULL); + random = primitive_surface_attribute_float(kg, sd, attr_descr_random, NULL, NULL); } else { random = stack_load_float_default(stack, random_ofs, data_node3.y); @@ -974,7 +974,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl /* Density and color attribute lookup if available. */ const AttributeDescriptor attr_density = find_attribute(kg, sd, attr_node.x); if(attr_density.offset != ATTR_STD_NOT_FOUND) { - primitive_density = primitive_attribute_float(kg, sd, attr_density, NULL, NULL); + primitive_density = primitive_volume_attribute_float(kg, sd, attr_density); density = fmaxf(density * primitive_density, 0.0f); } } @@ -985,7 +985,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl const AttributeDescriptor attr_color = find_attribute(kg, sd, attr_node.y); if(attr_color.offset != ATTR_STD_NOT_FOUND) { - color *= primitive_attribute_float3(kg, sd, attr_color, NULL, NULL); + color *= primitive_volume_attribute_float3(kg, sd, attr_color); } /* Add closure for volume scattering. */ @@ -1026,7 +1026,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg, ShaderData *sd, fl /* Add flame temperature from attribute if available. */ const AttributeDescriptor attr_temperature = find_attribute(kg, sd, attr_node.z); if(attr_temperature.offset != ATTR_STD_NOT_FOUND) { - float temperature = primitive_attribute_float(kg, sd, attr_temperature, NULL, NULL); + float temperature = primitive_volume_attribute_float(kg, sd, attr_temperature); T *= fmaxf(temperature, 0.0f); } diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h index 0f5b3abef87..a69c9fe81f9 100644 --- a/intern/cycles/kernel/svm/svm_displace.h +++ b/intern/cycles/kernel/svm/svm_displace.h @@ -137,7 +137,7 @@ ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd, const AttributeDescriptor attr = find_attribute(kg, sd, node.z); float3 tangent; if(attr.offset != ATTR_STD_NOT_FOUND) { - tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL); + tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); } else { tangent = normalize(sd->dPdu); @@ -146,7 +146,7 @@ ccl_device void svm_node_vector_displacement(KernelGlobals *kg, ShaderData *sd, float3 bitangent = normalize(cross(normal, tangent)); const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); if(attr_sign.offset != ATTR_STD_NOT_FOUND) { - float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL); + float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); bitangent *= sign; } diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 45c38d64763..72871254f0d 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -292,12 +292,12 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st } /* get _unnormalized_ interpolated normal and tangent */ - float3 tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL); - float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL); + float3 tangent = primitive_surface_attribute_float3(kg, sd, attr, NULL, NULL); + float sign = primitive_surface_attribute_float(kg, sd, attr_sign, NULL, NULL); float3 normal; if(sd->shader & SHADER_SMOOTH_NORMAL) { - normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL); + normal = primitive_surface_attribute_float3(kg, sd, attr_normal, NULL, NULL); } else { normal = sd->Ng; @@ -360,25 +360,28 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL); float3 tangent; + float3 attribute_value; + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); + if (desc.offset != ATTR_STD_NOT_FOUND) { + attribute_value = primitive_surface_attribute_float3(kg, sd, desc, NULL, NULL); + } + if(direction_type == NODE_TANGENT_UVMAP) { /* UV map */ - const AttributeDescriptor desc = find_attribute(kg, sd, node.z); - if(desc.offset == ATTR_STD_NOT_FOUND) tangent = make_float3(0.0f, 0.0f, 0.0f); else - tangent = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + tangent = attribute_value; } else { /* radial */ - const AttributeDescriptor desc = find_attribute(kg, sd, node.z); float3 generated; if(desc.offset == ATTR_STD_NOT_FOUND) generated = sd->P; else - generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL); + generated = attribute_value; if(axis == NODE_TANGENT_AXIS_X) tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f)); -- cgit v1.2.3 From 667033e89e7fe5241592e72e088a19723ca906b5 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 19 Feb 2019 16:31:31 +0100 Subject: T61463: Separate Baking kernels Cycles OpenCL: Split baking kernels in own program Fix T61463. Before this patch baking was part of the base kernels. There are 3 baking kernels that and all 3 uses shader evaluation. Only for one of these kernels the functionality was wrapped in the __NO_BAKING__ compile directive. When you start baking this leads to long compile times. By separating in individual programs will reduce the compile times. Also wrapped all baking kernels with __NO_BAKING__ to reduce the compilation times. Impact on compilation time job | scene_name | previous | new | percentage --------+-----------------+----------+-------+------------ T61463 | empty | 10.63 | 7.27 | 32% T61463 | bmw | 17.91 | 14.24 | 20% T61463 | fishycat | 19.57 | 15.08 | 23% T61463 | barbershop | 54.10 | 48.18 | 11% T61463 | classroom | 17.55 | 14.42 | 18% T61463 | koro | 18.92 | 17.15 | 9% T61463 | pavillion | 17.43 | 14.23 | 18% T61463 | splash279 | 16.48 | 15.33 | 7% T61463 | volume_emission | 36.22 | 34.19 | 6% Impact on render time job | scene_name | previous | new | percentage --------+-----------------+----------+---------+------------ T61463 | empty | 21.06 | 20.54 | 2% T61463 | bmw | 198.44 | 189.59 | 4% T61463 | fishycat | 394.20 | 388.50 | 1% T61463 | barbershop | 1188.16 | 1185.49 | 0% T61463 | classroom | 341.08 | 339.27 | 1% T61463 | koro | 472.43 | 360.70 | 24% T61463 | pavillion | 905.77 | 902.14 | 0% T61463 | splash279 | 55.26 | 54.92 | 1% T61463 | volume_emission | 62.59 | 39.09 | 38% I don't have a grounded explanation why koro and volume_emission is this much faster; I have done several tests though... Maniphest Tasks: T61463 Differential Revision: https://developer.blender.org/D4376 --- intern/cycles/device/opencl/opencl.h | 8 ++- intern/cycles/device/opencl/opencl_base.cpp | 32 +++++++--- intern/cycles/device/opencl/opencl_split.cpp | 2 +- intern/cycles/kernel/CMakeLists.txt | 3 + intern/cycles/kernel/kernels/opencl/kernel.cl | 72 ---------------------- .../kernel/kernels/opencl/kernel_background.cl | 39 ++++++++++++ intern/cycles/kernel/kernels/opencl/kernel_bake.cl | 38 ++++++++++++ .../kernel/kernels/opencl/kernel_displace.cl | 40 ++++++++++++ 8 files changed, 149 insertions(+), 85 deletions(-) create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_background.cl create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_bake.cl create mode 100644 intern/cycles/kernel/kernels/opencl/kernel_displace.cl (limited to 'intern') diff --git a/intern/cycles/device/opencl/opencl.h b/intern/cycles/device/opencl/opencl.h index a2c0e53b3e7..766b9e4bf1a 100644 --- a/intern/cycles/device/opencl/opencl.h +++ b/intern/cycles/device/opencl/opencl.h @@ -325,7 +325,11 @@ public: map kernels; }; - OpenCLProgram base_program, denoising_program; + OpenCLProgram base_program; + OpenCLProgram bake_program; + OpenCLProgram displace_program; + OpenCLProgram background_program; + OpenCLProgram denoising_program; typedef map*> ConstMemMap; typedef map MemMap; @@ -571,7 +575,7 @@ protected: ustring key, thread_scoped_lock& cache_locker); - virtual string build_options_for_base_program( + virtual string build_options_for_bake_program( const DeviceRequestedFeatures& /*requested_features*/); private: diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp index d8f9a242ac8..6a47a60e915 100644 --- a/intern/cycles/device/opencl/opencl_base.cpp +++ b/intern/cycles/device/opencl/opencl_base.cpp @@ -162,6 +162,9 @@ OpenCLDeviceBase::~OpenCLDeviceBase() } base_program.release(); + bake_program.release(); + displace_program.release(); + background_program.release(); if(cqCommandQueue) clReleaseCommandQueue(cqCommandQueue); if(cxContext) @@ -225,14 +228,20 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea if(!opencl_version_check()) return false; - base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features)); + base_program = OpenCLProgram(this, "base", "kernel.cl", ""); base_program.add_kernel(ustring("convert_to_byte")); base_program.add_kernel(ustring("convert_to_half_float")); - base_program.add_kernel(ustring("displace")); - base_program.add_kernel(ustring("background")); - base_program.add_kernel(ustring("bake")); base_program.add_kernel(ustring("zero_buffer")); + bake_program = OpenCLProgram(this, "bake", "kernel_bake.cl", build_options_for_bake_program(requested_features)); + bake_program.add_kernel(ustring("bake")); + + displace_program = OpenCLProgram(this, "displace", "kernel_displace.cl", build_options_for_bake_program(requested_features)); + displace_program.add_kernel(ustring("displace")); + + background_program = OpenCLProgram(this, "background", "kernel_background.cl", build_options_for_bake_program(requested_features)); + background_program.add_kernel(ustring("background")); + denoising_program = OpenCLProgram(this, "denoising", "filter.cl", ""); denoising_program.add_kernel(ustring("filter_divide_shadow")); denoising_program.add_kernel(ustring("filter_get_feature")); @@ -248,12 +257,15 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea denoising_program.add_kernel(ustring("filter_finalize")); vector programs; - programs.push_back(&base_program); - programs.push_back(&denoising_program); + programs.push_back(&bake_program); + programs.push_back(&displace_program); + programs.push_back(&background_program); /* Call actual class to fill the vector with its programs. */ if(!add_kernel_programs(requested_features, programs)) { return false; } + programs.push_back(&base_program); + programs.push_back(&denoising_program); /* Parallel compilation of Cycles kernels, this launches multiple * processes to workaround OpenCL frameworks serializing the calls @@ -1152,13 +1164,13 @@ void OpenCLDeviceBase::shader(DeviceTask& task) cl_kernel kernel; if(task.shader_eval_type >= SHADER_EVAL_BAKE) { - kernel = base_program(ustring("bake")); + kernel = bake_program(ustring("bake")); } else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) { - kernel = base_program(ustring("displace")); + kernel = displace_program(ustring("displace")); } else { - kernel = base_program(ustring("background")); + kernel = background_program(ustring("background")); } cl_uint start_arg_index = @@ -1385,7 +1397,7 @@ void OpenCLDeviceBase::store_cached_kernel( cache_locker); } -string OpenCLDeviceBase::build_options_for_base_program( +string OpenCLDeviceBase::build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { /* TODO(sergey): By default we compile all features, meaning diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp index b759f69d3ab..c9d3eb2eb8c 100644 --- a/intern/cycles/device/opencl/opencl_split.cpp +++ b/intern/cycles/device/opencl/opencl_split.cpp @@ -327,7 +327,7 @@ public: protected: /* ** Those guys are for workign around some compiler-specific bugs ** */ - string build_options_for_base_program( + string build_options_for_bake_program( const DeviceRequestedFeatures& requested_features) { return requested_features.get_build_options(); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f7041ee2783..0a2acd3f669 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -37,6 +37,9 @@ set(SRC_CUDA_KERNELS set(SRC_OPENCL_KERNELS kernels/opencl/kernel.cl + kernels/opencl/kernel_bake.cl + kernels/opencl/kernel_displace.cl + kernels/opencl/kernel_background.cl kernels/opencl/kernel_state_buffer_size.cl kernels/opencl/kernel_split.cl kernels/opencl/kernel_split_bundle.cl diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl index de1f5088629..aa837e2ae87 100644 --- a/intern/cycles/kernel/kernels/opencl/kernel.cl +++ b/intern/cycles/kernel/kernels/opencl/kernel.cl @@ -81,78 +81,6 @@ __kernel void kernel_ocl_path_trace( #else /* __COMPILE_ONLY_MEGAKERNEL__ */ -__kernel void kernel_ocl_displace( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_displace_evaluate(kg, input, output, x); - } -} -__kernel void kernel_ocl_background( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { - kernel_background_evaluate(kg, input, output, x); - } -} - -__kernel void kernel_ocl_bake( - ccl_constant KernelData *data, - ccl_global uint4 *input, - ccl_global float4 *output, - - KERNEL_BUFFER_PARAMS, - - int type, int filter, int sx, int sw, int offset, int sample) -{ - KernelGlobals kglobals, *kg = &kglobals; - - kg->data = data; - - kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); - kernel_set_buffer_info(kg); - - int x = sx + ccl_global_id(0); - - if(x < sx + sw) { -#ifdef __NO_BAKING__ - output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); -#else - kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); -#endif - } -} - __kernel void kernel_ocl_convert_to_byte( ccl_constant KernelData *data, ccl_global uchar4 *rgba, diff --git a/intern/cycles/kernel/kernels/opencl/kernel_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_background.cl new file mode 100644 index 00000000000..c7c709c0ad7 --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_background.cl @@ -0,0 +1,39 @@ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h" + +__kernel void kernel_ocl_background( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_background_evaluate(kg, input, output, x); +#endif + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_bake.cl b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl new file mode 100644 index 00000000000..041312b53cb --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_bake.cl @@ -0,0 +1,38 @@ +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h" + +__kernel void kernel_ocl_bake( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int filter, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, filter, x, offset, sample); +#endif + } +} diff --git a/intern/cycles/kernel/kernels/opencl/kernel_displace.cl b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl new file mode 100644 index 00000000000..288bfd5eadc --- /dev/null +++ b/intern/cycles/kernel/kernels/opencl/kernel_displace.cl @@ -0,0 +1,40 @@ + +#include "kernel/kernel_compat_opencl.h" +#include "kernel/kernel_math.h" +#include "kernel/kernel_types.h" +#include "kernel/kernel_globals.h" +#include "kernel/kernel_color.h" +#include "kernel/kernels/opencl/kernel_opencl_image.h" + +#include "kernel/kernel_path.h" +#include "kernel/kernel_path_branched.h" + +#include "kernel/kernel_bake.h + +__kernel void kernel_ocl_displace( + ccl_constant KernelData *data, + ccl_global uint4 *input, + ccl_global float4 *output, + + KERNEL_BUFFER_PARAMS, + + int type, int sx, int sw, int offset, int sample) +{ + KernelGlobals kglobals, *kg = &kglobals; + + kg->data = data; + + kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS); + kernel_set_buffer_info(kg); + + int x = sx + ccl_global_id(0); + + if(x < sx + sw) { +#ifdef __NO_BAKING__ + output[x] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + kernel_displace_evaluate(kg, input, output, x); +#endif + } +} + -- cgit v1.2.3