diff options
author | Pascal Schoen <pascal_schoen@gmx.net> | 2016-08-16 16:22:32 +0300 |
---|---|---|
committer | Pascal Schoen <pascal_schoen@gmx.net> | 2016-08-16 16:22:32 +0300 |
commit | 9eed34c7d980e1b998df457c4f76021162c80f78 (patch) | |
tree | 0c47e10e97c2088d59a52c3802c35f7e9eb7901f /intern/cycles | |
parent | ef29aaee1af8074e0228c480d962700e97ea5b36 (diff) | |
parent | ae475e355488db27c4b9fcc33385080578403833 (diff) |
Merge branch 'master' into cycles_disney_brdf
Diffstat (limited to 'intern/cycles')
72 files changed, 2297 insertions, 574 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 3b410b2a1e1..97854a88e84 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -146,6 +146,14 @@ if(WITH_CYCLES_OSL) ) endif() +if(WITH_CYCLES_OPENSUBDIV) + add_definitions(-DWITH_OPENSUBDIV) + include_directories( + SYSTEM + ${OPENSUBDIV_INCLUDE_DIR} + ) +endif() + set(WITH_CYCLES_DEVICE_OPENCL TRUE) set(WITH_CYCLES_DEVICE_CUDA TRUE) set(WITH_CYCLES_DEVICE_MULTI TRUE) diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 73dbf16a3d3..8cd499b7ca6 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -88,6 +88,9 @@ macro(cycles_target_link_libraries target) if(WITH_CYCLES_OSL) target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES}) endif() + if(WITH_CYCLES_OPENSUBDIV) + target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES}) + endif() target_link_libraries( ${target} ${OPENIMAGEIO_LIBRARIES} diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 726e9a51744..e8168bc15ff 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -375,6 +375,8 @@ static void options_parse(int argc, const char **argv) "--threads %d", &options.session_params.threads, "CPU Rendering Threads", "--width %d", &options.width, "Window width in pixel", "--height %d", &options.height, "Window height in pixel", + "--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels", + "--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels", "--list-devices", &list, "List information about all available devices", #ifdef WITH_CYCLES_LOGGING "--debug", &debug, "Enable debug logging", diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 3d3aca33881..a54022268bb 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -57,14 +57,12 @@ struct XMLReadState : public XMLReader { Shader *shader; /* current shader */ string base; /* base path to current file*/ float dicing_rate; /* current dicing rate */ - Mesh::DisplacementMethod displacement_method; XMLReadState() : scene(NULL), smooth(false), shader(NULL), - dicing_rate(0.0f), - displacement_method(Mesh::DISPLACE_BUMP) + dicing_rate(0.0f) { tfm = transform_identity(); } @@ -405,8 +403,6 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) int shader = 0; bool smooth = state.smooth; - mesh->displacement_method = state.displacement_method; - /* read vertices and polygons, RIB style */ vector<float3> P; vector<float> UV; @@ -653,14 +649,6 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node) state.smooth = true; else if(xml_equal_string(node, "interpolation", "flat")) state.smooth = false; - - /* read displacement method */ - if(xml_equal_string(node, "displacement_method", "true")) - state.displacement_method = Mesh::DISPLACE_TRUE; - else if(xml_equal_string(node, "displacement_method", "bump")) - state.displacement_method = Mesh::DISPLACE_BUMP; - else if(xml_equal_string(node, "displacement_method", "both")) - state.displacement_method = Mesh::DISPLACE_BOTH; } /* Scene */ diff --git a/intern/cycles/blender/addon/osl.py b/intern/cycles/blender/addon/osl.py index f4aaaab5eab..19f2ecc9d1a 100644 --- a/intern/cycles/blender/addon/osl.py +++ b/intern/cycles/blender/addon/osl.py @@ -41,6 +41,8 @@ def update_script_node(node, report): import shutil import tempfile + oso_file_remove = False + if node.mode == 'EXTERNAL': # compile external script file script_path = bpy.path.abspath(node.filepath, library=node.id_data.library) @@ -49,7 +51,6 @@ def update_script_node(node, report): if script_ext == ".oso": # it's a .oso file, no need to compile ok, oso_path = True, script_path - oso_file_remove = False elif script_ext == ".osl": # compile .osl file ok, oso_path = osl_compile(script_path, report) @@ -65,7 +66,6 @@ def update_script_node(node, report): elif os.path.dirname(node.filepath) == "": # module in search path oso_path = node.filepath - oso_file_remove = False ok = True else: # unknown @@ -88,12 +88,10 @@ def update_script_node(node, report): osl_file.close() ok, oso_path = osl_compile(osl_file.name, report) - oso_file_remove = False os.remove(osl_file.name) else: # compile text datablock from disk directly ok, oso_path = osl_compile(osl_path, report) - oso_file_remove = False if ok: # read bytecode diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 81204eb8ae0..8e82eac2b59 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -775,6 +775,13 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup): default='LINEAR', ) + cls.displacement_method = EnumProperty( + name="Displacement Method", + description="Method to use for the displacement", + items=enum_displacement_methods, + default='BUMP', + ) + @classmethod def unregister(cls): del bpy.types.Material.cycles @@ -952,13 +959,6 @@ class CyclesMeshSettings(bpy.types.PropertyGroup): type=cls, ) - cls.displacement_method = EnumProperty( - name="Displacement Method", - description="Method to use for the displacement", - items=enum_displacement_methods, - default='BUMP', - ) - @classmethod def unregister(cls): del bpy.types.Mesh.cycles diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 42f7970769a..52872d2b83f 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -674,40 +674,6 @@ class Cycles_PT_context_material(CyclesButtonsPanel, Panel): split.separator() -class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel): - bl_label = "Displacement" - bl_context = "data" - - @classmethod - def poll(cls, context): - if CyclesButtonsPanel.poll(context): - if context.mesh or context.curve or context.meta_ball: - if context.scene.cycles.feature_set == 'EXPERIMENTAL': - return True - - return False - - def draw(self, context): - layout = self.layout - - mesh = context.mesh - curve = context.curve - mball = context.meta_ball - - if mesh: - cdata = mesh.cycles - elif curve: - cdata = curve.cycles - elif mball: - cdata = mball.cycles - - split = layout.split() - - col = split.column() - sub = col.column(align=True) - sub.label(text="Displacement:") - sub.prop(cdata, "displacement_method", text="") - class CyclesObject_PT_motion_blur(CyclesButtonsPanel, Panel): bl_label = "Motion Blur" bl_context = "object" @@ -1219,6 +1185,11 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel): col.prop(cmat, "sample_as_light", text="Multiple Importance") col.prop(cmat, "use_transparent_shadow") + if context.scene.cycles.feature_set == 'EXPERIMENTAL': + col.separator() + col.label(text="Displacement:") + col.prop(cmat, "displacement_method", text="") + col = split.column() col.label(text="Volume:") sub = col.column() diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index 74fd4cb44a0..c33bc4c263f 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -409,7 +409,8 @@ static void attr_create_uv_map(Scene *scene, BL::Mesh& b_mesh, const vector<int>& nverts, const vector<int>& face_flags, - bool subdivision) + bool subdivision, + bool subdivide_uvs) { if(subdivision) { BL::Mesh::uv_layers_iterator l; @@ -429,6 +430,10 @@ static void attr_create_uv_map(Scene *scene, else attr = mesh->subd_attributes.add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CORNER); + if(subdivide_uvs) { + attr->flags |= ATTR_SUBDIVIDED; + } + BL::Mesh::polygons_iterator p; float3 *fdata = attr->data_float3(); @@ -592,7 +597,8 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh& b_mesh, const vector<Shader*>& used_shaders, - bool subdivision=false) + bool subdivision=false, + bool subdivide_uvs=true) { /* count vertices and faces */ int numverts = b_mesh.vertices.length(); @@ -638,6 +644,7 @@ static void create_mesh(Scene *scene, /* create generated coordinates from undeformed coordinates */ if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) { Attribute *attr = attributes.add(ATTR_STD_GENERATED); + attr->flags |= ATTR_SUBDIVIDED; float3 loc, size; mesh_texture_space(b_mesh, loc, size); @@ -746,7 +753,7 @@ static void create_mesh(Scene *scene, * The calculate functions will check whether they're needed or not. */ attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision); - attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision); + attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs); /* for volume objects, create a matrix to transform from object space to * mesh texture space. this does not work with deformations but that can @@ -770,9 +777,39 @@ static void create_subd_mesh(Scene *scene, float dicing_rate, int max_subdivisions) { - create_mesh(scene, mesh, b_mesh, used_shaders, true); + BL::SubsurfModifier subsurf_mod(b_ob.modifiers[b_ob.modifiers.length()-1]); + bool subdivide_uvs = subsurf_mod.use_subsurf_uv(); + + create_mesh(scene, mesh, b_mesh, used_shaders, true, subdivide_uvs); + + /* export creases */ + size_t num_creases = 0; + BL::Mesh::edges_iterator e; + + for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e) { + if(e->crease() != 0.0f) { + num_creases++; + } + } + + mesh->subd_creases.resize(num_creases); + + Mesh::SubdEdgeCrease* crease = mesh->subd_creases.data(); + for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e) { + if(e->crease() != 0.0f) { + crease->v[0] = e->vertices()[0]; + crease->v[1] = e->vertices()[1]; + crease->crease = e->crease(); - SubdParams sdparams(mesh); + crease++; + } + } + + /* set subd params */ + if(!mesh->subd_params) { + mesh->subd_params = new SubdParams(mesh); + } + SubdParams& sdparams = *mesh->subd_params; PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles"); @@ -782,10 +819,6 @@ static void create_subd_mesh(Scene *scene, scene->camera->update(); sdparams.camera = scene->camera; sdparams.objecttoworld = get_transform(b_ob.matrix_world()); - - /* tesselate */ - DiagSplit dsplit(sdparams); - mesh->tessellate(&dsplit); } /* Sync */ @@ -903,8 +936,6 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob, mesh_synced.insert(mesh); /* create derived mesh */ - PointerRNA cmesh = RNA_pointer_get(&b_ob_data.ptr, "cycles"); - array<int> oldtriangle = mesh->triangles; /* compares curve_keys rather than strands in order to handle quick hair @@ -936,7 +967,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob, BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length()-1]; bool enabled = preview ? mod.show_viewport() : mod.show_render(); - if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_int_get(&cobj, "use_adaptive_subdivision")) { + if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_boolean_get(&cobj, "use_adaptive_subdivision")) { BL::SubsurfModifier subsurf(mod); if(subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) { @@ -974,21 +1005,6 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob, } mesh->geometry_flags = requested_geometry_flags; - /* displacement method */ - if(cmesh.data) { - const int method = get_enum(cmesh, - "displacement_method", - Mesh::DISPLACE_NUM_METHODS, - Mesh::DISPLACE_BUMP); - - if(method == 0 || !experimental) - mesh->displacement_method = Mesh::DISPLACE_BUMP; - else if(method == 1) - mesh->displacement_method = Mesh::DISPLACE_TRUE; - else - mesh->displacement_method = Mesh::DISPLACE_BOTH; - } - /* fluid motion */ sync_mesh_fluid_motion(b_ob, scene, mesh); diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 4886735a18f..f305e8e17cc 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -329,16 +329,18 @@ Object *BlenderSync::sync_object(BL::Object& b_parent, /* object transformation */ if(tfm != object->tfm) { VLOG(1) << "Object " << b_ob.name() << " motion detected."; - if(motion_time == -1.0f) { - object->motion.pre = tfm; - object->use_motion = true; - } - else if(motion_time == 1.0f) { - object->motion.post = tfm; + if(motion_time == -1.0f || motion_time == 1.0f) { object->use_motion = true; } } + if(motion_time == -1.0f) { + object->motion.pre = tfm; + } + else if(motion_time == 1.0f) { + object->motion.post = tfm; + } + /* mesh deformation */ if(object->mesh) sync_mesh_motion(b_ob, object, motion_time); @@ -395,8 +397,8 @@ Object *BlenderSync::sync_object(BL::Object& b_parent, object->name = b_ob.name().c_str(); object->pass_id = b_ob.pass_index(); object->tfm = tfm; - object->motion.pre = tfm; - object->motion.post = tfm; + object->motion.pre = transform_empty(); + object->motion.post = transform_empty(); object->use_motion = false; /* motion blur */ diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 7b8317a50a7..171b8241280 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -64,6 +64,14 @@ static VolumeInterpolation get_volume_interpolation(PointerRNA& ptr) VOLUME_INTERPOLATION_LINEAR); } +static DisplacementMethod get_displacement_method(PointerRNA& ptr) +{ + return (DisplacementMethod)get_enum(ptr, + "displacement_method", + DISPLACE_NUM_METHODS, + DISPLACE_BUMP); +} + static int validate_enum_value(int value, int num_values, int default_value) { if(value >= num_values) { @@ -840,8 +848,10 @@ static ShaderNode *add_node(Scene *scene, } } - if(node) + if(node) { + node->name = b_node.name(); graph->add(node); + } return node; } @@ -1183,6 +1193,7 @@ void BlenderSync::sync_materials(bool update_all) shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume"); shader->volume_sampling_method = get_volume_sampling(cmat); shader->volume_interpolation_method = get_volume_interpolation(cmat); + shader->displacement_method = (experimental) ? get_displacement_method(cmat) : DISPLACE_BUMP; shader->set_graph(graph); shader->tag_update(scene); diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index df01215c91a..85e736ad635 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -56,8 +56,14 @@ std::ostream& operator <<(std::ostream &os, << string_from_bool(requested_features.use_camera_motion) << std::endl; os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl; + os << "Use Subsurface: " + << string_from_bool(requested_features.use_subsurface) << std::endl; os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl; + os << "Use Branched Integrator: " + << string_from_bool(requested_features.use_integrator_branched) << std::endl; + os << "Use Patch Evaluation: " + << string_from_bool(requested_features.use_patch_evaluation) << std::endl; return os; } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index e11bb7f76af..77dc1fa9713 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -109,6 +109,9 @@ public: /* Use branched integrator. */ bool use_integrator_branched; + /* Use OpenSubdiv patch evaluation */ + bool use_patch_evaluation; + DeviceRequestedFeatures() { /* TODO(sergey): Find more meaningful defaults. */ @@ -123,6 +126,7 @@ public: use_subsurface = false; use_volume = false; use_integrator_branched = false; + use_patch_evaluation = false; } bool modified(const DeviceRequestedFeatures& requested_features) @@ -137,7 +141,8 @@ public: use_baking == requested_features.use_baking && use_subsurface == requested_features.use_subsurface && use_volume == requested_features.use_volume && - use_integrator_branched == requested_features.use_integrator_branched); + use_integrator_branched == requested_features.use_integrator_branched && + use_patch_evaluation == requested_features.use_patch_evaluation); } /* Convert the requested features structure to a build options, @@ -175,6 +180,9 @@ public: if(!use_integrator_branched) { build_options += " -D__NO_BRANCHED_PATH__"; } + if(!use_patch_evaluation) { + build_options += " -D__NO_PATCH_EVAL__"; + } return build_options; } }; diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 6a511ea7316..76e52498b42 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -297,7 +297,7 @@ public: cuda_error_message("CUDA nvcc compiler version could not be parsed."); return false; } - if(cuda_version < 60) { + if(cuda_version < 75) { printf("Unsupported CUDA version %d.%d detected, " "you need CUDA 7.5 or newer.\n", major, minor); @@ -576,6 +576,7 @@ public: case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; + case TYPE_HALF: format = CU_AD_FORMAT_HALF; break; default: assert(0); return; } @@ -747,8 +748,12 @@ public: } /* Resize once */ - if(flat_slot >= bindless_mapping.size()) - bindless_mapping.resize(4096); /*TODO(dingto): Make this a variable */ + if(flat_slot >= bindless_mapping.size()) { + /* Allocate some slots in advance, to reduce amount + * of re-allocations. + */ + bindless_mapping.resize(flat_slot + 128); + } /* Set Mapping and tag that we need to (re-)upload to device */ bindless_mapping.get_data()[flat_slot] = (uint)tex; diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 50490f3a20e..5c05aeb5569 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -875,6 +875,7 @@ public: if(ciErr != CL_SUCCESS) { opencl_error("OpenCL build failed: errors in console"); + fprintf(stderr, "Build error: %s\n", clewErrorString(ciErr)); return false; } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f4d154ca19e..1bb93c7f922 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -166,6 +166,7 @@ set(SRC_GEOM_HEADERS geom/geom_motion_curve.h geom/geom_motion_triangle.h geom/geom_object.h + geom/geom_patch.h geom/geom_primitive.h geom/geom_subd_triangle.h geom/geom_triangle.h @@ -179,6 +180,7 @@ set(SRC_UTIL_HEADERS ../util/util_half.h ../util/util_math.h ../util/util_math_fast.h + ../util/util_static_assert.h ../util/util_transform.h ../util/util_texture.h ../util/util_types.h diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h index 633a16ca8e5..ac4f52818c9 100644 --- a/intern/cycles/kernel/closure/bsdf.h +++ b/intern/cycles/kernel/closure/bsdf.h @@ -166,7 +166,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg, return label; } -#ifndef __KERNEL_CUDS__ +#ifndef __KERNEL_CUDA__ ccl_device #else ccl_device_inline diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h index 493afdc4f62..3605394f182 100644 --- a/intern/cycles/kernel/geom/geom.h +++ b/intern/cycles/kernel/geom/geom.h @@ -17,6 +17,9 @@ #include "geom_attribute.h" #include "geom_object.h" +#ifdef __PATCH_EVAL__ +# include "geom_patch.h" +#endif #include "geom_triangle.h" #include "geom_subd_triangle.h" #include "geom_triangle_intersect.h" diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h index 5d78cf8f9fc..8604d30ad34 100644 --- a/intern/cycles/kernel/geom/geom_attribute.h +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -43,12 +43,19 @@ ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderD } } +ccl_device_inline AttributeDescriptor attribute_not_found() +{ + const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND}; + return desc; +} + /* Find attribute based on ID */ -ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) +ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id) { - if(ccl_fetch(sd, object) == PRIM_NONE) - return (int)ATTR_STD_NOT_FOUND; + if(ccl_fetch(sd, object) == PRIM_NONE) { + return attribute_not_found(); + } /* for SVM, find attribute by unique id */ uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; @@ -57,31 +64,37 @@ ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, ui while(attr_map.x != id) { if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - return ATTR_STD_NOT_FOUND; + return attribute_not_found(); } attr_offset += ATTR_PRIM_TYPES; attr_map = kernel_tex_fetch(__attributes_map, attr_offset); } - *elem = (AttributeElement)attr_map.y; + AttributeDescriptor desc; + desc.element = (AttributeElement)attr_map.y; - if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) - return ATTR_STD_NOT_FOUND; + if(ccl_fetch(sd, prim) == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH) { + return attribute_not_found(); + } /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; + desc.type = (NodeAttributeType)(attr_map.w & 0xff); + desc.flags = (AttributeFlag)(attr_map.w >> 8); + + return desc; } /* Transform matrix attribute on meshes */ -ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset) +ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc) { Transform tfm; - tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2); - tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3); + tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2); + tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3); return tfm; } diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h index 292e1bfca0e..aa9cd295452 100644 --- a/intern/cycles/kernel/geom/geom_curve.h +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -24,23 +24,23 @@ CCL_NAMESPACE_BEGIN /* Reading attributes on various curve elements */ -ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(elem == ATTR_ELEMENT_CURVE) { + if(desc.element == ATTR_ELEMENT_CURVE) { #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; #endif - return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); } - else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; - float f0 = kernel_tex_fetch(__attributes_float, offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); @@ -59,9 +59,9 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, } } -ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - if(elem == ATTR_ELEMENT_CURVE) { + if(desc.element == ATTR_ELEMENT_CURVE) { /* idea: we can't derive any useful differentials here, but for tiled * mipmap image caching it would be useful to avoid reading the highest * detail level always. maybe a derivative based on the hair density @@ -71,15 +71,15 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); #endif - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); } - else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) { float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim)); int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type)); int k1 = k0 + 1; - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1)); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0); diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index c0d15a95954..883c5dc100d 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -292,6 +292,18 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu *numverts = __float_as_int(f.w); } +/* Offset to an objects patch map */ + +ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object) +{ + if(object == OBJECT_NONE) + return 0; + + int offset = object*OBJECT_SIZE + 11; + float4 f = kernel_tex_fetch(__objects, offset); + return __float_as_uint(f.x); +} + /* Pass ID for shader */ ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h new file mode 100644 index 00000000000..6a0ff5a4a04 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_patch.h @@ -0,0 +1,343 @@ +/* + * Based on code from OpenSubdiv released under this license: + * + * Copyright 2013 Pixar + * + * Licensed under the Apache License, Version 2.0 (the "Apache License") + * with the following modification; you may not use this file except in + * compliance with the Apache License and the following modification to it: + * Section 6. Trademarks. is deleted and replaced with: + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor + * and its affiliates, except as required to comply with Section 4(c) of + * the License and to reproduce the content of the NOTICE file. + * + * You may obtain a copy of the Apache License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Apache License with the above modification is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the Apache License for the specific + * language governing permissions and limitations under the Apache License. + * + */ + +CCL_NAMESPACE_BEGIN + +typedef struct PatchHandle { + int array_index, patch_index, vert_index; +} PatchHandle; + +ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v) +{ + int quadrant = -1; + + if(*u < median) { + if(*v < median) { + quadrant = 0; + } + else { + quadrant = 1; + *v -= median; + } + } + else { + if(*v < median) { + quadrant = 3; + } + else { + quadrant = 2; + *v -= median; + } + *u -= median; + } + + return quadrant; +} + +/* retrieve PatchHandle from patch coords */ + +ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v) +{ + PatchHandle handle; + + kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f)); + + int node = (object_patch_map_offset(kg, object) + patch)/2; + float median = 0.5f; + + for(int depth = 0; depth < 0xff; depth++) { + float delta = median * 0.5f; + + int quadrant = patch_map_resolve_quadrant(median, &u, &v); + kernel_assert(quadrant >= 0); + + uint child = kernel_tex_fetch(__patches, node + quadrant); + + /* is the quadrant a hole? */ + if(!(child & PATCH_MAP_NODE_IS_SET)) { + handle.array_index = -1; + return handle; + } + + uint index = child & PATCH_MAP_NODE_INDEX_MASK; + + if(child & PATCH_MAP_NODE_IS_LEAF) { + handle.array_index = kernel_tex_fetch(__patches, index + 0); + handle.patch_index = kernel_tex_fetch(__patches, index + 1); + handle.vert_index = kernel_tex_fetch(__patches, index + 2); + + return handle; + } else { + node = index; + } + + median = delta; + } + + /* no leaf found */ + kernel_assert(0); + + handle.array_index = -1; + return handle; +} + +ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv) +{ + /* The four uniform cubic B-Spline basis functions evaluated at t */ + float inv_6 = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3); + point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3); + point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3)); + point[3] = inv_6 * t3; + + /* Derivatives of the above four basis functions at t */ + deriv[0] = -0.5f*t2 + t - 0.5f; + deriv[1] = 1.5f*t2 - 2.0f*t; + deriv[2] = -1.5f*t2 + t + 0.5f; + deriv[3] = 0.5f*t2; +} + +ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t) +{ + int boundary = ((bits >> 8) & 0xf); + + if(boundary & 1) { + t[2] -= t[0]; + t[1] += 2*t[0]; + t[0] = 0; + } + + if(boundary & 2) { + s[1] -= s[3]; + s[2] += 2*s[3]; + s[3] = 0; + } + + if(boundary & 4) { + t[1] -= t[3]; + t[2] += 2*t[3]; + t[3] = 0; + } + + if(boundary & 8) { + s[2] -= s[0]; + s[1] += 2*s[0]; + s[0] = 0; + } +} + +ccl_device_inline int patch_eval_depth(uint patch_bits) +{ + return (patch_bits & 0xf); +} + +ccl_device_inline float patch_eval_param_fraction(uint patch_bits) +{ + bool non_quad_root = (patch_bits >> 4) & 0x1; + int depth = patch_eval_depth(patch_bits); + + if(non_quad_root) { + return 1.0f / (float)(1 << (depth-1)); + } + else { + return 1.0f / (float)(1 << depth); + } +} + +ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v) +{ + float frac = patch_eval_param_fraction(patch_bits); + + int iu = (patch_bits >> 22) & 0x3ff; + int iv = (patch_bits >> 12) & 0x3ff; + + /* top left corner */ + float pu = (float)iu*frac; + float pv = (float)iv*frac; + + /* normalize uv coordinates */ + *u = (*u - pu) / frac; + *v = (*v - pv) / frac; +} + +/* retrieve patch control indices */ + +ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel, + int indices[PATCH_MAX_CONTROL_VERTS]) +{ + int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index; + + /* XXX: regular patches only */ + for(int i = 0; i < 16; i++) { + indices[i] = kernel_tex_fetch(__patches, index_base + i); + } + + return 16; +} + +/* evaluate patch basis functions */ + +ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v, + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) +{ + uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */ + float d_scale = 1 << patch_eval_depth(patch_bits); + + bool non_quad_root = (patch_bits >> 4) & 0x1; + if(non_quad_root) { + d_scale *= 0.5f; + } + + patch_eval_normalize_coords(patch_bits, &u, &v); + + /* XXX: regular patches only for now. */ + + float s[4], t[4], ds[4], dt[4]; + + patch_eval_bspline_weights(u, s, ds); + patch_eval_bspline_weights(v, t, dt); + + patch_eval_adjust_boundary_weights(patch_bits, s, t); + patch_eval_adjust_boundary_weights(patch_bits, ds, dt); + + for(int k = 0; k < 4; k++) { + for(int l = 0; l < 4; l++) { + weights[4*k+l] = s[l] * t[k]; + weights_du[4*k+l] = ds[l] * t[k] * d_scale; + weights_dv[4*k+l] = s[l] * dt[k] * d_scale; + } + } +} + +/* generic function for evaluating indices and weights from patch coords */ + +ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel, + int indices[PATCH_MAX_CONTROL_VERTS], + float weights[PATCH_MAX_CONTROL_VERTS], + float weights_du[PATCH_MAX_CONTROL_VERTS], + float weights_dv[PATCH_MAX_CONTROL_VERTS]) +{ + PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v); + kernel_assert(handle.array_index >= 0); + + int num_control = patch_eval_indices(kg, &handle, channel, indices); + patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv); + + return num_control; +} + +/* functions for evaluating attributes on patches */ + +ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float *du, float* dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float val = 0.0f; + if(du) *du = 0.0f; + if(dv) *dv = 0.0f; + + for(int i = 0; i < num_control; i++) { + float v = kernel_tex_fetch(__attributes_float, offset + indices[i]); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float3 *du, float3 *dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if(du) *du = make_float3(0.0f, 0.0f, 0.0f); + if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; i < num_control; i++) { + float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i])); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset, + int patch, float u, float v, int channel, + float3 *du, float3 *dv) +{ + int indices[PATCH_MAX_CONTROL_VERTS]; + float weights[PATCH_MAX_CONTROL_VERTS]; + float weights_du[PATCH_MAX_CONTROL_VERTS]; + float weights_dv[PATCH_MAX_CONTROL_VERTS]; + + int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel, + indices, weights, weights_du, weights_dv); + + float3 val = make_float3(0.0f, 0.0f, 0.0f); + if(du) *du = make_float3(0.0f, 0.0f, 0.0f); + if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; i < num_control; i++) { + float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i])); + + val += v * weights[i]; + if(du) *du += v * weights_du[i]; + if(dv) *dv += v * weights_dv[i]; + } + + return val; +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index b16f0c9a99b..4384c2093e9 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -25,24 +25,23 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, - AttributeElement elem, - int offset, + const AttributeDescriptor desc, float *dx, float *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float(kg, sd, elem, offset, dx, dy); + return triangle_attribute_float(kg, sd, desc, dx, dy); else - return subd_triangle_attribute_float(kg, sd, elem, offset, dx, dy); + return subd_triangle_attribute_float(kg, sd, desc, dx, dy); } #ifdef __HAIR__ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float(kg, sd, elem, offset, dx, dy); + return curve_attribute_float(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float(kg, sd, elem, offset, dx, dy); + else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, desc, dx, dy); } #endif else { @@ -54,25 +53,23 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg, ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, - AttributeElement elem, - int offset, - float3 *dx, - float3 *dy) + const AttributeDescriptor desc, + float3 *dx, float3 *dy) { if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) { if(subd_triangle_patch(kg, sd) == ~0) - return triangle_attribute_float3(kg, sd, elem, offset, dx, dy); + return triangle_attribute_float3(kg, sd, desc, dx, dy); else - return subd_triangle_attribute_float3(kg, sd, elem, offset, dx, dy); + return subd_triangle_attribute_float3(kg, sd, desc, dx, dy); } #ifdef __HAIR__ else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) { - return curve_attribute_float3(kg, sd, elem, offset, dx, dy); + return curve_attribute_float3(kg, sd, desc, dx, dy); } #endif #ifdef __VOLUME__ - else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { - return volume_attribute_float3(kg, sd, elem, offset, dx, dy); + else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, desc, dx, dy); } #endif else { @@ -86,13 +83,12 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg, ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { - AttributeElement elem_uv; - int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV); - if(offset_uv == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) return make_float3(0.0f, 0.0f, 0.0f); - float3 uv = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL); + float3 uv = primitive_attribute_float3(kg, sd, desc, NULL, NULL); uv.z = 1.0f; return uv; } @@ -102,15 +98,14 @@ ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id) { /* storing ptex data as attributes is not memory efficient but simple for tests */ - AttributeElement elem_face_id, elem_uv; - int offset_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID, &elem_face_id); - int offset_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV, &elem_uv); + const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID); + const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV); - if(offset_face_id == ATTR_STD_NOT_FOUND || offset_uv == ATTR_STD_NOT_FOUND) + if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND) return false; - float3 uv3 = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL); - float face_id_f = primitive_attribute_float(kg, sd, elem_face_id, offset_face_id, NULL, NULL); + float3 uv3 = primitive_attribute_float3(kg, sd, desc_uv, NULL, NULL); + float face_id_f = primitive_attribute_float(kg, sd, desc_face_id, NULL, NULL); *uv = make_float2(uv3.x, uv3.y); *face_id = (int)face_id_f; @@ -132,11 +127,10 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) #endif /* try to create spherical tangent from generated coordinates */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); - if(attr_offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + if(desc.offset != ATTR_STD_NOT_FOUND) { + float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL); data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N)))); @@ -173,19 +167,18 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData * float3 motion_pre = center, motion_post = center; /* deformation motion */ - AttributeElement elem; - int offset = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION); - if(offset != ATTR_STD_NOT_FOUND) { + if(desc.offset != ATTR_STD_NOT_FOUND) { /* get motion info */ int numverts, numkeys; object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys); /* lookup attributes */ - int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; + motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL); - motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); - motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); + desc.offset += (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys; + motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL); #ifdef __HAIR__ if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h index bf9be182345..647840dc696 100644 --- a/intern/cycles/kernel/geom/geom_subd_triangle.h +++ b/intern/cycles/kernel/geom/geom_subd_triangle.h @@ -97,36 +97,81 @@ ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch, /* Reading attributes on various subdivision triangle elements */ -ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { int patch = subd_triangle_patch(kg, sd); - if(elem == ATTR_ELEMENT_FACE) { +#ifdef __PATCH_EVAL__ + if(desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + + float a, dads, dadt; + a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if(dx) { + float dudx = ccl_fetch(sd, du).dx; + float dvdx = ccl_fetch(sd, dv).dx; + + float dsdx = dsdu*dudx + dsdv*dvdx; + float dtdx = dtdu*dudx + dtdv*dvdx; + + *dx = dads*dsdx + dadt*dtdx; + } + if(dy) { + float dudy = ccl_fetch(sd, du).dy; + float dvdy = ccl_fetch(sd, dv).dy; + + float dsdy = dsdu*dudy + dsdv*dvdy; + float dtdy = dtdu*dudy + dtdv*dvdy; + + *dy = dads*dsdy + dadt*dtdy; + } + } +#endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, offset + subd_triangle_patch_face(kg, patch)); + return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch)); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - uint4 v = subd_triangle_patch_indices(kg, patch); - float a, b, c; + uint4 v = subd_triangle_patch_indices(kg, patch); - float f0 = kernel_tex_fetch(__attributes_float, offset + v.x); - float f1 = kernel_tex_fetch(__attributes_float, offset + v.y); - float f2 = kernel_tex_fetch(__attributes_float, offset + v.z); - float f3 = kernel_tex_fetch(__attributes_float, offset + v.w); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z); + float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w); if(subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1+f0)*0.5f; f3 = (f3+f0)*0.5f; } - a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; @@ -135,28 +180,26 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; } - else if(elem == ATTR_ELEMENT_CORNER) { - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); - + else if(desc.element == ATTR_ELEMENT_CORNER) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float a, b, c; + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); - float f0 = kernel_tex_fetch(__attributes_float, corners[0] + offset); - float f1 = kernel_tex_fetch(__attributes_float, corners[1] + offset); - float f2 = kernel_tex_fetch(__attributes_float, corners[2] + offset); - float f3 = kernel_tex_fetch(__attributes_float, corners[3] + offset); + float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset); + float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset); + float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset); + float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset); if(subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1+f0)*0.5f; f3 = (f3+f0)*0.5f; } - a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; @@ -173,36 +216,87 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa } } -ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { int patch = subd_triangle_patch(kg, sd); - if(elem == ATTR_ELEMENT_FACE) { +#ifdef __PATCH_EVAL__ + if(desc.flags & ATTR_SUBDIVIDED) { + float2 uv[3]; + subd_triangle_patch_uv(kg, sd, uv); + + float2 dpdu = uv[0] - uv[2]; + float2 dpdv = uv[1] - uv[2]; + + /* p is [s, t] */ + float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2]; + + float3 a, dads, dadt; + + if(desc.element == ATTR_ELEMENT_CORNER_BYTE) { + a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + else { + a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt); + } + +#ifdef __RAY_DIFFERENTIALS__ + if(dx || dy) { + float dsdu = dpdu.x; + float dtdu = dpdu.y; + float dsdv = dpdv.x; + float dtdv = dpdv.y; + + if(dx) { + float dudx = ccl_fetch(sd, du).dx; + float dvdx = ccl_fetch(sd, dv).dx; + + float dsdx = dsdu*dudx + dsdv*dvdx; + float dtdx = dtdu*dudx + dtdv*dvdx; + + *dx = dads*dsdx + dadt*dtdx; + } + if(dy) { + float dudy = ccl_fetch(sd, du).dy; + float dvdy = ccl_fetch(sd, dv).dy; + + float dsdy = dsdu*dudy + dsdv*dvdy; + float dtdy = dtdu*dudy + dtdv*dvdy; + + *dy = dads*dsdy + dadt*dtdy; + } + } +#endif + + return a; + } + else +#endif /* __PATCH_EVAL__ */ + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + subd_triangle_patch_face(kg, patch))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch))); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - uint4 v = subd_triangle_patch_indices(kg, patch); - float3 a, b, c; + uint4 v = subd_triangle_patch_indices(kg, patch); - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.z)); - float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.w)); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z)); + float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w)); if(subd_triangle_patch_num_corners(kg, patch) != 4) { f1 = (f1+f0)*0.5f; f3 = (f3+f0)*0.5f; } - a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; @@ -211,27 +305,26 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c; } - else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) { - int corners[4]; - subd_triangle_patch_corners(kg, patch, corners); - + else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { float2 uv[3]; subd_triangle_patch_uv(kg, sd, uv); - float3 a, b, c; + int corners[4]; + subd_triangle_patch_corners(kg, patch, corners); + float3 f0, f1, f2, f3; - if(elem == ATTR_ELEMENT_CORNER) { - f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + offset)); - f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + offset)); - f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + offset)); - f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + offset)); + if(desc.element == ATTR_ELEMENT_CORNER) { + f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset)); + f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset)); + f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset)); + f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset)); } else { - f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + offset)); - f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + offset)); - f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + offset)); - f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + offset)); + f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset)); + f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset)); + f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset)); + f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset)); } if(subd_triangle_patch_num_corners(kg, patch) != 4) { @@ -239,9 +332,9 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader f3 = (f3+f0)*0.5f; } - a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); - b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); - c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); + float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y); + float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y); + float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c; diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h index 0c2351e1d1b..d3289d6572c 100644 --- a/intern/cycles/kernel/geom/geom_triangle.h +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -105,20 +105,20 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_spa /* Reading attributes on various triangle elements */ -ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { - if(elem == ATTR_ELEMENT_FACE) { + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = 0.0f; if(dy) *dy = 0.0f; - return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim)); + return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim)); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float f0 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.x); - float f1 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.y); - float f2 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.z); + float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x); + float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y); + float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; @@ -127,8 +127,8 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } - else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + ccl_fetch(sd, prim)*3; + else if(desc.element == ATTR_ELEMENT_CORNER) { + int tri = desc.offset + ccl_fetch(sd, prim)*3; float f0 = kernel_tex_fetch(__attributes_float, tri + 0); float f1 = kernel_tex_fetch(__attributes_float, tri + 1); float f2 = kernel_tex_fetch(__attributes_float, tri + 2); @@ -148,20 +148,20 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s } } -ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { - if(elem == ATTR_ELEMENT_FACE) { + if(desc.element == ATTR_ELEMENT_FACE) { if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim))); + return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim))); } - else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) { uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim)); - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z)); + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z)); #ifdef __RAY_DIFFERENTIALS__ if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2; @@ -170,11 +170,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2; } - else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) { - int tri = offset + ccl_fetch(sd, prim)*3; + else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) { + int tri = desc.offset + ccl_fetch(sd, prim)*3; float3 f0, f1, f2; - if(elem == ATTR_ELEMENT_CORNER) { + if(desc.element == ATTR_ELEMENT_CORNER) { f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 7c8182bc430..efe540a8518 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -50,36 +50,35 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg, { /* todo: optimize this so it's just a single matrix multiplication when * possible (not motion blur), or perhaps even just translation + scale */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM); object_inverse_position_transform(kg, sd, &P); - if(attr_offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); + if(desc.offset != ATTR_STD_NOT_FOUND) { + Transform tfm = primitive_attribute_matrix(kg, sd, desc); P = transform_point(&tfm, P); } return P; } -ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy) +ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ # if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); + CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z); float4 r = make_float4(f, f, f, 1.0); # else - float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); + float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); # endif #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); else - r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); #endif if(dx) *dx = 0.0f; @@ -88,22 +87,22 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, return average(float4_to_float3(r)); } -ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy) +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy) { float3 P = volume_normalized_position(kg, sd, sd->P); #ifdef __KERNEL_GPU__ # if __CUDA_ARCH__ >= 300 - CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); + CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset); float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z); # else - float4 r = volume_image_texture_3d(id, P.x, P.y, P.z); + float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z); # endif #else float4 r; if(sd->flag & SD_VOLUME_CUBIC) - r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC); else - r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z); #endif if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index c882b477c35..3775934f293 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -495,6 +495,7 @@ typedef texture<uint> texture_uint; typedef texture<int> texture_int; typedef texture<uint4> texture_uint4; typedef texture<uchar4> texture_uchar4; +typedef texture<uchar> texture_uchar; typedef texture_image<float> texture_image_float; typedef texture_image<uchar> texture_image_uchar; typedef texture_image<half> texture_image_half; diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index a039b414006..9a96cb9f438 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -31,6 +31,7 @@ #endif #include <cuda.h> +#include <cuda_fp16.h> #include <float.h> /* Qualifier wrappers for different names on different devices */ @@ -47,6 +48,7 @@ #define ccl_may_alias #define ccl_addr_space #define ccl_restrict __restrict__ +#define ccl_align(n) __align__(n) /* No assert supported for CUDA */ @@ -65,6 +67,7 @@ typedef texture<float, 1> texture_float; typedef texture<uint, 1> texture_uint; typedef texture<int, 1> texture_int; typedef texture<uint4, 1> texture_uint4; +typedef texture<uchar, 1> texture_uchar; typedef texture<uchar4, 1> texture_uchar4; typedef texture<float4, 2> texture_image_float4; typedef texture<float4, 3> texture_image3d_float4; diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 8505cb85576..2ae89dde7c4 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -40,6 +40,7 @@ #define ccl_local __local #define ccl_private __private #define ccl_restrict restrict +#define ccl_align(n) __attribute__((aligned(n))) #ifdef __SPLIT_KERNEL__ # define ccl_addr_space __global diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 1f08f3459e6..903be4f09a0 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -435,8 +435,12 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, } #ifdef __SUBSURFACE__ - -ccl_device_inline bool kernel_path_subsurface_scatter( +# ifndef __KERNEL_CUDA__ +ccl_device +# else +ccl_device_inline +# endif +bool kernel_path_subsurface_scatter( KernelGlobals *kg, ShaderData *sd, ShaderData *emission_sd, diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index 98d321c9c16..079bea30bdd 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -149,7 +149,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, /* ShaderData setup from BSSRDF scatter */ #ifdef __SUBSURFACE__ -# ifndef __KERNEL_CUDS__ +# ifndef __KERNEL_CUDA__ ccl_device # else ccl_device_inline @@ -539,7 +539,7 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg, #endif -#ifndef __KERNEL_CUDS__ +#ifndef __KERNEL_CUDA__ ccl_device #else ccl_device_inline diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h index f404666177a..e83bfc3f08a 100644 --- a/intern/cycles/kernel/kernel_subsurface.h +++ b/intern/cycles/kernel/kernel_subsurface.h @@ -85,11 +85,16 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha return NULL; } -ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd, - ShaderClosure *sc, - float disk_r, - float r, - bool all) +#ifndef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device_inline +#endif +float3 subsurface_scatter_eval(ShaderData *sd, + ShaderClosure *sc, + float disk_r, + float r, + bool all) { #ifdef BSSRDF_MULTI_EVAL /* this is the veach one-sample model with balance heuristic, some pdf @@ -235,7 +240,12 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg, /* Subsurface scattering step, from a point on the surface to other * nearby points on the same object. */ -ccl_device_inline int subsurface_scatter_multi_intersect( +#ifndef __KERNEL_CUDA__ +ccl_device +#else +ccl_device_inline +#endif +int subsurface_scatter_multi_intersect( KernelGlobals *kg, SubsurfaceIntersection* ss_isect, ShaderData *sd, diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index 7d6fec02331..8d5bb75a428 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping) /* packed image (opencl) */ KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed) KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed) +KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed) +KERNEL_TEX(float, texture_float, __tex_image_float_packed) KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) #undef KERNEL_TEX diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 18b5c35c768..e29940672ca 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -19,6 +19,7 @@ #include "kernel_math.h" #include "svm/svm_types.h" +#include "util_static_assert.h" #ifndef __KERNEL_GPU__ # define __KERNEL_CPU__ @@ -34,7 +35,7 @@ CCL_NAMESPACE_BEGIN /* constants */ -#define OBJECT_SIZE 11 +#define OBJECT_SIZE 12 #define OBJECT_VECTOR_SIZE 6 #define LIGHT_SIZE 5 #define FILTER_TABLE_SIZE 1024 @@ -147,6 +148,7 @@ CCL_NAMESPACE_BEGIN #define __CAMERA_CLIPPING__ #define __INTERSECTION_REFINE__ #define __CLAMP_SAMPLE__ +#define __PATCH_EVAL__ #ifdef __KERNEL_SHADING__ # define __SVM__ @@ -196,6 +198,9 @@ CCL_NAMESPACE_BEGIN #ifdef __NO_BRANCHED_PATH__ # undef __BRANCHED_PATH__ #endif +#ifdef __NO_PATCH_EVAL__ +# undef __PATCH_EVAL__ +#endif /* Random Numbers */ @@ -624,6 +629,18 @@ typedef enum AttributeStandard { ATTR_STD_NOT_FOUND = ~0 } AttributeStandard; +typedef enum AttributeFlag { + ATTR_FINAL_SIZE = (1 << 0), + ATTR_SUBDIVIDED = (1 << 1), +} AttributeFlag; + +typedef struct AttributeDescriptor { + AttributeElement element; + NodeAttributeType type; + uint flags; /* see enum AttributeFlag */ + int offset; +} AttributeDescriptor; + /* Closure data */ #ifdef __MULTI_CLOSURE__ @@ -644,23 +661,18 @@ typedef enum AttributeStandard { * ShaderClosure has a fixed size, and any extra space must be allocated * with closure_alloc_extra(). * - * float3 is 12 bytes on CUDA and 16 bytes on CPU/OpenCL, we set the data - * size to ensure ShaderClosure is 80 bytes total everywhere. */ + * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which + * we assume to be the maximum required alignment for any struct. */ #define SHADER_CLOSURE_BASE \ float3 weight; \ ClosureType type; \ float sample_weight \ -typedef ccl_addr_space struct ShaderClosure { +typedef ccl_addr_space struct ccl_align(16) ShaderClosure { SHADER_CLOSURE_BASE; - /* pad to 80 bytes, data types are aligned to own size */ -#ifdef __KERNEL_CUDA__ - float data[15]; -#else - float data[14]; -#endif + float data[14]; /* pad to 80 bytes */ } ShaderClosure; /* Shader Context @@ -735,7 +747,7 @@ enum ShaderDataFlag { # define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0)) # if defined(__SPLIT_KERNEL_AOS__) /* ShaderData is stored as an Array-of-Structures */ -# define ccl_soa_member(type, name) type soa_##name; +# define ccl_soa_member(type, name) type soa_##name # define ccl_fetch(s, t) (s[SD_THREAD].soa_##t) # define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index]) # else @@ -743,7 +755,7 @@ enum ShaderDataFlag { # define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1)) # define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t) # define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0) -# define ccl_soa_member(type, name) type soa_##name; +# define ccl_soa_member(type, name) type soa_##name # define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(soa_##t) + SD_FIELD_SIZE(soa_##t) * SD_THREAD - SD_OFFSETOF(soa_##t)))->soa_##t) # define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index]) # endif @@ -979,6 +991,7 @@ typedef struct KernelCamera { int pad; } KernelCamera; +static_assert_align(KernelCamera, 16); typedef struct KernelFilm { float exposure; @@ -1033,6 +1046,7 @@ typedef struct KernelFilm { int pass_pad3; #endif } KernelFilm; +static_assert_align(KernelFilm, 16); typedef struct KernelBackground { /* only shader index */ @@ -1046,6 +1060,7 @@ typedef struct KernelBackground { float ao_distance; float ao_pad1, ao_pad2; } KernelBackground; +static_assert_align(KernelBackground, 16); typedef struct KernelIntegrator { /* emission */ @@ -1113,8 +1128,10 @@ typedef struct KernelIntegrator { float volume_step_size; int volume_samples; - int pad; + int pad1; + int pad2; } KernelIntegrator; +static_assert_align(KernelIntegrator, 16); typedef struct KernelBVH { /* root node */ @@ -1126,6 +1143,7 @@ typedef struct KernelBVH { int use_qbvh; int pad1, pad2; } KernelBVH; +static_assert_align(KernelBVH, 16); typedef enum CurveFlag { /* runtime flags */ @@ -1145,11 +1163,13 @@ typedef struct KernelCurves { float minimum_width; float maximum_width; } KernelCurves; +static_assert_align(KernelCurves, 16); typedef struct KernelTables { int beckmann_offset; int pad1, pad2, pad3; } KernelTables; +static_assert_align(KernelTables, 16); typedef struct KernelData { KernelCamera cam; @@ -1160,8 +1180,12 @@ typedef struct KernelData { KernelCurves curve; KernelTables tables; } KernelData; +static_assert_align(KernelData, 16); #ifdef __KERNEL_DEBUG__ +/* NOTE: This is a runtime-only struct, alignment is not + * really important here. + */ typedef ccl_addr_space struct DebugData { // Total number of BVH node traversal steps and primitives intersections // for the camera rays. @@ -1239,6 +1263,16 @@ enum RayState { #define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag))) #define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag) +/* Patches */ + +#define PATCH_MAX_CONTROL_VERTS 16 + +/* Patch map node flags */ + +#define PATCH_MAP_NODE_IS_SET (1 << 30) +#define PATCH_MAP_NODE_IS_LEAF (1u << 31) +#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF)) + CCL_NAMESPACE_END #endif /* __KERNEL_TYPES_H__ */ diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index 47383140170..af68907a5c2 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -25,12 +25,12 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float { if(tex >= TEX_START_HALF_CPU) return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y); else @@ -41,12 +41,12 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl { if(tex >= TEX_START_HALF_CPU) return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z); else @@ -57,13 +57,13 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) { if(tex >= TEX_START_HALF_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); + return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_BYTE_CPU) return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_FLOAT_CPU) return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation); + else if(tex >= TEX_START_HALF4_CPU) + return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); else if(tex >= TEX_START_BYTE4_CPU) return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation); else diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h index 916542ec628..8353c4e434b 100644 --- a/intern/cycles/kernel/osl/osl_globals.h +++ b/intern/cycles/kernel/osl/osl_globals.h @@ -59,8 +59,7 @@ struct OSLGlobals { /* attributes */ struct Attribute { TypeDesc type; - AttributeElement elem; - int offset; + AttributeDescriptor desc; ParamValue value; }; diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index caae24405f1..153ebad6cd2 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -554,13 +554,13 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { float3 fval[3]; - fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset, + fval[0] = primitive_attribute_float3(kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float3(fval, type, derivatives, val); } else if(attr.type == TypeDesc::TypeFloat) { float fval[3]; - fval[0] = primitive_attribute_float(kg, sd, attr.elem, attr.offset, + fval[0] = primitive_attribute_float(kg, sd, attr.desc, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); return set_attribute_float(fval, type, derivatives, val); } @@ -573,7 +573,7 @@ static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OS const TypeDesc& type, bool derivatives, void *val) { if(attr.type == TypeDesc::TypeMatrix) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr.offset); + Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc); return set_attribute_matrix(tfm, type, val); } else { @@ -815,7 +815,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring if(it != attribute_map.end()) { const OSLGlobals::Attribute& attr = it->second; - if(attr.elem != ATTR_ELEMENT_OBJECT) { + if(attr.desc.element != ATTR_ELEMENT_OBJECT) { /* triangle and vertex attributes */ if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val)) return true; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 784e468635c..43a9e2f13aa 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -340,7 +340,7 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderConte /* Attributes */ -int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) +int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc) { /* for OSL, a hash map is used to lookup the attribute by name. */ int object = sd->object*ATTR_PRIM_TYPES; @@ -354,16 +354,23 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, if(it != attr_map.end()) { const OSLGlobals::Attribute &osl_attr = it->second; - *elem = osl_attr.elem; + *desc = osl_attr.desc; - if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH) + if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) { + desc->offset = ATTR_STD_NOT_FOUND; return ATTR_STD_NOT_FOUND; + } /* return result */ - return (osl_attr.elem == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : osl_attr.offset; + if(osl_attr.desc.element == ATTR_ELEMENT_NONE) { + desc->offset = ATTR_STD_NOT_FOUND; + } + return desc->offset; } - else + else { + desc->offset = ATTR_STD_NOT_FOUND; return (int)ATTR_STD_NOT_FOUND; + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h index a185b8b8c05..ad06dd6929d 100644 --- a/intern/cycles/kernel/osl/osl_shader.h +++ b/intern/cycles/kernel/osl/osl_shader.h @@ -59,7 +59,7 @@ public: static void eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx); /* attributes */ - static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem); + static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc); }; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl index c8e7e4f175b..984b7d47e8f 100644 --- a/intern/cycles/kernel/shaders/node_rgb_curves.osl +++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "stdosl.h" #include "node_ramp_util.h" shader node_rgb_curves( diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl index 24b8728b999..4e7d8fdcf65 100644 --- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl +++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "stdosl.h" #include "node_ramp_util.h" shader node_rgb_ramp( diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl index d92fa11d439..ff284c48e0a 100644 --- a/intern/cycles/kernel/shaders/node_vector_curves.osl +++ b/intern/cycles/kernel/shaders/node_vector_curves.osl @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "stdosl.h" #include "node_ramp_util.h" shader node_vector_curves( diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index bd6013e9205..de978a423b4 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -18,117 +18,101 @@ CCL_NAMESPACE_BEGIN /* Attribute Node */ -ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, +ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, - NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset) + uint *out_offset) { *out_offset = node.z; *type = (NodeAttributeType)node.w; + + AttributeDescriptor desc; + if(ccl_fetch(sd, object) != OBJECT_NONE) { - /* find attribute by unique id */ - uint id = node.y; - uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride; - attr_offset += attribute_primitive_type(kg, sd); - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) { - *elem = ATTR_ELEMENT_NONE; - *offset = 0; - *mesh_type = (NodeAttributeType)node.w; - return; - } - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + desc = find_attribute(kg, sd, node.y); + if(desc.offset == ATTR_STD_NOT_FOUND) { + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; } - - /* return result */ - *elem = (AttributeElement)attr_map.y; - *offset = as_int(attr_map.z); - *mesh_type = (NodeAttributeType)attr_map.w; } else { /* background */ - *elem = ATTR_ELEMENT_NONE; - *offset = 0; - *mesh_type = (NodeAttributeType)node.w; + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; + desc.type = (NodeAttributeType)node.w; } + + return desc; } ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL); + if(desc.type == NODE_ATTR_FLOAT) { + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); stack_store_float(stack, out_offset, f); } else { - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); stack_store_float(stack, out_offset, average(f)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); + if(desc.type == NODE_ATTR_FLOAT3) { + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL); stack_store_float3(stack, out_offset, f); } else { - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL); + float f = primitive_attribute_float(kg, sd, desc, NULL, NULL); stack_store_float3(stack, out_offset, make_float3(f, f, f)); } } } -#ifndef __KERNEL_CUDS__ +#ifndef __KERNEL_CUDA__ ccl_device #else ccl_device_noinline #endif void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { + if(desc.type == NODE_ATTR_FLOAT) { float dx; - float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL); + float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); stack_store_float(stack, out_offset, f+dx); } else { float3 dx; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); stack_store_float(stack, out_offset, average(f+dx)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { + if(desc.type == NODE_ATTR_FLOAT3) { float3 dx; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL); + float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL); stack_store_float3(stack, out_offset, f+dx); } else { float dx; - float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL); + float f = primitive_attribute_float(kg, sd, desc, &dx, NULL); stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx)); } } } -#ifndef __KERNEL_CUDS__ +#ifndef __KERNEL_CUDA__ ccl_device #else ccl_device_noinline @@ -138,35 +122,32 @@ void svm_node_attr_bump_dy(KernelGlobals *kg, float *stack, uint4 node) { - NodeAttributeType type, mesh_type; - AttributeElement elem; + NodeAttributeType type; uint out_offset; - int offset; - - svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset); + AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset); /* fetch and store attribute */ if(type == NODE_ATTR_FLOAT) { - if(mesh_type == NODE_ATTR_FLOAT) { + if(desc.type == NODE_ATTR_FLOAT) { float dy; - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy); + float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); stack_store_float(stack, out_offset, f+dy); } else { float3 dy; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); stack_store_float(stack, out_offset, average(f+dy)); } } else { - if(mesh_type == NODE_ATTR_FLOAT3) { + if(desc.type == NODE_ATTR_FLOAT3) { float3 dy; - float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy); + float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy); stack_store_float3(stack, out_offset, f+dy); } else { float dy; - float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy); + float f = primitive_attribute_float(kg, sd, desc, NULL, &dy); stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy)); } } diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index b6b90dfff81..5d02be1fa2f 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN /* Float4 textures on various devices. */ #if defined(__KERNEL_CPU__) -# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU +# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU #elif defined(__KERNEL_CUDA__) # if __CUDA_ARCH__ < 300 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA @@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) { - if(id >= TEX_NUM_FLOAT4_IMAGES) { + /* Float4 */ + if(id < TEX_START_BYTE4_OPENCL) { + return kernel_tex_fetch(__tex_image_float4_packed, offset); + } + /* Byte4 */ + else if(id < TEX_START_FLOAT_OPENCL) { uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset); float f = 1.0f/255.0f; return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); } + /* Float */ + else if(id < TEX_START_BYTE_OPENCL) { + float f = kernel_tex_fetch(__tex_image_float_packed, offset); + return make_float4(f, f, f, 1.0f); + } + /* Byte */ else { - return kernel_tex_fetch(__tex_image_float4_packed, offset); + uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset); + float f = r * (1.0f/255.0f); + return make_float4(f, f, f, 1.0f); } } @@ -277,8 +290,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, } # else CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - if(id < 2048) /* TODO(dingto): Make this a variable */ + /* float4, byte4 and half4 */ + if(id < TEX_START_FLOAT_CUDA_KEPLER) r = kernel_tex_image_interp_float4(tex, x, y); + /* float, byte and half */ else { float f = kernel_tex_image_interp_float(tex, x, y); r = make_float4(f, f, f, 1.0); diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h index 3f7d18a02fe..6d13a0d8e02 100644 --- a/intern/cycles/kernel/svm/svm_math_util.h +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -32,21 +32,17 @@ ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type, *Fac = average_fac(*Vector); } else if(type == NODE_VECTOR_MATH_AVERAGE) { - *Fac = len(Vector1 + Vector2); - *Vector = normalize(Vector1 + Vector2); + *Vector = safe_normalize_len(Vector1 + Vector2, Fac); } else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) { *Fac = dot(Vector1, Vector2); *Vector = make_float3(0.0f, 0.0f, 0.0f); } else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) { - float3 c = cross(Vector1, Vector2); - *Fac = len(c); - *Vector = normalize(c); + *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac); } else if(type == NODE_VECTOR_MATH_NORMALIZE) { - *Fac = len(Vector1); - *Vector = normalize(Vector1); + *Vector = safe_normalize_len(Vector1, Fac); } else { *Fac = 0.0f; diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index b39d6a3e009..01dede3fff5 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -287,23 +287,22 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st } /* first try to get tangent attribute */ - AttributeElement attr_elem, attr_sign_elem, attr_normal_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); - int attr_sign_offset = find_attribute(kg, sd, node.w, &attr_sign_elem); - int attr_normal_offset = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL, &attr_normal_elem); + const AttributeDescriptor attr = find_attribute(kg, sd, node.z); + const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); + const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL); - if(attr_offset == ATTR_STD_NOT_FOUND || attr_sign_offset == ATTR_STD_NOT_FOUND || attr_normal_offset == ATTR_STD_NOT_FOUND) { + if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) { stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); return; } /* get _unnormalized_ interpolated normal and tangent */ - float3 tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); - float sign = primitive_attribute_float(kg, sd, attr_sign_elem, attr_sign_offset, NULL, NULL); + float3 tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL); + float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL); float3 normal; if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) { - normal = primitive_attribute_float3(kg, sd, attr_normal_elem, attr_normal_offset, NULL, NULL); + normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL); } else { normal = ccl_fetch(sd, Ng); @@ -356,24 +355,22 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack if(direction_type == NODE_TANGENT_UVMAP) { /* UV map */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); - if(attr_offset == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) tangent = make_float3(0.0f, 0.0f, 0.0f); else - tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + tangent = primitive_attribute_float3(kg, sd, desc, NULL, NULL); } else { /* radial */ - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, node.z, &attr_elem); + const AttributeDescriptor desc = find_attribute(kg, sd, node.z); float3 generated; - if(attr_offset == ATTR_STD_NOT_FOUND) + if(desc.offset == ATTR_STD_NOT_FOUND) generated = ccl_fetch(sd, P); else - generated = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL); + generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL); if(axis == NODE_TANGENT_AXIS_X) tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f)); diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp index e8ff81fe08e..c0d429a583c 100644 --- a/intern/cycles/render/attribute.cpp +++ b/intern/cycles/render/attribute.cpp @@ -44,6 +44,7 @@ void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_) type = type_; element = element_; std = ATTR_STD_NONE; + flags = 0; /* string and matrix not supported! */ assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor || @@ -61,6 +62,11 @@ void Attribute::resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only) } } +void Attribute::resize(size_t num_elements) +{ + buffer.resize(num_elements * data_sizeof(), 0); +} + void Attribute::add(const float& f) { char *data = (char*)&f; @@ -130,6 +136,10 @@ size_t Attribute::data_sizeof() const size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const { + if(flags & ATTR_FINAL_SIZE) { + return buffer.size() / data_sizeof(); + } + size_t size; switch(element) { @@ -517,16 +527,19 @@ AttributeRequest::AttributeRequest(ustring name_) std = ATTR_STD_NONE; triangle_type = TypeDesc::TypeFloat; - triangle_element = ATTR_ELEMENT_NONE; - triangle_offset = 0; + triangle_desc.element = ATTR_ELEMENT_NONE; + triangle_desc.offset = 0; + triangle_desc.type = NODE_ATTR_FLOAT; curve_type = TypeDesc::TypeFloat; - curve_element = ATTR_ELEMENT_NONE; - curve_offset = 0; + curve_desc.element = ATTR_ELEMENT_NONE; + curve_desc.offset = 0; + curve_desc.type = NODE_ATTR_FLOAT; subd_type = TypeDesc::TypeFloat; - subd_element = ATTR_ELEMENT_NONE; - subd_offset = 0; + subd_desc.element = ATTR_ELEMENT_NONE; + subd_desc.offset = 0; + subd_desc.type = NODE_ATTR_FLOAT; } AttributeRequest::AttributeRequest(AttributeStandard std_) @@ -535,16 +548,19 @@ AttributeRequest::AttributeRequest(AttributeStandard std_) std = std_; triangle_type = TypeDesc::TypeFloat; - triangle_element = ATTR_ELEMENT_NONE; - triangle_offset = 0; + triangle_desc.element = ATTR_ELEMENT_NONE; + triangle_desc.offset = 0; + triangle_desc.type = NODE_ATTR_FLOAT; curve_type = TypeDesc::TypeFloat; - curve_element = ATTR_ELEMENT_NONE; - curve_offset = 0; + curve_desc.element = ATTR_ELEMENT_NONE; + curve_desc.offset = 0; + curve_desc.type = NODE_ATTR_FLOAT; subd_type = TypeDesc::TypeFloat; - subd_element = ATTR_ELEMENT_NONE; - subd_offset = 0; + subd_desc.element = ATTR_ELEMENT_NONE; + subd_desc.offset = 0; + subd_desc.type = NODE_ATTR_FLOAT; } /* AttributeRequestSet */ diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h index e51bdf28d66..f4538c76369 100644 --- a/intern/cycles/render/attribute.h +++ b/intern/cycles/render/attribute.h @@ -54,11 +54,13 @@ public: TypeDesc type; vector<char> buffer; AttributeElement element; + uint flags; /* enum AttributeFlag */ Attribute() {} ~Attribute(); void set(ustring name, TypeDesc type, AttributeElement element); void resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only); + void resize(size_t num_elements); size_t data_sizeof() const; size_t element_size(Mesh *mesh, AttributePrimitive prim) const; @@ -135,8 +137,7 @@ public: /* temporary variables used by MeshManager */ TypeDesc triangle_type, curve_type, subd_type; - AttributeElement triangle_element, curve_element, subd_element; - int triangle_offset, curve_offset, subd_offset; + AttributeDescriptor triangle_desc, curve_desc, subd_desc; explicit AttributeRequest(ustring name_); explicit AttributeRequest(AttributeStandard std); diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 614620c14af..24543601ef9 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -52,15 +52,15 @@ ImageManager::ImageManager(const DeviceInfo& info) { \ tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \ + tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \ + tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \ } @@ -82,15 +82,15 @@ ImageManager::ImageManager(const DeviceInfo& info) /* Should not happen. */ tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; + tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_num_images[IMAGE_DATA_TYPE_HALF] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0; + tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0; tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; tex_start_images[IMAGE_DATA_TYPE_HALF] = 0; assert(0); } @@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } /* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, float, byte. + * float4, byte4, half4, float, byte, half. * These functions convert the slot ids from ImageManager "images" ones * to device ones and vice versa. */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) @@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename, if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = true; - /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */ + /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ if((type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_HALF4 || type == IMAGE_DATA_TYPE_HALF) && @@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device, size_t size = 0, offset = 0; ImageDataType type; - int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]; + int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4] + + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE]; uint4 *info = dscene->tex_image_packed_info.resize(info_size); - /* Byte Textures*/ + /* Byte4 Textures*/ type = IMAGE_DATA_TYPE_BYTE4; for(size_t slot = 0; slot < images[type].size(); slot++) { @@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device, size += tex_img.size(); } - uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size); + uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device, info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); - memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } - /* Float Textures*/ + /* Float4 Textures*/ type = IMAGE_DATA_TYPE_FLOAT4; size = 0, offset = 0; @@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device, size += tex_img.size(); } - float4 *pixels_float = dscene->tex_image_float4_packed.resize(size); + float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) @@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device, uint8_t options = pack_image_options(type, slot); info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + /* Byte Textures*/ + type = IMAGE_DATA_TYPE_BYTE; + size = 0, offset = 0; + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + size += tex_img.size(); + } + + uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size); + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + + uint8_t options = pack_image_options(type, slot); + + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + + memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); + offset += tex_img.size(); + } + + /* Float Textures*/ + type = IMAGE_DATA_TYPE_FLOAT; + size = 0, offset = 0; + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<float>& tex_img = dscene->tex_float_image[slot]; + size += tex_img.size(); + } + + float *pixels_float = dscene->tex_image_float_packed.resize(size); + + for(size_t slot = 0; slot < images[type].size(); slot++) { + if(!images[type][slot]) + continue; + + device_vector<float>& tex_img = dscene->tex_float_image[slot]; + + /* todo: support 3D textures, only CPU for now */ + + uint8_t options = pack_image_options(type, slot); + info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); + memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } @@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device, } device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed); } + if(dscene->tex_image_byte_packed.size()) { + if(dscene->tex_image_byte_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_byte_packed); + } + device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed); + } + if(dscene->tex_image_float_packed.size()) { + if(dscene->tex_image_float_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_float_packed); + } + device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed); + } if(dscene->tex_image_packed_info.size()) { if(dscene->tex_image_packed_info.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) device->tex_free(dscene->tex_image_byte4_packed); device->tex_free(dscene->tex_image_float4_packed); + device->tex_free(dscene->tex_image_byte_packed); + device->tex_free(dscene->tex_image_float_packed); device->tex_free(dscene->tex_image_packed_info); dscene->tex_image_byte4_packed.clear(); dscene->tex_image_float4_packed.clear(); + dscene->tex_image_byte_packed.clear(); + dscene->tex_image_float_packed.clear(); dscene->tex_image_packed_info.clear(); } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 07998684b23..cca71a6bb93 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -39,9 +39,9 @@ public: enum ImageDataType { IMAGE_DATA_TYPE_FLOAT4 = 0, IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_FLOAT = 2, - IMAGE_DATA_TYPE_BYTE = 3, - IMAGE_DATA_TYPE_HALF4 = 4, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, IMAGE_DATA_TYPE_HALF = 5, IMAGE_DATA_NUM_TYPES diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 4cf0a785897..fcf4e69984d 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -30,6 +30,9 @@ #include "osl_globals.h" +#include "subd_split.h" +#include "subd_patch_table.h" + #include "util_foreach.h" #include "util_logging.h" #include "util_progress.h" @@ -112,19 +115,12 @@ float3 Mesh::SubdFace::normal(const Mesh *mesh) const return safe_normalize(cross(v1 - v0, v2 - v0)); } - /* Mesh */ NODE_DEFINE(Mesh) { NodeType* type = NodeType::add("mesh", create); - static NodeEnum displacement_method_enum; - displacement_method_enum.insert("bump", DISPLACE_BUMP); - displacement_method_enum.insert("true", DISPLACE_TRUE); - displacement_method_enum.insert("both", DISPLACE_BOTH); - SOCKET_ENUM(displacement_method, "Displacement Method", displacement_method_enum, DISPLACE_BUMP); - SOCKET_UINT(motion_steps, "Motion Steps", 3); SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false); @@ -177,11 +173,16 @@ Mesh::Mesh() num_ngons = 0; subdivision_type = SUBDIVISION_NONE; + subd_params = NULL; + + patch_table = NULL; } Mesh::~Mesh() { delete bvh; + delete patch_table; + delete subd_params; } void Mesh::resize_mesh(int numverts, int numtris) @@ -274,6 +275,8 @@ void Mesh::clear() num_subd_verts = 0; + subd_creases.clear(); + attributes.clear(); curve_attributes.clear(); subd_attributes.clear(); @@ -283,6 +286,9 @@ void Mesh::clear() transform_negative_scaled = false; transform_normal = transform_identity(); geometry_flags = GEOMETRY_NONE; + + delete patch_table; + patch_table = NULL; } int Mesh::split_vertex(int vertex) @@ -705,7 +711,6 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui } } - void Mesh::compute_bvh(DeviceScene *dscene, SceneParams *params, Progress *progress, @@ -779,6 +784,17 @@ bool Mesh::has_motion_blur() const curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))); } +bool Mesh::has_true_displacement() const +{ + foreach(Shader *shader, used_shaders) { + if(shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) { + return true; + } + } + + return false; +} + bool Mesh::need_build_bvh() const { return !transform_applied || has_surface_bssrdf; @@ -831,9 +847,10 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att OSLGlobals::Attribute osl_attr; osl_attr.type = attr.type(); - osl_attr.elem = ATTR_ELEMENT_OBJECT; + osl_attr.desc.element = ATTR_ELEMENT_OBJECT; osl_attr.value = attr; - osl_attr.offset = 0; + osl_attr.desc.offset = 0; + osl_attr.desc.flags = 0; og->attribute_map[i*ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][attr.name()] = osl_attr; og->attribute_map[i*ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][attr.name()] = osl_attr; @@ -853,9 +870,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att foreach(AttributeRequest& req, attributes.requests) { OSLGlobals::Attribute osl_attr; - if(req.triangle_element != ATTR_ELEMENT_NONE) { - osl_attr.elem = req.triangle_element; - osl_attr.offset = req.triangle_offset; + if(req.triangle_desc.element != ATTR_ELEMENT_NONE) { + osl_attr.desc = req.triangle_desc; if(req.triangle_type == TypeDesc::TypeFloat) osl_attr.type = TypeDesc::TypeFloat; @@ -875,9 +891,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att } } - if(req.curve_element != ATTR_ELEMENT_NONE) { - osl_attr.elem = req.curve_element; - osl_attr.offset = req.curve_offset; + if(req.curve_desc.element != ATTR_ELEMENT_NONE) { + osl_attr.desc = req.curve_desc; if(req.curve_type == TypeDesc::TypeFloat) osl_attr.type = TypeDesc::TypeFloat; @@ -897,9 +912,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att } } - if(req.subd_element != ATTR_ELEMENT_NONE) { - osl_attr.elem = req.subd_element; - osl_attr.offset = req.subd_offset; + if(req.subd_desc.element != ATTR_ELEMENT_NONE) { + osl_attr.desc = req.subd_desc; if(req.subd_type == TypeDesc::TypeFloat) osl_attr.type = TypeDesc::TypeFloat; @@ -971,8 +985,8 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce if(mesh->num_triangles()) { attr_map[index].x = id; - attr_map[index].y = req.triangle_element; - attr_map[index].z = as_uint(req.triangle_offset); + attr_map[index].y = req.triangle_desc.element; + attr_map[index].z = as_uint(req.triangle_desc.offset); if(req.triangle_type == TypeDesc::TypeFloat) attr_map[index].w = NODE_ATTR_FLOAT; @@ -980,14 +994,16 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce attr_map[index].w = NODE_ATTR_MATRIX; else attr_map[index].w = NODE_ATTR_FLOAT3; + + attr_map[index].w |= req.triangle_desc.flags << 8; } index++; if(mesh->num_curves()) { attr_map[index].x = id; - attr_map[index].y = req.curve_element; - attr_map[index].z = as_uint(req.curve_offset); + attr_map[index].y = req.curve_desc.element; + attr_map[index].z = as_uint(req.curve_desc.offset); if(req.curve_type == TypeDesc::TypeFloat) attr_map[index].w = NODE_ATTR_FLOAT; @@ -995,14 +1011,16 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce attr_map[index].w = NODE_ATTR_MATRIX; else attr_map[index].w = NODE_ATTR_FLOAT3; + + attr_map[index].w |= req.curve_desc.flags << 8; } index++; if(mesh->subd_faces.size()) { attr_map[index].x = id; - attr_map[index].y = req.subd_element; - attr_map[index].z = as_uint(req.subd_offset); + attr_map[index].y = req.subd_desc.element; + attr_map[index].z = as_uint(req.subd_desc.offset); if(req.subd_type == TypeDesc::TypeFloat) attr_map[index].w = NODE_ATTR_FLOAT; @@ -1010,6 +1028,8 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce attr_map[index].w = NODE_ATTR_MATRIX; else attr_map[index].w = NODE_ATTR_FLOAT3; + + attr_map[index].w |= req.subd_desc.flags << 8; } index++; @@ -1069,17 +1089,20 @@ static void update_attribute_element_offset(Mesh *mesh, Attribute *mattr, AttributePrimitive prim, TypeDesc& type, - int& offset, - AttributeElement& element) + AttributeDescriptor& desc) { if(mattr) { /* store element and type */ - element = mattr->element; + desc.element = mattr->element; + desc.flags = mattr->flags; type = mattr->type; /* store attribute data in arrays */ size_t size = mattr->element_size(mesh, prim); + AttributeElement& element = desc.element; + int& offset = desc.offset; + if(mattr->element == ATTR_ELEMENT_VOXEL) { /* store slot in offset value */ VoxelAttribute *voxel_data = mattr->data_voxel(); @@ -1128,7 +1151,11 @@ static void update_attribute_element_offset(Mesh *mesh, /* mesh vertex/curve index is global, not per object, so we sneak * a correction for that in here */ - if(element == ATTR_ELEMENT_VERTEX) + if(mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK && desc.flags & ATTR_SUBDIVIDED) { + /* indices for subdivided attributes are retrieved + * from patch table so no need for correction here*/ + } + else if(element == ATTR_ELEMENT_VERTEX) offset -= mesh->vert_offset; else if(element == ATTR_ELEMENT_VERTEX_MOTION) offset -= mesh->vert_offset; @@ -1153,8 +1180,8 @@ static void update_attribute_element_offset(Mesh *mesh, } else { /* attribute not found */ - element = ATTR_ELEMENT_NONE; - offset = 0; + desc.element = ATTR_ELEMENT_NONE; + desc.offset = 0; } } @@ -1243,8 +1270,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, triangle_mattr, ATTR_PRIM_TRIANGLE, req.triangle_type, - req.triangle_offset, - req.triangle_element); + req.triangle_desc); update_attribute_element_offset(mesh, attr_float, attr_float_offset, @@ -1253,8 +1279,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, curve_mattr, ATTR_PRIM_CURVE, req.curve_type, - req.curve_offset, - req.curve_element); + req.curve_desc); update_attribute_element_offset(mesh, attr_float, attr_float_offset, @@ -1263,8 +1288,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, subd_mattr, ATTR_PRIM_SUBD, req.subd_type, - req.subd_offset, - req.subd_element); + req.subd_desc); if(progress.get_cancel()) return; } @@ -1327,6 +1351,12 @@ void MeshManager::mesh_calc_offset(Scene *scene) if(mesh->subd_faces.size()) { Mesh::SubdFace& last = mesh->subd_faces[mesh->subd_faces.size()-1]; patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; + + /* patch tables are stored in same array so include them in patch_size */ + if(mesh->patch_table) { + mesh->patch_table_offset = patch_size; + patch_size += mesh->patch_table->total_size(); + } } face_size += mesh->subd_faces.size(); corner_size += mesh->subd_face_corners.size(); @@ -1358,6 +1388,12 @@ void MeshManager::device_update_mesh(Device *device, if(mesh->subd_faces.size()) { Mesh::SubdFace& last = mesh->subd_faces[mesh->subd_faces.size()-1]; patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8; + + /* patch tables are stored in same array so include them in patch_size */ + if(mesh->patch_table) { + mesh->patch_table_offset = patch_size; + patch_size += mesh->patch_table->total_size(); + } } } @@ -1440,6 +1476,11 @@ void MeshManager::device_update_mesh(Device *device, foreach(Mesh *mesh, scene->meshes) { mesh->pack_patches(&patch_data[mesh->patch_offset], mesh->vert_offset, mesh->face_offset, mesh->corner_offset); + + if(mesh->patch_table) { + mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset], mesh->patch_table_offset); + } + if(progress.get_cancel()) return; } @@ -1621,12 +1662,48 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen } } + /* Tessellate meshes that are using subdivision */ + size_t total_tess_needed = 0; + foreach(Mesh *mesh, scene->meshes) { + if(mesh->need_update && + mesh->subdivision_type != Mesh::SUBDIVISION_NONE && + mesh->num_subd_verts == 0 && + mesh->subd_params) + { + total_tess_needed++; + } + } + + size_t i = 0; + foreach(Mesh *mesh, scene->meshes) { + if(mesh->need_update && + mesh->subdivision_type != Mesh::SUBDIVISION_NONE && + mesh->num_subd_verts == 0 && + mesh->subd_params) + { + string msg = "Tessellating "; + if(mesh->name == "") + msg += string_printf("%u/%u", (uint)(i+1), (uint)total_tess_needed); + else + msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)total_tess_needed); + + progress.set_status("Updating Mesh", msg); + + DiagSplit dsplit(*mesh->subd_params); + mesh->tessellate(&dsplit); + + i++; + + if(progress.get_cancel()) return; + } + } + /* Update images needed for true displacement. */ bool true_displacement_used = false; bool old_need_object_flags_update = false; foreach(Mesh *mesh, scene->meshes) { if(mesh->need_update && - mesh->displacement_method != Mesh::DISPLACE_BUMP) + mesh->has_true_displacement()) { true_displacement_used = true; break; @@ -1652,6 +1729,10 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen } if(progress.get_cancel()) return; + /* after mesh data has been copied to device memory we need to update + * offsets for patch tables as this can't be known before hand */ + scene->object_manager->device_update_patch_map_offsets(device, dscene, scene); + device_update_attributes(device, dscene, scene, progress); if(progress.get_cancel()) return; @@ -1677,7 +1758,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen } /* Update bvh. */ - size_t i = 0, num_bvh = 0; + size_t num_bvh = 0; foreach(Mesh *mesh, scene->meshes) { if(mesh->need_update && mesh->need_build_bvh()) { num_bvh++; @@ -1686,6 +1767,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen TaskPool pool; + i = 0; foreach(Mesh *mesh, scene->meshes) { if(mesh->need_update) { pool.push(function_bind(&Mesh::compute_bvh, diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index c9ae9aab888..a77e296ea4a 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -39,7 +39,9 @@ class Progress; class Scene; class SceneParams; class AttributeRequest; +struct SubdParams; class DiagSplit; +struct PackedPatchTable; /* Mesh */ @@ -110,13 +112,9 @@ public: int num_ptex_faces() const { return num_corners == 4 ? 1 : num_corners; } }; - /* Displacement */ - enum DisplacementMethod { - DISPLACE_BUMP = 0, - DISPLACE_TRUE = 1, - DISPLACE_BOTH = 2, - - DISPLACE_NUM_METHODS, + struct SubdEdgeCrease { + int v[2]; + float crease; }; enum SubdivisionType { @@ -157,6 +155,10 @@ public: array<int> subd_face_corners; int num_ngons; + array<SubdEdgeCrease> subd_creases; + + SubdParams *subd_params; + vector<Shader*> used_shaders; AttributeSet attributes; AttributeSet curve_attributes; @@ -166,7 +168,8 @@ public: bool transform_applied; bool transform_negative_scaled; Transform transform_normal; - DisplacementMethod displacement_method; + + PackedPatchTable *patch_table; uint motion_steps; bool use_motion_blur; @@ -184,6 +187,7 @@ public: size_t curvekey_offset; size_t patch_offset; + size_t patch_table_offset; size_t face_offset; size_t corner_offset; @@ -234,6 +238,7 @@ public: void tag_update(Scene *scene, bool rebuild); bool has_motion_blur() const; + bool has_true_displacement() const; /* Check whether the mesh should have own BVH built separately. Briefly, * own BVH is needed for mesh, if: diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index 95f46ff02a2..ef9cfedd412 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -26,19 +26,27 @@ CCL_NAMESPACE_BEGIN +static float3 compute_face_normal(const Mesh::Triangle& t, float3 *verts) +{ + float3 v0 = verts[t.v[0]]; + float3 v1 = verts[t.v[1]]; + float3 v2 = verts[t.v[2]]; + + float3 norm = cross(v1 - v0, v2 - v0); + float normlen = len(norm); + + if(normlen == 0.0f) + return make_float3(1.0f, 0.0f, 0.0f); + + return norm / normlen; +} + bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress& progress) { /* verify if we have a displacement shader */ - bool has_displacement = false; - - if(mesh->displacement_method != Mesh::DISPLACE_BUMP) { - foreach(Shader *shader, mesh->used_shaders) - if(shader->has_displacement) - has_displacement = true; - } - - if(!has_displacement) + if(!mesh->has_true_displacement()) { return false; + } string msg = string_printf("Computing Displacement %s", mesh->name.c_str()); progress.set_status("Updating Mesh", msg); @@ -67,8 +75,9 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me Shader *shader = (shader_index < mesh->used_shaders.size()) ? mesh->used_shaders[shader_index] : scene->default_surface; - if(!shader->has_displacement) + if(!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) { continue; + } for(int j = 0; j < 3; j++) { if(done[t.v[j]]) @@ -153,8 +162,9 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me Shader *shader = (shader_index < mesh->used_shaders.size()) ? mesh->used_shaders[shader_index] : scene->default_surface; - if(!shader->has_displacement) + if(!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) { continue; + } for(int j = 0; j < 3; j++) { if(!done[t.v[j]]) { @@ -178,9 +188,131 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me mesh->attributes.remove(ATTR_STD_FACE_NORMAL); mesh->add_face_normals(); - if(mesh->displacement_method == Mesh::DISPLACE_TRUE) { - mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL); - mesh->add_vertex_normals(); + bool need_recompute_vertex_normals = false; + + foreach(Shader *shader, mesh->used_shaders) { + if(shader->has_displacement && shader->displacement_method == DISPLACE_TRUE) { + need_recompute_vertex_normals = true; + break; + } + } + + if(need_recompute_vertex_normals) { + bool flip = mesh->transform_negative_scaled; + vector<bool> tri_has_true_disp(num_triangles, false); + + for(size_t i = 0; i < num_triangles; i++) { + int shader_index = mesh->shader[i]; + Shader *shader = (shader_index < mesh->used_shaders.size()) ? + mesh->used_shaders[shader_index] : scene->default_surface; + + tri_has_true_disp[i] = shader->has_displacement && shader->displacement_method == DISPLACE_TRUE; + } + + /* static vertex normals */ + + /* get attributes */ + Attribute *attr_fN = mesh->attributes.find(ATTR_STD_FACE_NORMAL); + Attribute *attr_vN = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL); + + float3 *fN = attr_fN->data_float3(); + float3 *vN = attr_vN->data_float3(); + + /* compute vertex normals */ + + /* zero vertex normals on triangles with true displacement */ + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + vN[mesh->get_triangle(i).v[j]] = make_float3(0.0f, 0.0f, 0.0f); + } + } + } + + /* add face normals to vertex normals */ + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + vN[mesh->get_triangle(i).v[j]] += fN[i]; + } + } + } + + /* normalize vertex normals */ + done.clear(); + done.resize(num_verts, false); + + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + int vert = mesh->get_triangle(i).v[j]; + + if(done[vert]) { + continue; + } + + vN[vert] = normalize(vN[vert]); + if(flip) + vN[vert] = -vN[vert]; + + done[vert] = true; + } + } + } + + /* motion vertex normals */ + Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + + if(mesh->has_motion_blur() && attr_mP && attr_mN) { + for(int step = 0; step < mesh->motion_steps - 1; step++) { + float3 *mP = attr_mP->data_float3() + step*mesh->verts.size(); + float3 *mN = attr_mN->data_float3() + step*mesh->verts.size(); + + /* compute */ + + /* zero vertex normals on triangles with true displacement */ + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + mN[mesh->get_triangle(i).v[j]] = make_float3(0.0f, 0.0f, 0.0f); + } + } + } + + /* add face normals to vertex normals */ + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + float3 fN = compute_face_normal(mesh->get_triangle(i), mP); + mN[mesh->get_triangle(i).v[j]] += fN; + } + } + } + + /* normalize vertex normals */ + done.clear(); + done.resize(num_verts, false); + + for(size_t i = 0; i < num_triangles; i++) { + if(tri_has_true_disp[i]) { + for(size_t j = 0; j < 3; j++) { + int vert = mesh->get_triangle(i).v[j]; + + if(done[vert]) { + continue; + } + + mN[vert] = normalize(mN[vert]); + if(flip) + mN[vert] = -mN[vert]; + + done[vert] = true; + } + } + } + } + } } return true; diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp index fe8e41e8d35..efb40efbb79 100644 --- a/intern/cycles/render/mesh_subdivision.cpp +++ b/intern/cycles/render/mesh_subdivision.cpp @@ -19,13 +19,302 @@ #include "subd_split.h" #include "subd_patch.h" +#include "subd_patch_table.h" #include "util_foreach.h" CCL_NAMESPACE_BEGIN +#ifdef WITH_OPENSUBDIV + +CCL_NAMESPACE_END + +#include <opensubdiv/far/topologyRefinerFactory.h> +#include <opensubdiv/far/primvarRefiner.h> +#include <opensubdiv/far/patchTableFactory.h> +#include <opensubdiv/far/patchMap.h> + +/* specializations of TopologyRefinerFactory for ccl::Mesh */ + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { +namespace Far { + template<> + bool TopologyRefinerFactory<ccl::Mesh>::resizeComponentTopology(TopologyRefiner& refiner, ccl::Mesh const& mesh) + { + setNumBaseVertices(refiner, mesh.verts.size()); + setNumBaseFaces(refiner, mesh.subd_faces.size()); + + ccl::Mesh::SubdFace* face = &mesh.subd_faces[0]; + + for(int i = 0; i < mesh.subd_faces.size(); i++, face++) { + setNumBaseFaceVertices(refiner, i, face->num_corners); + } + + return true; + } + + template<> + bool TopologyRefinerFactory<ccl::Mesh>::assignComponentTopology(TopologyRefiner& refiner, ccl::Mesh const& mesh) + { + ccl::Mesh::SubdFace* face = &mesh.subd_faces[0]; + + for(int i = 0; i < mesh.subd_faces.size(); i++, face++) { + IndexArray face_verts = getBaseFaceVertices(refiner, i); + + int* corner = &mesh.subd_face_corners[face->start_corner]; + + for(int j = 0; j < face->num_corners; j++, corner++) { + face_verts[j] = *corner; + } + } + + return true; + } + + template<> + bool TopologyRefinerFactory<ccl::Mesh>::assignComponentTags(TopologyRefiner& refiner, ccl::Mesh const& mesh) + { + const ccl::Mesh::SubdEdgeCrease* crease = mesh.subd_creases.data(); + + for(int i = 0; i < mesh.subd_creases.size(); i++, crease++) { + Index edge = findBaseEdge(refiner, crease->v[0], crease->v[1]); + + if(edge != INDEX_INVALID) { + setBaseEdgeSharpness(refiner, edge, crease->crease * 10.0f); + } + } + + for(int i = 0; i < mesh.verts.size(); i++) { + ConstIndexArray vert_edges = getBaseVertexEdges(refiner, i); + + if(vert_edges.size() == 2) { + float sharpness = refiner.getLevel(0).getEdgeSharpness(vert_edges[0]); + sharpness = std::min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1])); + + setBaseVertexSharpness(refiner, i, sharpness); + } + } + + return true; + } + + template<> + bool TopologyRefinerFactory<ccl::Mesh>::assignFaceVaryingTopology(TopologyRefiner& /*refiner*/, ccl::Mesh const& /*mesh*/) + { + return true; + } + + template<> + void TopologyRefinerFactory<ccl::Mesh>::reportInvalidTopology(TopologyError /*err_code*/, + char const */*msg*/, ccl::Mesh const& /*mesh*/) + { + } +} /* namespace Far */ +} /* namespace OPENSUBDIV_VERSION */ +} /* namespace OpenSubdiv */ + +CCL_NAMESPACE_BEGIN + +using namespace OpenSubdiv; + +/* struct that implements OpenSubdiv's vertex interface */ + +template<typename T> +struct OsdValue { + T value; + + OsdValue() {} + + void Clear(void* = 0) { + memset(&value, 0, sizeof(T)); + } + + void AddWithWeight(OsdValue<T> const& src, float weight) { + value += src.value * weight; + } +}; + +template<> +void OsdValue<uchar4>::AddWithWeight(OsdValue<uchar4> const& src, float weight) +{ + for(int i = 0; i < 4; i++) { + value[i] += (uchar)(src.value[i] * weight); + } +} + +/* class for holding OpenSubdiv data used during tessellation */ + +class OsdData { + Mesh* mesh; + vector<OsdValue<float3> > verts; + Far::TopologyRefiner* refiner; + Far::PatchTable* patch_table; + Far::PatchMap* patch_map; + +public: + OsdData() : mesh(NULL), refiner(NULL), patch_table(NULL), patch_map(NULL) {} + + ~OsdData() + { + delete refiner; + delete patch_table; + delete patch_map; + } + + void build_from_mesh(Mesh* mesh_) + { + mesh = mesh_; + + /* type and options */ + Sdc::SchemeType type = Sdc::SCHEME_CATMARK; + + Sdc::Options options; + options.SetVtxBoundaryInterpolation(Sdc::Options::VTX_BOUNDARY_EDGE_ONLY); + + /* create refiner */ + refiner = Far::TopologyRefinerFactory<Mesh>::Create(*mesh, + Far::TopologyRefinerFactory<Mesh>::Options(type, options)); + + /* adaptive refinement */ + int max_isolation = 10; + refiner->RefineAdaptive(Far::TopologyRefiner::AdaptiveOptions(max_isolation)); + + /* create patch table */ + Far::PatchTableFactory::Options patch_options; + patch_options.endCapType = Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS; + + patch_table = Far::PatchTableFactory::Create(*refiner, patch_options); + + /* interpolate verts */ + int num_refiner_verts = refiner->GetNumVerticesTotal(); + int num_local_points = patch_table->GetNumLocalPoints(); + + verts.resize(num_refiner_verts + num_local_points); + for(int i = 0; i < mesh->verts.size(); i++) { + verts[i].value = mesh->verts[i]; + } + + OsdValue<float3>* src = &verts[0]; + for(int i = 0; i < refiner->GetMaxLevel(); i++) { + OsdValue<float3>* dest = src + refiner->GetLevel(i).GetNumVertices(); + Far::PrimvarRefiner(*refiner).Interpolate(i+1, src, dest); + src = dest; + } + + patch_table->ComputeLocalPointValues(&verts[0], &verts[num_refiner_verts]); + + /* create patch map */ + patch_map = new Far::PatchMap(*patch_table); + } + + void subdivide_attribute(Attribute& attr) + { + Far::PrimvarRefiner primvar_refiner(*refiner); + + if(attr.element == ATTR_ELEMENT_VERTEX) { + int num_refiner_verts = refiner->GetNumVerticesTotal(); + int num_local_points = patch_table->GetNumLocalPoints(); + + attr.resize(num_refiner_verts + num_local_points); + attr.flags |= ATTR_FINAL_SIZE; + + char* src = &attr.buffer[0]; + + for(int i = 0; i < refiner->GetMaxLevel(); i++) { + char* dest = src + refiner->GetLevel(i).GetNumVertices() * attr.data_sizeof(); + + if(attr.same_storage(attr.type, TypeDesc::TypeFloat)) { + primvar_refiner.Interpolate(i+1, (OsdValue<float>*)src, (OsdValue<float>*&)dest); + } + else { + primvar_refiner.Interpolate(i+1, (OsdValue<float4>*)src, (OsdValue<float4>*&)dest); + } + + src = dest; + } + + if(attr.same_storage(attr.type, TypeDesc::TypeFloat)) { + patch_table->ComputeLocalPointValues((OsdValue<float>*)&attr.buffer[0], + (OsdValue<float>*)&attr.buffer[num_refiner_verts * attr.data_sizeof()]); + } + else { + patch_table->ComputeLocalPointValues((OsdValue<float4>*)&attr.buffer[0], + (OsdValue<float4>*)&attr.buffer[num_refiner_verts * attr.data_sizeof()]); + } + } + else if(attr.element == ATTR_ELEMENT_CORNER || attr.element == ATTR_ELEMENT_CORNER_BYTE) { + // TODO(mai): fvar interpolation + } + } + + friend struct OsdPatch; + friend class Mesh; +}; + +/* ccl::Patch implementation that uses OpenSubdiv for eval */ + +struct OsdPatch : Patch { + OsdData* osd_data; + + OsdPatch(OsdData* data) : osd_data(data) {} + + void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v) + { + const Far::PatchTable::PatchHandle* handle = osd_data->patch_map->FindPatch(patch_index, u, v); + assert(handle); + + float p_weights[20], du_weights[20], dv_weights[20]; + osd_data->patch_table->EvaluateBasis(*handle, u, v, p_weights, du_weights, dv_weights); + + Far::ConstIndexArray cv = osd_data->patch_table->GetPatchVertices(*handle); + + float3 du, dv; + if(P) *P = make_float3(0.0f, 0.0f, 0.0f); + du = make_float3(0.0f, 0.0f, 0.0f); + dv = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; i < cv.size(); i++) { + float3 p = osd_data->verts[cv[i]].value; + + if(P) *P += p * p_weights[i]; + du += p * du_weights[i]; + dv += p * dv_weights[i]; + } + + if(dPdu) *dPdu = du; + if(dPdv) *dPdv = dv; + if(N) *N = normalize(cross(du, dv)); + } + + BoundBox bound() { return BoundBox::empty; } +}; + +#endif + void Mesh::tessellate(DiagSplit *split) { +#ifdef WITH_OPENSUBDIV + OsdData osd_data; + bool need_packed_patch_table = false; + + if(subdivision_type == SUBDIVISION_CATMULL_CLARK) { + osd_data.build_from_mesh(this); + } + else +#endif + { + /* force linear subdivision if OpenSubdiv is unavailable to avoid + * falling into catmull-clark code paths by accident + */ + subdivision_type = SUBDIVISION_LINEAR; + + /* force disable attribute subdivision for same reason as above */ + foreach(Attribute& attr, subd_attributes.attributes) { + attr.flags &= ~ATTR_SUBDIVIDED; + } + } + int num_faces = subd_faces.size(); Attribute *attr_vN = subd_attributes.find(ATTR_STD_VERTEX_NORMAL); @@ -36,113 +325,158 @@ void Mesh::tessellate(DiagSplit *split) if(face.is_quad()) { /* quad */ - LinearQuadPatch patch; - float3 *hull = patch.hull; - float3 *normals = patch.normals; + QuadDice::SubPatch subpatch; - patch.patch_index = face.ptex_offset; - patch.shader = face.shader; + LinearQuadPatch quad_patch; +#ifdef WITH_OPENSUBDIV + OsdPatch osd_patch(&osd_data); - for(int i = 0; i < 4; i++) { - hull[i] = verts[subd_face_corners[face.start_corner+i]]; + if(subdivision_type == SUBDIVISION_CATMULL_CLARK) { + osd_patch.patch_index = face.ptex_offset; + + subpatch.patch = &osd_patch; } + else +#endif + { + float3 *hull = quad_patch.hull; + float3 *normals = quad_patch.normals; + + quad_patch.patch_index = face.ptex_offset; - if(face.smooth) { for(int i = 0; i < 4; i++) { - normals[i] = vN[subd_face_corners[face.start_corner+i]]; + hull[i] = verts[subd_face_corners[face.start_corner+i]]; } - } - else { - float3 N = face.normal(this); - for(int i = 0; i < 4; i++) { - normals[i] = N; + + if(face.smooth) { + for(int i = 0; i < 4; i++) { + normals[i] = vN[subd_face_corners[face.start_corner+i]]; + } + } + else { + float3 N = face.normal(this); + for(int i = 0; i < 4; i++) { + normals[i] = N; + } } + + swap(hull[2], hull[3]); + swap(normals[2], normals[3]); + + subpatch.patch = &quad_patch; } - swap(hull[2], hull[3]); - swap(normals[2], normals[3]); + subpatch.patch->shader = face.shader; /* Quad faces need to be split at least once to line up with split ngons, we do this * here in this manner because if we do it later edge factors may end up slightly off. */ - QuadDice::SubPatch subpatch; - subpatch.patch = &patch; - subpatch.P00 = make_float2(0.0f, 0.0f); subpatch.P10 = make_float2(0.5f, 0.0f); subpatch.P01 = make_float2(0.0f, 0.5f); subpatch.P11 = make_float2(0.5f, 0.5f); - split->split_quad(&patch, &subpatch); + split->split_quad(subpatch.patch, &subpatch); subpatch.P00 = make_float2(0.5f, 0.0f); subpatch.P10 = make_float2(1.0f, 0.0f); subpatch.P01 = make_float2(0.5f, 0.5f); subpatch.P11 = make_float2(1.0f, 0.5f); - split->split_quad(&patch, &subpatch); + split->split_quad(subpatch.patch, &subpatch); subpatch.P00 = make_float2(0.0f, 0.5f); subpatch.P10 = make_float2(0.5f, 0.5f); subpatch.P01 = make_float2(0.0f, 1.0f); subpatch.P11 = make_float2(0.5f, 1.0f); - split->split_quad(&patch, &subpatch); + split->split_quad(subpatch.patch, &subpatch); subpatch.P00 = make_float2(0.5f, 0.5f); subpatch.P10 = make_float2(1.0f, 0.5f); subpatch.P01 = make_float2(0.5f, 1.0f); subpatch.P11 = make_float2(1.0f, 1.0f); - split->split_quad(&patch, &subpatch); + split->split_quad(subpatch.patch, &subpatch); } else { /* ngon */ - float3 center_vert = make_float3(0.0f, 0.0f, 0.0f); - float3 center_normal = make_float3(0.0f, 0.0f, 0.0f); +#ifdef WITH_OPENSUBDIV + if(subdivision_type == SUBDIVISION_CATMULL_CLARK) { + OsdPatch patch(&osd_data); + + patch.shader = face.shader; - float inv_num_corners = 1.0f/float(face.num_corners); - for(int corner = 0; corner < face.num_corners; corner++) { - center_vert += verts[subd_face_corners[face.start_corner + corner]] * inv_num_corners; - center_normal += vN[subd_face_corners[face.start_corner + corner]] * inv_num_corners; + for(int corner = 0; corner < face.num_corners; corner++) { + patch.patch_index = face.ptex_offset + corner; + + split->split_quad(&patch); + } } + else +#endif + { + float3 center_vert = make_float3(0.0f, 0.0f, 0.0f); + float3 center_normal = make_float3(0.0f, 0.0f, 0.0f); + + float inv_num_corners = 1.0f/float(face.num_corners); + for(int corner = 0; corner < face.num_corners; corner++) { + center_vert += verts[subd_face_corners[face.start_corner + corner]] * inv_num_corners; + center_normal += vN[subd_face_corners[face.start_corner + corner]] * inv_num_corners; + } - for(int corner = 0; corner < face.num_corners; corner++) { - LinearQuadPatch patch; - float3 *hull = patch.hull; - float3 *normals = patch.normals; + for(int corner = 0; corner < face.num_corners; corner++) { + LinearQuadPatch patch; + float3 *hull = patch.hull; + float3 *normals = patch.normals; - patch.patch_index = face.ptex_offset + corner; + patch.patch_index = face.ptex_offset + corner; - patch.shader = face.shader; + patch.shader = face.shader; - hull[0] = verts[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]]; - hull[1] = verts[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]]; - hull[2] = verts[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]]; - hull[3] = center_vert; + hull[0] = verts[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]]; + hull[1] = verts[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]]; + hull[2] = verts[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]]; + hull[3] = center_vert; - hull[1] = (hull[1] + hull[0]) * 0.5; - hull[2] = (hull[2] + hull[0]) * 0.5; + hull[1] = (hull[1] + hull[0]) * 0.5; + hull[2] = (hull[2] + hull[0]) * 0.5; - if(face.smooth) { - normals[0] = vN[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]]; - normals[1] = vN[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]]; - normals[2] = vN[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]]; - normals[3] = center_normal; + if(face.smooth) { + normals[0] = vN[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]]; + normals[1] = vN[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]]; + normals[2] = vN[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]]; + normals[3] = center_normal; - normals[1] = (normals[1] + normals[0]) * 0.5; - normals[2] = (normals[2] + normals[0]) * 0.5; - } - else { - float3 N = face.normal(this); - for(int i = 0; i < 4; i++) { - normals[i] = N; + normals[1] = (normals[1] + normals[0]) * 0.5; + normals[2] = (normals[2] + normals[0]) * 0.5; + } + else { + float3 N = face.normal(this); + for(int i = 0; i < 4; i++) { + normals[i] = N; + } } - } - split->split_quad(&patch); + split->split_quad(&patch); + } } } } /* interpolate center points for attributes */ foreach(Attribute& attr, subd_attributes.attributes) { +#ifdef WITH_OPENSUBDIV + if(subdivision_type == SUBDIVISION_CATMULL_CLARK && attr.flags & ATTR_SUBDIVIDED) { + if(attr.element == ATTR_ELEMENT_CORNER || attr.element == ATTR_ELEMENT_CORNER_BYTE) { + /* keep subdivision for corner attributes disabled for now */ + attr.flags &= ~ATTR_SUBDIVIDED; + } + else { + osd_data.subdivide_attribute(attr); + + need_packed_patch_table = true; + continue; + } + } +#endif + char* data = attr.data(); size_t stride = attr.data_sizeof(); int ngons = 0; @@ -218,6 +552,15 @@ void Mesh::tessellate(DiagSplit *split) default: break; } } + +#ifdef WITH_OPENSUBDIV + /* pack patch tables */ + if(need_packed_patch_table) { + delete patch_table; + patch_table = new PackedPatchTable; + patch_table->pack(osd_data.patch_table); + } +#endif } CCL_NAMESPACE_END diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 662d87e8b6b..62076f3a865 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -29,6 +29,8 @@ #include "util_progress.h" #include "util_vector.h" +#include "subd_patch_table.h" + CCL_NAMESPACE_BEGIN /* Object */ @@ -55,9 +57,9 @@ Object::Object() particle_system = NULL; particle_index = 0; bounds = BoundBox::empty; - motion.pre = transform_identity(); - motion.mid = transform_identity(); - motion.post = transform_identity(); + motion.pre = transform_empty(); + motion.mid = transform_empty(); + motion.post = transform_empty(); use_motion = false; } @@ -70,19 +72,28 @@ void Object::compute_bounds(bool motion_blur) BoundBox mbounds = mesh->bounds; if(motion_blur && use_motion) { - DecompMotionTransform decomp; - transform_motion_decompose(&decomp, &motion, &tfm); + if(motion.pre == transform_empty() || + motion.post == transform_empty()) { + /* Hide objects that have no valid previous or next transform, for + * example particle that stop existing. TODO: add support for this + * case in the kernel so we don't get render artifacts. */ + bounds = BoundBox::empty; + } + else { + DecompMotionTransform decomp; + transform_motion_decompose(&decomp, &motion, &tfm); - bounds = BoundBox::empty; + bounds = BoundBox::empty; - /* todo: this is really terrible. according to pbrt there is a better - * way to find this iteratively, but did not find implementation yet - * or try to implement myself */ - for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) { - Transform ttfm; + /* todo: this is really terrible. according to pbrt there is a better + * way to find this iteratively, but did not find implementation yet + * or try to implement myself */ + for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) { + Transform ttfm; - transform_motion_interpolate(&ttfm, &decomp, t); - bounds.grow(mbounds.transformed(&ttfm)); + transform_motion_interpolate(&ttfm, &decomp, t); + bounds.grow(mbounds.transformed(&ttfm)); + } } } else { @@ -228,7 +239,7 @@ vector<float> Object::motion_times() bool Object::is_traceable() { /* Mesh itself can be empty,can skip all such objects. */ - if (bounds.size() == make_float3(0.0f, 0.0f, 0.0f)) { + if (!bounds.valid() || bounds.size() == make_float3(0.0f, 0.0f, 0.0f)) { return false; } /* TODO(sergey): Check for mesh vertices/curves. visibility flags. */ @@ -337,6 +348,15 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s Transform mtfm_pre = ob->motion.pre; Transform mtfm_post = ob->motion.post; + /* In case of missing motion information for previous/next frame, + * assume there is no motion. */ + if(!ob->use_motion || mtfm_pre == transform_empty()) { + mtfm_pre = ob->tfm; + } + if(!ob->use_motion || mtfm_post == transform_empty()) { + mtfm_post = ob->tfm; + } + if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { mtfm_pre = mtfm_pre * itfm; mtfm_post = mtfm_post * itfm; @@ -589,6 +609,40 @@ void ObjectManager::device_update_flags(Device *device, device->tex_alloc("__object_flag", dscene->object_flag); } +void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene) +{ + if (scene->objects.size() == 0) + return; + + uint4* objects = (uint4*)dscene->objects.get_data(); + + bool update = false; + + int object_index = 0; + foreach(Object *object, scene->objects) { + int offset = object_index*OBJECT_SIZE + 11; + + Mesh* mesh = object->mesh; + + if(mesh->patch_table) { + uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() - + mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - mesh->patch_offset; + + if(objects[offset].x != patch_map_offset) { + objects[offset].x = patch_map_offset; + update = true; + } + } + + object_index++; + } + + if(update) { + device->tex_free(dscene->objects); + device->tex_alloc("__objects", dscene->objects); + } +} + void ObjectManager::device_free(Device *device, DeviceScene *dscene) { device->tex_free(dscene->objects); @@ -638,7 +692,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u * Could be solved by moving reference counter to Mesh. */ if((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) && - object->mesh->displacement_method == Mesh::DISPLACE_BUMP) + !object->mesh->has_true_displacement()) { if(!(motion_blur && object->use_motion)) { if(!object->mesh->transform_applied) { diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 7ab73f3c91a..2e5837f672f 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -97,6 +97,8 @@ public: Scene *scene, Progress& progress, bool bounds_valid = true); + void device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene); + void device_free(Device *device, DeviceScene *dscene); void tag_update(Scene *scene); diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 676afad997e..1a6ae5f9277 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -549,7 +549,7 @@ string OSLCompiler::id(ShaderNode *node) { /* assign layer unique name based on pointer address + bump mode */ stringstream stream; - stream << "node_" << node->name << "_" << node; + stream << "node_" << node->type->name << "_" << node; return stream.str(); } diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 9e72f197cce..8fec171b6fb 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -123,6 +123,8 @@ public: /* opencl images */ device_vector<uchar4> tex_image_byte4_packed; device_vector<float4> tex_image_float4_packed; + device_vector<uchar> tex_image_byte_packed; + device_vector<float> tex_image_float_packed; device_vector<uint4> tex_image_packed_info; KernelData data; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 1cd76ff2b39..9d8c9fed7af 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -635,6 +635,11 @@ DeviceRequestedFeatures Session::get_requested_device_features() } requested_features.use_object_motion |= object->use_motion | mesh->use_motion_blur; requested_features.use_camera_motion |= mesh->use_motion_blur; +#ifdef WITH_OPENSUBDIV + if(mesh->subdivision_type != Mesh::SUBDIVISION_NONE) { + requested_features.use_patch_evaluation = true; + } +#endif } BakeManager *bake_manager = scene->bake_manager; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 4cdb878df45..d000cca5a45 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -150,6 +150,12 @@ NODE_DEFINE(Shader) volume_interpolation_method_enum.insert("cubic", VOLUME_INTERPOLATION_CUBIC); SOCKET_ENUM(volume_interpolation_method, "Volume Interpolation Method", volume_interpolation_method_enum, VOLUME_INTERPOLATION_LINEAR); + static NodeEnum displacement_method_enum; + displacement_method_enum.insert("bump", DISPLACE_BUMP); + displacement_method_enum.insert("true", DISPLACE_TRUE); + displacement_method_enum.insert("both", DISPLACE_BOTH); + SOCKET_ENUM(displacement_method, "Displacement Method", displacement_method_enum, DISPLACE_BUMP); + return type; } @@ -173,6 +179,8 @@ Shader::Shader() has_object_dependency = false; has_integrator_dependency = false; + displacement_method = DISPLACE_BUMP; + id = -1; used = false; @@ -310,7 +318,7 @@ int ShaderManager::get_shader_id(Shader *shader, Mesh *mesh, bool smooth) int id = shader->id*2; /* index depends bump since this setting is not in the shader */ - if(mesh && mesh->displacement_method != Mesh::DISPLACE_TRUE) + if(mesh && shader->displacement_method != DISPLACE_TRUE) id += 1; /* smooth flag */ if(smooth) diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index dc57ed4e4eb..060ad7056bc 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -66,6 +66,14 @@ enum VolumeInterpolation { VOLUME_NUM_INTERPOLATION, }; +enum DisplacementMethod { + DISPLACE_BUMP = 0, + DISPLACE_TRUE = 1, + DISPLACE_BOTH = 2, + + DISPLACE_NUM_METHODS, +}; + /* Shader describing the appearance of a Mesh, Light or Background. * * While there is only a single shader graph, it has three outputs: surface, @@ -110,6 +118,9 @@ public: bool has_object_dependency; bool has_integrator_dependency; + /* displacement */ + DisplacementMethod displacement_method; + /* requested mesh attributes */ AttributeRequestSet attributes; diff --git a/intern/cycles/subd/CMakeLists.txt b/intern/cycles/subd/CMakeLists.txt index db497013693..dafb807bdf3 100644 --- a/intern/cycles/subd/CMakeLists.txt +++ b/intern/cycles/subd/CMakeLists.txt @@ -16,18 +16,16 @@ set(SRC subd_dice.cpp subd_patch.cpp subd_split.cpp + subd_patch_table.cpp ) set(SRC_HEADERS subd_dice.h subd_patch.h + subd_patch_table.h subd_split.h ) -if(WITH_CYCLES_OPENSUBDIV) - add_definitions(-DWITH_OPENSUBDIV) -endif() - include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) diff --git a/intern/cycles/subd/subd_patch_table.cpp b/intern/cycles/subd/subd_patch_table.cpp new file mode 100644 index 00000000000..68ec1b2c6a6 --- /dev/null +++ b/intern/cycles/subd/subd_patch_table.cpp @@ -0,0 +1,297 @@ +/* + * Based on code from OpenSubdiv released under this license: + * + * Copyright 2014 DreamWorks Animation LLC. + * + * Licensed under the Apache License, Version 2.0 (the "Apache License") + * with the following modification; you may not use this file except in + * compliance with the Apache License and the following modification to it: + * Section 6. Trademarks. is deleted and replaced with: + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor + * and its affiliates, except as required to comply with Section 4(c) of + * the License and to reproduce the content of the NOTICE file. + * + * You may obtain a copy of the Apache License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Apache License with the above modification is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the Apache License for the specific + * language governing permissions and limitations under the Apache License. + * + */ + +#include "subd_patch_table.h" +#include "kernel_types.h" + +#include "util_math.h" + +#ifdef WITH_OPENSUBDIV +#include <opensubdiv/far/patchTable.h> +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef WITH_OPENSUBDIV + +using namespace OpenSubdiv; + +/* functions for building patch maps */ + +struct PatchMapQuadNode { + /* sets all the children to point to the patch of index */ + void set_child(int index) + { + for (int i = 0; i < 4; i++) { + children[i] = index | PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF; + } + } + + /* sets the child in quadrant to point to the node or patch of the given index */ + void set_child(unsigned char quadrant, int index, bool is_leaf=true) + { + assert(quadrant < 4); + children[quadrant] = index | PATCH_MAP_NODE_IS_SET | (is_leaf ? PATCH_MAP_NODE_IS_LEAF : 0); + } + + uint children[4]; +}; + +template<class T> +static int resolve_quadrant(T& median, T& u, T& v) +{ + int quadrant = -1; + + if(u < median) { + if(v < median) { + quadrant = 0; + } + else { + quadrant = 1; + v -= median; + } + } + else { + if(v < median) { + quadrant = 3; + } + else { + quadrant = 2; + v -= median; + } + u -= median; + } + + return quadrant; +} + +static void build_patch_map(PackedPatchTable& table, OpenSubdiv::Far::PatchTable* patch_table, int offset) +{ + int num_faces = 0; + + for(int array = 0; array < table.num_arrays; array++) { + Far::ConstPatchParamArray params = patch_table->GetPatchParams(array); + + for(int j = 0; j < patch_table->GetNumPatches(array); j++) { + num_faces = max(num_faces, (int)params[j].GetFaceId()); + } + } + num_faces++; + + vector<PatchMapQuadNode> quadtree; + quadtree.reserve(num_faces + table.num_patches); + quadtree.resize(num_faces); + + /* adjust offsets to make indices relative to the table */ + int handle_index = -(table.num_patches * PATCH_HANDLE_SIZE); + offset += table.total_size(); + + /* populate the quadtree from the FarPatchArrays sub-patches */ + for(int array = 0; array < table.num_arrays; array++) { + Far::ConstPatchParamArray params = patch_table->GetPatchParams(array); + + for(int i = 0; i < patch_table->GetNumPatches(array); i++, handle_index += PATCH_HANDLE_SIZE) { + const Far::PatchParam& param = params[i]; + unsigned short depth = param.GetDepth(); + + PatchMapQuadNode* node = &quadtree[params[i].GetFaceId()]; + + if(depth == (param.NonQuadRoot() ? 1 : 0)) { + /* special case : regular BSpline face w/ no sub-patches */ + node->set_child(handle_index + offset); + continue; + } + + int u = param.GetU(); + int v = param.GetV(); + int pdepth = param.NonQuadRoot() ? depth-2 : depth-1; + int half = 1 << pdepth; + + for(int j = 0; j < depth; j++) { + int delta = half >> 1; + + int quadrant = resolve_quadrant(half, u, v); + assert(quadrant >= 0); + + half = delta; + + if(j == pdepth) { + /* we have reached the depth of the sub-patch : add a leaf */ + assert(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET)); + node->set_child(quadrant, handle_index + offset, true); + break; + } + else { + /* travel down the child node of the corresponding quadrant */ + if(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET)) { + /* create a new branch in the quadrant */ + quadtree.push_back(PatchMapQuadNode()); + + int idx = (int)quadtree.size() - 1; + node->set_child(quadrant, idx*4 + offset, false); + + node = &quadtree[idx]; + } + else { + /* travel down an existing branch */ + uint idx = node->children[quadrant] & PATCH_MAP_NODE_INDEX_MASK; + node = &(quadtree[(idx - offset)/4]); + } + } + } + } + } + + /* copy into table */ + assert(table.table.size() == table.total_size()); + uint map_offset = table.total_size(); + + table.num_nodes = quadtree.size() * 4; + table.table.resize(table.total_size()); + + uint* data = &table.table[map_offset]; + + for(int i = 0; i < quadtree.size(); i++) { + for(int j = 0; j < 4; j++) { + assert(quadtree[i].children[j] & PATCH_MAP_NODE_IS_SET); + *(data++) = quadtree[i].children[j]; + } + } +} + +#endif + +/* packed patch table functions */ + +size_t PackedPatchTable::total_size() +{ + return num_arrays * PATCH_ARRAY_SIZE + + num_indices + + num_patches * (PATCH_PARAM_SIZE + PATCH_HANDLE_SIZE) + + num_nodes * PATCH_NODE_SIZE; +} + +void PackedPatchTable::pack(Far::PatchTable* patch_table, int offset) +{ + num_arrays = 0; + num_patches = 0; + num_indices = 0; + num_nodes = 0; + +#ifdef WITH_OPENSUBDIV + num_arrays = patch_table->GetNumPatchArrays(); + + for(int i = 0; i < num_arrays; i++) { + int patches = patch_table->GetNumPatches(i); + int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices(); + + num_patches += patches; + num_indices += patches * num_control; + } + + table.resize(total_size()); + uint* data = &table[0]; + + uint* array = data; + uint* index = array + num_arrays * PATCH_ARRAY_SIZE; + uint* param = index + num_indices; + uint* handle = param + num_patches * PATCH_PARAM_SIZE; + + uint current_param = 0; + + for(int i = 0; i < num_arrays; i++) { + *(array++) = patch_table->GetPatchArrayDescriptor(i).GetType(); + *(array++) = patch_table->GetNumPatches(i); + *(array++) = (index - data) + offset; + *(array++) = (param - data) + offset; + + Far::ConstIndexArray indices = patch_table->GetPatchArrayVertices(i); + + for(int j = 0; j < indices.size(); j++) { + *(index++) = indices[j]; + } + + const Far::PatchParamTable& param_table = patch_table->GetPatchParamTable(); + + int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices(); + int patches = patch_table->GetNumPatches(i); + + for(int j = 0; j < patches; j++, current_param++) { + *(param++) = param_table[current_param].field0; + *(param++) = param_table[current_param].field1; + + *(handle++) = (array - data) - PATCH_ARRAY_SIZE + offset; + *(handle++) = (param - data) - PATCH_PARAM_SIZE + offset; + *(handle++) = j * num_control; + } + } + + build_patch_map(*this, patch_table, offset); +#else + (void)patch_table; + (void)offset; +#endif +} + +void PackedPatchTable::copy_adjusting_offsets(uint* dest, int doffset) +{ + uint* src = &table[0]; + + /* arrays */ + for(int i = 0; i < num_arrays; i++) { + *(dest++) = *(src++); + *(dest++) = *(src++); + *(dest++) = *(src++) + doffset; + *(dest++) = *(src++) + doffset; + } + + /* indices */ + for(int i = 0; i < num_indices; i++) { + *(dest++) = *(src++); + } + + /* params */ + for(int i = 0; i < num_patches; i++) { + *(dest++) = *(src++); + *(dest++) = *(src++); + } + + /* handles */ + for(int i = 0; i < num_patches; i++) { + *(dest++) = *(src++) + doffset; + *(dest++) = *(src++) + doffset; + *(dest++) = *(src++); + } + + /* nodes */ + for(int i = 0; i < num_nodes; i++) { + *(dest++) = *(src++) + doffset; + } +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/subd/subd_patch_table.h b/intern/cycles/subd/subd_patch_table.h new file mode 100644 index 00000000000..c8c7ecf9e47 --- /dev/null +++ b/intern/cycles/subd/subd_patch_table.h @@ -0,0 +1,63 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SUBD_PATCH_TABLE_H__ +#define __SUBD_PATCH_TABLE_H__ + +#include "util_types.h" +#include "util_vector.h" + +#ifdef WITH_OPENSUBDIV +#ifdef _MSC_VER +# include "iso646.h" +#endif + +#include <opensubdiv/far/patchTable.h> +#endif + +CCL_NAMESPACE_BEGIN + +#ifdef WITH_OPENSUBDIV +using namespace OpenSubdiv; +#else +/* forward declare for when OpenSubdiv is unavailable */ +namespace Far { struct PatchTable; } +#endif + +#define PATCH_ARRAY_SIZE 4 +#define PATCH_PARAM_SIZE 2 +#define PATCH_HANDLE_SIZE 3 +#define PATCH_NODE_SIZE 1 + +struct PackedPatchTable { + vector<uint> table; + + size_t num_arrays; + size_t num_indices; + size_t num_patches; + size_t num_nodes; + + /* calculated size from num_* members */ + size_t total_size(); + + void pack(Far::PatchTable* patch_table, int offset = 0); + void copy_adjusting_offsets(uint* dest, int doffset); +}; + +CCL_NAMESPACE_END + +#endif /* __SUBD_PATCH_TABLE_H__ */ + diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt index 80fe893826a..9af777fb9dd 100644 --- a/intern/cycles/test/CMakeLists.txt +++ b/intern/cycles/test/CMakeLists.txt @@ -26,6 +26,7 @@ set(ALL_CYCLES_LIBRARIES cycles_device cycles_bvh cycles_graph + cycles_subd cycles_util ${OPENIMAGEIO_LIBRARIES} ) @@ -41,6 +42,16 @@ if(WITH_IMAGE_OPENJPEG AND NOT WITH_SYSTEM_OPENJPEG) extern_openjpeg ) endif() +if(WITH_CYCLES_OPENSUBDIV) + add_definitions(-DWITH_OPENSUBDIV) + include_directories( + SYSTEM + ${OPENSUBDIV_INCLUDE_DIR} + ) + list(APPEND ALL_CYCLES_LIBRARIES + ${OPENSUBDIV_LIBRARIES} + ) +endif() list(APPEND ALL_CYCLES_LIBRARIES ${BOOST_LIBRARIES} ) diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index e6140b3ed09..f5674bdc15c 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -25,10 +25,6 @@ set(SRC util_windows.cpp ) -if(NOT CYCLES_STANDALONE_REPOSITORY) - add_definitions(-DWITH_GLEW_MX) -endif() - if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) list(APPEND SRC util_view.cpp @@ -71,6 +67,7 @@ set(SRC_HEADERS util_ssef.h util_ssei.h util_stack_allocator.h + util_static_assert.h util_stats.h util_string.h util_system.h diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index 1787ff648ee..73fd228b5d9 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -20,6 +20,8 @@ #include <cassert> #include <iostream> +#include "util_static_assert.h" + CCL_NAMESPACE_BEGIN /* Global storage for all sort of flags used to fine-tune behavior of particular diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index ae85ab3a915..5db3384cda4 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN #else +/* CUDA has its own half data type, no need to define then */ +#ifndef __KERNEL_CUDA__ typedef unsigned short half; +#endif + struct half4 { half x, y, z, w; }; #ifdef __KERNEL_CUDA__ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half_rn(f.x * scale); - h[1] = __float2half_rn(f.y * scale); - h[2] = __float2half_rn(f.z * scale); - h[3] = __float2half_rn(f.w * scale); + h[0] = __float2half(f.x * scale); + h[1] = __float2half(f.y * scale); + h[2] = __float2half(f.z * scale); + h[3] = __float2half(f.w * scale); } #else diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 13aba0646d2..89a882d9b9d 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -572,6 +572,12 @@ ccl_device_inline float3 safe_normalize(const float3 a) return (t != 0.0f)? a/t: a; } +ccl_device_inline float3 safe_normalize_len(const float3 a, float *t) +{ + *t = len(a); + return (*t != 0.0f)? a/(*t): a; +} + #ifndef __KERNEL_OPENCL__ ccl_device_inline bool operator==(const float3 a, const float3 b) diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h new file mode 100644 index 00000000000..1b945705145 --- /dev/null +++ b/intern/cycles/util/util_static_assert.h @@ -0,0 +1,64 @@ +/* + * Copyright 2011-2016 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_STATIC_ASSERT_H__ +#define __UTIL_STATIC_ASSERT_H__ + +CCL_NAMESPACE_BEGIN + +/* TODO(sergey): In theory CUDA might work with own static assert + * implementation since it's just pure C++. + */ +#ifndef __KERNEL_GPU__ +# if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +/* C++11 has built-in static_assert() */ +# else /* C++11 or MSVC2015 */ +template <bool Test> class StaticAssertFailure; +template <> class StaticAssertFailure<true> {}; +# define _static_assert_private_glue_impl(A, B) A ## B +# define _static_assert_glue(A, B) _static_assert_private_glue_impl(A, B) +# ifdef __COUNTER__ +# define static_assert(condition, message) \ + enum {_static_assert_glue(q_static_assert_result, __COUNTER__) = sizeof(StaticAssertFailure<!!(condition)>)} // NOLINT +# else /* __COUNTER__ */ +# define static_assert(condition, message) \ + enum {_static_assert_glue(q_static_assert_result, __LINE__) = sizeof(StaticAssertFailure<!!(condition)>)} // NOLINT +# endif /* __COUNTER__ */ +# endif /* C++11 or MSVC2015 */ +#else /* __KERNEL_GPU__ */ +# define static_assert(statement, message) +#endif /* __KERNEL_GPU__ */ + +/* TODO(sergey): For until C++11 is a bare minimum for us, + * we do a bit of a trickery to show meaningful message so + * it's more or less clear what's wrong when building without + * C++11. + * + * The thing here is: our non-C++11 implementation doesn't + * have a way to print any message after preprocessor + * substitution so we rely on the message which is passed to + * static_assert() since that's the only message visible when + * compilation fails. + * + * After C++11 bump it should be possible to glue structure + * name to the error message, + */ +# define static_assert_align(st, align) \ + static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT + +CCL_NAMESPACE_END + +#endif /* __UTIL_STATIC_ASSERT_H__ */ diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 2ef47283029..aff928ea2ee 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -24,58 +24,58 @@ CCL_NAMESPACE_BEGIN /* CPU */ #define TEX_NUM_FLOAT4_CPU 1024 #define TEX_NUM_BYTE4_CPU 1024 +#define TEX_NUM_HALF4_CPU 1024 #define TEX_NUM_FLOAT_CPU 1024 #define TEX_NUM_BYTE_CPU 1024 -#define TEX_NUM_HALF4_CPU 1024 #define TEX_NUM_HALF_CPU 1024 #define TEX_START_FLOAT4_CPU 0 #define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU -#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) -#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU) -#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) -#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU) +#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) +#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU) +#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU) +#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) /* CUDA (Geforce 4xx and 5xx) */ #define TEX_NUM_FLOAT4_CUDA 5 -#define TEX_NUM_BYTE4_CUDA 88 +#define TEX_NUM_BYTE4_CUDA 85 +#define TEX_NUM_HALF4_CUDA 0 #define TEX_NUM_FLOAT_CUDA 0 #define TEX_NUM_BYTE_CUDA 0 -#define TEX_NUM_HALF4_CUDA 0 #define TEX_NUM_HALF_CUDA 0 #define TEX_START_FLOAT4_CUDA 0 #define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA -#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) -#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA) -#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) -#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA) +#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) +#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA) +#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA) +#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) /* CUDA (Kepler, Geforce 6xx and above) */ #define TEX_NUM_FLOAT4_CUDA_KEPLER 1024 #define TEX_NUM_BYTE4_CUDA_KEPLER 1024 +#define TEX_NUM_HALF4_CUDA_KEPLER 1024 #define TEX_NUM_FLOAT_CUDA_KEPLER 1024 #define TEX_NUM_BYTE_CUDA_KEPLER 1024 -#define TEX_NUM_HALF4_CUDA_KEPLER 0 -#define TEX_NUM_HALF_CUDA_KEPLER 0 +#define TEX_NUM_HALF_CUDA_KEPLER 1024 #define TEX_START_FLOAT4_CUDA_KEPLER 0 #define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER -#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) -#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) -#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) -#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) +#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) +#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) +#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) +#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) /* OpenCL */ #define TEX_NUM_FLOAT4_OPENCL 1024 #define TEX_NUM_BYTE4_OPENCL 1024 -#define TEX_NUM_FLOAT_OPENCL 0 -#define TEX_NUM_BYTE_OPENCL 0 #define TEX_NUM_HALF4_OPENCL 0 +#define TEX_NUM_FLOAT_OPENCL 1024 +#define TEX_NUM_BYTE_OPENCL 1024 #define TEX_NUM_HALF_OPENCL 0 #define TEX_START_FLOAT4_OPENCL 0 #define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL -#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) -#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL) -#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) -#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL) +#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) +#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL) +#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL) +#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) /* Color to use when textures are not found. */ diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index 6fed18a3db8..bfc8f55feed 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -323,6 +323,15 @@ ccl_device_inline Transform transform_clear_scale(const Transform& tfm) return ntfm; } +ccl_device_inline Transform transform_empty() +{ + return make_transform( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0); +} + #endif /* Motion Transform */ diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 6f8c3f6f3de..546b17570bb 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -222,6 +222,11 @@ public: return datasize_; } + T* data() + { + return data_; + } + const T* data() const { return data_; |