Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Schoen <pascal_schoen@gmx.net>2016-08-16 16:22:32 +0300
committerPascal Schoen <pascal_schoen@gmx.net>2016-08-16 16:22:32 +0300
commit9eed34c7d980e1b998df457c4f76021162c80f78 (patch)
tree0c47e10e97c2088d59a52c3802c35f7e9eb7901f /intern/cycles
parentef29aaee1af8074e0228c480d962700e97ea5b36 (diff)
parentae475e355488db27c4b9fcc33385080578403833 (diff)
Merge branch 'master' into cycles_disney_brdf
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/CMakeLists.txt8
-rw-r--r--intern/cycles/app/CMakeLists.txt3
-rw-r--r--intern/cycles/app/cycles_standalone.cpp2
-rw-r--r--intern/cycles/app/cycles_xml.cpp14
-rw-r--r--intern/cycles/blender/addon/osl.py6
-rw-r--r--intern/cycles/blender/addon/properties.py14
-rw-r--r--intern/cycles/blender/addon/ui.py39
-rw-r--r--intern/cycles/blender/blender_mesh.cpp70
-rw-r--r--intern/cycles/blender/blender_object.cpp18
-rw-r--r--intern/cycles/blender/blender_shader.cpp13
-rw-r--r--intern/cycles/device/device.cpp6
-rw-r--r--intern/cycles/device/device.h10
-rw-r--r--intern/cycles/device/device_cuda.cpp11
-rw-r--r--intern/cycles/device/device_opencl.cpp1
-rw-r--r--intern/cycles/kernel/CMakeLists.txt2
-rw-r--r--intern/cycles/kernel/closure/bsdf.h2
-rw-r--r--intern/cycles/kernel/geom/geom.h3
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h39
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h24
-rw-r--r--intern/cycles/kernel/geom/geom_object.h12
-rw-r--r--intern/cycles/kernel/geom/geom_patch.h343
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h65
-rw-r--r--intern/cycles/kernel/geom/geom_subd_triangle.h203
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h38
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h27
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h1
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h3
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h1
-rw-r--r--intern/cycles/kernel/kernel_path.h8
-rw-r--r--intern/cycles/kernel/kernel_shader.h4
-rw-r--r--intern/cycles/kernel/kernel_subsurface.h22
-rw-r--r--intern/cycles/kernel/kernel_textures.h2
-rw-r--r--intern/cycles/kernel/kernel_types.h60
-rw-r--r--intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h14
-rw-r--r--intern/cycles/kernel/osl/osl_globals.h3
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp8
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp17
-rw-r--r--intern/cycles/kernel/osl/osl_shader.h2
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_curves.osl1
-rw-r--r--intern/cycles/kernel/shaders/node_rgb_ramp.osl1
-rw-r--r--intern/cycles/kernel/shaders/node_vector_curves.osl1
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h101
-rw-r--r--intern/cycles/kernel/svm/svm_image.h23
-rw-r--r--intern/cycles/kernel/svm/svm_math_util.h10
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h29
-rw-r--r--intern/cycles/render/attribute.cpp40
-rw-r--r--intern/cycles/render/attribute.h5
-rw-r--r--intern/cycles/render/image.cpp100
-rw-r--r--intern/cycles/render/image.h6
-rw-r--r--intern/cycles/render/mesh.cpp160
-rw-r--r--intern/cycles/render/mesh.h21
-rw-r--r--intern/cycles/render/mesh_displace.cpp160
-rw-r--r--intern/cycles/render/mesh_subdivision.cpp453
-rw-r--r--intern/cycles/render/object.cpp84
-rw-r--r--intern/cycles/render/object.h2
-rw-r--r--intern/cycles/render/osl.cpp2
-rw-r--r--intern/cycles/render/scene.h2
-rw-r--r--intern/cycles/render/session.cpp5
-rw-r--r--intern/cycles/render/shader.cpp10
-rw-r--r--intern/cycles/render/shader.h11
-rw-r--r--intern/cycles/subd/CMakeLists.txt6
-rw-r--r--intern/cycles/subd/subd_patch_table.cpp297
-rw-r--r--intern/cycles/subd/subd_patch_table.h63
-rw-r--r--intern/cycles/test/CMakeLists.txt11
-rw-r--r--intern/cycles/util/CMakeLists.txt5
-rw-r--r--intern/cycles/util/util_debug.h2
-rw-r--r--intern/cycles/util/util_half.h12
-rw-r--r--intern/cycles/util/util_math.h6
-rw-r--r--intern/cycles/util/util_static_assert.h64
-rw-r--r--intern/cycles/util/util_texture.h46
-rw-r--r--intern/cycles/util/util_transform.h9
-rw-r--r--intern/cycles/util/util_vector.h5
72 files changed, 2297 insertions, 574 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 3b410b2a1e1..97854a88e84 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -146,6 +146,14 @@ if(WITH_CYCLES_OSL)
)
endif()
+if(WITH_CYCLES_OPENSUBDIV)
+ add_definitions(-DWITH_OPENSUBDIV)
+ include_directories(
+ SYSTEM
+ ${OPENSUBDIV_INCLUDE_DIR}
+ )
+endif()
+
set(WITH_CYCLES_DEVICE_OPENCL TRUE)
set(WITH_CYCLES_DEVICE_CUDA TRUE)
set(WITH_CYCLES_DEVICE_MULTI TRUE)
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index 73dbf16a3d3..8cd499b7ca6 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -88,6 +88,9 @@ macro(cycles_target_link_libraries target)
if(WITH_CYCLES_OSL)
target_link_libraries(${target} ${OSL_LIBRARIES} ${LLVM_LIBRARIES})
endif()
+ if(WITH_CYCLES_OPENSUBDIV)
+ target_link_libraries(${target} ${OPENSUBDIV_LIBRARIES})
+ endif()
target_link_libraries(
${target}
${OPENIMAGEIO_LIBRARIES}
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index 726e9a51744..e8168bc15ff 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -375,6 +375,8 @@ static void options_parse(int argc, const char **argv)
"--threads %d", &options.session_params.threads, "CPU Rendering Threads",
"--width %d", &options.width, "Window width in pixel",
"--height %d", &options.height, "Window height in pixel",
+ "--tile-width %d", &options.session_params.tile_size.x, "Tile width in pixels",
+ "--tile-height %d", &options.session_params.tile_size.y, "Tile height in pixels",
"--list-devices", &list, "List information about all available devices",
#ifdef WITH_CYCLES_LOGGING
"--debug", &debug, "Enable debug logging",
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 3d3aca33881..a54022268bb 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -57,14 +57,12 @@ struct XMLReadState : public XMLReader {
Shader *shader; /* current shader */
string base; /* base path to current file*/
float dicing_rate; /* current dicing rate */
- Mesh::DisplacementMethod displacement_method;
XMLReadState()
: scene(NULL),
smooth(false),
shader(NULL),
- dicing_rate(0.0f),
- displacement_method(Mesh::DISPLACE_BUMP)
+ dicing_rate(0.0f)
{
tfm = transform_identity();
}
@@ -405,8 +403,6 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
int shader = 0;
bool smooth = state.smooth;
- mesh->displacement_method = state.displacement_method;
-
/* read vertices and polygons, RIB style */
vector<float3> P;
vector<float> UV;
@@ -653,14 +649,6 @@ static void xml_read_state(XMLReadState& state, pugi::xml_node node)
state.smooth = true;
else if(xml_equal_string(node, "interpolation", "flat"))
state.smooth = false;
-
- /* read displacement method */
- if(xml_equal_string(node, "displacement_method", "true"))
- state.displacement_method = Mesh::DISPLACE_TRUE;
- else if(xml_equal_string(node, "displacement_method", "bump"))
- state.displacement_method = Mesh::DISPLACE_BUMP;
- else if(xml_equal_string(node, "displacement_method", "both"))
- state.displacement_method = Mesh::DISPLACE_BOTH;
}
/* Scene */
diff --git a/intern/cycles/blender/addon/osl.py b/intern/cycles/blender/addon/osl.py
index f4aaaab5eab..19f2ecc9d1a 100644
--- a/intern/cycles/blender/addon/osl.py
+++ b/intern/cycles/blender/addon/osl.py
@@ -41,6 +41,8 @@ def update_script_node(node, report):
import shutil
import tempfile
+ oso_file_remove = False
+
if node.mode == 'EXTERNAL':
# compile external script file
script_path = bpy.path.abspath(node.filepath, library=node.id_data.library)
@@ -49,7 +51,6 @@ def update_script_node(node, report):
if script_ext == ".oso":
# it's a .oso file, no need to compile
ok, oso_path = True, script_path
- oso_file_remove = False
elif script_ext == ".osl":
# compile .osl file
ok, oso_path = osl_compile(script_path, report)
@@ -65,7 +66,6 @@ def update_script_node(node, report):
elif os.path.dirname(node.filepath) == "":
# module in search path
oso_path = node.filepath
- oso_file_remove = False
ok = True
else:
# unknown
@@ -88,12 +88,10 @@ def update_script_node(node, report):
osl_file.close()
ok, oso_path = osl_compile(osl_file.name, report)
- oso_file_remove = False
os.remove(osl_file.name)
else:
# compile text datablock from disk directly
ok, oso_path = osl_compile(osl_path, report)
- oso_file_remove = False
if ok:
# read bytecode
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 81204eb8ae0..8e82eac2b59 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -775,6 +775,13 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup):
default='LINEAR',
)
+ cls.displacement_method = EnumProperty(
+ name="Displacement Method",
+ description="Method to use for the displacement",
+ items=enum_displacement_methods,
+ default='BUMP',
+ )
+
@classmethod
def unregister(cls):
del bpy.types.Material.cycles
@@ -952,13 +959,6 @@ class CyclesMeshSettings(bpy.types.PropertyGroup):
type=cls,
)
- cls.displacement_method = EnumProperty(
- name="Displacement Method",
- description="Method to use for the displacement",
- items=enum_displacement_methods,
- default='BUMP',
- )
-
@classmethod
def unregister(cls):
del bpy.types.Mesh.cycles
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 42f7970769a..52872d2b83f 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -674,40 +674,6 @@ class Cycles_PT_context_material(CyclesButtonsPanel, Panel):
split.separator()
-class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel):
- bl_label = "Displacement"
- bl_context = "data"
-
- @classmethod
- def poll(cls, context):
- if CyclesButtonsPanel.poll(context):
- if context.mesh or context.curve or context.meta_ball:
- if context.scene.cycles.feature_set == 'EXPERIMENTAL':
- return True
-
- return False
-
- def draw(self, context):
- layout = self.layout
-
- mesh = context.mesh
- curve = context.curve
- mball = context.meta_ball
-
- if mesh:
- cdata = mesh.cycles
- elif curve:
- cdata = curve.cycles
- elif mball:
- cdata = mball.cycles
-
- split = layout.split()
-
- col = split.column()
- sub = col.column(align=True)
- sub.label(text="Displacement:")
- sub.prop(cdata, "displacement_method", text="")
-
class CyclesObject_PT_motion_blur(CyclesButtonsPanel, Panel):
bl_label = "Motion Blur"
bl_context = "object"
@@ -1219,6 +1185,11 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel):
col.prop(cmat, "sample_as_light", text="Multiple Importance")
col.prop(cmat, "use_transparent_shadow")
+ if context.scene.cycles.feature_set == 'EXPERIMENTAL':
+ col.separator()
+ col.label(text="Displacement:")
+ col.prop(cmat, "displacement_method", text="")
+
col = split.column()
col.label(text="Volume:")
sub = col.column()
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 74fd4cb44a0..c33bc4c263f 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -409,7 +409,8 @@ static void attr_create_uv_map(Scene *scene,
BL::Mesh& b_mesh,
const vector<int>& nverts,
const vector<int>& face_flags,
- bool subdivision)
+ bool subdivision,
+ bool subdivide_uvs)
{
if(subdivision) {
BL::Mesh::uv_layers_iterator l;
@@ -429,6 +430,10 @@ static void attr_create_uv_map(Scene *scene,
else
attr = mesh->subd_attributes.add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CORNER);
+ if(subdivide_uvs) {
+ attr->flags |= ATTR_SUBDIVIDED;
+ }
+
BL::Mesh::polygons_iterator p;
float3 *fdata = attr->data_float3();
@@ -592,7 +597,8 @@ static void create_mesh(Scene *scene,
Mesh *mesh,
BL::Mesh& b_mesh,
const vector<Shader*>& used_shaders,
- bool subdivision=false)
+ bool subdivision=false,
+ bool subdivide_uvs=true)
{
/* count vertices and faces */
int numverts = b_mesh.vertices.length();
@@ -638,6 +644,7 @@ static void create_mesh(Scene *scene,
/* create generated coordinates from undeformed coordinates */
if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
Attribute *attr = attributes.add(ATTR_STD_GENERATED);
+ attr->flags |= ATTR_SUBDIVIDED;
float3 loc, size;
mesh_texture_space(b_mesh, loc, size);
@@ -746,7 +753,7 @@ static void create_mesh(Scene *scene,
* The calculate functions will check whether they're needed or not.
*/
attr_create_vertex_color(scene, mesh, b_mesh, nverts, face_flags, subdivision);
- attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision);
+ attr_create_uv_map(scene, mesh, b_mesh, nverts, face_flags, subdivision, subdivide_uvs);
/* for volume objects, create a matrix to transform from object space to
* mesh texture space. this does not work with deformations but that can
@@ -770,9 +777,39 @@ static void create_subd_mesh(Scene *scene,
float dicing_rate,
int max_subdivisions)
{
- create_mesh(scene, mesh, b_mesh, used_shaders, true);
+ BL::SubsurfModifier subsurf_mod(b_ob.modifiers[b_ob.modifiers.length()-1]);
+ bool subdivide_uvs = subsurf_mod.use_subsurf_uv();
+
+ create_mesh(scene, mesh, b_mesh, used_shaders, true, subdivide_uvs);
+
+ /* export creases */
+ size_t num_creases = 0;
+ BL::Mesh::edges_iterator e;
+
+ for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e) {
+ if(e->crease() != 0.0f) {
+ num_creases++;
+ }
+ }
+
+ mesh->subd_creases.resize(num_creases);
+
+ Mesh::SubdEdgeCrease* crease = mesh->subd_creases.data();
+ for(b_mesh.edges.begin(e); e != b_mesh.edges.end(); ++e) {
+ if(e->crease() != 0.0f) {
+ crease->v[0] = e->vertices()[0];
+ crease->v[1] = e->vertices()[1];
+ crease->crease = e->crease();
- SubdParams sdparams(mesh);
+ crease++;
+ }
+ }
+
+ /* set subd params */
+ if(!mesh->subd_params) {
+ mesh->subd_params = new SubdParams(mesh);
+ }
+ SubdParams& sdparams = *mesh->subd_params;
PointerRNA cobj = RNA_pointer_get(&b_ob.ptr, "cycles");
@@ -782,10 +819,6 @@ static void create_subd_mesh(Scene *scene,
scene->camera->update();
sdparams.camera = scene->camera;
sdparams.objecttoworld = get_transform(b_ob.matrix_world());
-
- /* tesselate */
- DiagSplit dsplit(sdparams);
- mesh->tessellate(&dsplit);
}
/* Sync */
@@ -903,8 +936,6 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
mesh_synced.insert(mesh);
/* create derived mesh */
- PointerRNA cmesh = RNA_pointer_get(&b_ob_data.ptr, "cycles");
-
array<int> oldtriangle = mesh->triangles;
/* compares curve_keys rather than strands in order to handle quick hair
@@ -936,7 +967,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
BL::Modifier mod = b_ob.modifiers[b_ob.modifiers.length()-1];
bool enabled = preview ? mod.show_viewport() : mod.show_render();
- if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_int_get(&cobj, "use_adaptive_subdivision")) {
+ if(enabled && mod.type() == BL::Modifier::type_SUBSURF && RNA_boolean_get(&cobj, "use_adaptive_subdivision")) {
BL::SubsurfModifier subsurf(mod);
if(subsurf.subdivision_type() == BL::SubsurfModifier::subdivision_type_CATMULL_CLARK) {
@@ -974,21 +1005,6 @@ Mesh *BlenderSync::sync_mesh(BL::Object& b_ob,
}
mesh->geometry_flags = requested_geometry_flags;
- /* displacement method */
- if(cmesh.data) {
- const int method = get_enum(cmesh,
- "displacement_method",
- Mesh::DISPLACE_NUM_METHODS,
- Mesh::DISPLACE_BUMP);
-
- if(method == 0 || !experimental)
- mesh->displacement_method = Mesh::DISPLACE_BUMP;
- else if(method == 1)
- mesh->displacement_method = Mesh::DISPLACE_TRUE;
- else
- mesh->displacement_method = Mesh::DISPLACE_BOTH;
- }
-
/* fluid motion */
sync_mesh_fluid_motion(b_ob, scene, mesh);
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 4886735a18f..f305e8e17cc 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -329,16 +329,18 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
/* object transformation */
if(tfm != object->tfm) {
VLOG(1) << "Object " << b_ob.name() << " motion detected.";
- if(motion_time == -1.0f) {
- object->motion.pre = tfm;
- object->use_motion = true;
- }
- else if(motion_time == 1.0f) {
- object->motion.post = tfm;
+ if(motion_time == -1.0f || motion_time == 1.0f) {
object->use_motion = true;
}
}
+ if(motion_time == -1.0f) {
+ object->motion.pre = tfm;
+ }
+ else if(motion_time == 1.0f) {
+ object->motion.post = tfm;
+ }
+
/* mesh deformation */
if(object->mesh)
sync_mesh_motion(b_ob, object, motion_time);
@@ -395,8 +397,8 @@ Object *BlenderSync::sync_object(BL::Object& b_parent,
object->name = b_ob.name().c_str();
object->pass_id = b_ob.pass_index();
object->tfm = tfm;
- object->motion.pre = tfm;
- object->motion.post = tfm;
+ object->motion.pre = transform_empty();
+ object->motion.post = transform_empty();
object->use_motion = false;
/* motion blur */
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 7b8317a50a7..171b8241280 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -64,6 +64,14 @@ static VolumeInterpolation get_volume_interpolation(PointerRNA& ptr)
VOLUME_INTERPOLATION_LINEAR);
}
+static DisplacementMethod get_displacement_method(PointerRNA& ptr)
+{
+ return (DisplacementMethod)get_enum(ptr,
+ "displacement_method",
+ DISPLACE_NUM_METHODS,
+ DISPLACE_BUMP);
+}
+
static int validate_enum_value(int value, int num_values, int default_value)
{
if(value >= num_values) {
@@ -840,8 +848,10 @@ static ShaderNode *add_node(Scene *scene,
}
}
- if(node)
+ if(node) {
+ node->name = b_node.name();
graph->add(node);
+ }
return node;
}
@@ -1183,6 +1193,7 @@ void BlenderSync::sync_materials(bool update_all)
shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume");
shader->volume_sampling_method = get_volume_sampling(cmat);
shader->volume_interpolation_method = get_volume_interpolation(cmat);
+ shader->displacement_method = (experimental) ? get_displacement_method(cmat) : DISPLACE_BUMP;
shader->set_graph(graph);
shader->tag_update(scene);
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index df01215c91a..85e736ad635 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -56,8 +56,14 @@ std::ostream& operator <<(std::ostream &os,
<< string_from_bool(requested_features.use_camera_motion) << std::endl;
os << "Use Baking: "
<< string_from_bool(requested_features.use_baking) << std::endl;
+ os << "Use Subsurface: "
+ << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: "
<< string_from_bool(requested_features.use_volume) << std::endl;
+ os << "Use Branched Integrator: "
+ << string_from_bool(requested_features.use_integrator_branched) << std::endl;
+ os << "Use Patch Evaluation: "
+ << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
return os;
}
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index e11bb7f76af..77dc1fa9713 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -109,6 +109,9 @@ public:
/* Use branched integrator. */
bool use_integrator_branched;
+ /* Use OpenSubdiv patch evaluation */
+ bool use_patch_evaluation;
+
DeviceRequestedFeatures()
{
/* TODO(sergey): Find more meaningful defaults. */
@@ -123,6 +126,7 @@ public:
use_subsurface = false;
use_volume = false;
use_integrator_branched = false;
+ use_patch_evaluation = false;
}
bool modified(const DeviceRequestedFeatures& requested_features)
@@ -137,7 +141,8 @@ public:
use_baking == requested_features.use_baking &&
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
- use_integrator_branched == requested_features.use_integrator_branched);
+ use_integrator_branched == requested_features.use_integrator_branched &&
+ use_patch_evaluation == requested_features.use_patch_evaluation);
}
/* Convert the requested features structure to a build options,
@@ -175,6 +180,9 @@ public:
if(!use_integrator_branched) {
build_options += " -D__NO_BRANCHED_PATH__";
}
+ if(!use_patch_evaluation) {
+ build_options += " -D__NO_PATCH_EVAL__";
+ }
return build_options;
}
};
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 6a511ea7316..76e52498b42 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -297,7 +297,7 @@ public:
cuda_error_message("CUDA nvcc compiler version could not be parsed.");
return false;
}
- if(cuda_version < 60) {
+ if(cuda_version < 75) {
printf("Unsupported CUDA version %d.%d detected, "
"you need CUDA 7.5 or newer.\n",
major, minor);
@@ -576,6 +576,7 @@ public:
case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
default: assert(0); return;
}
@@ -747,8 +748,12 @@ public:
}
/* Resize once */
- if(flat_slot >= bindless_mapping.size())
- bindless_mapping.resize(4096); /*TODO(dingto): Make this a variable */
+ if(flat_slot >= bindless_mapping.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations.
+ */
+ bindless_mapping.resize(flat_slot + 128);
+ }
/* Set Mapping and tag that we need to (re-)upload to device */
bindless_mapping.get_data()[flat_slot] = (uint)tex;
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 50490f3a20e..5c05aeb5569 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -875,6 +875,7 @@ public:
if(ciErr != CL_SUCCESS) {
opencl_error("OpenCL build failed: errors in console");
+ fprintf(stderr, "Build error: %s\n", clewErrorString(ciErr));
return false;
}
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f4d154ca19e..1bb93c7f922 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -166,6 +166,7 @@ set(SRC_GEOM_HEADERS
geom/geom_motion_curve.h
geom/geom_motion_triangle.h
geom/geom_object.h
+ geom/geom_patch.h
geom/geom_primitive.h
geom/geom_subd_triangle.h
geom/geom_triangle.h
@@ -179,6 +180,7 @@ set(SRC_UTIL_HEADERS
../util/util_half.h
../util/util_math.h
../util/util_math_fast.h
+ ../util/util_static_assert.h
../util/util_transform.h
../util/util_texture.h
../util/util_types.h
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 633a16ca8e5..ac4f52818c9 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -166,7 +166,7 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
return label;
}
-#ifndef __KERNEL_CUDS__
+#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_inline
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index 493afdc4f62..3605394f182 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -17,6 +17,9 @@
#include "geom_attribute.h"
#include "geom_object.h"
+#ifdef __PATCH_EVAL__
+# include "geom_patch.h"
+#endif
#include "geom_triangle.h"
#include "geom_subd_triangle.h"
#include "geom_triangle_intersect.h"
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
index 5d78cf8f9fc..8604d30ad34 100644
--- a/intern/cycles/kernel/geom/geom_attribute.h
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -43,12 +43,19 @@ ccl_device_inline uint attribute_primitive_type(KernelGlobals *kg, const ShaderD
}
}
+ccl_device_inline AttributeDescriptor attribute_not_found()
+{
+ const AttributeDescriptor desc = {ATTR_ELEMENT_NONE, (NodeAttributeType)0, 0, ATTR_STD_NOT_FOUND};
+ return desc;
+}
+
/* Find attribute based on ID */
-ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem)
+ccl_device_inline AttributeDescriptor find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id)
{
- if(ccl_fetch(sd, object) == PRIM_NONE)
- return (int)ATTR_STD_NOT_FOUND;
+ if(ccl_fetch(sd, object) == PRIM_NONE) {
+ return attribute_not_found();
+ }
/* for SVM, find attribute by unique id */
uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
@@ -57,31 +64,37 @@ ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, ui
while(attr_map.x != id) {
if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
- return ATTR_STD_NOT_FOUND;
+ return attribute_not_found();
}
attr_offset += ATTR_PRIM_TYPES;
attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
}
- *elem = (AttributeElement)attr_map.y;
+ AttributeDescriptor desc;
+ desc.element = (AttributeElement)attr_map.y;
- if(ccl_fetch(sd, prim) == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
- return ATTR_STD_NOT_FOUND;
+ if(ccl_fetch(sd, prim) == PRIM_NONE && desc.element != ATTR_ELEMENT_MESH) {
+ return attribute_not_found();
+ }
/* return result */
- return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+ desc.offset = (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+ desc.type = (NodeAttributeType)(attr_map.w & 0xff);
+ desc.flags = (AttributeFlag)(attr_map.w >> 8);
+
+ return desc;
}
/* Transform matrix attribute on meshes */
-ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset)
+ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc)
{
Transform tfm;
- tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0);
- tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1);
- tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2);
- tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3);
+ tfm.x = kernel_tex_fetch(__attributes_float3, desc.offset + 0);
+ tfm.y = kernel_tex_fetch(__attributes_float3, desc.offset + 1);
+ tfm.z = kernel_tex_fetch(__attributes_float3, desc.offset + 2);
+ tfm.w = kernel_tex_fetch(__attributes_float3, desc.offset + 3);
return tfm;
}
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 292e1bfca0e..aa9cd295452 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -24,23 +24,23 @@ CCL_NAMESPACE_BEGIN
/* Reading attributes on various curve elements */
-ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(elem == ATTR_ELEMENT_CURVE) {
+ if(desc.element == ATTR_ELEMENT_CURVE) {
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
#endif
- return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
+ return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim));
}
- else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
- float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
- float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + k0);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + k1);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
@@ -59,9 +59,9 @@ ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd,
}
}
-ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
- if(elem == ATTR_ELEMENT_CURVE) {
+ if(desc.element == ATTR_ELEMENT_CURVE) {
/* idea: we can't derive any useful differentials here, but for tiled
* mipmap image caching it would be useful to avoid reading the highest
* detail level always. maybe a derivative based on the hair density
@@ -71,15 +71,15 @@ ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
#endif
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim)));
}
- else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_CURVE_KEY || desc.element == ATTR_ELEMENT_CURVE_KEY_MOTION) {
float4 curvedata = kernel_tex_fetch(__curves, ccl_fetch(sd, prim));
int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(ccl_fetch(sd, type));
int k1 = k0 + 1;
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + k1));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*(f1 - f0);
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index c0d15a95954..883c5dc100d 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -292,6 +292,18 @@ ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *nu
*numverts = __float_as_int(f.w);
}
+/* Offset to an objects patch map */
+
+ccl_device_inline uint object_patch_map_offset(KernelGlobals *kg, int object)
+{
+ if(object == OBJECT_NONE)
+ return 0;
+
+ int offset = object*OBJECT_SIZE + 11;
+ float4 f = kernel_tex_fetch(__objects, offset);
+ return __float_as_uint(f.x);
+}
+
/* Pass ID for shader */
ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
new file mode 100644
index 00000000000..6a0ff5a4a04
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -0,0 +1,343 @@
+/*
+ * Based on code from OpenSubdiv released under this license:
+ *
+ * Copyright 2013 Pixar
+ *
+ * Licensed under the Apache License, Version 2.0 (the "Apache License")
+ * with the following modification; you may not use this file except in
+ * compliance with the Apache License and the following modification to it:
+ * Section 6. Trademarks. is deleted and replaced with:
+ *
+ * 6. Trademarks. This License does not grant permission to use the trade
+ * names, trademarks, service marks, or product names of the Licensor
+ * and its affiliates, except as required to comply with Section 4(c) of
+ * the License and to reproduce the content of the NOTICE file.
+ *
+ * You may obtain a copy of the Apache License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Apache License with the above modification is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the Apache License for the specific
+ * language governing permissions and limitations under the Apache License.
+ *
+ */
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct PatchHandle {
+ int array_index, patch_index, vert_index;
+} PatchHandle;
+
+ccl_device_inline int patch_map_resolve_quadrant(float median, float *u, float *v)
+{
+ int quadrant = -1;
+
+ if(*u < median) {
+ if(*v < median) {
+ quadrant = 0;
+ }
+ else {
+ quadrant = 1;
+ *v -= median;
+ }
+ }
+ else {
+ if(*v < median) {
+ quadrant = 3;
+ }
+ else {
+ quadrant = 2;
+ *v -= median;
+ }
+ *u -= median;
+ }
+
+ return quadrant;
+}
+
+/* retrieve PatchHandle from patch coords */
+
+ccl_device_inline PatchHandle patch_map_find_patch(KernelGlobals *kg, int object, int patch, float u, float v)
+{
+ PatchHandle handle;
+
+ kernel_assert((u >= 0.0f) && (u <= 1.0f) && (v >= 0.0f) && (v <= 1.0f));
+
+ int node = (object_patch_map_offset(kg, object) + patch)/2;
+ float median = 0.5f;
+
+ for(int depth = 0; depth < 0xff; depth++) {
+ float delta = median * 0.5f;
+
+ int quadrant = patch_map_resolve_quadrant(median, &u, &v);
+ kernel_assert(quadrant >= 0);
+
+ uint child = kernel_tex_fetch(__patches, node + quadrant);
+
+ /* is the quadrant a hole? */
+ if(!(child & PATCH_MAP_NODE_IS_SET)) {
+ handle.array_index = -1;
+ return handle;
+ }
+
+ uint index = child & PATCH_MAP_NODE_INDEX_MASK;
+
+ if(child & PATCH_MAP_NODE_IS_LEAF) {
+ handle.array_index = kernel_tex_fetch(__patches, index + 0);
+ handle.patch_index = kernel_tex_fetch(__patches, index + 1);
+ handle.vert_index = kernel_tex_fetch(__patches, index + 2);
+
+ return handle;
+ } else {
+ node = index;
+ }
+
+ median = delta;
+ }
+
+ /* no leaf found */
+ kernel_assert(0);
+
+ handle.array_index = -1;
+ return handle;
+}
+
+ccl_device_inline void patch_eval_bspline_weights(float t, float *point, float *deriv)
+{
+ /* The four uniform cubic B-Spline basis functions evaluated at t */
+ float inv_6 = 1.0f / 6.0f;
+
+ float t2 = t * t;
+ float t3 = t * t2;
+
+ point[0] = inv_6 * (1.0f - 3.0f*(t - t2) - t3);
+ point[1] = inv_6 * (4.0f - 6.0f*t2 + 3.0f*t3);
+ point[2] = inv_6 * (1.0f + 3.0f*(t + t2 - t3));
+ point[3] = inv_6 * t3;
+
+ /* Derivatives of the above four basis functions at t */
+ deriv[0] = -0.5f*t2 + t - 0.5f;
+ deriv[1] = 1.5f*t2 - 2.0f*t;
+ deriv[2] = -1.5f*t2 + t + 0.5f;
+ deriv[3] = 0.5f*t2;
+}
+
+ccl_device_inline void patch_eval_adjust_boundary_weights(uint bits, float *s, float *t)
+{
+ int boundary = ((bits >> 8) & 0xf);
+
+ if(boundary & 1) {
+ t[2] -= t[0];
+ t[1] += 2*t[0];
+ t[0] = 0;
+ }
+
+ if(boundary & 2) {
+ s[1] -= s[3];
+ s[2] += 2*s[3];
+ s[3] = 0;
+ }
+
+ if(boundary & 4) {
+ t[1] -= t[3];
+ t[2] += 2*t[3];
+ t[3] = 0;
+ }
+
+ if(boundary & 8) {
+ s[2] -= s[0];
+ s[1] += 2*s[0];
+ s[0] = 0;
+ }
+}
+
+ccl_device_inline int patch_eval_depth(uint patch_bits)
+{
+ return (patch_bits & 0xf);
+}
+
+ccl_device_inline float patch_eval_param_fraction(uint patch_bits)
+{
+ bool non_quad_root = (patch_bits >> 4) & 0x1;
+ int depth = patch_eval_depth(patch_bits);
+
+ if(non_quad_root) {
+ return 1.0f / (float)(1 << (depth-1));
+ }
+ else {
+ return 1.0f / (float)(1 << depth);
+ }
+}
+
+ccl_device_inline void patch_eval_normalize_coords(uint patch_bits, float *u, float *v)
+{
+ float frac = patch_eval_param_fraction(patch_bits);
+
+ int iu = (patch_bits >> 22) & 0x3ff;
+ int iv = (patch_bits >> 12) & 0x3ff;
+
+ /* top left corner */
+ float pu = (float)iu*frac;
+ float pv = (float)iv*frac;
+
+ /* normalize uv coordinates */
+ *u = (*u - pu) / frac;
+ *v = (*v - pv) / frac;
+}
+
+/* retrieve patch control indices */
+
+ccl_device_inline int patch_eval_indices(KernelGlobals *kg, const PatchHandle *handle, int channel,
+ int indices[PATCH_MAX_CONTROL_VERTS])
+{
+ int index_base = kernel_tex_fetch(__patches, handle->array_index + 2) + handle->vert_index;
+
+ /* XXX: regular patches only */
+ for(int i = 0; i < 16; i++) {
+ indices[i] = kernel_tex_fetch(__patches, index_base + i);
+ }
+
+ return 16;
+}
+
+/* evaluate patch basis functions */
+
+ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *handle, float u, float v,
+ float weights[PATCH_MAX_CONTROL_VERTS],
+ float weights_du[PATCH_MAX_CONTROL_VERTS],
+ float weights_dv[PATCH_MAX_CONTROL_VERTS])
+{
+ uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
+ float d_scale = 1 << patch_eval_depth(patch_bits);
+
+ bool non_quad_root = (patch_bits >> 4) & 0x1;
+ if(non_quad_root) {
+ d_scale *= 0.5f;
+ }
+
+ patch_eval_normalize_coords(patch_bits, &u, &v);
+
+ /* XXX: regular patches only for now. */
+
+ float s[4], t[4], ds[4], dt[4];
+
+ patch_eval_bspline_weights(u, s, ds);
+ patch_eval_bspline_weights(v, t, dt);
+
+ patch_eval_adjust_boundary_weights(patch_bits, s, t);
+ patch_eval_adjust_boundary_weights(patch_bits, ds, dt);
+
+ for(int k = 0; k < 4; k++) {
+ for(int l = 0; l < 4; l++) {
+ weights[4*k+l] = s[l] * t[k];
+ weights_du[4*k+l] = ds[l] * t[k] * d_scale;
+ weights_dv[4*k+l] = s[l] * dt[k] * d_scale;
+ }
+ }
+}
+
+/* generic function for evaluating indices and weights from patch coords */
+
+ccl_device_inline int patch_eval_control_verts(KernelGlobals *kg, int object, int patch, float u, float v, int channel,
+ int indices[PATCH_MAX_CONTROL_VERTS],
+ float weights[PATCH_MAX_CONTROL_VERTS],
+ float weights_du[PATCH_MAX_CONTROL_VERTS],
+ float weights_dv[PATCH_MAX_CONTROL_VERTS])
+{
+ PatchHandle handle = patch_map_find_patch(kg, object, patch, u, v);
+ kernel_assert(handle.array_index >= 0);
+
+ int num_control = patch_eval_indices(kg, &handle, channel, indices);
+ patch_eval_basis(kg, &handle, u, v, weights, weights_du, weights_dv);
+
+ return num_control;
+}
+
+/* functions for evaluating attributes on patches */
+
+ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int offset,
+ int patch, float u, float v, int channel,
+ float *du, float* dv)
+{
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel,
+ indices, weights, weights_du, weights_dv);
+
+ float val = 0.0f;
+ if(du) *du = 0.0f;
+ if(dv) *dv = 0.0f;
+
+ for(int i = 0; i < num_control; i++) {
+ float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
+
+ val += v * weights[i];
+ if(du) *du += v * weights_du[i];
+ if(dv) *dv += v * weights_dv[i];
+ }
+
+ return val;
+}
+
+ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int offset,
+ int patch, float u, float v, int channel,
+ float3 *du, float3 *dv)
+{
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel,
+ indices, weights, weights_du, weights_dv);
+
+ float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
+ if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+ for(int i = 0; i < num_control; i++) {
+ float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
+
+ val += v * weights[i];
+ if(du) *du += v * weights_du[i];
+ if(dv) *dv += v * weights_dv[i];
+ }
+
+ return val;
+}
+
+ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int offset,
+ int patch, float u, float v, int channel,
+ float3 *du, float3 *dv)
+{
+ int indices[PATCH_MAX_CONTROL_VERTS];
+ float weights[PATCH_MAX_CONTROL_VERTS];
+ float weights_du[PATCH_MAX_CONTROL_VERTS];
+ float weights_dv[PATCH_MAX_CONTROL_VERTS];
+
+ int num_control = patch_eval_control_verts(kg, ccl_fetch(sd, object), patch, u, v, channel,
+ indices, weights, weights_du, weights_dv);
+
+ float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
+ if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
+
+ for(int i = 0; i < num_control; i++) {
+ float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
+
+ val += v * weights[i];
+ if(du) *du += v * weights_du[i];
+ if(dv) *dv += v * weights_dv[i];
+ }
+
+ return val;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index b16f0c9a99b..4384c2093e9 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -25,24 +25,23 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
const ShaderData *sd,
- AttributeElement elem,
- int offset,
+ const AttributeDescriptor desc,
float *dx, float *dy)
{
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float(kg, sd, elem, offset, dx, dy);
+ return triangle_attribute_float(kg, sd, desc, dx, dy);
else
- return subd_triangle_attribute_float(kg, sd, elem, offset, dx, dy);
+ return subd_triangle_attribute_float(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float(kg, sd, elem, offset, dx, dy);
+ return curve_attribute_float(kg, sd, desc, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
- return volume_attribute_float(kg, sd, elem, offset, dx, dy);
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float(kg, sd, desc, dx, dy);
}
#endif
else {
@@ -54,25 +53,23 @@ ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
const ShaderData *sd,
- AttributeElement elem,
- int offset,
- float3 *dx,
- float3 *dy)
+ const AttributeDescriptor desc,
+ float3 *dx, float3 *dy)
{
if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
if(subd_triangle_patch(kg, sd) == ~0)
- return triangle_attribute_float3(kg, sd, elem, offset, dx, dy);
+ return triangle_attribute_float3(kg, sd, desc, dx, dy);
else
- return subd_triangle_attribute_float3(kg, sd, elem, offset, dx, dy);
+ return subd_triangle_attribute_float3(kg, sd, desc, dx, dy);
}
#ifdef __HAIR__
else if(ccl_fetch(sd, type) & PRIMITIVE_ALL_CURVE) {
- return curve_attribute_float3(kg, sd, elem, offset, dx, dy);
+ return curve_attribute_float3(kg, sd, desc, dx, dy);
}
#endif
#ifdef __VOLUME__
- else if(ccl_fetch(sd, object) != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
- return volume_attribute_float3(kg, sd, elem, offset, dx, dy);
+ else if(ccl_fetch(sd, object) != OBJECT_NONE && desc.element == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float3(kg, sd, desc, dx, dy);
}
#endif
else {
@@ -86,13 +83,12 @@ ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
{
- AttributeElement elem_uv;
- int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv);
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_UV);
- if(offset_uv == ATTR_STD_NOT_FOUND)
+ if(desc.offset == ATTR_STD_NOT_FOUND)
return make_float3(0.0f, 0.0f, 0.0f);
- float3 uv = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL);
+ float3 uv = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
uv.z = 1.0f;
return uv;
}
@@ -102,15 +98,14 @@ ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
{
/* storing ptex data as attributes is not memory efficient but simple for tests */
- AttributeElement elem_face_id, elem_uv;
- int offset_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID, &elem_face_id);
- int offset_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV, &elem_uv);
+ const AttributeDescriptor desc_face_id = find_attribute(kg, sd, ATTR_STD_PTEX_FACE_ID);
+ const AttributeDescriptor desc_uv = find_attribute(kg, sd, ATTR_STD_PTEX_UV);
- if(offset_face_id == ATTR_STD_NOT_FOUND || offset_uv == ATTR_STD_NOT_FOUND)
+ if(desc_face_id.offset == ATTR_STD_NOT_FOUND || desc_uv.offset == ATTR_STD_NOT_FOUND)
return false;
- float3 uv3 = primitive_attribute_float3(kg, sd, elem_uv, offset_uv, NULL, NULL);
- float face_id_f = primitive_attribute_float(kg, sd, elem_face_id, offset_face_id, NULL, NULL);
+ float3 uv3 = primitive_attribute_float3(kg, sd, desc_uv, NULL, NULL);
+ float face_id_f = primitive_attribute_float(kg, sd, desc_face_id, NULL, NULL);
*uv = make_float2(uv3.x, uv3.y);
*face_id = (int)face_id_f;
@@ -132,11 +127,10 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
#endif
/* try to create spherical tangent from generated coordinates */
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED, &attr_elem);
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
- if(attr_offset != ATTR_STD_NOT_FOUND) {
- float3 data = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
+ if(desc.offset != ATTR_STD_NOT_FOUND) {
+ float3 data = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
object_normal_transform(kg, sd, &data);
return cross(ccl_fetch(sd, N), normalize(cross(data, ccl_fetch(sd, N))));
@@ -173,19 +167,18 @@ ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *
float3 motion_pre = center, motion_post = center;
/* deformation motion */
- AttributeElement elem;
- int offset = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION);
- if(offset != ATTR_STD_NOT_FOUND) {
+ if(desc.offset != ATTR_STD_NOT_FOUND) {
/* get motion info */
int numverts, numkeys;
object_motion_info(kg, ccl_fetch(sd, object), NULL, &numverts, &numkeys);
/* lookup attributes */
- int offset_next = (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
+ motion_pre = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
- motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
- motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL);
+ desc.offset += (ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)? numverts: numkeys;
+ motion_post = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
#ifdef __HAIR__
if(is_curve_primitive && (ccl_fetch(sd, flag) & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index bf9be182345..647840dc696 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -97,36 +97,81 @@ ccl_device_inline void subd_triangle_patch_corners(KernelGlobals *kg, int patch,
/* Reading attributes on various subdivision triangle elements */
-ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+ccl_device_noinline float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
int patch = subd_triangle_patch(kg, sd);
- if(elem == ATTR_ELEMENT_FACE) {
+#ifdef __PATCH_EVAL__
+ if(desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2];
+
+ float a, dads, dadt;
+ a = patch_eval_float(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if(dx) {
+ float dudx = ccl_fetch(sd, du).dx;
+ float dvdx = ccl_fetch(sd, dv).dx;
+
+ float dsdx = dsdu*dudx + dsdv*dvdx;
+ float dtdx = dtdu*dudx + dtdv*dvdx;
+
+ *dx = dads*dsdx + dadt*dtdx;
+ }
+ if(dy) {
+ float dudy = ccl_fetch(sd, du).dy;
+ float dvdy = ccl_fetch(sd, dv).dy;
+
+ float dsdy = dsdu*dudy + dsdv*dvdy;
+ float dtdy = dtdu*dudy + dtdv*dvdy;
+
+ *dy = dads*dsdy + dadt*dtdy;
+ }
+ }
+#endif
+
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
- return kernel_tex_fetch(__attributes_float, offset + subd_triangle_patch_face(kg, patch));
+ return kernel_tex_fetch(__attributes_float, desc.offset + subd_triangle_patch_face(kg, patch));
}
- else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- uint4 v = subd_triangle_patch_indices(kg, patch);
- float a, b, c;
+ uint4 v = subd_triangle_patch_indices(kg, patch);
- float f0 = kernel_tex_fetch(__attributes_float, offset + v.x);
- float f1 = kernel_tex_fetch(__attributes_float, offset + v.y);
- float f2 = kernel_tex_fetch(__attributes_float, offset + v.z);
- float f3 = kernel_tex_fetch(__attributes_float, offset + v.w);
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + v.x);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + v.y);
+ float f2 = kernel_tex_fetch(__attributes_float, desc.offset + v.z);
+ float f3 = kernel_tex_fetch(__attributes_float, desc.offset + v.w);
if(subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1+f0)*0.5f;
f3 = (f3+f0)*0.5f;
}
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
@@ -135,28 +180,26 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa
return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c;
}
- else if(elem == ATTR_ELEMENT_CORNER) {
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
-
+ else if(desc.element == ATTR_ELEMENT_CORNER) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float a, b, c;
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
- float f0 = kernel_tex_fetch(__attributes_float, corners[0] + offset);
- float f1 = kernel_tex_fetch(__attributes_float, corners[1] + offset);
- float f2 = kernel_tex_fetch(__attributes_float, corners[2] + offset);
- float f3 = kernel_tex_fetch(__attributes_float, corners[3] + offset);
+ float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc.offset);
+ float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc.offset);
+ float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc.offset);
+ float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc.offset);
if(subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1+f0)*0.5f;
f3 = (f3+f0)*0.5f;
}
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
@@ -173,36 +216,87 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa
}
}
-ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+ccl_device_noinline float3 subd_triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
int patch = subd_triangle_patch(kg, sd);
- if(elem == ATTR_ELEMENT_FACE) {
+#ifdef __PATCH_EVAL__
+ if(desc.flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2];
+
+ float3 a, dads, dadt;
+
+ if(desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ a = patch_eval_uchar4(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+ else {
+ a = patch_eval_float3(kg, sd, desc.offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if(dx) {
+ float dudx = ccl_fetch(sd, du).dx;
+ float dvdx = ccl_fetch(sd, dv).dx;
+
+ float dsdx = dsdu*dudx + dsdv*dvdx;
+ float dtdx = dtdu*dudx + dtdv*dvdx;
+
+ *dx = dads*dsdx + dadt*dtdx;
+ }
+ if(dy) {
+ float dudy = ccl_fetch(sd, du).dy;
+ float dvdy = ccl_fetch(sd, dv).dy;
+
+ float dsdy = dsdu*dudy + dsdv*dvdy;
+ float dtdy = dtdu*dudy + dtdv*dvdy;
+
+ *dy = dads*dsdy + dadt*dtdy;
+ }
+ }
+#endif
+
+ return a;
+ }
+ else
+#endif /* __PATCH_EVAL__ */
+ if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + subd_triangle_patch_face(kg, patch)));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + subd_triangle_patch_face(kg, patch)));
}
- else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- uint4 v = subd_triangle_patch_indices(kg, patch);
- float3 a, b, c;
+ uint4 v = subd_triangle_patch_indices(kg, patch);
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.x));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.y));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.z));
- float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + v.w));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.x));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.y));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.z));
+ float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + v.w));
if(subd_triangle_patch_num_corners(kg, patch) != 4) {
f1 = (f1+f0)*0.5f;
f3 = (f3+f0)*0.5f;
}
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
@@ -211,27 +305,26 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader
return ccl_fetch(sd, u)*a + ccl_fetch(sd, v)*b + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*c;
}
- else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) {
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
-
+ else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float3 a, b, c;
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
+
float3 f0, f1, f2, f3;
- if(elem == ATTR_ELEMENT_CORNER) {
- f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + offset));
- f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + offset));
- f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + offset));
- f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + offset));
+ if(desc.element == ATTR_ELEMENT_CORNER) {
+ f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[0] + desc.offset));
+ f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[1] + desc.offset));
+ f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[2] + desc.offset));
+ f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, corners[3] + desc.offset));
}
else {
- f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + offset));
- f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + offset));
- f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + offset));
- f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + offset));
+ f0 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[0] + desc.offset));
+ f1 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[1] + desc.offset));
+ f2 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[2] + desc.offset));
+ f3 = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, corners[3] + desc.offset));
}
if(subd_triangle_patch_num_corners(kg, patch) != 4) {
@@ -239,9 +332,9 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader
f3 = (f3+f0)*0.5f;
}
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ float3 a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float3 b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float3 c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index 0c2351e1d1b..d3289d6572c 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -105,20 +105,20 @@ ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, ccl_addr_spa
/* Reading attributes on various triangle elements */
-ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
- if(elem == ATTR_ELEMENT_FACE) {
+ if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
- return kernel_tex_fetch(__attributes_float, offset + ccl_fetch(sd, prim));
+ return kernel_tex_fetch(__attributes_float, desc.offset + ccl_fetch(sd, prim));
}
- else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
- float f0 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.x);
- float f1 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.y);
- float f2 = kernel_tex_fetch(__attributes_float, offset + tri_vindex.z);
+ float f0 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.x);
+ float f1 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.y);
+ float f2 = kernel_tex_fetch(__attributes_float, desc.offset + tri_vindex.z);
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
@@ -127,8 +127,8 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
- else if(elem == ATTR_ELEMENT_CORNER) {
- int tri = offset + ccl_fetch(sd, prim)*3;
+ else if(desc.element == ATTR_ELEMENT_CORNER) {
+ int tri = desc.offset + ccl_fetch(sd, prim)*3;
float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
@@ -148,20 +148,20 @@ ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *s
}
}
-ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
- if(elem == ATTR_ELEMENT_FACE) {
+ if(desc.element == ATTR_ELEMENT_FACE) {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + ccl_fetch(sd, prim)));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + ccl_fetch(sd, prim)));
}
- else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ else if(desc.element == ATTR_ELEMENT_VERTEX || desc.element == ATTR_ELEMENT_VERTEX_MOTION) {
uint4 tri_vindex = kernel_tex_fetch(__tri_vindex, ccl_fetch(sd, prim));
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.x));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.y));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + tri_vindex.z));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.x));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.y));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc.offset + tri_vindex.z));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*f0 + ccl_fetch(sd, dv).dx*f1 - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*f2;
@@ -170,11 +170,11 @@ ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData
return ccl_fetch(sd, u)*f0 + ccl_fetch(sd, v)*f1 + (1.0f - ccl_fetch(sd, u) - ccl_fetch(sd, v))*f2;
}
- else if(elem == ATTR_ELEMENT_CORNER || elem == ATTR_ELEMENT_CORNER_BYTE) {
- int tri = offset + ccl_fetch(sd, prim)*3;
+ else if(desc.element == ATTR_ELEMENT_CORNER || desc.element == ATTR_ELEMENT_CORNER_BYTE) {
+ int tri = desc.offset + ccl_fetch(sd, prim)*3;
float3 f0, f1, f2;
- if(elem == ATTR_ELEMENT_CORNER) {
+ if(desc.element == ATTR_ELEMENT_CORNER) {
f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 7c8182bc430..efe540a8518 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -50,36 +50,35 @@ ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
{
/* todo: optimize this so it's just a single matrix multiplication when
* possible (not motion blur), or perhaps even just translation + scale */
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem);
+ const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM);
object_inverse_position_transform(kg, sd, &P);
- if(attr_offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset);
+ if(desc.offset != ATTR_STD_NOT_FOUND) {
+ Transform tfm = primitive_attribute_matrix(kg, sd, desc);
P = transform_point(&tfm, P);
}
return P;
}
-ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy)
+ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float *dx, float *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_GPU__
# if __CUDA_ARCH__ >= 300
- CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
+ CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float f = kernel_tex_image_interp_3d_float(tex, P.x, P.y, P.z);
float4 r = make_float4(f, f, f, 1.0);
# else
- float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
+ float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
- r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+ r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
- r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+ r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
#endif
if(dx) *dx = 0.0f;
@@ -88,22 +87,22 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
return average(float4_to_float3(r));
}
-ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy)
+ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, const AttributeDescriptor desc, float3 *dx, float3 *dy)
{
float3 P = volume_normalized_position(kg, sd, sd->P);
#ifdef __KERNEL_GPU__
# if __CUDA_ARCH__ >= 300
- CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
+ CUtexObject tex = kernel_tex_fetch(__bindless_mapping, desc.offset);
float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
# else
- float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
+ float4 r = volume_image_texture_3d(desc.offset, P.x, P.y, P.z);
# endif
#else
float4 r;
if(sd->flag & SD_VOLUME_CUBIC)
- r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC);
+ r = kernel_tex_image_interp_3d_ex(desc.offset, P.x, P.y, P.z, INTERPOLATION_CUBIC);
else
- r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+ r = kernel_tex_image_interp_3d(desc.offset, P.x, P.y, P.z);
#endif
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index c882b477c35..3775934f293 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -495,6 +495,7 @@ typedef texture<uint> texture_uint;
typedef texture<int> texture_int;
typedef texture<uint4> texture_uint4;
typedef texture<uchar4> texture_uchar4;
+typedef texture<uchar> texture_uchar;
typedef texture_image<float> texture_image_float;
typedef texture_image<uchar> texture_image_uchar;
typedef texture_image<half> texture_image_half;
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index a039b414006..9a96cb9f438 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -31,6 +31,7 @@
#endif
#include <cuda.h>
+#include <cuda_fp16.h>
#include <float.h>
/* Qualifier wrappers for different names on different devices */
@@ -47,6 +48,7 @@
#define ccl_may_alias
#define ccl_addr_space
#define ccl_restrict __restrict__
+#define ccl_align(n) __align__(n)
/* No assert supported for CUDA */
@@ -65,6 +67,7 @@ typedef texture<float, 1> texture_float;
typedef texture<uint, 1> texture_uint;
typedef texture<int, 1> texture_int;
typedef texture<uint4, 1> texture_uint4;
+typedef texture<uchar, 1> texture_uchar;
typedef texture<uchar4, 1> texture_uchar4;
typedef texture<float4, 2> texture_image_float4;
typedef texture<float4, 3> texture_image3d_float4;
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 8505cb85576..2ae89dde7c4 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -40,6 +40,7 @@
#define ccl_local __local
#define ccl_private __private
#define ccl_restrict restrict
+#define ccl_align(n) __attribute__((aligned(n)))
#ifdef __SPLIT_KERNEL__
# define ccl_addr_space __global
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1f08f3459e6..903be4f09a0 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -435,8 +435,12 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
}
#ifdef __SUBSURFACE__
-
-ccl_device_inline bool kernel_path_subsurface_scatter(
+# ifndef __KERNEL_CUDA__
+ccl_device
+# else
+ccl_device_inline
+# endif
+bool kernel_path_subsurface_scatter(
KernelGlobals *kg,
ShaderData *sd,
ShaderData *emission_sd,
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 98d321c9c16..079bea30bdd 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -149,7 +149,7 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
/* ShaderData setup from BSSRDF scatter */
#ifdef __SUBSURFACE__
-# ifndef __KERNEL_CUDS__
+# ifndef __KERNEL_CUDA__
ccl_device
# else
ccl_device_inline
@@ -539,7 +539,7 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
#endif
-#ifndef __KERNEL_CUDS__
+#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_inline
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index f404666177a..e83bfc3f08a 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -85,11 +85,16 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha
return NULL;
}
-ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
- ShaderClosure *sc,
- float disk_r,
- float r,
- bool all)
+#ifndef __KERNEL_GPU__
+ccl_device_noinline
+#else
+ccl_device_inline
+#endif
+float3 subsurface_scatter_eval(ShaderData *sd,
+ ShaderClosure *sc,
+ float disk_r,
+ float r,
+ bool all)
{
#ifdef BSSRDF_MULTI_EVAL
/* this is the veach one-sample model with balance heuristic, some pdf
@@ -235,7 +240,12 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
/* Subsurface scattering step, from a point on the surface to other
* nearby points on the same object.
*/
-ccl_device_inline int subsurface_scatter_multi_intersect(
+#ifndef __KERNEL_CUDA__
+ccl_device
+#else
+ccl_device_inline
+#endif
+int subsurface_scatter_multi_intersect(
KernelGlobals *kg,
SubsurfaceIntersection* ss_isect,
ShaderData *sd,
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 7d6fec02331..8d5bb75a428 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping)
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
+KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed)
+KERNEL_TEX(float, texture_float, __tex_image_float_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
#undef KERNEL_TEX
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 18b5c35c768..e29940672ca 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -19,6 +19,7 @@
#include "kernel_math.h"
#include "svm/svm_types.h"
+#include "util_static_assert.h"
#ifndef __KERNEL_GPU__
# define __KERNEL_CPU__
@@ -34,7 +35,7 @@
CCL_NAMESPACE_BEGIN
/* constants */
-#define OBJECT_SIZE 11
+#define OBJECT_SIZE 12
#define OBJECT_VECTOR_SIZE 6
#define LIGHT_SIZE 5
#define FILTER_TABLE_SIZE 1024
@@ -147,6 +148,7 @@ CCL_NAMESPACE_BEGIN
#define __CAMERA_CLIPPING__
#define __INTERSECTION_REFINE__
#define __CLAMP_SAMPLE__
+#define __PATCH_EVAL__
#ifdef __KERNEL_SHADING__
# define __SVM__
@@ -196,6 +198,9 @@ CCL_NAMESPACE_BEGIN
#ifdef __NO_BRANCHED_PATH__
# undef __BRANCHED_PATH__
#endif
+#ifdef __NO_PATCH_EVAL__
+# undef __PATCH_EVAL__
+#endif
/* Random Numbers */
@@ -624,6 +629,18 @@ typedef enum AttributeStandard {
ATTR_STD_NOT_FOUND = ~0
} AttributeStandard;
+typedef enum AttributeFlag {
+ ATTR_FINAL_SIZE = (1 << 0),
+ ATTR_SUBDIVIDED = (1 << 1),
+} AttributeFlag;
+
+typedef struct AttributeDescriptor {
+ AttributeElement element;
+ NodeAttributeType type;
+ uint flags; /* see enum AttributeFlag */
+ int offset;
+} AttributeDescriptor;
+
/* Closure data */
#ifdef __MULTI_CLOSURE__
@@ -644,23 +661,18 @@ typedef enum AttributeStandard {
* ShaderClosure has a fixed size, and any extra space must be allocated
* with closure_alloc_extra().
*
- * float3 is 12 bytes on CUDA and 16 bytes on CPU/OpenCL, we set the data
- * size to ensure ShaderClosure is 80 bytes total everywhere. */
+ * We pad the struct to 80 bytes and ensure it is aligned to 16 bytes, which
+ * we assume to be the maximum required alignment for any struct. */
#define SHADER_CLOSURE_BASE \
float3 weight; \
ClosureType type; \
float sample_weight \
-typedef ccl_addr_space struct ShaderClosure {
+typedef ccl_addr_space struct ccl_align(16) ShaderClosure {
SHADER_CLOSURE_BASE;
- /* pad to 80 bytes, data types are aligned to own size */
-#ifdef __KERNEL_CUDA__
- float data[15];
-#else
- float data[14];
-#endif
+ float data[14]; /* pad to 80 bytes */
} ShaderClosure;
/* Shader Context
@@ -735,7 +747,7 @@ enum ShaderDataFlag {
# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
# if defined(__SPLIT_KERNEL_AOS__)
/* ShaderData is stored as an Array-of-Structures */
-# define ccl_soa_member(type, name) type soa_##name;
+# define ccl_soa_member(type, name) type soa_##name
# define ccl_fetch(s, t) (s[SD_THREAD].soa_##t)
# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index])
# else
@@ -743,7 +755,7 @@ enum ShaderDataFlag {
# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
-# define ccl_soa_member(type, name) type soa_##name;
+# define ccl_soa_member(type, name) type soa_##name
# define ccl_fetch(s, t) (((ShaderData*)((ccl_addr_space char*)s + SD_GLOBAL_SIZE * SD_OFFSETOF(soa_##t) + SD_FIELD_SIZE(soa_##t) * SD_THREAD - SD_OFFSETOF(soa_##t)))->soa_##t)
# define ccl_fetch_array(s, t, index) (&ccl_fetch(s, t)[index])
# endif
@@ -979,6 +991,7 @@ typedef struct KernelCamera {
int pad;
} KernelCamera;
+static_assert_align(KernelCamera, 16);
typedef struct KernelFilm {
float exposure;
@@ -1033,6 +1046,7 @@ typedef struct KernelFilm {
int pass_pad3;
#endif
} KernelFilm;
+static_assert_align(KernelFilm, 16);
typedef struct KernelBackground {
/* only shader index */
@@ -1046,6 +1060,7 @@ typedef struct KernelBackground {
float ao_distance;
float ao_pad1, ao_pad2;
} KernelBackground;
+static_assert_align(KernelBackground, 16);
typedef struct KernelIntegrator {
/* emission */
@@ -1113,8 +1128,10 @@ typedef struct KernelIntegrator {
float volume_step_size;
int volume_samples;
- int pad;
+ int pad1;
+ int pad2;
} KernelIntegrator;
+static_assert_align(KernelIntegrator, 16);
typedef struct KernelBVH {
/* root node */
@@ -1126,6 +1143,7 @@ typedef struct KernelBVH {
int use_qbvh;
int pad1, pad2;
} KernelBVH;
+static_assert_align(KernelBVH, 16);
typedef enum CurveFlag {
/* runtime flags */
@@ -1145,11 +1163,13 @@ typedef struct KernelCurves {
float minimum_width;
float maximum_width;
} KernelCurves;
+static_assert_align(KernelCurves, 16);
typedef struct KernelTables {
int beckmann_offset;
int pad1, pad2, pad3;
} KernelTables;
+static_assert_align(KernelTables, 16);
typedef struct KernelData {
KernelCamera cam;
@@ -1160,8 +1180,12 @@ typedef struct KernelData {
KernelCurves curve;
KernelTables tables;
} KernelData;
+static_assert_align(KernelData, 16);
#ifdef __KERNEL_DEBUG__
+/* NOTE: This is a runtime-only struct, alignment is not
+ * really important here.
+ */
typedef ccl_addr_space struct DebugData {
// Total number of BVH node traversal steps and primitives intersections
// for the camera rays.
@@ -1239,6 +1263,16 @@ enum RayState {
#define REMOVE_RAY_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] = (ray_state[ray_index] & (~flag)))
#define IS_FLAG(ray_state, ray_index, flag) (ray_state[ray_index] & flag)
+/* Patches */
+
+#define PATCH_MAX_CONTROL_VERTS 16
+
+/* Patch map node flags */
+
+#define PATCH_MAP_NODE_IS_SET (1 << 30)
+#define PATCH_MAP_NODE_IS_LEAF (1u << 31)
+#define PATCH_MAP_NODE_INDEX_MASK (~(PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF))
+
CCL_NAMESPACE_END
#endif /* __KERNEL_TYPES_H__ */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 47383140170..af68907a5c2 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -25,12 +25,12 @@ ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float
{
if(tex >= TEX_START_HALF_CPU)
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y);
- else if(tex >= TEX_START_HALF4_CPU)
- return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
else if(tex >= TEX_START_BYTE_CPU)
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
else if(tex >= TEX_START_FLOAT_CPU)
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
+ else if(tex >= TEX_START_HALF4_CPU)
+ return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
else if(tex >= TEX_START_BYTE4_CPU)
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
else
@@ -41,12 +41,12 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
{
if(tex >= TEX_START_HALF_CPU)
return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z);
- else if(tex >= TEX_START_HALF4_CPU)
- return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
else if(tex >= TEX_START_BYTE_CPU)
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
else if(tex >= TEX_START_FLOAT_CPU)
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
+ else if(tex >= TEX_START_HALF4_CPU)
+ return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
else if(tex >= TEX_START_BYTE4_CPU)
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
else
@@ -57,13 +57,13 @@ ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, fl
ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
{
if(tex >= TEX_START_HALF_CPU)
- return kg->texture_half4_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
- else if(tex >= TEX_START_HALF4_CPU)
- return kg->texture_half_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
+ return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
else if(tex >= TEX_START_BYTE_CPU)
return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
else if(tex >= TEX_START_FLOAT_CPU)
return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
+ else if(tex >= TEX_START_HALF4_CPU)
+ return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
else if(tex >= TEX_START_BYTE4_CPU)
return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
else
diff --git a/intern/cycles/kernel/osl/osl_globals.h b/intern/cycles/kernel/osl/osl_globals.h
index 916542ec628..8353c4e434b 100644
--- a/intern/cycles/kernel/osl/osl_globals.h
+++ b/intern/cycles/kernel/osl/osl_globals.h
@@ -59,8 +59,7 @@ struct OSLGlobals {
/* attributes */
struct Attribute {
TypeDesc type;
- AttributeElement elem;
- int offset;
+ AttributeDescriptor desc;
ParamValue value;
};
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index caae24405f1..153ebad6cd2 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -554,13 +554,13 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd,
attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
{
float3 fval[3];
- fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset,
+ fval[0] = primitive_attribute_float3(kg, sd, attr.desc,
(derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float3(fval, type, derivatives, val);
}
else if(attr.type == TypeDesc::TypeFloat) {
float fval[3];
- fval[0] = primitive_attribute_float(kg, sd, attr.elem, attr.offset,
+ fval[0] = primitive_attribute_float(kg, sd, attr.desc,
(derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
return set_attribute_float(fval, type, derivatives, val);
}
@@ -573,7 +573,7 @@ static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OS
const TypeDesc& type, bool derivatives, void *val)
{
if(attr.type == TypeDesc::TypeMatrix) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr.offset);
+ Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
return set_attribute_matrix(tfm, type, val);
}
else {
@@ -815,7 +815,7 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring
if(it != attribute_map.end()) {
const OSLGlobals::Attribute& attr = it->second;
- if(attr.elem != ATTR_ELEMENT_OBJECT) {
+ if(attr.desc.element != ATTR_ELEMENT_OBJECT) {
/* triangle and vertex attributes */
if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val))
return true;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 784e468635c..43a9e2f13aa 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -340,7 +340,7 @@ void OSLShader::eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderConte
/* Attributes */
-int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem)
+int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc)
{
/* for OSL, a hash map is used to lookup the attribute by name. */
int object = sd->object*ATTR_PRIM_TYPES;
@@ -354,16 +354,23 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id,
if(it != attr_map.end()) {
const OSLGlobals::Attribute &osl_attr = it->second;
- *elem = osl_attr.elem;
+ *desc = osl_attr.desc;
- if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH)
+ if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.desc.element != ATTR_ELEMENT_MESH) {
+ desc->offset = ATTR_STD_NOT_FOUND;
return ATTR_STD_NOT_FOUND;
+ }
/* return result */
- return (osl_attr.elem == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : osl_attr.offset;
+ if(osl_attr.desc.element == ATTR_ELEMENT_NONE) {
+ desc->offset = ATTR_STD_NOT_FOUND;
+ }
+ return desc->offset;
}
- else
+ else {
+ desc->offset = ATTR_STD_NOT_FOUND;
return (int)ATTR_STD_NOT_FOUND;
+ }
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/osl/osl_shader.h b/intern/cycles/kernel/osl/osl_shader.h
index a185b8b8c05..ad06dd6929d 100644
--- a/intern/cycles/kernel/osl/osl_shader.h
+++ b/intern/cycles/kernel/osl/osl_shader.h
@@ -59,7 +59,7 @@ public:
static void eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx);
/* attributes */
- static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem);
+ static int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeDescriptor *desc);
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/shaders/node_rgb_curves.osl b/intern/cycles/kernel/shaders/node_rgb_curves.osl
index c8e7e4f175b..984b7d47e8f 100644
--- a/intern/cycles/kernel/shaders/node_rgb_curves.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_curves.osl
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "stdosl.h"
#include "node_ramp_util.h"
shader node_rgb_curves(
diff --git a/intern/cycles/kernel/shaders/node_rgb_ramp.osl b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
index 24b8728b999..4e7d8fdcf65 100644
--- a/intern/cycles/kernel/shaders/node_rgb_ramp.osl
+++ b/intern/cycles/kernel/shaders/node_rgb_ramp.osl
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "stdosl.h"
#include "node_ramp_util.h"
shader node_rgb_ramp(
diff --git a/intern/cycles/kernel/shaders/node_vector_curves.osl b/intern/cycles/kernel/shaders/node_vector_curves.osl
index d92fa11d439..ff284c48e0a 100644
--- a/intern/cycles/kernel/shaders/node_vector_curves.osl
+++ b/intern/cycles/kernel/shaders/node_vector_curves.osl
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "stdosl.h"
#include "node_ramp_util.h"
shader node_vector_curves(
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index bd6013e9205..de978a423b4 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -18,117 +18,101 @@ CCL_NAMESPACE_BEGIN
/* Attribute Node */
-ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
+ccl_device AttributeDescriptor svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
uint4 node, NodeAttributeType *type,
- NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset)
+ uint *out_offset)
{
*out_offset = node.z;
*type = (NodeAttributeType)node.w;
+
+ AttributeDescriptor desc;
+
if(ccl_fetch(sd, object) != OBJECT_NONE) {
- /* find attribute by unique id */
- uint id = node.y;
- uint attr_offset = ccl_fetch(sd, object)*kernel_data.bvh.attributes_map_stride;
- attr_offset += attribute_primitive_type(kg, sd);
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
- while(attr_map.x != id) {
- if(UNLIKELY(attr_map.x == ATTR_STD_NONE)) {
- *elem = ATTR_ELEMENT_NONE;
- *offset = 0;
- *mesh_type = (NodeAttributeType)node.w;
- return;
- }
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ desc = find_attribute(kg, sd, node.y);
+ if(desc.offset == ATTR_STD_NOT_FOUND) {
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = (NodeAttributeType)node.w;
}
-
- /* return result */
- *elem = (AttributeElement)attr_map.y;
- *offset = as_int(attr_map.z);
- *mesh_type = (NodeAttributeType)attr_map.w;
}
else {
/* background */
- *elem = ATTR_ELEMENT_NONE;
- *offset = 0;
- *mesh_type = (NodeAttributeType)node.w;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
+ desc.type = (NodeAttributeType)node.w;
}
+
+ return desc;
}
ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type, mesh_type;
- AttributeElement elem;
+ NodeAttributeType type;
uint out_offset;
- int offset;
-
- svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
- if(mesh_type == NODE_ATTR_FLOAT) {
- float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL);
+ if(desc.type == NODE_ATTR_FLOAT) {
+ float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
stack_store_float(stack, out_offset, f);
}
else {
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
+ float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
stack_store_float(stack, out_offset, average(f));
}
}
else {
- if(mesh_type == NODE_ATTR_FLOAT3) {
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
+ if(desc.type == NODE_ATTR_FLOAT3) {
+ float3 f = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
stack_store_float3(stack, out_offset, f);
}
else {
- float f = primitive_attribute_float(kg, sd, elem, offset, NULL, NULL);
+ float f = primitive_attribute_float(kg, sd, desc, NULL, NULL);
stack_store_float3(stack, out_offset, make_float3(f, f, f));
}
}
}
-#ifndef __KERNEL_CUDS__
+#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_noinline
#endif
void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
{
- NodeAttributeType type, mesh_type;
- AttributeElement elem;
+ NodeAttributeType type;
uint out_offset;
- int offset;
-
- svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
- if(mesh_type == NODE_ATTR_FLOAT) {
+ if(desc.type == NODE_ATTR_FLOAT) {
float dx;
- float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL);
+ float f = primitive_attribute_float(kg, sd, desc, &dx, NULL);
stack_store_float(stack, out_offset, f+dx);
}
else {
float3 dx;
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL);
+ float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL);
stack_store_float(stack, out_offset, average(f+dx));
}
}
else {
- if(mesh_type == NODE_ATTR_FLOAT3) {
+ if(desc.type == NODE_ATTR_FLOAT3) {
float3 dx;
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, &dx, NULL);
+ float3 f = primitive_attribute_float3(kg, sd, desc, &dx, NULL);
stack_store_float3(stack, out_offset, f+dx);
}
else {
float dx;
- float f = primitive_attribute_float(kg, sd, elem, offset, &dx, NULL);
+ float f = primitive_attribute_float(kg, sd, desc, &dx, NULL);
stack_store_float3(stack, out_offset, make_float3(f+dx, f+dx, f+dx));
}
}
}
-#ifndef __KERNEL_CUDS__
+#ifndef __KERNEL_CUDA__
ccl_device
#else
ccl_device_noinline
@@ -138,35 +122,32 @@ void svm_node_attr_bump_dy(KernelGlobals *kg,
float *stack,
uint4 node)
{
- NodeAttributeType type, mesh_type;
- AttributeElement elem;
+ NodeAttributeType type;
uint out_offset;
- int offset;
-
- svm_node_attr_init(kg, sd, node, &type, &mesh_type, &elem, &offset, &out_offset);
+ AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
/* fetch and store attribute */
if(type == NODE_ATTR_FLOAT) {
- if(mesh_type == NODE_ATTR_FLOAT) {
+ if(desc.type == NODE_ATTR_FLOAT) {
float dy;
- float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy);
+ float f = primitive_attribute_float(kg, sd, desc, NULL, &dy);
stack_store_float(stack, out_offset, f+dy);
}
else {
float3 dy;
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy);
+ float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy);
stack_store_float(stack, out_offset, average(f+dy));
}
}
else {
- if(mesh_type == NODE_ATTR_FLOAT3) {
+ if(desc.type == NODE_ATTR_FLOAT3) {
float3 dy;
- float3 f = primitive_attribute_float3(kg, sd, elem, offset, NULL, &dy);
+ float3 f = primitive_attribute_float3(kg, sd, desc, NULL, &dy);
stack_store_float3(stack, out_offset, f+dy);
}
else {
float dy;
- float f = primitive_attribute_float(kg, sd, elem, offset, NULL, &dy);
+ float f = primitive_attribute_float(kg, sd, desc, NULL, &dy);
stack_store_float3(stack, out_offset, make_float3(f+dy, f+dy, f+dy));
}
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index b6b90dfff81..5d02be1fa2f 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Float4 textures on various devices. */
#if defined(__KERNEL_CPU__)
-# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU
+# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU
#elif defined(__KERNEL_CUDA__)
# if __CUDA_ARCH__ < 300
# define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA
@@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
{
- if(id >= TEX_NUM_FLOAT4_IMAGES) {
+ /* Float4 */
+ if(id < TEX_START_BYTE4_OPENCL) {
+ return kernel_tex_fetch(__tex_image_float4_packed, offset);
+ }
+ /* Byte4 */
+ else if(id < TEX_START_FLOAT_OPENCL) {
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
}
+ /* Float */
+ else if(id < TEX_START_BYTE_OPENCL) {
+ float f = kernel_tex_fetch(__tex_image_float_packed, offset);
+ return make_float4(f, f, f, 1.0f);
+ }
+ /* Byte */
else {
- return kernel_tex_fetch(__tex_image_float4_packed, offset);
+ uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
+ float f = r * (1.0f/255.0f);
+ return make_float4(f, f, f, 1.0f);
}
}
@@ -277,8 +290,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
}
# else
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
- if(id < 2048) /* TODO(dingto): Make this a variable */
+ /* float4, byte4 and half4 */
+ if(id < TEX_START_FLOAT_CUDA_KEPLER)
r = kernel_tex_image_interp_float4(tex, x, y);
+ /* float, byte and half */
else {
float f = kernel_tex_image_interp_float(tex, x, y);
r = make_float4(f, f, f, 1.0);
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index 3f7d18a02fe..6d13a0d8e02 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -32,21 +32,17 @@ ccl_device void svm_vector_math(float *Fac, float3 *Vector, NodeVectorMath type,
*Fac = average_fac(*Vector);
}
else if(type == NODE_VECTOR_MATH_AVERAGE) {
- *Fac = len(Vector1 + Vector2);
- *Vector = normalize(Vector1 + Vector2);
+ *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
}
else if(type == NODE_VECTOR_MATH_DOT_PRODUCT) {
*Fac = dot(Vector1, Vector2);
*Vector = make_float3(0.0f, 0.0f, 0.0f);
}
else if(type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
- float3 c = cross(Vector1, Vector2);
- *Fac = len(c);
- *Vector = normalize(c);
+ *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
}
else if(type == NODE_VECTOR_MATH_NORMALIZE) {
- *Fac = len(Vector1);
- *Vector = normalize(Vector1);
+ *Vector = safe_normalize_len(Vector1, Fac);
}
else {
*Fac = 0.0f;
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index b39d6a3e009..01dede3fff5 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -287,23 +287,22 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
}
/* first try to get tangent attribute */
- AttributeElement attr_elem, attr_sign_elem, attr_normal_elem;
- int attr_offset = find_attribute(kg, sd, node.z, &attr_elem);
- int attr_sign_offset = find_attribute(kg, sd, node.w, &attr_sign_elem);
- int attr_normal_offset = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL, &attr_normal_elem);
+ const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
+ const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
+ const AttributeDescriptor attr_normal = find_attribute(kg, sd, ATTR_STD_VERTEX_NORMAL);
- if(attr_offset == ATTR_STD_NOT_FOUND || attr_sign_offset == ATTR_STD_NOT_FOUND || attr_normal_offset == ATTR_STD_NOT_FOUND) {
+ if(attr.offset == ATTR_STD_NOT_FOUND || attr_sign.offset == ATTR_STD_NOT_FOUND || attr_normal.offset == ATTR_STD_NOT_FOUND) {
stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
return;
}
/* get _unnormalized_ interpolated normal and tangent */
- float3 tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
- float sign = primitive_attribute_float(kg, sd, attr_sign_elem, attr_sign_offset, NULL, NULL);
+ float3 tangent = primitive_attribute_float3(kg, sd, attr, NULL, NULL);
+ float sign = primitive_attribute_float(kg, sd, attr_sign, NULL, NULL);
float3 normal;
if(ccl_fetch(sd, shader) & SHADER_SMOOTH_NORMAL) {
- normal = primitive_attribute_float3(kg, sd, attr_normal_elem, attr_normal_offset, NULL, NULL);
+ normal = primitive_attribute_float3(kg, sd, attr_normal, NULL, NULL);
}
else {
normal = ccl_fetch(sd, Ng);
@@ -356,24 +355,22 @@ ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack
if(direction_type == NODE_TANGENT_UVMAP) {
/* UV map */
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, node.z, &attr_elem);
+ const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
- if(attr_offset == ATTR_STD_NOT_FOUND)
+ if(desc.offset == ATTR_STD_NOT_FOUND)
tangent = make_float3(0.0f, 0.0f, 0.0f);
else
- tangent = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
+ tangent = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
}
else {
/* radial */
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, node.z, &attr_elem);
+ const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
float3 generated;
- if(attr_offset == ATTR_STD_NOT_FOUND)
+ if(desc.offset == ATTR_STD_NOT_FOUND)
generated = ccl_fetch(sd, P);
else
- generated = primitive_attribute_float3(kg, sd, attr_elem, attr_offset, NULL, NULL);
+ generated = primitive_attribute_float3(kg, sd, desc, NULL, NULL);
if(axis == NODE_TANGENT_AXIS_X)
tangent = make_float3(0.0f, -(generated.z - 0.5f), (generated.y - 0.5f));
diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp
index e8ff81fe08e..c0d429a583c 100644
--- a/intern/cycles/render/attribute.cpp
+++ b/intern/cycles/render/attribute.cpp
@@ -44,6 +44,7 @@ void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_)
type = type_;
element = element_;
std = ATTR_STD_NONE;
+ flags = 0;
/* string and matrix not supported! */
assert(type == TypeDesc::TypeFloat || type == TypeDesc::TypeColor ||
@@ -61,6 +62,11 @@ void Attribute::resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only)
}
}
+void Attribute::resize(size_t num_elements)
+{
+ buffer.resize(num_elements * data_sizeof(), 0);
+}
+
void Attribute::add(const float& f)
{
char *data = (char*)&f;
@@ -130,6 +136,10 @@ size_t Attribute::data_sizeof() const
size_t Attribute::element_size(Mesh *mesh, AttributePrimitive prim) const
{
+ if(flags & ATTR_FINAL_SIZE) {
+ return buffer.size() / data_sizeof();
+ }
+
size_t size;
switch(element) {
@@ -517,16 +527,19 @@ AttributeRequest::AttributeRequest(ustring name_)
std = ATTR_STD_NONE;
triangle_type = TypeDesc::TypeFloat;
- triangle_element = ATTR_ELEMENT_NONE;
- triangle_offset = 0;
+ triangle_desc.element = ATTR_ELEMENT_NONE;
+ triangle_desc.offset = 0;
+ triangle_desc.type = NODE_ATTR_FLOAT;
curve_type = TypeDesc::TypeFloat;
- curve_element = ATTR_ELEMENT_NONE;
- curve_offset = 0;
+ curve_desc.element = ATTR_ELEMENT_NONE;
+ curve_desc.offset = 0;
+ curve_desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
- subd_element = ATTR_ELEMENT_NONE;
- subd_offset = 0;
+ subd_desc.element = ATTR_ELEMENT_NONE;
+ subd_desc.offset = 0;
+ subd_desc.type = NODE_ATTR_FLOAT;
}
AttributeRequest::AttributeRequest(AttributeStandard std_)
@@ -535,16 +548,19 @@ AttributeRequest::AttributeRequest(AttributeStandard std_)
std = std_;
triangle_type = TypeDesc::TypeFloat;
- triangle_element = ATTR_ELEMENT_NONE;
- triangle_offset = 0;
+ triangle_desc.element = ATTR_ELEMENT_NONE;
+ triangle_desc.offset = 0;
+ triangle_desc.type = NODE_ATTR_FLOAT;
curve_type = TypeDesc::TypeFloat;
- curve_element = ATTR_ELEMENT_NONE;
- curve_offset = 0;
+ curve_desc.element = ATTR_ELEMENT_NONE;
+ curve_desc.offset = 0;
+ curve_desc.type = NODE_ATTR_FLOAT;
subd_type = TypeDesc::TypeFloat;
- subd_element = ATTR_ELEMENT_NONE;
- subd_offset = 0;
+ subd_desc.element = ATTR_ELEMENT_NONE;
+ subd_desc.offset = 0;
+ subd_desc.type = NODE_ATTR_FLOAT;
}
/* AttributeRequestSet */
diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h
index e51bdf28d66..f4538c76369 100644
--- a/intern/cycles/render/attribute.h
+++ b/intern/cycles/render/attribute.h
@@ -54,11 +54,13 @@ public:
TypeDesc type;
vector<char> buffer;
AttributeElement element;
+ uint flags; /* enum AttributeFlag */
Attribute() {}
~Attribute();
void set(ustring name, TypeDesc type, AttributeElement element);
void resize(Mesh *mesh, AttributePrimitive prim, bool reserve_only);
+ void resize(size_t num_elements);
size_t data_sizeof() const;
size_t element_size(Mesh *mesh, AttributePrimitive prim) const;
@@ -135,8 +137,7 @@ public:
/* temporary variables used by MeshManager */
TypeDesc triangle_type, curve_type, subd_type;
- AttributeElement triangle_element, curve_element, subd_element;
- int triangle_offset, curve_offset, subd_offset;
+ AttributeDescriptor triangle_desc, curve_desc, subd_desc;
explicit AttributeRequest(ustring name_);
explicit AttributeRequest(AttributeStandard std);
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 614620c14af..24543601ef9 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -52,15 +52,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
{ \
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
+ tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
- tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
+ tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
- tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
}
@@ -82,15 +82,15 @@ ImageManager::ImageManager(const DeviceInfo& info)
/* Should not happen. */
tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
+ tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
- tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
+ tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
- tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
assert(0);
}
@@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
}
/* We use a consecutive slot counting scheme on the devices, in order
- * float4, byte4, float, byte.
+ * float4, byte4, half4, float, byte, half.
* These functions convert the slot ids from ImageManager "images" ones
* to device ones and vice versa. */
int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
@@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename,
if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
is_float = true;
- /* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */
+ /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
if((type == IMAGE_DATA_TYPE_FLOAT ||
type == IMAGE_DATA_TYPE_HALF4 ||
type == IMAGE_DATA_TYPE_HALF) &&
@@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device,
size_t size = 0, offset = 0;
ImageDataType type;
- int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
+ int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
+ + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
uint4 *info = dscene->tex_image_packed_info.resize(info_size);
- /* Byte Textures*/
+ /* Byte4 Textures*/
type = IMAGE_DATA_TYPE_BYTE4;
for(size_t slot = 0; slot < images[type].size(); slot++) {
@@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device,
size += tex_img.size();
}
- uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
+ uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot])
@@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device,
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
- memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+ memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
}
- /* Float Textures*/
+ /* Float4 Textures*/
type = IMAGE_DATA_TYPE_FLOAT4;
size = 0, offset = 0;
@@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device,
size += tex_img.size();
}
- float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
+ float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(!images[type][slot])
@@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device,
uint8_t options = pack_image_options(type, slot);
info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+ memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+ offset += tex_img.size();
+ }
+
+ /* Byte Textures*/
+ type = IMAGE_DATA_TYPE_BYTE;
+ size = 0, offset = 0;
+
+ for(size_t slot = 0; slot < images[type].size(); slot++) {
+ if(!images[type][slot])
+ continue;
+
+ device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+ size += tex_img.size();
+ }
+
+ uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);
+
+ for(size_t slot = 0; slot < images[type].size(); slot++) {
+ if(!images[type][slot])
+ continue;
+
+ device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+
+ uint8_t options = pack_image_options(type, slot);
+
+ info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
+ memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+ offset += tex_img.size();
+ }
+
+ /* Float Textures*/
+ type = IMAGE_DATA_TYPE_FLOAT;
+ size = 0, offset = 0;
+
+ for(size_t slot = 0; slot < images[type].size(); slot++) {
+ if(!images[type][slot])
+ continue;
+
+ device_vector<float>& tex_img = dscene->tex_float_image[slot];
+ size += tex_img.size();
+ }
+
+ float *pixels_float = dscene->tex_image_float_packed.resize(size);
+
+ for(size_t slot = 0; slot < images[type].size(); slot++) {
+ if(!images[type][slot])
+ continue;
+
+ device_vector<float>& tex_img = dscene->tex_float_image[slot];
+
+ /* todo: support 3D textures, only CPU for now */
+
+ uint8_t options = pack_image_options(type, slot);
+ info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
}
@@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device,
}
device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
}
+ if(dscene->tex_image_byte_packed.size()) {
+ if(dscene->tex_image_byte_packed.device_pointer) {
+ thread_scoped_lock device_lock(device_mutex);
+ device->tex_free(dscene->tex_image_byte_packed);
+ }
+ device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
+ }
+ if(dscene->tex_image_float_packed.size()) {
+ if(dscene->tex_image_float_packed.device_pointer) {
+ thread_scoped_lock device_lock(device_mutex);
+ device->tex_free(dscene->tex_image_float_packed);
+ }
+ device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
+ }
if(dscene->tex_image_packed_info.size()) {
if(dscene->tex_image_packed_info.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
@@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
device->tex_free(dscene->tex_image_byte4_packed);
device->tex_free(dscene->tex_image_float4_packed);
+ device->tex_free(dscene->tex_image_byte_packed);
+ device->tex_free(dscene->tex_image_float_packed);
device->tex_free(dscene->tex_image_packed_info);
dscene->tex_image_byte4_packed.clear();
dscene->tex_image_float4_packed.clear();
+ dscene->tex_image_byte_packed.clear();
+ dscene->tex_image_float_packed.clear();
dscene->tex_image_packed_info.clear();
}
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 07998684b23..cca71a6bb93 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -39,9 +39,9 @@ public:
enum ImageDataType {
IMAGE_DATA_TYPE_FLOAT4 = 0,
IMAGE_DATA_TYPE_BYTE4 = 1,
- IMAGE_DATA_TYPE_FLOAT = 2,
- IMAGE_DATA_TYPE_BYTE = 3,
- IMAGE_DATA_TYPE_HALF4 = 4,
+ IMAGE_DATA_TYPE_HALF4 = 2,
+ IMAGE_DATA_TYPE_FLOAT = 3,
+ IMAGE_DATA_TYPE_BYTE = 4,
IMAGE_DATA_TYPE_HALF = 5,
IMAGE_DATA_NUM_TYPES
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 4cf0a785897..fcf4e69984d 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -30,6 +30,9 @@
#include "osl_globals.h"
+#include "subd_split.h"
+#include "subd_patch_table.h"
+
#include "util_foreach.h"
#include "util_logging.h"
#include "util_progress.h"
@@ -112,19 +115,12 @@ float3 Mesh::SubdFace::normal(const Mesh *mesh) const
return safe_normalize(cross(v1 - v0, v2 - v0));
}
-
/* Mesh */
NODE_DEFINE(Mesh)
{
NodeType* type = NodeType::add("mesh", create);
- static NodeEnum displacement_method_enum;
- displacement_method_enum.insert("bump", DISPLACE_BUMP);
- displacement_method_enum.insert("true", DISPLACE_TRUE);
- displacement_method_enum.insert("both", DISPLACE_BOTH);
- SOCKET_ENUM(displacement_method, "Displacement Method", displacement_method_enum, DISPLACE_BUMP);
-
SOCKET_UINT(motion_steps, "Motion Steps", 3);
SOCKET_BOOLEAN(use_motion_blur, "Use Motion Blur", false);
@@ -177,11 +173,16 @@ Mesh::Mesh()
num_ngons = 0;
subdivision_type = SUBDIVISION_NONE;
+ subd_params = NULL;
+
+ patch_table = NULL;
}
Mesh::~Mesh()
{
delete bvh;
+ delete patch_table;
+ delete subd_params;
}
void Mesh::resize_mesh(int numverts, int numtris)
@@ -274,6 +275,8 @@ void Mesh::clear()
num_subd_verts = 0;
+ subd_creases.clear();
+
attributes.clear();
curve_attributes.clear();
subd_attributes.clear();
@@ -283,6 +286,9 @@ void Mesh::clear()
transform_negative_scaled = false;
transform_normal = transform_identity();
geometry_flags = GEOMETRY_NONE;
+
+ delete patch_table;
+ patch_table = NULL;
}
int Mesh::split_vertex(int vertex)
@@ -705,7 +711,6 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui
}
}
-
void Mesh::compute_bvh(DeviceScene *dscene,
SceneParams *params,
Progress *progress,
@@ -779,6 +784,17 @@ bool Mesh::has_motion_blur() const
curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)));
}
+bool Mesh::has_true_displacement() const
+{
+ foreach(Shader *shader, used_shaders) {
+ if(shader->has_displacement && shader->displacement_method != DISPLACE_BUMP) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool Mesh::need_build_bvh() const
{
return !transform_applied || has_surface_bssrdf;
@@ -831,9 +847,10 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att
OSLGlobals::Attribute osl_attr;
osl_attr.type = attr.type();
- osl_attr.elem = ATTR_ELEMENT_OBJECT;
+ osl_attr.desc.element = ATTR_ELEMENT_OBJECT;
osl_attr.value = attr;
- osl_attr.offset = 0;
+ osl_attr.desc.offset = 0;
+ osl_attr.desc.flags = 0;
og->attribute_map[i*ATTR_PRIM_TYPES + ATTR_PRIM_TRIANGLE][attr.name()] = osl_attr;
og->attribute_map[i*ATTR_PRIM_TYPES + ATTR_PRIM_CURVE][attr.name()] = osl_attr;
@@ -853,9 +870,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att
foreach(AttributeRequest& req, attributes.requests) {
OSLGlobals::Attribute osl_attr;
- if(req.triangle_element != ATTR_ELEMENT_NONE) {
- osl_attr.elem = req.triangle_element;
- osl_attr.offset = req.triangle_offset;
+ if(req.triangle_desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.triangle_desc;
if(req.triangle_type == TypeDesc::TypeFloat)
osl_attr.type = TypeDesc::TypeFloat;
@@ -875,9 +891,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att
}
}
- if(req.curve_element != ATTR_ELEMENT_NONE) {
- osl_attr.elem = req.curve_element;
- osl_attr.offset = req.curve_offset;
+ if(req.curve_desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.curve_desc;
if(req.curve_type == TypeDesc::TypeFloat)
osl_attr.type = TypeDesc::TypeFloat;
@@ -897,9 +912,8 @@ void MeshManager::update_osl_attributes(Device *device, Scene *scene, vector<Att
}
}
- if(req.subd_element != ATTR_ELEMENT_NONE) {
- osl_attr.elem = req.subd_element;
- osl_attr.offset = req.subd_offset;
+ if(req.subd_desc.element != ATTR_ELEMENT_NONE) {
+ osl_attr.desc = req.subd_desc;
if(req.subd_type == TypeDesc::TypeFloat)
osl_attr.type = TypeDesc::TypeFloat;
@@ -971,8 +985,8 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
if(mesh->num_triangles()) {
attr_map[index].x = id;
- attr_map[index].y = req.triangle_element;
- attr_map[index].z = as_uint(req.triangle_offset);
+ attr_map[index].y = req.triangle_desc.element;
+ attr_map[index].z = as_uint(req.triangle_desc.offset);
if(req.triangle_type == TypeDesc::TypeFloat)
attr_map[index].w = NODE_ATTR_FLOAT;
@@ -980,14 +994,16 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
attr_map[index].w = NODE_ATTR_MATRIX;
else
attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.triangle_desc.flags << 8;
}
index++;
if(mesh->num_curves()) {
attr_map[index].x = id;
- attr_map[index].y = req.curve_element;
- attr_map[index].z = as_uint(req.curve_offset);
+ attr_map[index].y = req.curve_desc.element;
+ attr_map[index].z = as_uint(req.curve_desc.offset);
if(req.curve_type == TypeDesc::TypeFloat)
attr_map[index].w = NODE_ATTR_FLOAT;
@@ -995,14 +1011,16 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
attr_map[index].w = NODE_ATTR_MATRIX;
else
attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.curve_desc.flags << 8;
}
index++;
if(mesh->subd_faces.size()) {
attr_map[index].x = id;
- attr_map[index].y = req.subd_element;
- attr_map[index].z = as_uint(req.subd_offset);
+ attr_map[index].y = req.subd_desc.element;
+ attr_map[index].z = as_uint(req.subd_desc.offset);
if(req.subd_type == TypeDesc::TypeFloat)
attr_map[index].w = NODE_ATTR_FLOAT;
@@ -1010,6 +1028,8 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
attr_map[index].w = NODE_ATTR_MATRIX;
else
attr_map[index].w = NODE_ATTR_FLOAT3;
+
+ attr_map[index].w |= req.subd_desc.flags << 8;
}
index++;
@@ -1069,17 +1089,20 @@ static void update_attribute_element_offset(Mesh *mesh,
Attribute *mattr,
AttributePrimitive prim,
TypeDesc& type,
- int& offset,
- AttributeElement& element)
+ AttributeDescriptor& desc)
{
if(mattr) {
/* store element and type */
- element = mattr->element;
+ desc.element = mattr->element;
+ desc.flags = mattr->flags;
type = mattr->type;
/* store attribute data in arrays */
size_t size = mattr->element_size(mesh, prim);
+ AttributeElement& element = desc.element;
+ int& offset = desc.offset;
+
if(mattr->element == ATTR_ELEMENT_VOXEL) {
/* store slot in offset value */
VoxelAttribute *voxel_data = mattr->data_voxel();
@@ -1128,7 +1151,11 @@ static void update_attribute_element_offset(Mesh *mesh,
/* mesh vertex/curve index is global, not per object, so we sneak
* a correction for that in here */
- if(element == ATTR_ELEMENT_VERTEX)
+ if(mesh->subdivision_type == Mesh::SUBDIVISION_CATMULL_CLARK && desc.flags & ATTR_SUBDIVIDED) {
+ /* indices for subdivided attributes are retrieved
+ * from patch table so no need for correction here*/
+ }
+ else if(element == ATTR_ELEMENT_VERTEX)
offset -= mesh->vert_offset;
else if(element == ATTR_ELEMENT_VERTEX_MOTION)
offset -= mesh->vert_offset;
@@ -1153,8 +1180,8 @@ static void update_attribute_element_offset(Mesh *mesh,
}
else {
/* attribute not found */
- element = ATTR_ELEMENT_NONE;
- offset = 0;
+ desc.element = ATTR_ELEMENT_NONE;
+ desc.offset = 0;
}
}
@@ -1243,8 +1270,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
triangle_mattr,
ATTR_PRIM_TRIANGLE,
req.triangle_type,
- req.triangle_offset,
- req.triangle_element);
+ req.triangle_desc);
update_attribute_element_offset(mesh,
attr_float, attr_float_offset,
@@ -1253,8 +1279,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
curve_mattr,
ATTR_PRIM_CURVE,
req.curve_type,
- req.curve_offset,
- req.curve_element);
+ req.curve_desc);
update_attribute_element_offset(mesh,
attr_float, attr_float_offset,
@@ -1263,8 +1288,7 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
subd_mattr,
ATTR_PRIM_SUBD,
req.subd_type,
- req.subd_offset,
- req.subd_element);
+ req.subd_desc);
if(progress.get_cancel()) return;
}
@@ -1327,6 +1351,12 @@ void MeshManager::mesh_calc_offset(Scene *scene)
if(mesh->subd_faces.size()) {
Mesh::SubdFace& last = mesh->subd_faces[mesh->subd_faces.size()-1];
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if(mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
}
face_size += mesh->subd_faces.size();
corner_size += mesh->subd_face_corners.size();
@@ -1358,6 +1388,12 @@ void MeshManager::device_update_mesh(Device *device,
if(mesh->subd_faces.size()) {
Mesh::SubdFace& last = mesh->subd_faces[mesh->subd_faces.size()-1];
patch_size += (last.ptex_offset + last.num_ptex_faces()) * 8;
+
+ /* patch tables are stored in same array so include them in patch_size */
+ if(mesh->patch_table) {
+ mesh->patch_table_offset = patch_size;
+ patch_size += mesh->patch_table->total_size();
+ }
}
}
@@ -1440,6 +1476,11 @@ void MeshManager::device_update_mesh(Device *device,
foreach(Mesh *mesh, scene->meshes) {
mesh->pack_patches(&patch_data[mesh->patch_offset], mesh->vert_offset, mesh->face_offset, mesh->corner_offset);
+
+ if(mesh->patch_table) {
+ mesh->patch_table->copy_adjusting_offsets(&patch_data[mesh->patch_table_offset], mesh->patch_table_offset);
+ }
+
if(progress.get_cancel()) return;
}
@@ -1621,12 +1662,48 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
}
+ /* Tessellate meshes that are using subdivision */
+ size_t total_tess_needed = 0;
+ foreach(Mesh *mesh, scene->meshes) {
+ if(mesh->need_update &&
+ mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
+ mesh->num_subd_verts == 0 &&
+ mesh->subd_params)
+ {
+ total_tess_needed++;
+ }
+ }
+
+ size_t i = 0;
+ foreach(Mesh *mesh, scene->meshes) {
+ if(mesh->need_update &&
+ mesh->subdivision_type != Mesh::SUBDIVISION_NONE &&
+ mesh->num_subd_verts == 0 &&
+ mesh->subd_params)
+ {
+ string msg = "Tessellating ";
+ if(mesh->name == "")
+ msg += string_printf("%u/%u", (uint)(i+1), (uint)total_tess_needed);
+ else
+ msg += string_printf("%s %u/%u", mesh->name.c_str(), (uint)(i+1), (uint)total_tess_needed);
+
+ progress.set_status("Updating Mesh", msg);
+
+ DiagSplit dsplit(*mesh->subd_params);
+ mesh->tessellate(&dsplit);
+
+ i++;
+
+ if(progress.get_cancel()) return;
+ }
+ }
+
/* Update images needed for true displacement. */
bool true_displacement_used = false;
bool old_need_object_flags_update = false;
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update &&
- mesh->displacement_method != Mesh::DISPLACE_BUMP)
+ mesh->has_true_displacement())
{
true_displacement_used = true;
break;
@@ -1652,6 +1729,10 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
if(progress.get_cancel()) return;
+ /* after mesh data has been copied to device memory we need to update
+ * offsets for patch tables as this can't be known before hand */
+ scene->object_manager->device_update_patch_map_offsets(device, dscene, scene);
+
device_update_attributes(device, dscene, scene, progress);
if(progress.get_cancel()) return;
@@ -1677,7 +1758,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
}
/* Update bvh. */
- size_t i = 0, num_bvh = 0;
+ size_t num_bvh = 0;
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update && mesh->need_build_bvh()) {
num_bvh++;
@@ -1686,6 +1767,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
TaskPool pool;
+ i = 0;
foreach(Mesh *mesh, scene->meshes) {
if(mesh->need_update) {
pool.push(function_bind(&Mesh::compute_bvh,
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index c9ae9aab888..a77e296ea4a 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -39,7 +39,9 @@ class Progress;
class Scene;
class SceneParams;
class AttributeRequest;
+struct SubdParams;
class DiagSplit;
+struct PackedPatchTable;
/* Mesh */
@@ -110,13 +112,9 @@ public:
int num_ptex_faces() const { return num_corners == 4 ? 1 : num_corners; }
};
- /* Displacement */
- enum DisplacementMethod {
- DISPLACE_BUMP = 0,
- DISPLACE_TRUE = 1,
- DISPLACE_BOTH = 2,
-
- DISPLACE_NUM_METHODS,
+ struct SubdEdgeCrease {
+ int v[2];
+ float crease;
};
enum SubdivisionType {
@@ -157,6 +155,10 @@ public:
array<int> subd_face_corners;
int num_ngons;
+ array<SubdEdgeCrease> subd_creases;
+
+ SubdParams *subd_params;
+
vector<Shader*> used_shaders;
AttributeSet attributes;
AttributeSet curve_attributes;
@@ -166,7 +168,8 @@ public:
bool transform_applied;
bool transform_negative_scaled;
Transform transform_normal;
- DisplacementMethod displacement_method;
+
+ PackedPatchTable *patch_table;
uint motion_steps;
bool use_motion_blur;
@@ -184,6 +187,7 @@ public:
size_t curvekey_offset;
size_t patch_offset;
+ size_t patch_table_offset;
size_t face_offset;
size_t corner_offset;
@@ -234,6 +238,7 @@ public:
void tag_update(Scene *scene, bool rebuild);
bool has_motion_blur() const;
+ bool has_true_displacement() const;
/* Check whether the mesh should have own BVH built separately. Briefly,
* own BVH is needed for mesh, if:
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 95f46ff02a2..ef9cfedd412 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -26,19 +26,27 @@
CCL_NAMESPACE_BEGIN
+static float3 compute_face_normal(const Mesh::Triangle& t, float3 *verts)
+{
+ float3 v0 = verts[t.v[0]];
+ float3 v1 = verts[t.v[1]];
+ float3 v2 = verts[t.v[2]];
+
+ float3 norm = cross(v1 - v0, v2 - v0);
+ float normlen = len(norm);
+
+ if(normlen == 0.0f)
+ return make_float3(1.0f, 0.0f, 0.0f);
+
+ return norm / normlen;
+}
+
bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress& progress)
{
/* verify if we have a displacement shader */
- bool has_displacement = false;
-
- if(mesh->displacement_method != Mesh::DISPLACE_BUMP) {
- foreach(Shader *shader, mesh->used_shaders)
- if(shader->has_displacement)
- has_displacement = true;
- }
-
- if(!has_displacement)
+ if(!mesh->has_true_displacement()) {
return false;
+ }
string msg = string_printf("Computing Displacement %s", mesh->name.c_str());
progress.set_status("Updating Mesh", msg);
@@ -67,8 +75,9 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
Shader *shader = (shader_index < mesh->used_shaders.size()) ?
mesh->used_shaders[shader_index] : scene->default_surface;
- if(!shader->has_displacement)
+ if(!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
continue;
+ }
for(int j = 0; j < 3; j++) {
if(done[t.v[j]])
@@ -153,8 +162,9 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
Shader *shader = (shader_index < mesh->used_shaders.size()) ?
mesh->used_shaders[shader_index] : scene->default_surface;
- if(!shader->has_displacement)
+ if(!shader->has_displacement || shader->displacement_method == DISPLACE_BUMP) {
continue;
+ }
for(int j = 0; j < 3; j++) {
if(!done[t.v[j]]) {
@@ -178,9 +188,131 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
mesh->attributes.remove(ATTR_STD_FACE_NORMAL);
mesh->add_face_normals();
- if(mesh->displacement_method == Mesh::DISPLACE_TRUE) {
- mesh->attributes.remove(ATTR_STD_VERTEX_NORMAL);
- mesh->add_vertex_normals();
+ bool need_recompute_vertex_normals = false;
+
+ foreach(Shader *shader, mesh->used_shaders) {
+ if(shader->has_displacement && shader->displacement_method == DISPLACE_TRUE) {
+ need_recompute_vertex_normals = true;
+ break;
+ }
+ }
+
+ if(need_recompute_vertex_normals) {
+ bool flip = mesh->transform_negative_scaled;
+ vector<bool> tri_has_true_disp(num_triangles, false);
+
+ for(size_t i = 0; i < num_triangles; i++) {
+ int shader_index = mesh->shader[i];
+ Shader *shader = (shader_index < mesh->used_shaders.size()) ?
+ mesh->used_shaders[shader_index] : scene->default_surface;
+
+ tri_has_true_disp[i] = shader->has_displacement && shader->displacement_method == DISPLACE_TRUE;
+ }
+
+ /* static vertex normals */
+
+ /* get attributes */
+ Attribute *attr_fN = mesh->attributes.find(ATTR_STD_FACE_NORMAL);
+ Attribute *attr_vN = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
+
+ float3 *fN = attr_fN->data_float3();
+ float3 *vN = attr_vN->data_float3();
+
+ /* compute vertex normals */
+
+ /* zero vertex normals on triangles with true displacement */
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ vN[mesh->get_triangle(i).v[j]] = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ }
+ }
+
+ /* add face normals to vertex normals */
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ vN[mesh->get_triangle(i).v[j]] += fN[i];
+ }
+ }
+ }
+
+ /* normalize vertex normals */
+ done.clear();
+ done.resize(num_verts, false);
+
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ int vert = mesh->get_triangle(i).v[j];
+
+ if(done[vert]) {
+ continue;
+ }
+
+ vN[vert] = normalize(vN[vert]);
+ if(flip)
+ vN[vert] = -vN[vert];
+
+ done[vert] = true;
+ }
+ }
+ }
+
+ /* motion vertex normals */
+ Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+ if(mesh->has_motion_blur() && attr_mP && attr_mN) {
+ for(int step = 0; step < mesh->motion_steps - 1; step++) {
+ float3 *mP = attr_mP->data_float3() + step*mesh->verts.size();
+ float3 *mN = attr_mN->data_float3() + step*mesh->verts.size();
+
+ /* compute */
+
+ /* zero vertex normals on triangles with true displacement */
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ mN[mesh->get_triangle(i).v[j]] = make_float3(0.0f, 0.0f, 0.0f);
+ }
+ }
+ }
+
+ /* add face normals to vertex normals */
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ float3 fN = compute_face_normal(mesh->get_triangle(i), mP);
+ mN[mesh->get_triangle(i).v[j]] += fN;
+ }
+ }
+ }
+
+ /* normalize vertex normals */
+ done.clear();
+ done.resize(num_verts, false);
+
+ for(size_t i = 0; i < num_triangles; i++) {
+ if(tri_has_true_disp[i]) {
+ for(size_t j = 0; j < 3; j++) {
+ int vert = mesh->get_triangle(i).v[j];
+
+ if(done[vert]) {
+ continue;
+ }
+
+ mN[vert] = normalize(mN[vert]);
+ if(flip)
+ mN[vert] = -mN[vert];
+
+ done[vert] = true;
+ }
+ }
+ }
+ }
+ }
}
return true;
diff --git a/intern/cycles/render/mesh_subdivision.cpp b/intern/cycles/render/mesh_subdivision.cpp
index fe8e41e8d35..efb40efbb79 100644
--- a/intern/cycles/render/mesh_subdivision.cpp
+++ b/intern/cycles/render/mesh_subdivision.cpp
@@ -19,13 +19,302 @@
#include "subd_split.h"
#include "subd_patch.h"
+#include "subd_patch_table.h"
#include "util_foreach.h"
CCL_NAMESPACE_BEGIN
+#ifdef WITH_OPENSUBDIV
+
+CCL_NAMESPACE_END
+
+#include <opensubdiv/far/topologyRefinerFactory.h>
+#include <opensubdiv/far/primvarRefiner.h>
+#include <opensubdiv/far/patchTableFactory.h>
+#include <opensubdiv/far/patchMap.h>
+
+/* specializations of TopologyRefinerFactory for ccl::Mesh */
+
+namespace OpenSubdiv {
+namespace OPENSUBDIV_VERSION {
+namespace Far {
+ template<>
+ bool TopologyRefinerFactory<ccl::Mesh>::resizeComponentTopology(TopologyRefiner& refiner, ccl::Mesh const& mesh)
+ {
+ setNumBaseVertices(refiner, mesh.verts.size());
+ setNumBaseFaces(refiner, mesh.subd_faces.size());
+
+ ccl::Mesh::SubdFace* face = &mesh.subd_faces[0];
+
+ for(int i = 0; i < mesh.subd_faces.size(); i++, face++) {
+ setNumBaseFaceVertices(refiner, i, face->num_corners);
+ }
+
+ return true;
+ }
+
+ template<>
+ bool TopologyRefinerFactory<ccl::Mesh>::assignComponentTopology(TopologyRefiner& refiner, ccl::Mesh const& mesh)
+ {
+ ccl::Mesh::SubdFace* face = &mesh.subd_faces[0];
+
+ for(int i = 0; i < mesh.subd_faces.size(); i++, face++) {
+ IndexArray face_verts = getBaseFaceVertices(refiner, i);
+
+ int* corner = &mesh.subd_face_corners[face->start_corner];
+
+ for(int j = 0; j < face->num_corners; j++, corner++) {
+ face_verts[j] = *corner;
+ }
+ }
+
+ return true;
+ }
+
+ template<>
+ bool TopologyRefinerFactory<ccl::Mesh>::assignComponentTags(TopologyRefiner& refiner, ccl::Mesh const& mesh)
+ {
+ const ccl::Mesh::SubdEdgeCrease* crease = mesh.subd_creases.data();
+
+ for(int i = 0; i < mesh.subd_creases.size(); i++, crease++) {
+ Index edge = findBaseEdge(refiner, crease->v[0], crease->v[1]);
+
+ if(edge != INDEX_INVALID) {
+ setBaseEdgeSharpness(refiner, edge, crease->crease * 10.0f);
+ }
+ }
+
+ for(int i = 0; i < mesh.verts.size(); i++) {
+ ConstIndexArray vert_edges = getBaseVertexEdges(refiner, i);
+
+ if(vert_edges.size() == 2) {
+ float sharpness = refiner.getLevel(0).getEdgeSharpness(vert_edges[0]);
+ sharpness = std::min(sharpness, refiner.getLevel(0).getEdgeSharpness(vert_edges[1]));
+
+ setBaseVertexSharpness(refiner, i, sharpness);
+ }
+ }
+
+ return true;
+ }
+
+ template<>
+ bool TopologyRefinerFactory<ccl::Mesh>::assignFaceVaryingTopology(TopologyRefiner& /*refiner*/, ccl::Mesh const& /*mesh*/)
+ {
+ return true;
+ }
+
+ template<>
+ void TopologyRefinerFactory<ccl::Mesh>::reportInvalidTopology(TopologyError /*err_code*/,
+ char const */*msg*/, ccl::Mesh const& /*mesh*/)
+ {
+ }
+} /* namespace Far */
+} /* namespace OPENSUBDIV_VERSION */
+} /* namespace OpenSubdiv */
+
+CCL_NAMESPACE_BEGIN
+
+using namespace OpenSubdiv;
+
+/* struct that implements OpenSubdiv's vertex interface */
+
+template<typename T>
+struct OsdValue {
+ T value;
+
+ OsdValue() {}
+
+ void Clear(void* = 0) {
+ memset(&value, 0, sizeof(T));
+ }
+
+ void AddWithWeight(OsdValue<T> const& src, float weight) {
+ value += src.value * weight;
+ }
+};
+
+template<>
+void OsdValue<uchar4>::AddWithWeight(OsdValue<uchar4> const& src, float weight)
+{
+ for(int i = 0; i < 4; i++) {
+ value[i] += (uchar)(src.value[i] * weight);
+ }
+}
+
+/* class for holding OpenSubdiv data used during tessellation */
+
+class OsdData {
+ Mesh* mesh;
+ vector<OsdValue<float3> > verts;
+ Far::TopologyRefiner* refiner;
+ Far::PatchTable* patch_table;
+ Far::PatchMap* patch_map;
+
+public:
+ OsdData() : mesh(NULL), refiner(NULL), patch_table(NULL), patch_map(NULL) {}
+
+ ~OsdData()
+ {
+ delete refiner;
+ delete patch_table;
+ delete patch_map;
+ }
+
+ void build_from_mesh(Mesh* mesh_)
+ {
+ mesh = mesh_;
+
+ /* type and options */
+ Sdc::SchemeType type = Sdc::SCHEME_CATMARK;
+
+ Sdc::Options options;
+ options.SetVtxBoundaryInterpolation(Sdc::Options::VTX_BOUNDARY_EDGE_ONLY);
+
+ /* create refiner */
+ refiner = Far::TopologyRefinerFactory<Mesh>::Create(*mesh,
+ Far::TopologyRefinerFactory<Mesh>::Options(type, options));
+
+ /* adaptive refinement */
+ int max_isolation = 10;
+ refiner->RefineAdaptive(Far::TopologyRefiner::AdaptiveOptions(max_isolation));
+
+ /* create patch table */
+ Far::PatchTableFactory::Options patch_options;
+ patch_options.endCapType = Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS;
+
+ patch_table = Far::PatchTableFactory::Create(*refiner, patch_options);
+
+ /* interpolate verts */
+ int num_refiner_verts = refiner->GetNumVerticesTotal();
+ int num_local_points = patch_table->GetNumLocalPoints();
+
+ verts.resize(num_refiner_verts + num_local_points);
+ for(int i = 0; i < mesh->verts.size(); i++) {
+ verts[i].value = mesh->verts[i];
+ }
+
+ OsdValue<float3>* src = &verts[0];
+ for(int i = 0; i < refiner->GetMaxLevel(); i++) {
+ OsdValue<float3>* dest = src + refiner->GetLevel(i).GetNumVertices();
+ Far::PrimvarRefiner(*refiner).Interpolate(i+1, src, dest);
+ src = dest;
+ }
+
+ patch_table->ComputeLocalPointValues(&verts[0], &verts[num_refiner_verts]);
+
+ /* create patch map */
+ patch_map = new Far::PatchMap(*patch_table);
+ }
+
+ void subdivide_attribute(Attribute& attr)
+ {
+ Far::PrimvarRefiner primvar_refiner(*refiner);
+
+ if(attr.element == ATTR_ELEMENT_VERTEX) {
+ int num_refiner_verts = refiner->GetNumVerticesTotal();
+ int num_local_points = patch_table->GetNumLocalPoints();
+
+ attr.resize(num_refiner_verts + num_local_points);
+ attr.flags |= ATTR_FINAL_SIZE;
+
+ char* src = &attr.buffer[0];
+
+ for(int i = 0; i < refiner->GetMaxLevel(); i++) {
+ char* dest = src + refiner->GetLevel(i).GetNumVertices() * attr.data_sizeof();
+
+ if(attr.same_storage(attr.type, TypeDesc::TypeFloat)) {
+ primvar_refiner.Interpolate(i+1, (OsdValue<float>*)src, (OsdValue<float>*&)dest);
+ }
+ else {
+ primvar_refiner.Interpolate(i+1, (OsdValue<float4>*)src, (OsdValue<float4>*&)dest);
+ }
+
+ src = dest;
+ }
+
+ if(attr.same_storage(attr.type, TypeDesc::TypeFloat)) {
+ patch_table->ComputeLocalPointValues((OsdValue<float>*)&attr.buffer[0],
+ (OsdValue<float>*)&attr.buffer[num_refiner_verts * attr.data_sizeof()]);
+ }
+ else {
+ patch_table->ComputeLocalPointValues((OsdValue<float4>*)&attr.buffer[0],
+ (OsdValue<float4>*)&attr.buffer[num_refiner_verts * attr.data_sizeof()]);
+ }
+ }
+ else if(attr.element == ATTR_ELEMENT_CORNER || attr.element == ATTR_ELEMENT_CORNER_BYTE) {
+ // TODO(mai): fvar interpolation
+ }
+ }
+
+ friend struct OsdPatch;
+ friend class Mesh;
+};
+
+/* ccl::Patch implementation that uses OpenSubdiv for eval */
+
+struct OsdPatch : Patch {
+ OsdData* osd_data;
+
+ OsdPatch(OsdData* data) : osd_data(data) {}
+
+ void eval(float3 *P, float3 *dPdu, float3 *dPdv, float3 *N, float u, float v)
+ {
+ const Far::PatchTable::PatchHandle* handle = osd_data->patch_map->FindPatch(patch_index, u, v);
+ assert(handle);
+
+ float p_weights[20], du_weights[20], dv_weights[20];
+ osd_data->patch_table->EvaluateBasis(*handle, u, v, p_weights, du_weights, dv_weights);
+
+ Far::ConstIndexArray cv = osd_data->patch_table->GetPatchVertices(*handle);
+
+ float3 du, dv;
+ if(P) *P = make_float3(0.0f, 0.0f, 0.0f);
+ du = make_float3(0.0f, 0.0f, 0.0f);
+ dv = make_float3(0.0f, 0.0f, 0.0f);
+
+ for(int i = 0; i < cv.size(); i++) {
+ float3 p = osd_data->verts[cv[i]].value;
+
+ if(P) *P += p * p_weights[i];
+ du += p * du_weights[i];
+ dv += p * dv_weights[i];
+ }
+
+ if(dPdu) *dPdu = du;
+ if(dPdv) *dPdv = dv;
+ if(N) *N = normalize(cross(du, dv));
+ }
+
+ BoundBox bound() { return BoundBox::empty; }
+};
+
+#endif
+
void Mesh::tessellate(DiagSplit *split)
{
+#ifdef WITH_OPENSUBDIV
+ OsdData osd_data;
+ bool need_packed_patch_table = false;
+
+ if(subdivision_type == SUBDIVISION_CATMULL_CLARK) {
+ osd_data.build_from_mesh(this);
+ }
+ else
+#endif
+ {
+ /* force linear subdivision if OpenSubdiv is unavailable to avoid
+ * falling into catmull-clark code paths by accident
+ */
+ subdivision_type = SUBDIVISION_LINEAR;
+
+ /* force disable attribute subdivision for same reason as above */
+ foreach(Attribute& attr, subd_attributes.attributes) {
+ attr.flags &= ~ATTR_SUBDIVIDED;
+ }
+ }
+
int num_faces = subd_faces.size();
Attribute *attr_vN = subd_attributes.find(ATTR_STD_VERTEX_NORMAL);
@@ -36,113 +325,158 @@ void Mesh::tessellate(DiagSplit *split)
if(face.is_quad()) {
/* quad */
- LinearQuadPatch patch;
- float3 *hull = patch.hull;
- float3 *normals = patch.normals;
+ QuadDice::SubPatch subpatch;
- patch.patch_index = face.ptex_offset;
- patch.shader = face.shader;
+ LinearQuadPatch quad_patch;
+#ifdef WITH_OPENSUBDIV
+ OsdPatch osd_patch(&osd_data);
- for(int i = 0; i < 4; i++) {
- hull[i] = verts[subd_face_corners[face.start_corner+i]];
+ if(subdivision_type == SUBDIVISION_CATMULL_CLARK) {
+ osd_patch.patch_index = face.ptex_offset;
+
+ subpatch.patch = &osd_patch;
}
+ else
+#endif
+ {
+ float3 *hull = quad_patch.hull;
+ float3 *normals = quad_patch.normals;
+
+ quad_patch.patch_index = face.ptex_offset;
- if(face.smooth) {
for(int i = 0; i < 4; i++) {
- normals[i] = vN[subd_face_corners[face.start_corner+i]];
+ hull[i] = verts[subd_face_corners[face.start_corner+i]];
}
- }
- else {
- float3 N = face.normal(this);
- for(int i = 0; i < 4; i++) {
- normals[i] = N;
+
+ if(face.smooth) {
+ for(int i = 0; i < 4; i++) {
+ normals[i] = vN[subd_face_corners[face.start_corner+i]];
+ }
+ }
+ else {
+ float3 N = face.normal(this);
+ for(int i = 0; i < 4; i++) {
+ normals[i] = N;
+ }
}
+
+ swap(hull[2], hull[3]);
+ swap(normals[2], normals[3]);
+
+ subpatch.patch = &quad_patch;
}
- swap(hull[2], hull[3]);
- swap(normals[2], normals[3]);
+ subpatch.patch->shader = face.shader;
/* Quad faces need to be split at least once to line up with split ngons, we do this
* here in this manner because if we do it later edge factors may end up slightly off.
*/
- QuadDice::SubPatch subpatch;
- subpatch.patch = &patch;
-
subpatch.P00 = make_float2(0.0f, 0.0f);
subpatch.P10 = make_float2(0.5f, 0.0f);
subpatch.P01 = make_float2(0.0f, 0.5f);
subpatch.P11 = make_float2(0.5f, 0.5f);
- split->split_quad(&patch, &subpatch);
+ split->split_quad(subpatch.patch, &subpatch);
subpatch.P00 = make_float2(0.5f, 0.0f);
subpatch.P10 = make_float2(1.0f, 0.0f);
subpatch.P01 = make_float2(0.5f, 0.5f);
subpatch.P11 = make_float2(1.0f, 0.5f);
- split->split_quad(&patch, &subpatch);
+ split->split_quad(subpatch.patch, &subpatch);
subpatch.P00 = make_float2(0.0f, 0.5f);
subpatch.P10 = make_float2(0.5f, 0.5f);
subpatch.P01 = make_float2(0.0f, 1.0f);
subpatch.P11 = make_float2(0.5f, 1.0f);
- split->split_quad(&patch, &subpatch);
+ split->split_quad(subpatch.patch, &subpatch);
subpatch.P00 = make_float2(0.5f, 0.5f);
subpatch.P10 = make_float2(1.0f, 0.5f);
subpatch.P01 = make_float2(0.5f, 1.0f);
subpatch.P11 = make_float2(1.0f, 1.0f);
- split->split_quad(&patch, &subpatch);
+ split->split_quad(subpatch.patch, &subpatch);
}
else {
/* ngon */
- float3 center_vert = make_float3(0.0f, 0.0f, 0.0f);
- float3 center_normal = make_float3(0.0f, 0.0f, 0.0f);
+#ifdef WITH_OPENSUBDIV
+ if(subdivision_type == SUBDIVISION_CATMULL_CLARK) {
+ OsdPatch patch(&osd_data);
+
+ patch.shader = face.shader;
- float inv_num_corners = 1.0f/float(face.num_corners);
- for(int corner = 0; corner < face.num_corners; corner++) {
- center_vert += verts[subd_face_corners[face.start_corner + corner]] * inv_num_corners;
- center_normal += vN[subd_face_corners[face.start_corner + corner]] * inv_num_corners;
+ for(int corner = 0; corner < face.num_corners; corner++) {
+ patch.patch_index = face.ptex_offset + corner;
+
+ split->split_quad(&patch);
+ }
}
+ else
+#endif
+ {
+ float3 center_vert = make_float3(0.0f, 0.0f, 0.0f);
+ float3 center_normal = make_float3(0.0f, 0.0f, 0.0f);
+
+ float inv_num_corners = 1.0f/float(face.num_corners);
+ for(int corner = 0; corner < face.num_corners; corner++) {
+ center_vert += verts[subd_face_corners[face.start_corner + corner]] * inv_num_corners;
+ center_normal += vN[subd_face_corners[face.start_corner + corner]] * inv_num_corners;
+ }
- for(int corner = 0; corner < face.num_corners; corner++) {
- LinearQuadPatch patch;
- float3 *hull = patch.hull;
- float3 *normals = patch.normals;
+ for(int corner = 0; corner < face.num_corners; corner++) {
+ LinearQuadPatch patch;
+ float3 *hull = patch.hull;
+ float3 *normals = patch.normals;
- patch.patch_index = face.ptex_offset + corner;
+ patch.patch_index = face.ptex_offset + corner;
- patch.shader = face.shader;
+ patch.shader = face.shader;
- hull[0] = verts[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]];
- hull[1] = verts[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]];
- hull[2] = verts[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]];
- hull[3] = center_vert;
+ hull[0] = verts[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]];
+ hull[1] = verts[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]];
+ hull[2] = verts[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]];
+ hull[3] = center_vert;
- hull[1] = (hull[1] + hull[0]) * 0.5;
- hull[2] = (hull[2] + hull[0]) * 0.5;
+ hull[1] = (hull[1] + hull[0]) * 0.5;
+ hull[2] = (hull[2] + hull[0]) * 0.5;
- if(face.smooth) {
- normals[0] = vN[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]];
- normals[1] = vN[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]];
- normals[2] = vN[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]];
- normals[3] = center_normal;
+ if(face.smooth) {
+ normals[0] = vN[subd_face_corners[face.start_corner + mod(corner + 0, face.num_corners)]];
+ normals[1] = vN[subd_face_corners[face.start_corner + mod(corner + 1, face.num_corners)]];
+ normals[2] = vN[subd_face_corners[face.start_corner + mod(corner - 1, face.num_corners)]];
+ normals[3] = center_normal;
- normals[1] = (normals[1] + normals[0]) * 0.5;
- normals[2] = (normals[2] + normals[0]) * 0.5;
- }
- else {
- float3 N = face.normal(this);
- for(int i = 0; i < 4; i++) {
- normals[i] = N;
+ normals[1] = (normals[1] + normals[0]) * 0.5;
+ normals[2] = (normals[2] + normals[0]) * 0.5;
+ }
+ else {
+ float3 N = face.normal(this);
+ for(int i = 0; i < 4; i++) {
+ normals[i] = N;
+ }
}
- }
- split->split_quad(&patch);
+ split->split_quad(&patch);
+ }
}
}
}
/* interpolate center points for attributes */
foreach(Attribute& attr, subd_attributes.attributes) {
+#ifdef WITH_OPENSUBDIV
+ if(subdivision_type == SUBDIVISION_CATMULL_CLARK && attr.flags & ATTR_SUBDIVIDED) {
+ if(attr.element == ATTR_ELEMENT_CORNER || attr.element == ATTR_ELEMENT_CORNER_BYTE) {
+ /* keep subdivision for corner attributes disabled for now */
+ attr.flags &= ~ATTR_SUBDIVIDED;
+ }
+ else {
+ osd_data.subdivide_attribute(attr);
+
+ need_packed_patch_table = true;
+ continue;
+ }
+ }
+#endif
+
char* data = attr.data();
size_t stride = attr.data_sizeof();
int ngons = 0;
@@ -218,6 +552,15 @@ void Mesh::tessellate(DiagSplit *split)
default: break;
}
}
+
+#ifdef WITH_OPENSUBDIV
+ /* pack patch tables */
+ if(need_packed_patch_table) {
+ delete patch_table;
+ patch_table = new PackedPatchTable;
+ patch_table->pack(osd_data.patch_table);
+ }
+#endif
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 662d87e8b6b..62076f3a865 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -29,6 +29,8 @@
#include "util_progress.h"
#include "util_vector.h"
+#include "subd_patch_table.h"
+
CCL_NAMESPACE_BEGIN
/* Object */
@@ -55,9 +57,9 @@ Object::Object()
particle_system = NULL;
particle_index = 0;
bounds = BoundBox::empty;
- motion.pre = transform_identity();
- motion.mid = transform_identity();
- motion.post = transform_identity();
+ motion.pre = transform_empty();
+ motion.mid = transform_empty();
+ motion.post = transform_empty();
use_motion = false;
}
@@ -70,19 +72,28 @@ void Object::compute_bounds(bool motion_blur)
BoundBox mbounds = mesh->bounds;
if(motion_blur && use_motion) {
- DecompMotionTransform decomp;
- transform_motion_decompose(&decomp, &motion, &tfm);
+ if(motion.pre == transform_empty() ||
+ motion.post == transform_empty()) {
+ /* Hide objects that have no valid previous or next transform, for
+ * example particle that stop existing. TODO: add support for this
+ * case in the kernel so we don't get render artifacts. */
+ bounds = BoundBox::empty;
+ }
+ else {
+ DecompMotionTransform decomp;
+ transform_motion_decompose(&decomp, &motion, &tfm);
- bounds = BoundBox::empty;
+ bounds = BoundBox::empty;
- /* todo: this is really terrible. according to pbrt there is a better
- * way to find this iteratively, but did not find implementation yet
- * or try to implement myself */
- for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) {
- Transform ttfm;
+ /* todo: this is really terrible. according to pbrt there is a better
+ * way to find this iteratively, but did not find implementation yet
+ * or try to implement myself */
+ for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) {
+ Transform ttfm;
- transform_motion_interpolate(&ttfm, &decomp, t);
- bounds.grow(mbounds.transformed(&ttfm));
+ transform_motion_interpolate(&ttfm, &decomp, t);
+ bounds.grow(mbounds.transformed(&ttfm));
+ }
}
}
else {
@@ -228,7 +239,7 @@ vector<float> Object::motion_times()
bool Object::is_traceable()
{
/* Mesh itself can be empty,can skip all such objects. */
- if (bounds.size() == make_float3(0.0f, 0.0f, 0.0f)) {
+ if (!bounds.valid() || bounds.size() == make_float3(0.0f, 0.0f, 0.0f)) {
return false;
}
/* TODO(sergey): Check for mesh vertices/curves. visibility flags. */
@@ -337,6 +348,15 @@ void ObjectManager::device_update_object_transform(UpdateObejctTransformState *s
Transform mtfm_pre = ob->motion.pre;
Transform mtfm_post = ob->motion.post;
+ /* In case of missing motion information for previous/next frame,
+ * assume there is no motion. */
+ if(!ob->use_motion || mtfm_pre == transform_empty()) {
+ mtfm_pre = ob->tfm;
+ }
+ if(!ob->use_motion || mtfm_post == transform_empty()) {
+ mtfm_post = ob->tfm;
+ }
+
if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
mtfm_pre = mtfm_pre * itfm;
mtfm_post = mtfm_post * itfm;
@@ -589,6 +609,40 @@ void ObjectManager::device_update_flags(Device *device,
device->tex_alloc("__object_flag", dscene->object_flag);
}
+void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene)
+{
+ if (scene->objects.size() == 0)
+ return;
+
+ uint4* objects = (uint4*)dscene->objects.get_data();
+
+ bool update = false;
+
+ int object_index = 0;
+ foreach(Object *object, scene->objects) {
+ int offset = object_index*OBJECT_SIZE + 11;
+
+ Mesh* mesh = object->mesh;
+
+ if(mesh->patch_table) {
+ uint patch_map_offset = 2*(mesh->patch_table_offset + mesh->patch_table->total_size() -
+ mesh->patch_table->num_nodes * PATCH_NODE_SIZE) - mesh->patch_offset;
+
+ if(objects[offset].x != patch_map_offset) {
+ objects[offset].x = patch_map_offset;
+ update = true;
+ }
+ }
+
+ object_index++;
+ }
+
+ if(update) {
+ device->tex_free(dscene->objects);
+ device->tex_alloc("__objects", dscene->objects);
+ }
+}
+
void ObjectManager::device_free(Device *device, DeviceScene *dscene)
{
device->tex_free(dscene->objects);
@@ -638,7 +692,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
* Could be solved by moving reference counter to Mesh.
*/
if((mesh_users[object->mesh] == 1 && !object->mesh->has_surface_bssrdf) &&
- object->mesh->displacement_method == Mesh::DISPLACE_BUMP)
+ !object->mesh->has_true_displacement())
{
if(!(motion_blur && object->use_motion)) {
if(!object->mesh->transform_applied) {
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 7ab73f3c91a..2e5837f672f 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -97,6 +97,8 @@ public:
Scene *scene,
Progress& progress,
bool bounds_valid = true);
+ void device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene);
+
void device_free(Device *device, DeviceScene *dscene);
void tag_update(Scene *scene);
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index 676afad997e..1a6ae5f9277 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -549,7 +549,7 @@ string OSLCompiler::id(ShaderNode *node)
{
/* assign layer unique name based on pointer address + bump mode */
stringstream stream;
- stream << "node_" << node->name << "_" << node;
+ stream << "node_" << node->type->name << "_" << node;
return stream.str();
}
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 9e72f197cce..8fec171b6fb 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -123,6 +123,8 @@ public:
/* opencl images */
device_vector<uchar4> tex_image_byte4_packed;
device_vector<float4> tex_image_float4_packed;
+ device_vector<uchar> tex_image_byte_packed;
+ device_vector<float> tex_image_float_packed;
device_vector<uint4> tex_image_packed_info;
KernelData data;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 1cd76ff2b39..9d8c9fed7af 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -635,6 +635,11 @@ DeviceRequestedFeatures Session::get_requested_device_features()
}
requested_features.use_object_motion |= object->use_motion | mesh->use_motion_blur;
requested_features.use_camera_motion |= mesh->use_motion_blur;
+#ifdef WITH_OPENSUBDIV
+ if(mesh->subdivision_type != Mesh::SUBDIVISION_NONE) {
+ requested_features.use_patch_evaluation = true;
+ }
+#endif
}
BakeManager *bake_manager = scene->bake_manager;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 4cdb878df45..d000cca5a45 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -150,6 +150,12 @@ NODE_DEFINE(Shader)
volume_interpolation_method_enum.insert("cubic", VOLUME_INTERPOLATION_CUBIC);
SOCKET_ENUM(volume_interpolation_method, "Volume Interpolation Method", volume_interpolation_method_enum, VOLUME_INTERPOLATION_LINEAR);
+ static NodeEnum displacement_method_enum;
+ displacement_method_enum.insert("bump", DISPLACE_BUMP);
+ displacement_method_enum.insert("true", DISPLACE_TRUE);
+ displacement_method_enum.insert("both", DISPLACE_BOTH);
+ SOCKET_ENUM(displacement_method, "Displacement Method", displacement_method_enum, DISPLACE_BUMP);
+
return type;
}
@@ -173,6 +179,8 @@ Shader::Shader()
has_object_dependency = false;
has_integrator_dependency = false;
+ displacement_method = DISPLACE_BUMP;
+
id = -1;
used = false;
@@ -310,7 +318,7 @@ int ShaderManager::get_shader_id(Shader *shader, Mesh *mesh, bool smooth)
int id = shader->id*2;
/* index depends bump since this setting is not in the shader */
- if(mesh && mesh->displacement_method != Mesh::DISPLACE_TRUE)
+ if(mesh && shader->displacement_method != DISPLACE_TRUE)
id += 1;
/* smooth flag */
if(smooth)
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index dc57ed4e4eb..060ad7056bc 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -66,6 +66,14 @@ enum VolumeInterpolation {
VOLUME_NUM_INTERPOLATION,
};
+enum DisplacementMethod {
+ DISPLACE_BUMP = 0,
+ DISPLACE_TRUE = 1,
+ DISPLACE_BOTH = 2,
+
+ DISPLACE_NUM_METHODS,
+};
+
/* Shader describing the appearance of a Mesh, Light or Background.
*
* While there is only a single shader graph, it has three outputs: surface,
@@ -110,6 +118,9 @@ public:
bool has_object_dependency;
bool has_integrator_dependency;
+ /* displacement */
+ DisplacementMethod displacement_method;
+
/* requested mesh attributes */
AttributeRequestSet attributes;
diff --git a/intern/cycles/subd/CMakeLists.txt b/intern/cycles/subd/CMakeLists.txt
index db497013693..dafb807bdf3 100644
--- a/intern/cycles/subd/CMakeLists.txt
+++ b/intern/cycles/subd/CMakeLists.txt
@@ -16,18 +16,16 @@ set(SRC
subd_dice.cpp
subd_patch.cpp
subd_split.cpp
+ subd_patch_table.cpp
)
set(SRC_HEADERS
subd_dice.h
subd_patch.h
+ subd_patch_table.h
subd_split.h
)
-if(WITH_CYCLES_OPENSUBDIV)
- add_definitions(-DWITH_OPENSUBDIV)
-endif()
-
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
diff --git a/intern/cycles/subd/subd_patch_table.cpp b/intern/cycles/subd/subd_patch_table.cpp
new file mode 100644
index 00000000000..68ec1b2c6a6
--- /dev/null
+++ b/intern/cycles/subd/subd_patch_table.cpp
@@ -0,0 +1,297 @@
+/*
+ * Based on code from OpenSubdiv released under this license:
+ *
+ * Copyright 2014 DreamWorks Animation LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "Apache License")
+ * with the following modification; you may not use this file except in
+ * compliance with the Apache License and the following modification to it:
+ * Section 6. Trademarks. is deleted and replaced with:
+ *
+ * 6. Trademarks. This License does not grant permission to use the trade
+ * names, trademarks, service marks, or product names of the Licensor
+ * and its affiliates, except as required to comply with Section 4(c) of
+ * the License and to reproduce the content of the NOTICE file.
+ *
+ * You may obtain a copy of the Apache License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the Apache License with the above modification is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the Apache License for the specific
+ * language governing permissions and limitations under the Apache License.
+ *
+ */
+
+#include "subd_patch_table.h"
+#include "kernel_types.h"
+
+#include "util_math.h"
+
+#ifdef WITH_OPENSUBDIV
+#include <opensubdiv/far/patchTable.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_OPENSUBDIV
+
+using namespace OpenSubdiv;
+
+/* functions for building patch maps */
+
+struct PatchMapQuadNode {
+ /* sets all the children to point to the patch of index */
+ void set_child(int index)
+ {
+ for (int i = 0; i < 4; i++) {
+ children[i] = index | PATCH_MAP_NODE_IS_SET | PATCH_MAP_NODE_IS_LEAF;
+ }
+ }
+
+ /* sets the child in quadrant to point to the node or patch of the given index */
+ void set_child(unsigned char quadrant, int index, bool is_leaf=true)
+ {
+ assert(quadrant < 4);
+ children[quadrant] = index | PATCH_MAP_NODE_IS_SET | (is_leaf ? PATCH_MAP_NODE_IS_LEAF : 0);
+ }
+
+ uint children[4];
+};
+
+template<class T>
+static int resolve_quadrant(T& median, T& u, T& v)
+{
+ int quadrant = -1;
+
+ if(u < median) {
+ if(v < median) {
+ quadrant = 0;
+ }
+ else {
+ quadrant = 1;
+ v -= median;
+ }
+ }
+ else {
+ if(v < median) {
+ quadrant = 3;
+ }
+ else {
+ quadrant = 2;
+ v -= median;
+ }
+ u -= median;
+ }
+
+ return quadrant;
+}
+
+static void build_patch_map(PackedPatchTable& table, OpenSubdiv::Far::PatchTable* patch_table, int offset)
+{
+ int num_faces = 0;
+
+ for(int array = 0; array < table.num_arrays; array++) {
+ Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
+
+ for(int j = 0; j < patch_table->GetNumPatches(array); j++) {
+ num_faces = max(num_faces, (int)params[j].GetFaceId());
+ }
+ }
+ num_faces++;
+
+ vector<PatchMapQuadNode> quadtree;
+ quadtree.reserve(num_faces + table.num_patches);
+ quadtree.resize(num_faces);
+
+ /* adjust offsets to make indices relative to the table */
+ int handle_index = -(table.num_patches * PATCH_HANDLE_SIZE);
+ offset += table.total_size();
+
+ /* populate the quadtree from the FarPatchArrays sub-patches */
+ for(int array = 0; array < table.num_arrays; array++) {
+ Far::ConstPatchParamArray params = patch_table->GetPatchParams(array);
+
+ for(int i = 0; i < patch_table->GetNumPatches(array); i++, handle_index += PATCH_HANDLE_SIZE) {
+ const Far::PatchParam& param = params[i];
+ unsigned short depth = param.GetDepth();
+
+ PatchMapQuadNode* node = &quadtree[params[i].GetFaceId()];
+
+ if(depth == (param.NonQuadRoot() ? 1 : 0)) {
+ /* special case : regular BSpline face w/ no sub-patches */
+ node->set_child(handle_index + offset);
+ continue;
+ }
+
+ int u = param.GetU();
+ int v = param.GetV();
+ int pdepth = param.NonQuadRoot() ? depth-2 : depth-1;
+ int half = 1 << pdepth;
+
+ for(int j = 0; j < depth; j++) {
+ int delta = half >> 1;
+
+ int quadrant = resolve_quadrant(half, u, v);
+ assert(quadrant >= 0);
+
+ half = delta;
+
+ if(j == pdepth) {
+ /* we have reached the depth of the sub-patch : add a leaf */
+ assert(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET));
+ node->set_child(quadrant, handle_index + offset, true);
+ break;
+ }
+ else {
+ /* travel down the child node of the corresponding quadrant */
+ if(!(node->children[quadrant] & PATCH_MAP_NODE_IS_SET)) {
+ /* create a new branch in the quadrant */
+ quadtree.push_back(PatchMapQuadNode());
+
+ int idx = (int)quadtree.size() - 1;
+ node->set_child(quadrant, idx*4 + offset, false);
+
+ node = &quadtree[idx];
+ }
+ else {
+ /* travel down an existing branch */
+ uint idx = node->children[quadrant] & PATCH_MAP_NODE_INDEX_MASK;
+ node = &(quadtree[(idx - offset)/4]);
+ }
+ }
+ }
+ }
+ }
+
+ /* copy into table */
+ assert(table.table.size() == table.total_size());
+ uint map_offset = table.total_size();
+
+ table.num_nodes = quadtree.size() * 4;
+ table.table.resize(table.total_size());
+
+ uint* data = &table.table[map_offset];
+
+ for(int i = 0; i < quadtree.size(); i++) {
+ for(int j = 0; j < 4; j++) {
+ assert(quadtree[i].children[j] & PATCH_MAP_NODE_IS_SET);
+ *(data++) = quadtree[i].children[j];
+ }
+ }
+}
+
+#endif
+
+/* packed patch table functions */
+
+size_t PackedPatchTable::total_size()
+{
+ return num_arrays * PATCH_ARRAY_SIZE +
+ num_indices +
+ num_patches * (PATCH_PARAM_SIZE + PATCH_HANDLE_SIZE) +
+ num_nodes * PATCH_NODE_SIZE;
+}
+
+void PackedPatchTable::pack(Far::PatchTable* patch_table, int offset)
+{
+ num_arrays = 0;
+ num_patches = 0;
+ num_indices = 0;
+ num_nodes = 0;
+
+#ifdef WITH_OPENSUBDIV
+ num_arrays = patch_table->GetNumPatchArrays();
+
+ for(int i = 0; i < num_arrays; i++) {
+ int patches = patch_table->GetNumPatches(i);
+ int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
+
+ num_patches += patches;
+ num_indices += patches * num_control;
+ }
+
+ table.resize(total_size());
+ uint* data = &table[0];
+
+ uint* array = data;
+ uint* index = array + num_arrays * PATCH_ARRAY_SIZE;
+ uint* param = index + num_indices;
+ uint* handle = param + num_patches * PATCH_PARAM_SIZE;
+
+ uint current_param = 0;
+
+ for(int i = 0; i < num_arrays; i++) {
+ *(array++) = patch_table->GetPatchArrayDescriptor(i).GetType();
+ *(array++) = patch_table->GetNumPatches(i);
+ *(array++) = (index - data) + offset;
+ *(array++) = (param - data) + offset;
+
+ Far::ConstIndexArray indices = patch_table->GetPatchArrayVertices(i);
+
+ for(int j = 0; j < indices.size(); j++) {
+ *(index++) = indices[j];
+ }
+
+ const Far::PatchParamTable& param_table = patch_table->GetPatchParamTable();
+
+ int num_control = patch_table->GetPatchArrayDescriptor(i).GetNumControlVertices();
+ int patches = patch_table->GetNumPatches(i);
+
+ for(int j = 0; j < patches; j++, current_param++) {
+ *(param++) = param_table[current_param].field0;
+ *(param++) = param_table[current_param].field1;
+
+ *(handle++) = (array - data) - PATCH_ARRAY_SIZE + offset;
+ *(handle++) = (param - data) - PATCH_PARAM_SIZE + offset;
+ *(handle++) = j * num_control;
+ }
+ }
+
+ build_patch_map(*this, patch_table, offset);
+#else
+ (void)patch_table;
+ (void)offset;
+#endif
+}
+
+void PackedPatchTable::copy_adjusting_offsets(uint* dest, int doffset)
+{
+ uint* src = &table[0];
+
+ /* arrays */
+ for(int i = 0; i < num_arrays; i++) {
+ *(dest++) = *(src++);
+ *(dest++) = *(src++);
+ *(dest++) = *(src++) + doffset;
+ *(dest++) = *(src++) + doffset;
+ }
+
+ /* indices */
+ for(int i = 0; i < num_indices; i++) {
+ *(dest++) = *(src++);
+ }
+
+ /* params */
+ for(int i = 0; i < num_patches; i++) {
+ *(dest++) = *(src++);
+ *(dest++) = *(src++);
+ }
+
+ /* handles */
+ for(int i = 0; i < num_patches; i++) {
+ *(dest++) = *(src++) + doffset;
+ *(dest++) = *(src++) + doffset;
+ *(dest++) = *(src++);
+ }
+
+ /* nodes */
+ for(int i = 0; i < num_nodes; i++) {
+ *(dest++) = *(src++) + doffset;
+ }
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/subd/subd_patch_table.h b/intern/cycles/subd/subd_patch_table.h
new file mode 100644
index 00000000000..c8c7ecf9e47
--- /dev/null
+++ b/intern/cycles/subd/subd_patch_table.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SUBD_PATCH_TABLE_H__
+#define __SUBD_PATCH_TABLE_H__
+
+#include "util_types.h"
+#include "util_vector.h"
+
+#ifdef WITH_OPENSUBDIV
+#ifdef _MSC_VER
+# include "iso646.h"
+#endif
+
+#include <opensubdiv/far/patchTable.h>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef WITH_OPENSUBDIV
+using namespace OpenSubdiv;
+#else
+/* forward declare for when OpenSubdiv is unavailable */
+namespace Far { struct PatchTable; }
+#endif
+
+#define PATCH_ARRAY_SIZE 4
+#define PATCH_PARAM_SIZE 2
+#define PATCH_HANDLE_SIZE 3
+#define PATCH_NODE_SIZE 1
+
+struct PackedPatchTable {
+ vector<uint> table;
+
+ size_t num_arrays;
+ size_t num_indices;
+ size_t num_patches;
+ size_t num_nodes;
+
+ /* calculated size from num_* members */
+ size_t total_size();
+
+ void pack(Far::PatchTable* patch_table, int offset = 0);
+ void copy_adjusting_offsets(uint* dest, int doffset);
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __SUBD_PATCH_TABLE_H__ */
+
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index 80fe893826a..9af777fb9dd 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -26,6 +26,7 @@ set(ALL_CYCLES_LIBRARIES
cycles_device
cycles_bvh
cycles_graph
+ cycles_subd
cycles_util
${OPENIMAGEIO_LIBRARIES}
)
@@ -41,6 +42,16 @@ if(WITH_IMAGE_OPENJPEG AND NOT WITH_SYSTEM_OPENJPEG)
extern_openjpeg
)
endif()
+if(WITH_CYCLES_OPENSUBDIV)
+ add_definitions(-DWITH_OPENSUBDIV)
+ include_directories(
+ SYSTEM
+ ${OPENSUBDIV_INCLUDE_DIR}
+ )
+ list(APPEND ALL_CYCLES_LIBRARIES
+ ${OPENSUBDIV_LIBRARIES}
+ )
+endif()
list(APPEND ALL_CYCLES_LIBRARIES
${BOOST_LIBRARIES}
)
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index e6140b3ed09..f5674bdc15c 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -25,10 +25,6 @@ set(SRC
util_windows.cpp
)
-if(NOT CYCLES_STANDALONE_REPOSITORY)
- add_definitions(-DWITH_GLEW_MX)
-endif()
-
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
list(APPEND SRC
util_view.cpp
@@ -71,6 +67,7 @@ set(SRC_HEADERS
util_ssef.h
util_ssei.h
util_stack_allocator.h
+ util_static_assert.h
util_stats.h
util_string.h
util_system.h
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 1787ff648ee..73fd228b5d9 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -20,6 +20,8 @@
#include <cassert>
#include <iostream>
+#include "util_static_assert.h"
+
CCL_NAMESPACE_BEGIN
/* Global storage for all sort of flags used to fine-tune behavior of particular
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index ae85ab3a915..5db3384cda4 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -33,17 +33,21 @@ CCL_NAMESPACE_BEGIN
#else
+/* CUDA has its own half data type, no need to define then */
+#ifndef __KERNEL_CUDA__
typedef unsigned short half;
+#endif
+
struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f.x * scale);
- h[1] = __float2half_rn(f.y * scale);
- h[2] = __float2half_rn(f.z * scale);
- h[3] = __float2half_rn(f.w * scale);
+ h[0] = __float2half(f.x * scale);
+ h[1] = __float2half(f.y * scale);
+ h[2] = __float2half(f.z * scale);
+ h[3] = __float2half(f.w * scale);
}
#else
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 13aba0646d2..89a882d9b9d 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -572,6 +572,12 @@ ccl_device_inline float3 safe_normalize(const float3 a)
return (t != 0.0f)? a/t: a;
}
+ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
+{
+ *t = len(a);
+ return (*t != 0.0f)? a/(*t): a;
+}
+
#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const float3 a, const float3 b)
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
new file mode 100644
index 00000000000..1b945705145
--- /dev/null
+++ b/intern/cycles/util/util_static_assert.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_STATIC_ASSERT_H__
+#define __UTIL_STATIC_ASSERT_H__
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO(sergey): In theory CUDA might work with own static assert
+ * implementation since it's just pure C++.
+ */
+#ifndef __KERNEL_GPU__
+# if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+/* C++11 has built-in static_assert() */
+# else /* C++11 or MSVC2015 */
+template <bool Test> class StaticAssertFailure;
+template <> class StaticAssertFailure<true> {};
+# define _static_assert_private_glue_impl(A, B) A ## B
+# define _static_assert_glue(A, B) _static_assert_private_glue_impl(A, B)
+# ifdef __COUNTER__
+# define static_assert(condition, message) \
+ enum {_static_assert_glue(q_static_assert_result, __COUNTER__) = sizeof(StaticAssertFailure<!!(condition)>)} // NOLINT
+# else /* __COUNTER__ */
+# define static_assert(condition, message) \
+ enum {_static_assert_glue(q_static_assert_result, __LINE__) = sizeof(StaticAssertFailure<!!(condition)>)} // NOLINT
+# endif /* __COUNTER__ */
+# endif /* C++11 or MSVC2015 */
+#else /* __KERNEL_GPU__ */
+# define static_assert(statement, message)
+#endif /* __KERNEL_GPU__ */
+
+/* TODO(sergey): For until C++11 is a bare minimum for us,
+ * we do a bit of a trickery to show meaningful message so
+ * it's more or less clear what's wrong when building without
+ * C++11.
+ *
+ * The thing here is: our non-C++11 implementation doesn't
+ * have a way to print any message after preprocessor
+ * substitution so we rely on the message which is passed to
+ * static_assert() since that's the only message visible when
+ * compilation fails.
+ *
+ * After C++11 bump it should be possible to glue structure
+ * name to the error message,
+ */
+# define static_assert_align(st, align) \
+ static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_STATIC_ASSERT_H__ */
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 2ef47283029..aff928ea2ee 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -24,58 +24,58 @@ CCL_NAMESPACE_BEGIN
/* CPU */
#define TEX_NUM_FLOAT4_CPU 1024
#define TEX_NUM_BYTE4_CPU 1024
+#define TEX_NUM_HALF4_CPU 1024
#define TEX_NUM_FLOAT_CPU 1024
#define TEX_NUM_BYTE_CPU 1024
-#define TEX_NUM_HALF4_CPU 1024
#define TEX_NUM_HALF_CPU 1024
#define TEX_START_FLOAT4_CPU 0
#define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU
-#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
-#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU)
-#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
-#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU + TEX_NUM_HALF4_CPU)
+#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
+#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU)
+#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU)
+#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
/* CUDA (Geforce 4xx and 5xx) */
#define TEX_NUM_FLOAT4_CUDA 5
-#define TEX_NUM_BYTE4_CUDA 88
+#define TEX_NUM_BYTE4_CUDA 85
+#define TEX_NUM_HALF4_CUDA 0
#define TEX_NUM_FLOAT_CUDA 0
#define TEX_NUM_BYTE_CUDA 0
-#define TEX_NUM_HALF4_CUDA 0
#define TEX_NUM_HALF_CUDA 0
#define TEX_START_FLOAT4_CUDA 0
#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA
-#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
-#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA)
-#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
-#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA + TEX_NUM_HALF4_CUDA)
+#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
+#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
+#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
+#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
/* CUDA (Kepler, Geforce 6xx and above) */
#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024
#define TEX_NUM_BYTE4_CUDA_KEPLER 1024
+#define TEX_NUM_HALF4_CUDA_KEPLER 1024
#define TEX_NUM_FLOAT_CUDA_KEPLER 1024
#define TEX_NUM_BYTE_CUDA_KEPLER 1024
-#define TEX_NUM_HALF4_CUDA_KEPLER 0
-#define TEX_NUM_HALF_CUDA_KEPLER 0
+#define TEX_NUM_HALF_CUDA_KEPLER 1024
#define TEX_START_FLOAT4_CUDA_KEPLER 0
#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER
-#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
-#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
-#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
-#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
+#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
+#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
+#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
+#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
/* OpenCL */
#define TEX_NUM_FLOAT4_OPENCL 1024
#define TEX_NUM_BYTE4_OPENCL 1024
-#define TEX_NUM_FLOAT_OPENCL 0
-#define TEX_NUM_BYTE_OPENCL 0
#define TEX_NUM_HALF4_OPENCL 0
+#define TEX_NUM_FLOAT_OPENCL 1024
+#define TEX_NUM_BYTE_OPENCL 1024
#define TEX_NUM_HALF_OPENCL 0
#define TEX_START_FLOAT4_OPENCL 0
#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL
-#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
-#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL)
-#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
-#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL + TEX_NUM_HALF4_OPENCL)
+#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
+#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL)
+#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL)
+#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
/* Color to use when textures are not found. */
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index 6fed18a3db8..bfc8f55feed 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -323,6 +323,15 @@ ccl_device_inline Transform transform_clear_scale(const Transform& tfm)
return ntfm;
}
+ccl_device_inline Transform transform_empty()
+{
+ return make_transform(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0);
+}
+
#endif
/* Motion Transform */
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 6f8c3f6f3de..546b17570bb 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -222,6 +222,11 @@ public:
return datasize_;
}
+ T* data()
+ {
+ return data_;
+ }
+
const T* data() const
{
return data_;