diff options
Diffstat (limited to 'intern/cycles')
78 files changed, 1129 insertions, 864 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index fb747c1313e..c8c71fe6856 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -14,7 +14,9 @@ include(cmake/external_libs.cmake) # todo: refactor this code to match scons # note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm) -if(WIN32 AND MSVC) +if(NOT WITH_CPU_SSE) + set(CXX_HAS_SSE FALSE) +elseif(WIN32 AND MSVC) set(CXX_HAS_SSE TRUE) # /arch:AVX for VC2012 and above @@ -161,6 +163,10 @@ include_directories( ${OPENEXR_INCLUDE_DIRS} ) +# TODO(sergey): Adjust so standalone repository is also happy. +include_directories( + ../atomic +) # Warnings if(CMAKE_COMPILER_IS_GNUCXX) diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index b399844534d..15a02881ec2 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -62,12 +62,23 @@ if env['WITH_BF_CYCLES_OSL']: if env['WITH_BF_CYCLES_DEBUG']: defs.append('WITH_CYCLES_DEBUG') +if env['WITH_BF_CYCLES_LOGGING']: + defs.append('WITH_CYCLES_LOGGING') + defs.append('GOOGLE_GLOG_DLL_DECL=') + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): + incs.append('#extern/libmv/third_party/glog/src/windows') + incs.append('#extern/libmv/third_party/gflags') + else: + incs.append('#extern/libmv/third_party/glog/src') + incs.append('#extern/libmv/third_party/gflags') + incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split()) incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna #source/blender/blenlib'.split()) incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split()) incs.append(env['BF_GLEW_INC']) incs.append('#/intern/glew-mx') +incs.append('#/intern/atomic') incs.append('#intern/mikktspace') incs.extend('#extern/glew/include #extern/clew/include #extern/cuew/include #intern/mikktspace'.split()) diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 431796e106b..528b3016b80 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -299,7 +299,6 @@ static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node) xml_read_bool(&integrator->transparent_shadows, node, "transparent_shadows"); /* Volume */ - xml_read_int(&integrator->volume_homogeneous_sampling, node, "volume_homogeneous_sampling"); xml_read_float(&integrator->volume_step_size, node, "volume_step_size"); xml_read_int(&integrator->volume_max_steps, node, "volume_max_steps"); @@ -803,7 +802,17 @@ static void xml_read_shader(const XMLReadState& state, pugi::xml_node node) xml_read_string(&shader->name, node, "name"); xml_read_bool(&shader->use_mis, node, "use_mis"); xml_read_bool(&shader->use_transparent_shadow, node, "use_transparent_shadow"); + + /* Volume */ xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume"); + xml_read_int(&shader->volume_interpolation_method, node, "volume_interpolation_method"); + + if(xml_equal_string(node, "volume_sampling_method", "distance")) + shader->volume_sampling_method = VOLUME_SAMPLING_DISTANCE; + else if(xml_equal_string(node, "volume_sampling_method", "equiangular")) + shader->volume_sampling_method = VOLUME_SAMPLING_EQUIANGULAR; + else if(xml_equal_string(node, "volume_sampling_method", "multiple_importance")) + shader->volume_sampling_method = VOLUME_SAMPLING_MULTIPLE_IMPORTANCE; xml_read_shader_graph(state, shader, node); state.scene->shaders.push_back(shader); @@ -816,6 +825,14 @@ static void xml_read_background(const XMLReadState& state, pugi::xml_node node) Shader *shader = state.scene->shaders[state.scene->default_background]; xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume"); + xml_read_int(&shader->volume_interpolation_method, node, "volume_interpolation_method"); + + if(xml_equal_string(node, "volume_sampling_method", "distance")) + shader->volume_sampling_method = VOLUME_SAMPLING_DISTANCE; + else if(xml_equal_string(node, "volume_sampling_method", "equiangular")) + shader->volume_sampling_method = VOLUME_SAMPLING_EQUIANGULAR; + else if(xml_equal_string(node, "volume_sampling_method", "multiple_importance")) + shader->volume_sampling_method = VOLUME_SAMPLING_MULTIPLE_IMPORTANCE; xml_read_shader_graph(state, shader, node); } diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 597ac1a9ce0..9459b750bd1 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -114,6 +114,11 @@ enum_volume_sampling = ( ('MULTIPLE_IMPORTANCE', "Multiple Importance", "Combine distance and equi-angular sampling for volumes where neither method is ideal"), ) +enum_volume_interpolation = ( + ('LINEAR', "Linear", "Good smoothness and speed"), + ('CUBIC', 'Cubic', 'Smoothed high quality interpolation, but slower') + ) + class CyclesRenderSettings(bpy.types.PropertyGroup): @classmethod @@ -345,7 +350,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): description="Distance between volume shader samples when rendering the volume " "(lower values give more accurate and detailed results, but also increased render time)", default=0.1, - min=0.0000001, max=100000.0 + min=0.0000001, max=100000.0, soft_min=0.01, soft_max=1.0 ) cls.volume_max_steps = IntProperty( @@ -617,6 +622,13 @@ class CyclesMaterialSettings(bpy.types.PropertyGroup): default='DISTANCE', ) + cls.volume_interpolation = EnumProperty( + name="Volume Interpolation", + description="Interpolation method to use for volumes", + items=enum_volume_interpolation, + default='LINEAR', + ) + @classmethod def unregister(cls): del bpy.types.Material.cycles @@ -641,6 +653,12 @@ class CyclesLampSettings(bpy.types.PropertyGroup): min=1, max=10000, default=1, ) + cls.max_bounces = IntProperty( + name="Max Bounces", + description="Maximum number of bounces the light will contribute to the render", + min=0, max=1024, + default=1024, + ) cls.use_multiple_importance_sampling = BoolProperty( name="Multiple Importance Sample", description="Use multiple importance sampling for the lamp, " @@ -693,6 +711,13 @@ class CyclesWorldSettings(bpy.types.PropertyGroup): default='EQUIANGULAR', ) + cls.volume_interpolation = EnumProperty( + name="Volume Interpolation", + description="Interpolation method to use for volumes", + items=enum_volume_interpolation, + default='LINEAR', + ) + @classmethod def unregister(cls): del bpy.types.World.cycles diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index 9632b12c414..9b1e20d3c8f 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -729,11 +729,11 @@ class CyclesLamp_PT_lamp(CyclesButtonsPanel, Panel): if cscene.progressive == 'BRANCHED_PATH': col.prop(clamp, "samples") + col.prop(clamp, "max_bounces") col = split.column() col.prop(clamp, "cast_shadow") - - layout.prop(clamp, "use_multiple_importance_sampling") + col.prop(clamp, "use_multiple_importance_sampling", text="Multiple Importance") if lamp.type == 'HEMI': layout.label(text="Not supported, interpreted as sun lamp") @@ -936,6 +936,7 @@ class CyclesWorld_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cworld, "volume_sampling", text="") + sub.prop(cworld, "volume_interpolation", text="") col.prop(cworld, "homogeneous_volume", text="Homogeneous") @@ -1019,17 +1020,6 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel): cmat = mat.cycles split = layout.split() - - col = split.column(align=True) - col.prop(mat, "diffuse_color", text="Viewport Color") - col.prop(mat, "alpha") - - col = split.column(align=True) - col.label() - col.prop(mat, "pass_index") - - split = layout.split() - col = split.column() col.label(text="Surface:") col.prop(cmat, "sample_as_light", text="Multiple Importance") @@ -1040,8 +1030,25 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel): sub = col.column() sub.active = use_cpu(context) sub.prop(cmat, "volume_sampling", text="") + col.prop(cmat, "volume_interpolation", text="") col.prop(cmat, "homogeneous_volume", text="Homogeneous") + layout.separator() + split = layout.split() + + col = split.column(align=True) + col.label("Viewport Color:") + col.prop(mat, "diffuse_color", text="") + col.prop(mat, "alpha") + + col.separator() + col.prop(mat, "pass_index") + + col = split.column(align=True) + col.label("Viewport Specular:") + col.prop(mat, "specular_color", text="") + col.prop(mat, "specular_hardness", text="Hardness") + class CyclesTexture_PT_context(CyclesButtonsPanel, Panel): bl_label = "" @@ -1381,7 +1388,11 @@ def get_panels(): "RENDER_PT_encoding", "RENDER_PT_dimensions", "RENDER_PT_stamp", + "RENDER_PT_freestyle", "RENDERLAYER_PT_layers", + "RENDERLAYER_PT_freestyle", + "RENDERLAYER_PT_freestyle_lineset", + "RENDERLAYER_PT_freestyle_linestyle", "SCENE_PT_scene", "SCENE_PT_color_management", "SCENE_PT_custom_props", @@ -1419,6 +1430,7 @@ def get_panels(): "DATA_PT_custom_props_curve", "DATA_PT_custom_props_lattice", "DATA_PT_custom_props_metaball", + "TEXTURE_PT_preview", "TEXTURE_PT_custom_props", "TEXTURE_PT_clouds", "TEXTURE_PT_wood", @@ -1436,6 +1448,7 @@ def get_panels(): "TEXTURE_PT_pointdensity", "TEXTURE_PT_pointdensity_turbulence", "TEXTURE_PT_mapping", + "TEXTURE_PT_ocean", "TEXTURE_PT_influence", "TEXTURE_PT_colors", "PARTICLE_PT_context_particles", @@ -1457,6 +1470,7 @@ def get_panels(): "PARTICLE_PT_force_fields", "PARTICLE_PT_vertexgroups", "MATERIAL_PT_custom_props", + "MATERIAL_PT_freestyle_line", "BONE_PT_custom_props", "OBJECT_PT_custom_props", ] diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index ce8c64c4819..416348f3b91 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -20,6 +20,8 @@ #include "blender_sync.h" #include "blender_util.h" +#include "util_logging.h" + CCL_NAMESPACE_BEGIN /* Blender Camera Intermediate: we first convert both the offline and 3d view @@ -400,6 +402,7 @@ void BlenderSync::sync_camera_motion(BL::Object b_ob, float motion_time) tfm = blender_camera_matrix(tfm, cam->type); if(tfm != cam->matrix) { + VLOG(1) << "Camera " << b_ob.name() << " motion detected."; if(motion_time == -1.0f) { cam->motion.pre = tfm; cam->use_motion = true; diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 8cfaea59a06..7c8e7d40119 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -25,6 +25,7 @@ #include "blender_util.h" #include "util_foreach.h" +#include "util_logging.h" CCL_NAMESPACE_BEGIN @@ -577,6 +578,10 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) } } + if (num_curves > 0) { + VLOG(1) << "Exporting curve segments for mesh " << mesh->name; + } + mesh->curve_keys.reserve(mesh->curve_keys.size() + num_keys); mesh->curves.reserve(mesh->curves.size() + num_curves); @@ -612,9 +617,9 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) } } - /* check allocation*/ + /* check allocation */ if((mesh->curve_keys.size() != num_keys) || (mesh->curves.size() != num_curves)) { - /* allocation failed -> clear data */ + VLOG(1) << "Allocation failed, clearing data"; mesh->curve_keys.clear(); mesh->curves.clear(); mesh->curve_attributes.clear(); @@ -623,12 +628,16 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveData *CData, int time_index) { + VLOG(1) << "Exporting curve motion segments for mesh " << mesh->name + << ", time index " << time_index; + /* find attribute */ Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); bool new_attribute = false; /* add new attribute if it doesn't exist already */ if(!attr_mP) { + VLOG(1) << "Creating new motion vertex position attribute"; attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); new_attribute = true; } @@ -675,9 +684,12 @@ static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveDat if(new_attribute) { if(i != numkeys || !have_motion) { /* no motion, remove attributes again */ + VLOG(1) << "No motion, removing attribute"; mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); } else if(time_index > 0) { + VLOG(1) << "Filling in new motion vertex position for time_index" + << time_index; /* motion, fill up previous steps that we might have skipped because * they had no motion, but we need them anyway now */ for(int step = 0; step < time_index; step++) { diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index a5e4b7bd2ae..e8da8a87c1d 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -27,6 +27,7 @@ #include "subd_split.h" #include "util_foreach.h" +#include "util_logging.h" #include "mikktspace.h" @@ -761,11 +762,13 @@ void BlenderSync::sync_mesh_motion(BL::Object b_ob, Object *object, float motion if(new_attribute) { if(i != numverts || memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0) { /* no motion, remove attributes again */ + VLOG(1) << "No actual motion for mesh " << b_mesh.name(); mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); if(attr_mN) mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL); } else if(time_index > 0) { + VLOG(1) << "Filling motion for mesh " << b_mesh.name(); /* motion, fill up previous steps that we might have skipped because * they had no motion, but we need them anyway now */ float3 *P = &mesh->verts[0]; diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 1e07c5f9c96..88bfbf6db74 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -30,6 +30,7 @@ #include "util_foreach.h" #include "util_hash.h" +#include "util_logging.h" CCL_NAMESPACE_BEGIN @@ -168,6 +169,8 @@ void BlenderSync::sync_light(BL::Object b_parent, int persistent_id[OBJECT_PERSI else light->samples = samples; + light->max_bounces = get_int(clamp, "max_bounces"); + /* visibility */ uint visibility = object_ray_visibility(b_ob); light->use_diffuse = (visibility & PATH_RAY_DIFFUSE) != 0; @@ -249,6 +252,7 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P if(object && (scene->need_motion() == Scene::MOTION_PASS || object_use_motion(b_ob))) { /* object transformation */ if(tfm != object->tfm) { + VLOG(1) << "Object " << b_ob.name() << " motion detected."; if(motion_time == -1.0f) { object->motion.pre = tfm; object->use_motion = true; @@ -458,10 +462,10 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, float motion_time) BL::Scene::object_bases_iterator b_base; BL::Scene b_sce = b_scene; /* modifier result type (not exposed as enum in C++ API) - * 1 : DAG_EVAL_PREVIEW - * 2 : DAG_EVAL_RENDER - */ - int dupli_settings = preview ? 1 : 2; + * 1 : DAG_EVAL_PREVIEW + * 2 : DAG_EVAL_RENDER + */ + int dupli_settings = preview ? 1 : 2; bool cancel = false; diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index b756d6acdb2..78a96319163 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -53,14 +53,36 @@ void python_thread_state_restore(void **python_thread_state) *python_thread_state = NULL; } +static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce) +{ +#ifdef WIN32 + /* bug [#31856] oddly enough, Python3.2 --> 3.3 on Windows will throw an + * exception here this needs to be fixed in python: + * see: bugs.python.org/issue15859 */ + if(!PyUnicode_Check(py_str)) { + PyErr_BadArgument(); + return ""; + } +#endif + if((*coerce = PyUnicode_EncodeFSDefault(py_str))) { + return PyBytes_AS_STRING(*coerce); + } + return ""; +} + static PyObject *init_func(PyObject *self, PyObject *args) { - const char *path, *user_path; + PyObject *path, *user_path; - if(!PyArg_ParseTuple(args, "ss", &path, &user_path)) + if(!PyArg_ParseTuple(args, "OO", &path, &user_path)) { return NULL; - - path_init(path, user_path); + } + + PyObject *path_coerce = NULL, *user_path_coerce = NULL; + path_init(PyC_UnicodeAsByte(path, &path_coerce), + PyC_UnicodeAsByte(user_path, &user_path_coerce)); + Py_XDECREF(path_coerce); + Py_XDECREF(user_path_coerce); Py_RETURN_NONE; } @@ -83,7 +105,7 @@ static PyObject *create_func(PyObject *self, PyObject *args) BL::UserPreferences userpref(userprefptr); PointerRNA dataptr; - RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pydata), &dataptr); + RNA_main_pointer_create((Main*)PyLong_AsVoidPtr(pydata), &dataptr); BL::BlendData data(dataptr); PointerRNA sceneptr; @@ -91,15 +113,15 @@ static PyObject *create_func(PyObject *self, PyObject *args) BL::Scene scene(sceneptr); PointerRNA regionptr; - RNA_id_pointer_create((ID*)pylong_as_voidptr_typesafe(pyregion), ®ionptr); + RNA_pointer_create(NULL, &RNA_Region, pylong_as_voidptr_typesafe(pyregion), ®ionptr); BL::Region region(regionptr); PointerRNA v3dptr; - RNA_id_pointer_create((ID*)pylong_as_voidptr_typesafe(pyv3d), &v3dptr); + RNA_pointer_create(NULL, &RNA_SpaceView3D, pylong_as_voidptr_typesafe(pyv3d), &v3dptr); BL::SpaceView3D v3d(v3dptr); PointerRNA rv3dptr; - RNA_id_pointer_create((ID*)pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr); + RNA_pointer_create(NULL, &RNA_RegionView3D, pylong_as_voidptr_typesafe(pyrv3d), &rv3dptr); BL::RegionView3D rv3d(rv3dptr); /* create session */ @@ -174,7 +196,7 @@ static PyObject *bake_func(PyObject *self, PyObject *args) void *b_result = PyLong_AsVoidPtr(pyresult); PointerRNA bakepixelptr; - RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pypixel_array), &bakepixelptr); + RNA_pointer_create(NULL, &RNA_BakePixel, PyLong_AsVoidPtr(pypixel_array), &bakepixelptr); BL::BakePixel b_bake_pixel(bakepixelptr); python_thread_state_save(&session->python_thread_state); @@ -216,7 +238,7 @@ static PyObject *reset_func(PyObject *self, PyObject *args) BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession); PointerRNA dataptr; - RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pydata), &dataptr); + RNA_main_pointer_create((Main*)PyLong_AsVoidPtr(pydata), &dataptr); BL::BlendData b_data(dataptr); PointerRNA sceneptr; @@ -363,13 +385,7 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args) /* find socket socket */ BL::NodeSocket b_sock(PointerRNA_NULL); if (param->isoutput) { -#if OSL_LIBRARY_VERSION_CODE < 10500 - b_sock = b_node.outputs[param->name]; -#else b_sock = b_node.outputs[param->name.string()]; -#endif - - /* remove if type no longer matches */ if(b_sock && b_sock.bl_idname() != socket_type) { b_node.outputs.remove(b_sock); @@ -377,12 +393,7 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args) } } else { -#if OSL_LIBRARY_VERSION_CODE < 10500 - b_sock = b_node.inputs[param->name]; -#else b_sock = b_node.inputs[param->name.string()]; -#endif - /* remove if type no longer matches */ if(b_sock && b_sock.bl_idname() != socket_type) { b_node.inputs.remove(b_sock); diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 57ffea4b1a9..79ab25483e2 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -92,6 +92,7 @@ void BlenderSession::create_session() /* reset status/progress */ last_status = ""; + last_error = ""; last_progress = -1.0f; start_resize_time = 0.0; @@ -826,10 +827,8 @@ void BlenderSession::update_status_progress() get_status(status, substatus); get_progress(progress, total_time); - - if(background) { - if(progress>0) + if(progress > 0) remaining_time = (1.0 - (double)progress) * (total_time / (double)progress); scene += " | " + b_scene.name(); @@ -843,12 +842,12 @@ void BlenderSession::update_status_progress() if(samples > 0 && total_samples != USHRT_MAX) remaining_time = (total_samples - samples) * (total_time / samples); } - - if(remaining_time>0) { + + if(remaining_time > 0) { BLI_timestr(remaining_time, time_str, sizeof(time_str)); timestatus += "Remaining:" + string(time_str) + " | "; } - + timestatus += string_printf("Mem:%.2fM, Peak:%.2fM", (double)mem_used, (double)mem_peak); if(status.size() > 0) @@ -865,6 +864,21 @@ void BlenderSession::update_status_progress() b_engine.update_progress(progress); last_progress = progress; } + + if (session->progress.get_error()) { + string error = session->progress.get_error_message(); + if(error != last_error) { + /* TODO(sergey): Currently C++ RNA API doesn't let us to + * use mnemonic name for the variable. Would be nice to + * have this figured out. + * + * For until then, 1 << 5 means RPT_ERROR. + */ + b_engine.report(1 << 5, error.c_str()); + b_engine.error_set(error.c_str()); + last_error = error; + } + } } void BlenderSession::tag_update() diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index ac685118b3d..143a23af5c6 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -91,6 +91,7 @@ public: string b_rlay_name; string last_status; + string last_error; float last_progress; int width, height; diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 33c7bf5f859..27c2e9e9ae8 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -1014,7 +1014,8 @@ void BlenderSync::sync_materials(bool update_all) shader->use_mis = get_boolean(cmat, "sample_as_light"); shader->use_transparent_shadow = get_boolean(cmat, "use_transparent_shadow"); shader->heterogeneous_volume = !get_boolean(cmat, "homogeneous_volume"); - shader->volume_sampling_method = RNA_enum_get(&cmat, "volume_sampling"); + shader->volume_sampling_method = (VolumeSampling)RNA_enum_get(&cmat, "volume_sampling"); + shader->volume_interpolation_method = (VolumeInterpolation)RNA_enum_get(&cmat, "volume_interpolation"); shader->set_graph(graph); shader->tag_update(scene); @@ -1044,7 +1045,8 @@ void BlenderSync::sync_world(bool update_all) /* volume */ PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles"); shader->heterogeneous_volume = !get_boolean(cworld, "homogeneous_volume"); - shader->volume_sampling_method = RNA_enum_get(&cworld, "volume_sampling"); + shader->volume_sampling_method = (VolumeSampling)RNA_enum_get(&cworld, "volume_sampling"); + shader->volume_interpolation_method = (VolumeInterpolation)RNA_enum_get(&cworld, "volume_interpolation"); } else if(b_world) { ShaderNode *closure, *out; diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 2ac90b34fd7..ee492dd00e4 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -515,7 +515,17 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::Use params.shadingsystem = SHADINGSYSTEM_OSL; /* color managagement */ - params.display_buffer_linear = GLEW_ARB_half_float_pixel && b_engine.support_display_space_shader(b_scene); +#ifdef GLEW_MX + /* When using GLEW MX we need to check whether we've got an OpenGL + * context for current window. This is because command line rendering + * doesn't have OpenGL context actually. + */ + if(glewGetContext() != NULL) +#endif + { + params.display_buffer_linear = GLEW_ARB_half_float_pixel && + b_engine.support_display_space_shader(b_scene); + } return params; } diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index eb4cca92b6b..5547229a910 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -28,6 +28,7 @@ #include "util_debug.h" #include "util_foreach.h" +#include "util_logging.h" #include "util_progress.h" #include "util_time.h" @@ -223,7 +224,8 @@ BVHNode* BVHBuild::run() spatial_right_bounds.resize(max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1); /* init progress updates */ - progress_start_time = time_dt(); + double build_start_time; + build_start_time = progress_start_time = time_dt(); progress_count = 0; progress_total = references.size(); progress_original_total = progress_total; @@ -258,6 +260,10 @@ BVHNode* BVHBuild::run() } } + VLOG(1) << "BVH built in " + << time_dt() - build_start_time + << " seconds."; + return rootnode; } @@ -394,7 +400,7 @@ BVHNode* BVHBuild::build_node(const BVHRange& range, int level) progress_total += left.size() + right.size() - range.size(); size_t total = progress_total; - /* leaft node */ + /* left node */ BVHNode *leftnode = build_node(left, level + 1); /* right node (modify start for splits) */ diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index e073b69472e..43c2d9b2683 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -115,6 +115,11 @@ public: __forceinline int prim_object() const { return __float_as_int(rbounds.max.w); } __forceinline int prim_type() const { return type; } + BVHReference& operator=(const BVHReference &arg) { + memcpy(this, &arg, sizeof(BVHReference)); + return *this; + } + protected: BoundBox rbounds; uint type; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index c9b8a5b726b..e5242e7ee47 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -170,124 +170,42 @@ public: #endif RenderTile tile; - - while(task.acquire_tile(this, tile)) { - float *render_buffer = (float*)tile.buffer; - uint *rng_state = (uint*)tile.rng_state; - int start_sample = tile.start_sample; - int end_sample = tile.start_sample + tile.num_samples; -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) { - for(int sample = start_sample; sample < end_sample; sample++) { - if (task.get_cancel() || task_pool.canceled()) { - if(task.need_finish_queue == false) - break; - } - - for(int y = tile.y; y < tile.y + tile.h; y++) { - for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); - } - } - - tile.sample = sample + 1; + void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int); - task.update_progress(&tile); - } - } - else +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 + if(system_cpu_support_avx2()) + path_trace_kernel = kernel_cpu_avx2_path_trace; + else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) { - for(int sample = start_sample; sample < end_sample; sample++) { - if (task.get_cancel() || task_pool.canceled()) { - if(task.need_finish_queue == false) - break; - } - - for(int y = tile.y; y < tile.y + tile.h; y++) { - for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); - } - } - - tile.sample = sample + 1; - - task.update_progress(&tile); - } - } - else + if(system_cpu_support_avx()) + path_trace_kernel = kernel_cpu_avx_path_trace; + else #endif -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) { - for(int sample = start_sample; sample < end_sample; sample++) { - if (task.get_cancel() || task_pool.canceled()) { - if(task.need_finish_queue == false) - break; - } - - for(int y = tile.y; y < tile.y + tile.h; y++) { - for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); - } - } - - tile.sample = sample + 1; - - task.update_progress(&tile); - } - } - else +#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 + if(system_cpu_support_sse41()) + path_trace_kernel = kernel_cpu_sse41_path_trace; + else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) { - for(int sample = start_sample; sample < end_sample; sample++) { - if (task.get_cancel() || task_pool.canceled()) { - if(task.need_finish_queue == false) - break; - } - - for(int y = tile.y; y < tile.y + tile.h; y++) { - for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); - } - } - - tile.sample = sample + 1; - - task.update_progress(&tile); - } - } - else + if(system_cpu_support_sse3()) + path_trace_kernel = kernel_cpu_sse3_path_trace; + else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) { - for(int sample = start_sample; sample < end_sample; sample++) { - if (task.get_cancel() || task_pool.canceled()) { - if(task.need_finish_queue == false) - break; - } - - for(int y = tile.y; y < tile.y + tile.h; y++) { - for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, - sample, x, y, tile.offset, tile.stride); - } - } - - tile.sample = sample + 1; - - task.update_progress(&tile); - } - } - else + if(system_cpu_support_sse2()) + path_trace_kernel = kernel_cpu_sse2_path_trace; + else #endif - { + path_trace_kernel = kernel_cpu_path_trace; + + while(task.acquire_tile(this, tile)) { + float *render_buffer = (float*)tile.buffer; + uint *rng_state = (uint*)tile.rng_state; + int start_sample = tile.start_sample; + int end_sample = tile.start_sample + tile.num_samples; + for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) @@ -296,7 +214,7 @@ public: for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { - kernel_cpu_path_trace(&kg, render_buffer, rng_state, + path_trace_kernel(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } @@ -305,7 +223,7 @@ public: task.update_progress(&tile); } - } + task.release_tile(tile); @@ -325,110 +243,74 @@ public: float sample_scale = 1.0f/(task.sample + 1); if(task.rgba_half) { + void(*convert_to_half_float_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_avx2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_avx2()) + convert_to_half_float_kernel = kernel_cpu_avx2_convert_to_half_float; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) { + if(system_cpu_support_avx()) for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + convert_to_half_float_kernel = kernel_cpu_avx_convert_to_half_float; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse41_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse41()) + convert_to_half_float_kernel = kernel_cpu_sse41_convert_to_half_float; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse3()) + convert_to_half_float_kernel = kernel_cpu_sse3_convert_to_half_float; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse2()) + convert_to_half_float_kernel = kernel_cpu_sse2_convert_to_half_float; else #endif - { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + convert_to_half_float_kernel = kernel_cpu_convert_to_half_float; + + for(int y = task.y; y < task.y + task.h; y++) + for(int x = task.x; x < task.x + task.w; x++) + convert_to_half_float_kernel(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer, + sample_scale, x, y, task.offset, task.stride); } else { + void(*convert_to_byte_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_avx2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_avx2()) + convert_to_byte_kernel = kernel_cpu_avx2_convert_to_byte; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_avx()) + convert_to_byte_kernel = kernel_cpu_avx_convert_to_byte; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse41_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse41()) + convert_to_byte_kernel = kernel_cpu_sse41_convert_to_byte; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse3()) + convert_to_byte_kernel = kernel_cpu_sse3_convert_to_byte; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + if(system_cpu_support_sse2()) + convert_to_byte_kernel = kernel_cpu_sse2_convert_to_byte; else #endif - { - for(int y = task.y; y < task.y + task.h; y++) - for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, - sample_scale, x, y, task.offset, task.stride); - } + convert_to_byte_kernel = kernel_cpu_convert_to_byte; + + for(int y = task.y; y < task.y + task.h; y++) + for(int x = task.x; x < task.x + task.w; x++) + convert_to_byte_kernel(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer, + sample_scale, x, y, task.offset, task.stride); + } } @@ -439,93 +321,45 @@ public: #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif + void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 - if(system_cpu_support_avx2()) { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); - - if(task.get_cancel() || task_pool.canceled()) - break; - - task.update_progress(NULL); - } - } + if(system_cpu_support_avx2()) + shader_kernel = kernel_cpu_avx2_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX - if(system_cpu_support_avx()) { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); - - if(task.get_cancel() || task_pool.canceled()) - break; - - task.update_progress(NULL); - } - } + if(system_cpu_support_avx()) + shader_kernel = kernel_cpu_avx_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 - if(system_cpu_support_sse41()) { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); - - if(task.get_cancel() || task_pool.canceled()) - break; - - task.update_progress(NULL); - } - } + if(system_cpu_support_sse41()) + shader_kernel = kernel_cpu_sse41_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 - if(system_cpu_support_sse3()) { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); - - if(task.get_cancel() || task_pool.canceled()) - break; - - task.update_progress(NULL); - } - } + if(system_cpu_support_sse3()) + shader_kernel = kernel_cpu_sse3_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 - if(system_cpu_support_sse2()) { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); - - if(task.get_cancel() || task_pool.canceled()) - break; - - task.update_progress(NULL); - } - } + if(system_cpu_support_sse2()) + shader_kernel = kernel_cpu_sse2_shader; else #endif - { - for(int sample = 0; sample < task.num_samples; sample++) { - for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) - kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, - task.shader_eval_type, x, task.offset, sample); + shader_kernel = kernel_cpu_shader; - if(task.get_cancel() || task_pool.canceled()) - break; + for(int sample = 0; sample < task.num_samples; sample++) { + for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) + shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, + task.shader_eval_type, x, task.offset, sample); + + if(task.get_cancel() || task_pool.canceled()) + break; + + task.update_progress(NULL); - task.update_progress(NULL); - } } #ifdef WITH_OSL diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 44be7779891..7e622e03cdd 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -76,7 +76,7 @@ public: { if(first_error) { fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n"); - fprintf(stderr, "http://wiki.blender.org/index.php/Doc:2.6/Manual/Render/Cycles/GPU_Rendering\n\n"); + fprintf(stderr, "http://www.blender.org/manual/render/cycles/gpu_rendering.html\n\n"); first_error = false; } } @@ -202,13 +202,9 @@ public: /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); - - /* workaround to make sm_52 cards work, until we bundle kernel */ - if(major == 5 && minor == 2) - minor = 0; + string cubin; /* attempt to use kernel provided with blender */ - string cubin; if(experimental) cubin = path_get(string_printf("lib/kernel_experimental_sm_%d%d.cubin", major, minor)); else @@ -363,7 +359,7 @@ public: cuda_push_context(); if(mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)((uchar*)mem.device_pointer + offset), size)); + (CUdeviceptr)(mem.device_pointer + offset), size)); } else { memset((char*)mem.data_pointer + offset, 0, size); diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index c521e1383a4..0ff227938ae 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -91,6 +91,7 @@ set(SRC_SVM_HEADERS svm/svm_magic.h svm/svm_mapping.h svm/svm_math.h + svm/svm_math_util.h svm/svm_mix.h svm/svm_musgrave.h svm/svm_noise.h diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h index ad7864cb8ea..b94bdeeb23f 100644 --- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h +++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h @@ -1,5 +1,5 @@ /* - * Copyright 2011-2013 Blender Foundation + * Copyright 2011-2014 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,24 +33,20 @@ CCL_NAMESPACE_BEGIN ccl_device int bsdf_ashikhmin_shirley_setup(ShaderClosure *sc) { - /* store roughness. could already convert to exponent to save some cycles - * in eval, but this is more consistent with other bsdfs and shader_blur. */ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); sc->data1 = sc->data0; sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_ashikhmin_shirley_aniso_setup(ShaderClosure *sc) { - /* store roughness. could already convert to exponent to save some cycles - * in eval, but this is more consistent with other bsdfs and shader_blur. */ sc->data0 = clamp(sc->data0, 1e-4f, 1.0f); sc->data1 = clamp(sc->data1, 1e-4f, 1.0f); sc->type = CLOSURE_BSDF_ASHIKHMIN_SHIRLEY_ANISO_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_ashikhmin_shirley_blur(ShaderClosure *sc, float roughness) @@ -73,7 +69,7 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c float out = 0.0f; - if (NdotI > 0.0f && NdotO > 0.0f) { + if(NdotI > 0.0f && NdotO > 0.0f) { NdotI = fmaxf(NdotI, 1e-6f); NdotO = fmaxf(NdotO, 1e-6f); float3 H = normalize(omega_in + I); @@ -86,7 +82,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); - if (n_x == n_y) { /* => isotropic case */ + if(n_x == n_y) { + /* isotropic */ float e = n_x; float lobe = powf(HdotN, e); float norm = (n_x + 1.0f) / (8.0f * M_PI_F); @@ -94,7 +91,8 @@ ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, c out = NdotO * norm * lobe * pump; *pdf = norm * lobe / HdotI; /* this is p_h / 4(H.I) (conversion from 'wh measure' to 'wi measure', eq. 8 in paper) */ } - else { /* => ANisotropic case */ + else { + /* anisotropic */ float3 X, Y; make_orthonormals_tangent(N, sc->T, &X, &Y); @@ -130,7 +128,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, float3 N = sc->N; float NdotI = dot(N, I); - if (NdotI > 0.0f) { + if(NdotI > 0.0f) { float n_x = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data0); float n_y = bsdf_ashikhmin_shirley_roughness_to_exponent(sc->data1); @@ -146,21 +144,23 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, /* sample spherical coords for h in tangent space */ float phi; float cos_theta; - if (n_x == n_y) { /* => simple isotropic sampling */ + if(n_x == n_y) { + /* isotropic sampling */ phi = M_2PI_F * randu; cos_theta = powf(randv, 1.0f / (n_x + 1.0f)); } - else { /* => more complex anisotropic sampling */ - if (randu < 0.25f) { /* first quadrant */ + else { + /* anisotropic sampling */ + if(randu < 0.25f) { /* first quadrant */ float remapped_randu = 4.0f * randu; bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); } - else if (randu < 0.5f) { /* second quadrant */ + else if(randu < 0.5f) { /* second quadrant */ float remapped_randu = 4.0f * (.5f - randu); bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); phi = M_PI_F - phi; } - else if (randu < 0.75f) { /* third quadrant */ + else if(randu < 0.75f) { /* third quadrant */ float remapped_randu = 4.0f * (randu - 0.5f); bsdf_ashikhmin_shirley_sample_first_quadrant(n_x, n_y, remapped_randu, randv, &phi, &cos_theta); phi = M_PI_F + phi; @@ -185,13 +185,12 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, /* half vector to world space */ float3 H = h.x*X + h.y*Y + h.z*N; float HdotI = dot(H, I); - if (HdotI < 0.0f) H = -H; + if(HdotI < 0.0f) H = -H; /* reflect I on H to get omega_in */ *omega_in = -I + (2.0f * HdotI) * H; /* leave the rest to eval_reflect */ - /* (could maybe optimize a few things by manual inlining, but I doubt it would make much difference) */ *eval = bsdf_ashikhmin_shirley_eval_reflect(sc, I, *omega_in, pdf); #ifdef __RAY_DIFFERENTIALS__ @@ -201,7 +200,7 @@ ccl_device int bsdf_ashikhmin_shirley_sample(const ShaderClosure *sc, float3 Ng, #endif } - return LABEL_REFLECT | LABEL_GLOSSY; + return LABEL_REFLECT|LABEL_GLOSSY; } diff --git a/intern/cycles/kernel/closure/bsdf_diffuse.h b/intern/cycles/kernel/closure/bsdf_diffuse.h index 949fe869549..371f467000c 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse.h @@ -108,11 +108,6 @@ ccl_device float3 bsdf_translucent_eval_transmit(const ShaderClosure *sc, const return make_float3 (cos_pi, cos_pi, cos_pi); } -ccl_device float bsdf_translucent_albedo(const ShaderClosure *sc, const float3 I) -{ - return 1.0f; -} - ccl_device int bsdf_translucent_sample(const ShaderClosure *sc, float3 Ng, float3 I, float3 dIdx, float3 dIdy, float randu, float randv, float3 *eval, float3 *omega_in, float3 *domega_in_dx, float3 *domega_in_dy, float *pdf) { float3 N = sc->N; diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index b856774375f..cdaf84f1750 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -41,9 +41,9 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo float npos = pos * (float)(MAXCOLORS - 1); int ipos = float_to_int(npos); - if (ipos < 0) + if(ipos < 0) return colors[0]; - if (ipos >= (MAXCOLORS - 1)) + if(ipos >= (MAXCOLORS - 1)) return colors[MAXCOLORS - 1]; float offset = npos - (float)ipos; return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; @@ -52,7 +52,7 @@ ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const flo ccl_device int bsdf_diffuse_ramp_setup(ShaderClosure *sc) { sc->type = CLOSURE_BSDF_DIFFUSE_RAMP_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_diffuse_ramp_blur(ShaderClosure *sc, float roughness) diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index e0b5454592b..4f4fd5d26b8 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -49,7 +49,7 @@ ccl_device int bsdf_hair_reflection_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_HAIR_REFLECTION_ID; sc->data0 = clamp(sc->data0, 0.001f, 1.0f); sc->data1 = clamp(sc->data1, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc) @@ -57,7 +57,7 @@ ccl_device int bsdf_hair_transmission_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_HAIR_TRANSMISSION_ID; sc->data0 = clamp(sc->data0, 0.001f, 1.0f); sc->data1 = clamp(sc->data1, 0.001f, 1.0f); - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index 8737b0e2d94..9561885525f 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -305,7 +305,7 @@ ccl_device int bsdf_microfacet_ggx_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_MICROFACET_GGX_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc) @@ -315,7 +315,7 @@ ccl_device int bsdf_microfacet_ggx_aniso_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_MICROFACET_GGX_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc) @@ -325,7 +325,7 @@ ccl_device int bsdf_microfacet_ggx_refraction_setup(ShaderClosure *sc) sc->type = CLOSURE_BSDF_MICROFACET_GGX_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_ggx_blur(ShaderClosure *sc, float roughness) @@ -657,7 +657,7 @@ ccl_device int bsdf_microfacet_beckmann_setup(ShaderClosure *sc) sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc) @@ -666,7 +666,7 @@ ccl_device int bsdf_microfacet_beckmann_aniso_setup(ShaderClosure *sc) sc->data1 = clamp(sc->data1, 0.0f, 1.0f); /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_ANISO_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc) @@ -675,7 +675,7 @@ ccl_device int bsdf_microfacet_beckmann_refraction_setup(ShaderClosure *sc) sc->data1 = sc->data0; /* alpha_y */ sc->type = CLOSURE_BSDF_MICROFACET_BECKMANN_REFRACTION_ID; - return SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_microfacet_beckmann_blur(ShaderClosure *sc, float roughness) diff --git a/intern/cycles/kernel/closure/bsdf_oren_nayar.h b/intern/cycles/kernel/closure/bsdf_oren_nayar.h index 6f685d5eeea..6d3b915c24a 100644 --- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h +++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h @@ -25,7 +25,7 @@ ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 float nv = max(dot(n, v), 0.0f); float t = dot(l, v) - nl * nv; - if (t > 0.0f) + if(t > 0.0f) t /= max(nl, nv) + FLT_MIN; float is = nl * (sc->data0 + sc->data1 * t); return make_float3(is, is, is); @@ -44,7 +44,7 @@ ccl_device int bsdf_oren_nayar_setup(ShaderClosure *sc) sc->data0 = 1.0f * div; sc->data1 = sigma * div; - return SD_BSDF | SD_BSDF_HAS_EVAL; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_oren_nayar_blur(ShaderClosure *sc, float roughness) @@ -53,7 +53,7 @@ ccl_device void bsdf_oren_nayar_blur(ShaderClosure *sc, float roughness) ccl_device float3 bsdf_oren_nayar_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf) { - if (dot(sc->N, omega_in) > 0.0f) { + if(dot(sc->N, omega_in) > 0.0f) { *pdf = 0.5f * M_1_PI_F; return bsdf_oren_nayar_get_intensity(sc, sc->N, I, omega_in); } @@ -72,7 +72,7 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 { sample_uniform_hemisphere(sc->N, randu, randv, omega_in, pdf); - if (dot(Ng, *omega_in) > 0.0f) { + if(dot(Ng, *omega_in) > 0.0f) { *eval = bsdf_oren_nayar_get_intensity(sc, sc->N, I, *omega_in); #ifdef __RAY_DIFFERENTIALS__ @@ -86,7 +86,7 @@ ccl_device int bsdf_oren_nayar_sample(const ShaderClosure *sc, float3 Ng, float3 *eval = make_float3(0.0f, 0.0f, 0.0f); } - return LABEL_REFLECT | LABEL_DIFFUSE; + return LABEL_REFLECT|LABEL_DIFFUSE; } diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 2b4e1c68640..f9f263719e9 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -41,9 +41,9 @@ ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float float npos = pos * (float)(MAXCOLORS - 1); int ipos = float_to_int(npos); - if (ipos < 0) + if(ipos < 0) return colors[0]; - if (ipos >= (MAXCOLORS - 1)) + if(ipos >= (MAXCOLORS - 1)) return colors[MAXCOLORS - 1]; float offset = npos - (float)ipos; return colors[ipos] * (1.0f - offset) + colors[ipos+1] * offset; @@ -54,7 +54,7 @@ ccl_device int bsdf_phong_ramp_setup(ShaderClosure *sc) sc->data0 = max(sc->data0, 0.0f); sc->type = CLOSURE_BSDF_PHONG_RAMP_ID; - return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_GLOSSY; + return SD_BSDF|SD_BSDF_HAS_EVAL; } ccl_device void bsdf_phong_ramp_blur(ShaderClosure *sc, float roughness) @@ -67,11 +67,11 @@ ccl_device float3 bsdf_phong_ramp_eval_reflect(const ShaderClosure *sc, const fl float cosNI = dot(sc->N, omega_in); float cosNO = dot(sc->N, I); - if (cosNI > 0 && cosNO > 0) { + if(cosNI > 0 && cosNO > 0) { // reflect the view vector float3 R = (2 * cosNO) * sc->N - I; float cosRI = dot(R, omega_in); - if (cosRI > 0) { + if(cosRI > 0) { float cosp = powf(cosRI, m_exponent); float common = 0.5f * M_1_PI_F * cosp; float out = cosNI * (m_exponent + 2) * common; @@ -93,7 +93,7 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo float cosNO = dot(sc->N, I); float m_exponent = sc->data0; - if (cosNO > 0) { + if(cosNO > 0) { // reflect the view vector float3 R = (2 * cosNO) * sc->N - I; @@ -111,12 +111,12 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo *omega_in = (cosf(phi) * sinTheta) * T + (sinf(phi) * sinTheta) * B + ( cosTheta) * R; - if (dot(Ng, *omega_in) > 0.0f) + if(dot(Ng, *omega_in) > 0.0f) { // common terms for pdf and eval float cosNI = dot(sc->N, *omega_in); // make sure the direction we chose is still in the right hemisphere - if (cosNI > 0) + if(cosNI > 0) { float cosp = powf(cosTheta, m_exponent); float common = 0.5f * M_1_PI_F * cosp; diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h index 9dd8d226f5b..16c16beee39 100644 --- a/intern/cycles/kernel/geom/geom_bvh_volume.h +++ b/intern/cycles/kernel/geom/geom_bvh_volume.h @@ -277,6 +277,7 @@ ccl_device bool BVH_FUNCTION_NAME(KernelGlobals *kg, } else { /* pop */ + object = OBJECT_NONE; nodeAddr = traversalStack[stackPtr]; --stackPtr; } diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index 5df6c75df86..207d5066fb2 100644 --- a/intern/cycles/kernel/geom/geom_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -144,7 +144,8 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) float3 center; #ifdef __HAIR__ - if(sd->type & PRIMITIVE_ALL_CURVE) { + bool is_curve_primitive = sd->type & PRIMITIVE_ALL_CURVE; + if(is_curve_primitive) { center = curve_motion_center_location(kg, sd); if(!(sd->flag & SD_TRANSFORM_APPLIED)) @@ -170,6 +171,13 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); + +#ifdef __HAIR__ + if(is_curve_primitive && (sd->flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { + object_position_transform(kg, sd, &motion_pre); + object_position_transform(kg, sd, &motion_post); + } +#endif } /* object motion. note that depending on the mesh having motion vectors, this diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h index 33a20494966..3cb6d168f80 100644 --- a/intern/cycles/kernel/geom/geom_volume.h +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -52,7 +52,11 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, #ifdef __KERNEL_GPU__ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #else - float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + float4 r; + if(sd->flag & SD_VOLUME_CUBIC) + r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + else + r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); #endif if(dx) *dx = 0.0f; @@ -68,7 +72,11 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s #ifdef __KERNEL_GPU__ float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); #else - float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + float4 r; + if(sd->flag & SD_VOLUME_CUBIC) + r = kernel_tex_image_interp_3d_ex(id, P.x, P.y, P.z, INTERPOLATION_CUBIC); + else + r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); #endif if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h index a1ec080e3d3..e80bfb32e89 100644 --- a/intern/cycles/kernel/kernel_bake.h +++ b/intern/cycles/kernel/kernel_bake.h @@ -198,10 +198,10 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, int num_samples = kernel_data.integrator.aa_samples; /* random number generator */ - RNG rng = cmj_hash(offset + i, 0); + RNG rng = cmj_hash(offset + i, kernel_data.integrator.seed); #if 0 - uint rng_state = cmj_hash(i, 0); + uint rng_state = cmj_hash(i, kernel_data.integrator.seed); float filter_x, filter_y; path_rng_init(kg, &rng_state, sample, num_samples, &rng, 0, 0, &filter_x, &filter_y); @@ -253,6 +253,10 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, /* data passes */ case SHADER_EVAL_NORMAL: { + if ((sd.flag & SD_HAS_BUMP)) { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + } + /* compression: normal = (2 * color) - 1 */ out = sd.N * 0.5f + make_float3(0.5f, 0.5f, 0.5f); break; diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index 403fd0a67f7..08c8bdd369d 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -25,10 +25,12 @@ #include "util_half.h" #include "util_types.h" -/* On 64bit linux single precision exponent is really slow comparing to the - * double precision version, even with float<->double conversion involved. +/* On x86_64, versions of glibc < 2.16 have an issue where expf is + * much slower than the double version. This was fixed in glibc 2.16. */ -#if !defined(__KERNEL_GPU__) && defined(__linux__) && defined(__x86_64__) +#if !defined(__KERNEL_GPU__) && defined(__x86_64__) && defined(__x86_64__) && \ + defined(__GNU_LIBRARY__) && defined(__GLIBC__ ) && defined(__GLIBC_MINOR__) && \ + (__GLIBC__ <= 2 && __GLIBC_MINOR__ < 16) # define expf(x) ((float)exp((double)(x))) #endif @@ -151,6 +153,13 @@ template<typename T> struct texture_image { ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false) { + return interp_3d_ex(x, y, z, interpolation, periodic); + } + + ccl_always_inline float4 interp_3d_ex(float x, float y, float z, + int interpolation = INTERPOLATION_LINEAR, + bool periodic = false) + { if(UNLIKELY(!data)) return make_float4(0.0f, 0.0f, 0.0f, 0.0f); @@ -174,7 +183,7 @@ template<typename T> struct texture_image { return read(data[ix + iy*width + iz*width*height]); } - else { + else if(interpolation == INTERPOLATION_LINEAR) { float tx = frac(x*(float)width - 0.5f, &ix); float ty = frac(y*(float)height - 0.5f, &iy); float tz = frac(z*(float)depth - 0.5f, &iz); @@ -212,6 +221,93 @@ template<typename T> struct texture_image { return r; } + else { + /* Tricubic b-spline interpolation. */ + const float tx = frac(x*(float)width - 0.5f, &ix); + const float ty = frac(y*(float)height - 0.5f, &iy); + const float tz = frac(z*(float)depth - 0.5f, &iz); + int pix, piy, piz, nnix, nniy, nniz; + + if(periodic) { + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + pix = wrap_periodic(ix-1, width); + piy = wrap_periodic(iy-1, height); + piz = wrap_periodic(iz-1, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + + nnix = wrap_periodic(ix+2, width); + nniy = wrap_periodic(iy+2, height); + nniz = wrap_periodic(iz+2, depth); + } + else { + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + + pix = wrap_clamp(ix-1, width); + piy = wrap_clamp(iy-1, height); + piz = wrap_clamp(iz-1, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + + nnix = wrap_clamp(ix+2, width); + nniy = wrap_clamp(iy+2, height); + nniz = wrap_clamp(iz+2, depth); + } + + const int xc[4] = {pix, ix, nix, nnix}; + const int yc[4] = {width * piy, + width * iy, + width * niy, + width * nniy}; + const int zc[4] = {width * height * piz, + width * height * iz, + width * height * niz, + width * height * nniz}; + float u[4], v[4], w[4]; + + /* Some helper macro to keep code reasonable size, + * let compiler to inline all the matrix multiplications. + */ +#define SET_SPLINE_WEIGHTS(u, t) \ + { \ + u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \ + u[1] = (( 0.5f * t - 1.0f) * t ) * t + (2.0f/3.0f); \ + u[2] = (( -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \ + u[3] = (1.0f / 6.0f) * t * t * t; \ + } (void)0 +#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]])) +#define COL_TERM(col, row) \ + (v[col] * (u[0] * DATA(0, col, row) + \ + u[1] * DATA(1, col, row) + \ + u[2] * DATA(2, col, row) + \ + u[3] * DATA(3, col, row))) +#define ROW_TERM(row) \ + (w[row] * (COL_TERM(0, row) + \ + COL_TERM(1, row) + \ + COL_TERM(2, row) + \ + COL_TERM(3, row))) + + SET_SPLINE_WEIGHTS(u, tx); + SET_SPLINE_WEIGHTS(v, ty); + SET_SPLINE_WEIGHTS(w, tz); + + /* Actual interpolation. */ + return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); + +#undef COL_TERM +#undef ROW_TERM +#undef DATA +#undef SET_SPLINE_WEIGHTS + } } ccl_always_inline void dimensions_set(int width_, int height_, int depth_) @@ -244,6 +340,7 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) #define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y)) #define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z)) +#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d_ex(x, y, z, interpolation) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d_ex(x, y, z, interpolation)) #define kernel_data (kg->__data) diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index e7f62f230f8..9dfbfd91881 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -167,12 +167,143 @@ ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, flo return disk_light_sample(normalize(P - center), randu, randv)*radius; } -ccl_device float3 area_light_sample(float3 axisu, float3 axisv, float randu, float randv) +/* Uses the following paper: + * + * Carlos Urena et al. + * An Area-Preserving Parametrization for Spherical Rectangles. + * + * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf + */ +ccl_device float3 area_light_sample(float3 P, + float3 light_p, + float3 axisu, float3 axisv, + float randu, float randv, + float *pdf) { - randu = randu - 0.5f; - randv = randv - 0.5f; + /* In our name system we're using P for the center, + * which is o in the paper. + */ + + float3 corner = light_p - axisu * 0.5f - axisv * 0.5f; + float axisu_len, axisv_len; + /* Compute local reference system R. */ + float3 x = normalize_len(axisu, &axisu_len); + float3 y = normalize_len(axisv, &axisv_len); + float3 z = cross(x, y); + /* Compute rectangle coords in local reference system. */ + float3 dir = corner - P; + float z0 = dot(dir, z); + /* Flip 'z' to make it point against Q. */ + if(z0 > 0.0f) { + z *= -1.0f; + z0 *= -1.0f; + } + float z0sq = z0 * z0; + float x0 = dot(dir, x); + float y0 = dot(dir, y); + float x1 = x0 + axisu_len; + float y1 = y0 + axisv_len; + float y0sq = y0 * y0; + float y1sq = y1 * y1; + /* Create vectors to four vertices. */ + float3 v00 = make_float3(x0, y0, z0); + float3 v01 = make_float3(x0, y1, z0); + float3 v10 = make_float3(x1, y0, z0); + float3 v11 = make_float3(x1, y1, z0); + /* Compute normals to edges. */ + float3 n0 = normalize(cross(v00, v10)); + float3 n1 = normalize(cross(v10, v11)); + float3 n2 = normalize(cross(v11, v01)); + float3 n3 = normalize(cross(v01, v00)); + /* Compute internal angles (gamma_i). */ + float g0 = safe_acosf(-dot(n0, n1)); + float g1 = safe_acosf(-dot(n1, n2)); + float g2 = safe_acosf(-dot(n2, n3)); + float g3 = safe_acosf(-dot(n3, n0)); + /* Compute predefined constants. */ + float b0 = n0.z; + float b1 = n2.z; + float b0sq = b0 * b0; + float k = M_2PI_F - g2 - g3; + /* Compute solid angle from internal angles. */ + float S = g0 + g1 - k; + + /* Compute cu. */ + float au = randu * S + k; + float fu = (cosf(au) * b0 - b1) / sinf(au); + float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f); + cu = clamp(cu, -1.0f, 1.0f); + /* Compute xu. */ + float xu = -(cu * z0) / sqrtf(1.0f - cu * cu); + xu = clamp(xu, x0, x1); + /* Compute yv. */ + float d = sqrtf(xu * xu + z0sq); + float h0 = y0 / sqrtf(d * d + y0sq); + float h1 = y1 / sqrtf(d * d + y1sq); + float hv = h0 + randv * (h1 - h0), hv2 = hv * hv; + float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1; + + if(S != 0.0f) + *pdf = 1.0f / S; + else + *pdf = 0.0f; + + /* Transform (xu, yv, z0) to world coords. */ + return P + xu * x + yv * y + z0 * z; +} - return axisu*randu + axisv*randv; +/* TODO(sergey): This is actually a duplicated code from above, but how to avoid + * this without having some nasty function with loads of parameters? + */ +ccl_device float area_light_pdf(float3 P, + float3 light_p, + float3 axisu, float3 axisv) +{ + /* In our name system we're using P for the center, + * which is o in the paper. + */ + + float3 corner = light_p - axisu * 0.5f - axisv * 0.5f; + float axisu_len, axisv_len; + /* Compute local reference system R. */ + float3 x = normalize_len(axisu, &axisu_len); + float3 y = normalize_len(axisv, &axisv_len); + float3 z = cross(x, y); + /* Compute rectangle coords in local reference system. */ + float3 dir = corner - P; + float z0 = dot(dir, z); + /* Flip 'z' to make it point against Q. */ + if(z0 > 0.0f) { + z *= -1.0f; + z0 *= -1.0f; + } + float x0 = dot(dir, x); + float y0 = dot(dir, y); + float x1 = x0 + axisu_len; + float y1 = y0 + axisv_len; + /* Create vectors to four vertices. */ + float3 v00 = make_float3(x0, y0, z0); + float3 v01 = make_float3(x0, y1, z0); + float3 v10 = make_float3(x1, y0, z0); + float3 v11 = make_float3(x1, y1, z0); + /* Compute normals to edges. */ + float3 n0 = normalize(cross(v00, v10)); + float3 n1 = normalize(cross(v10, v11)); + float3 n2 = normalize(cross(v11, v01)); + float3 n3 = normalize(cross(v01, v00)); + /* Compute internal angles (gamma_i). */ + float g0 = safe_acosf(-dot(n0, n1)); + float g1 = safe_acosf(-dot(n1, n2)); + float g2 = safe_acosf(-dot(n2, n3)); + float g3 = safe_acosf(-dot(n3, n0)); + /* Compute predefined constants. */ + float k = M_2PI_F - g2 - g3; + /* Compute solid angle from internal angles. */ + float S = g0 + g1 - k; + if(S != 0.0f) + return 1.0f / S; + else + return 0.0f; } ccl_device float spot_light_attenuation(float4 data1, float4 data2, LightSample *ls) @@ -276,6 +407,7 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, float4 data2 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 2); ls->eval_fac *= spot_light_attenuation(data1, data2, ls); } + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } else { /* area light */ @@ -286,18 +418,22 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, float3 axisv = make_float3(data2.y, data2.z, data2.w); float3 D = make_float3(data3.y, data3.z, data3.w); - ls->P += area_light_sample(axisu, axisv, randu, randv); + ls->P = area_light_sample(P, ls->P, + axisu, axisv, + randu, randv, + &ls->pdf); + ls->Ng = D; ls->D = normalize_len(ls->P - P, &ls->t); float invarea = data2.x; - ls->eval_fac = 0.25f*invarea; - ls->pdf = invarea; + + if(dot(ls->D, D) > 0.0f) + ls->pdf = 0.0f; } ls->eval_fac *= kernel_data.integrator.inv_pdf_lights; - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } } @@ -355,6 +491,7 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, ls->D = D; ls->t = FLT_MAX; + /* compute pdf */ float invarea = data1.w; ls->pdf = invarea/(costheta*costheta*costheta); ls->eval_fac = ls->pdf; @@ -386,6 +523,10 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, if(ls->eval_fac == 0.0f) return false; } + + /* compute pdf */ + if(ls->t != FLT_MAX) + ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); } else if(type == LIGHT_AREA) { /* area light */ @@ -412,16 +553,12 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, ls->D = D; ls->Ng = Ng; - ls->pdf = invarea; - ls->eval_fac = 0.25f*ls->pdf; + ls->pdf = area_light_pdf(P, ls->P, axisu, axisv); + ls->eval_fac = 0.25f*invarea; } else return false; - /* compute pdf */ - if(ls->t != FLT_MAX) - ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); - return true; } @@ -514,7 +651,13 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt) /* Generic Light */ -ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, LightSample *ls) +ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce) +{ + float4 data4 = kernel_tex_fetch(__light_data, index*LIGHT_SIZE + 4); + return (bounce > __float_as_int(data4.x)); +} + +ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float randv, float time, float3 P, int bounce, LightSample *ls) { /* sample index */ int index = light_distribution_sample(kg, randt); @@ -536,6 +679,12 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float } else { int lamp = -prim-1; + + if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) { + ls->pdf = 0.0f; + return; + } + lamp_light_sample(kg, lamp, randu, randv, P, ls); } } @@ -546,22 +695,5 @@ ccl_device int light_select_num_samples(KernelGlobals *kg, int index) return __float_as_int(data3.x); } -ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt) -{ - /* sample index */ - int index = light_distribution_sample(kg, randt); - - /* fetch light data */ - float4 l = kernel_tex_fetch(__light_distribution, index); - int prim = __float_as_int(l.y); - - if(prim < 0) { - int lamp = -prim-1; - return lamp; - } - else - return LAMP_NONE; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h index 9553c2da0df..e5ba1f41c47 100644 --- a/intern/cycles/kernel/kernel_path_surface.h +++ b/intern/cycles/kernel/kernel_path_surface.h @@ -38,6 +38,9 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN if(sample_all_lights) { /* lamp sampling */ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); RNG lamp_rng = cmj_hash(*rng, i); @@ -82,7 +85,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN light_t = 0.5f*light_t; LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ @@ -103,7 +106,7 @@ ccl_device void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RN path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); /* sample random light */ if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { @@ -200,7 +203,7 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, RNG #endif LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(direct_emission(kg, sd, &ls, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h index da2d5e6eca8..11d3d94657b 100644 --- a/intern/cycles/kernel/kernel_path_volume.h +++ b/intern/cycles/kernel/kernel_path_volume.h @@ -40,7 +40,7 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, light_ray.time = sd->time; #endif - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) return; @@ -56,7 +56,12 @@ ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng, #endif } -ccl_device bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, +#ifdef __KERNEL_GPU__ +ccl_device_noinline +#else +ccl_device +#endif +bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng, ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) { /* sample phase function */ @@ -119,6 +124,9 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG if(sample_all_lights) { /* lamp sampling */ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + if(UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) + continue; + int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); RNG lamp_rng = cmj_hash(*rng, i); @@ -183,7 +191,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG light_t = 0.5f*light_t; LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, state->bounce, &ls); float3 tp = throughput; @@ -198,7 +206,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG kernel_assert(result == VOLUME_PATH_SCATTERED); /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) continue; @@ -222,7 +230,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); LightSample ls; - light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, ray->P, state->bounce, &ls); float3 tp = throughput; @@ -237,7 +245,7 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG kernel_assert(result == VOLUME_PATH_SCATTERED); /* todo: split up light_sample so we don't have to call it again with new position */ - light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, &ls); + light_sample(kg, light_t, light_u, light_v, sd->time, sd->P, state->bounce, &ls); if(ls.pdf == 0.0f) return; diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index db08c328d7e..65089740ef9 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -681,7 +681,7 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) for(int i = 0; i< sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; - if(CLOSURE_IS_BSSRDF(sc->type)) + if(CLOSURE_IS_BSSRDF(sc->type) || CLOSURE_IS_BSDF_BSSRDF(sc->type)) eval += sc->weight; } diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index ca1210f2d80..460ca7b68eb 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN /* constants */ #define OBJECT_SIZE 11 #define OBJECT_VECTOR_SIZE 6 -#define LIGHT_SIZE 4 +#define LIGHT_SIZE 5 #define FILTER_TABLE_SIZE 256 #define RAMP_TABLE_SIZE 256 #define PARTICLE_SIZE 5 @@ -291,34 +291,34 @@ typedef enum ClosureLabel { typedef enum PassType { PASS_NONE = 0, - PASS_COMBINED = 1, - PASS_DEPTH = 2, - PASS_NORMAL = 4, - PASS_UV = 8, - PASS_OBJECT_ID = 16, - PASS_MATERIAL_ID = 32, - PASS_DIFFUSE_COLOR = 64, - PASS_GLOSSY_COLOR = 128, - PASS_TRANSMISSION_COLOR = 256, - PASS_DIFFUSE_INDIRECT = 512, - PASS_GLOSSY_INDIRECT = 1024, - PASS_TRANSMISSION_INDIRECT = 2048, - PASS_DIFFUSE_DIRECT = 4096, - PASS_GLOSSY_DIRECT = 8192, - PASS_TRANSMISSION_DIRECT = 16384, - PASS_EMISSION = 32768, - PASS_BACKGROUND = 65536, - PASS_AO = 131072, - PASS_SHADOW = 262144, - PASS_MOTION = 524288, - PASS_MOTION_WEIGHT = 1048576, - PASS_MIST = 2097152, - PASS_SUBSURFACE_DIRECT = 4194304, - PASS_SUBSURFACE_INDIRECT = 8388608, - PASS_SUBSURFACE_COLOR = 16777216, - PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */ + PASS_COMBINED = (1 << 0), + PASS_DEPTH = (1 << 1), + PASS_NORMAL = (1 << 2), + PASS_UV = (1 << 3), + PASS_OBJECT_ID = (1 << 4), + PASS_MATERIAL_ID = (1 << 5), + PASS_DIFFUSE_COLOR = (1 << 6), + PASS_GLOSSY_COLOR = (1 << 7), + PASS_TRANSMISSION_COLOR = (1 << 8), + PASS_DIFFUSE_INDIRECT = (1 << 9), + PASS_GLOSSY_INDIRECT = (1 << 10), + PASS_TRANSMISSION_INDIRECT = (1 << 11), + PASS_DIFFUSE_DIRECT = (1 << 12), + PASS_GLOSSY_DIRECT = (1 << 13), + PASS_TRANSMISSION_DIRECT = (1 << 14), + PASS_EMISSION = (1 << 15), + PASS_BACKGROUND = (1 << 16), + PASS_AO = (1 << 17), + PASS_SHADOW = (1 << 18), + PASS_MOTION = (1 << 19), + PASS_MOTION_WEIGHT = (1 << 20), + PASS_MIST = (1 << 21), + PASS_SUBSURFACE_DIRECT = (1 << 22), + PASS_SUBSURFACE_INDIRECT = (1 << 23), + PASS_SUBSURFACE_COLOR = (1 << 24), + PASS_LIGHT = (1 << 25), /* no real pass, used to force use_light_pass */ #ifdef __KERNEL_DEBUG__ - PASS_BVH_TRAVERSAL_STEPS = 67108864, + PASS_BVH_TRAVERSAL_STEPS = (1 << 26), #endif } PassType; @@ -539,34 +539,25 @@ typedef enum AttributeStandard { #define MAX_CLOSURE 1 #endif -/* TODO(sergey): This is rather nasty bug happening in here, which - * could be simply a compilers bug for which we can't find a generic - * platform independent workaround. Also even if it's a compiler - * issue, it's not so simple to upgrade the compiler in the release - * environment for linux and doing it so closer to the release is - * rather a risky business. - * - * For this release it's probably safer to stick with such a rather - * dirty solution, and look for a cleaner fix during the next release - * cycle. +/* This struct is to be 16 bytes aligned, we also keep some extra precautions: + * - All the float3 members are in the beginning of the struct, so compiler + * does not put own padding trying to align this members. + * - We make sure OSL pointer is also 16 bytes aligned. */ typedef struct ShaderClosure { - ClosureType type; float3 weight; -#ifndef __APPLE__ + float3 N; + float3 T; + + ClosureType type; float sample_weight; -#endif float data0; float data1; float data2; + int pad1, pad2, pad3; - float3 N; - float3 T; -#ifdef __APPLE__ - float sample_weight; -#endif #ifdef __OSL__ - void *prim; + void *prim, *pad4; #endif } ShaderClosure; @@ -591,43 +582,46 @@ typedef enum ShaderContext { enum ShaderDataFlag { /* runtime flags */ - SD_BACKFACING = 1, /* backside of surface? */ - SD_EMISSION = 2, /* have emissive closure? */ - SD_BSDF = 4, /* have bsdf closure? */ - SD_BSDF_HAS_EVAL = 8, /* have non-singular bsdf closure? */ - SD_PHASE_HAS_EVAL = 8, /* have non-singular phase closure? */ - SD_BSDF_GLOSSY = 16, /* have glossy bsdf */ - SD_BSSRDF = 32, /* have bssrdf */ - SD_HOLDOUT = 64, /* have holdout closure? */ - SD_ABSORPTION = 128, /* have volume absorption closure? */ - SD_SCATTER = 256, /* have volume phase closure? */ - SD_AO = 512, /* have ao closure? */ - SD_TRANSPARENT = 1024, /* have transparent closure? */ - - SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSDF_GLOSSY| - SD_BSSRDF|SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO), + SD_BACKFACING = (1 << 0), /* backside of surface? */ + SD_EMISSION = (1 << 1), /* have emissive closure? */ + SD_BSDF = (1 << 2), /* have bsdf closure? */ + SD_BSDF_HAS_EVAL = (1 << 3), /* have non-singular bsdf closure? */ + SD_PHASE_HAS_EVAL = (1 << 3), /* have non-singular phase closure? */ + SD_BSSRDF = (1 << 4), /* have bssrdf */ + SD_HOLDOUT = (1 << 5), /* have holdout closure? */ + SD_ABSORPTION = (1 << 6), /* have volume absorption closure? */ + SD_SCATTER = (1 << 7), /* have volume phase closure? */ + SD_AO = (1 << 8), /* have ao closure? */ + SD_TRANSPARENT = (1 << 9), /* have transparent closure? */ + + SD_CLOSURE_FLAGS = (SD_EMISSION|SD_BSDF|SD_BSDF_HAS_EVAL|SD_BSSRDF| + SD_HOLDOUT|SD_ABSORPTION|SD_SCATTER|SD_AO), /* shader flags */ - SD_USE_MIS = 2048, /* direct light sample */ - SD_HAS_TRANSPARENT_SHADOW = 4096, /* has transparent shadow */ - SD_HAS_VOLUME = 8192, /* has volume shader */ - SD_HAS_ONLY_VOLUME = 16384, /* has only volume shader, no surface */ - SD_HETEROGENEOUS_VOLUME = 32768, /* has heterogeneous volume */ - SD_HAS_BSSRDF_BUMP = 65536, /* bssrdf normal uses bump */ - SD_VOLUME_EQUIANGULAR = 131072, /* use equiangular sampling */ - SD_VOLUME_MIS = 262144, /* use multiple importance sampling */ + SD_USE_MIS = (1 << 10), /* direct light sample */ + SD_HAS_TRANSPARENT_SHADOW = (1 << 11), /* has transparent shadow */ + SD_HAS_VOLUME = (1 << 12), /* has volume shader */ + SD_HAS_ONLY_VOLUME = (1 << 13), /* has only volume shader, no surface */ + SD_HETEROGENEOUS_VOLUME = (1 << 14), /* has heterogeneous volume */ + SD_HAS_BSSRDF_BUMP = (1 << 15), /* bssrdf normal uses bump */ + SD_VOLUME_EQUIANGULAR = (1 << 16), /* use equiangular sampling */ + SD_VOLUME_MIS = (1 << 17), /* use multiple importance sampling */ + SD_VOLUME_CUBIC = (1 << 18), /* use cubic interpolation for voxels */ + SD_HAS_BUMP = (1 << 19), /* has data connected to the displacement input */ SD_SHADER_FLAGS = (SD_USE_MIS|SD_HAS_TRANSPARENT_SHADOW|SD_HAS_VOLUME| SD_HAS_ONLY_VOLUME|SD_HETEROGENEOUS_VOLUME| - SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS), + SD_HAS_BSSRDF_BUMP|SD_VOLUME_EQUIANGULAR|SD_VOLUME_MIS| + SD_VOLUME_CUBIC|SD_HAS_BUMP), /* object flags */ - SD_HOLDOUT_MASK = 524288, /* holdout for camera rays */ - SD_OBJECT_MOTION = 1048576, /* has object motion blur */ - SD_TRANSFORM_APPLIED = 2097152, /* vertices have transform applied */ - SD_NEGATIVE_SCALE_APPLIED = 4194304, /* vertices have negative scale applied */ - SD_OBJECT_HAS_VOLUME = 8388608, /* object has a volume shader */ - SD_OBJECT_INTERSECTS_VOLUME = 16777216, /* object intersects AABB of an object with volume shader */ + SD_HOLDOUT_MASK = (1 << 20), /* holdout for camera rays */ + SD_OBJECT_MOTION = (1 << 21), /* has object motion blur */ + SD_TRANSFORM_APPLIED = (1 << 22), /* vertices have transform applied */ + SD_NEGATIVE_SCALE_APPLIED = (1 << 23), /* vertices have negative scale applied */ + SD_OBJECT_HAS_VOLUME = (1 << 24), /* object has a volume shader */ + SD_OBJECT_INTERSECTS_VOLUME = (1 << 25), /* object intersects AABB of an object with volume shader */ + SD_OBJECT_HAS_VERTEX_MOTION = (1 << 21), /* has position for motion vertices */ SD_OBJECT_FLAGS = (SD_HOLDOUT_MASK|SD_OBJECT_MOTION|SD_TRANSFORM_APPLIED| SD_NEGATIVE_SCALE_APPLIED|SD_OBJECT_HAS_VOLUME| @@ -994,7 +988,7 @@ typedef struct KernelData { #ifdef __KERNEL_DEBUG__ typedef struct DebugData { - // Total number of BVH node travesal steps and primitives intersections + // Total number of BVH node traversal steps and primitives intersections // for the camera rays. int num_bvh_traversal_steps; } DebugData; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index 93cb4c120ea..6a39ba928f0 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -581,7 +581,8 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals * through a volume. This can then latter be used for decoupled sampling as in: * "Importance Sampling Techniques for Path Tracing in Participating Media" * - * On the GPU this is only supported for homogeneous volumes (1 step), due to + * On the GPU this is only supported (but currently not enabled) + * for homogeneous volumes (1 step), due to * no support for malloc/free and too much stack usage with a fix size array. */ typedef struct VolumeStep { @@ -595,6 +596,7 @@ typedef struct VolumeStep { } VolumeStep; typedef struct VolumeSegment { + VolumeStep stack_step; /* stack storage for homogeneous step, to avoid malloc */ VolumeStep *steps; /* recorded steps */ int numsteps; /* number of steps */ int closure_flag; /* accumulated closure flags from all steps */ @@ -627,11 +629,13 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta /* compute exact steps in advance for malloc */ max_steps = max((int)ceilf(ray->t/step_size), 1); + segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); } else { max_steps = 1; step_size = ray->t; random_jitter_offset = 0.0f; + segment->steps = &segment->stack_step; } /* init accumulation variables */ @@ -640,10 +644,8 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); float t = 0.0f; - segment->closure_flag = 0; segment->numsteps = 0; - - segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); + segment->closure_flag = 0; VolumeStep *step = segment->steps; @@ -729,16 +731,13 @@ ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *sta ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) { - free(segment->steps); + if(segment->steps != &segment->stack_step) + free(segment->steps); } /* scattering for homogeneous and heterogeneous volumes, using decoupled ray - * marching. unlike the non-decoupled functions, these do not do probalistic - * scattering, they always scatter if there is any non-zero scattering - * coefficient. + * marching. this function does not do emission or modify throughput. * - * these also do not do emission or modify throughput. - * * function is expected to return VOLUME_PATH_SCATTERED when probalistic_scatter is false */ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, @@ -958,7 +957,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, Ray *ray, VolumeStack *stack) { - /* NULL ray happens in the baker, does it need proper initializetion of + /* NULL ray happens in the baker, does it need proper initialization of * camera in volume? */ if(!kernel_data.cam.is_inside_volume || ray == NULL) { @@ -992,31 +991,29 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg, ShaderData sd; shader_setup_from_ray(kg, &sd, &isect, &volume_ray, 0, 0); - if(sd.flag & SD_HAS_VOLUME) { - if(sd.flag & SD_BACKFACING) { - /* If ray exited the volume and never entered to that volume - * it means that camera is inside such a volume. - */ - bool is_enclosed = false; - for(int i = 0; i < enclosed_index; ++i) { - if(enclosed_volumes[i] == sd.object) { - is_enclosed = true; - break; - } - } - if(is_enclosed == false) { - stack[stack_index].object = sd.object; - stack[stack_index].shader = sd.shader; - ++stack_index; + if(sd.flag & SD_BACKFACING) { + /* If ray exited the volume and never entered to that volume + * it means that camera is inside such a volume. + */ + bool is_enclosed = false; + for(int i = 0; i < enclosed_index; ++i) { + if(enclosed_volumes[i] == sd.object) { + is_enclosed = true; + break; } } - else { - /* If ray from camera enters the volume, this volume shouldn't - * be added to the stak on exit. - */ - enclosed_volumes[enclosed_index++] = sd.object; + if(is_enclosed == false) { + stack[stack_index].object = sd.object; + stack[stack_index].shader = sd.shader; + ++stack_index; } } + else { + /* If ray from camera enters the volume, this volume shouldn't + * be added to the stack on exit. + */ + enclosed_volumes[enclosed_index++] = sd.object; + } /* Move ray forward. */ volume_ray.P = ray_offset(sd.P, -sd.Ng); diff --git a/intern/cycles/kernel/osl/SConscript b/intern/cycles/kernel/osl/SConscript index d721edbaf6e..0a21d3e6819 100644 --- a/intern/cycles/kernel/osl/SConscript +++ b/intern/cycles/kernel/osl/SConscript @@ -38,6 +38,7 @@ incs.append(env['BF_OIIO_INC']) incs.append(env['BF_BOOST_INC']) incs.append(env['BF_OSL_INC']) incs.append(env['BF_OPENEXR_INC'].split()) +incs.append('#/intern/atomic') defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {') defs.append('CCL_NAMESPACE_END=}') @@ -46,6 +47,16 @@ defs.append('WITH_OSL') if env['WITH_BF_CYCLES_DEBUG']: defs.append('WITH_CYCLES_DEBUG') +if env['WITH_BF_CYCLES_LOGGING']: + defs.append('WITH_CYCLES_LOGGING') + defs.append('GOOGLE_GLOG_DLL_DECL=') + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): + incs.append('#extern/libmv/third_party/glog/src/windows') + incs.append('#extern/libmv/third_party/gflags') + else: + incs.append('#extern/libmv/third_party/glog/src') + incs.append('#extern/libmv/third_party/gflags') + if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'): cxxflags.append('-DBOOST_NO_RTTI -DBOOST_NO_TYPEID /fp:fast'.split()) incs.append(env['BF_PTHREADS_INC']) diff --git a/intern/cycles/kernel/osl/osl_closures.cpp b/intern/cycles/kernel/osl/osl_closures.cpp index cc9942b024e..1d99f1d2682 100644 --- a/intern/cycles/kernel/osl/osl_closures.cpp +++ b/intern/cycles/kernel/osl/osl_closures.cpp @@ -189,11 +189,7 @@ static void register_closure(OSL::ShadingSystem *ss, const char *name, int id, O /* optimization: it's possible to not use a prepare function at all and * only initialize the actual class when accessing the closure component * data, but then we need to map the id to the class somehow */ -#ifdef CLOSURE_PREPARE - ss->register_closure(name, id, params, prepare, NULL, NULL); -#else - ss->register_closure(name, id, params, prepare, NULL); -#endif + ss->register_closure(name, id, params, prepare, NULL, 16); } void OSLShader::register_closures(OSLShadingSystem *ss_) diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index a9694651e14..7a93aa05222 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -834,7 +834,7 @@ bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlo bool OSLRenderServices::texture(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, float s, float t, float dsdx, float dtdx, - float dsdy, float dtdy, float *result) + float dsdy, float dtdy, int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -869,9 +869,9 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness); PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts)); - f->eval(result, options.firstchannel, options.nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); + f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy); - for(int c = r->numChannels(); c < options.nchannels; c++) + for(int c = r->numChannels(); c < nchannels; c++) result[c] = result[0]; return true; @@ -880,15 +880,15 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, bool status; if(filename[0] == '@' && filename.find('.') == -1) { - int slot = atoi(filename.c_str() + 1); + int slot = atoi(filename.c_str() + 1); float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t); result[0] = rgba[0]; - if(options.nchannels > 1) + if(nchannels > 1) result[1] = rgba[1]; - if(options.nchannels > 2) + if(nchannels > 2) result[2] = rgba[2]; - if(options.nchannels > 3) + if(nchannels > 3) result[3] = rgba[3]; status = true; } @@ -898,17 +898,24 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); +#if OIIO_VERSION < 10500 status = ts->texture(th, thread_info, - options, s, t, dsdx, dtdx, dsdy, dtdy, result); + options, s, t, dsdx, dtdx, dsdy, dtdy, + result); +#else + status = ts->texture(th, thread_info, + options, s, t, dsdx, dtdx, dsdy, dtdy, + nchannels, result); +#endif } if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } } @@ -919,7 +926,7 @@ bool OSLRenderServices::texture(ustring filename, TextureOpt &options, bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &P, const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, - const OSL::Vec3 &dPdz, float *result) + const OSL::Vec3 &dPdz, int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -929,16 +936,22 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options, OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); +#if OIIO_VERSION < 10500 bool status = ts->texture3d(th, thread_info, options, P, dPdx, dPdy, dPdz, result); +#else + bool status = ts->texture3d(th, thread_info, + options, P, dPdx, dPdy, dPdz, + nchannels, result); +#endif if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } @@ -949,7 +962,8 @@ bool OSLRenderServices::texture3d(ustring filename, TextureOpt &options, bool OSLRenderServices::environment(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result) + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + int nchannels, float *result) { OSL::TextureSystem *ts = osl_ts; ShaderData *sd = (ShaderData *)(sg->renderstate); @@ -958,16 +972,23 @@ bool OSLRenderServices::environment(ustring filename, TextureOpt &options, OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info; OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info); + +#if OIIO_VERSION < 10500 bool status = ts->environment(th, thread_info, options, R, dRdx, dRdy, result); +#else + bool status = ts->environment(th, thread_info, + options, R, dRdx, dRdy, + nchannels, result); +#endif if(!status) { - if(options.nchannels == 3 || options.nchannels == 4) { + if(nchannels == 3 || nchannels == 4) { result[0] = 1.0f; result[1] = 0.0f; result[2] = 1.0f; - if(options.nchannels == 4) + if(nchannels == 4) result[3] = 1.0f; } } diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 6f928a0d103..e9026d95f34 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -97,16 +97,17 @@ public: bool texture(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, float s, float t, float dsdx, float dtdx, - float dsdy, float dtdy, float *result); + float dsdy, float dtdy, int nchannels, float *result); bool texture3d(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &P, const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, - const OSL::Vec3 &dPdz, float *result); + const OSL::Vec3 &dPdz, int nchannels, float *result); bool environment(ustring filename, TextureOpt &options, OSL::ShaderGlobals *sg, const OSL::Vec3 &R, - const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, float *result); + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + int nchannels, float *result); bool get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage, ustring dataname, TypeDesc datatype, void *data); @@ -159,70 +160,37 @@ public: static ustring u_v; static ustring u_empty; -#if OSL_LIBRARY_VERSION_CODE < 10500 - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { - return get_matrix(NULL, result, xform, time); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform, float time) { - return get_inverse_matrix(NULL, result, xform, time); - } - - bool get_matrix(OSL::Matrix44 &result, ustring from, float time) { - return get_matrix(NULL, result, from, time); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, ustring to, float time) { - return get_inverse_matrix(NULL, result, to, time); - } - - bool get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { - return get_matrix(NULL, result, xform); - } - - bool get_inverse_matrix(OSL::Matrix44 &result, OSL::TransformationPtr xform) { - return get_inverse_matrix(NULL, result, xform); - } + /* Code to make OSL versions transition smooth. */ - bool get_matrix(OSL::Matrix44 &result, ustring from) { - return get_matrix(NULL, result, from); +#if OSL_LIBRARY_VERSION_CODE < 10600 + inline bool texture(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, + float s, float t, float dsdx, float dtdx, + float dsdy, float dtdy, float *result) + { + return texture(filename, options, sg, s, t, dsdx, dtdx, dsdy, dtdy, + options.nchannels, result); } - bool get_inverse_matrix(OSL::Matrix44 &result, ustring to) { - return get_inverse_matrix(NULL, result, to); + inline bool texture3d(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, const OSL::Vec3 &P, + const OSL::Vec3 &dPdx, const OSL::Vec3 &dPdy, + const OSL::Vec3 &dPdz, float *result) + { + return texture3d(filename, options, sg, P, dPdx, dPdy, dPdz, + options.nchannels, result); } - bool get_array_attribute(void *renderstate, bool derivatives, - ustring object, TypeDesc type, ustring name, - int index, void *val) { - OSL::ShaderGlobals sg; - sg.renderstate = renderstate; - return get_array_attribute(&sg, derivatives, - object, type, name, - index, val); - } - - bool get_attribute(void *renderstate, bool derivatives, ustring object_name, - TypeDesc type, ustring name, void *val) { - OSL::ShaderGlobals sg; - sg.renderstate = renderstate; - return get_attribute(&sg, derivatives, object_name, type, name, val); - } - - bool has_userdata(ustring name, TypeDesc type, void *renderstate) { - return has_userdata(name, type, (OSL::ShaderGlobals *) renderstate); - } - - bool get_userdata(bool derivatives, ustring name, TypeDesc type, - void *renderstate, void *val) { - return get_userdata(derivatives, name, type, (OSL::ShaderGlobals *) renderstate, val); - } - - bool get_texture_info(ustring filename, int subimage, - ustring dataname, TypeDesc datatype, void *data) { - return get_texture_info(NULL, filename, subimage, dataname, datatype, data); + inline bool environment(ustring filename, TextureOpt &options, + OSL::ShaderGlobals *sg, const OSL::Vec3 &R, + const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy, + float *result) + { + return environment(filename, options, sg, R, dRdx, dRdy, + options.nchannels, result); } #endif + private: KernelGlobals *kernel_globals; OSL::TextureSystem *osl_ts; diff --git a/intern/cycles/kernel/shaders/node_combine_hsv.osl b/intern/cycles/kernel/shaders/node_combine_hsv.osl index 010773acc5c..574bad30b14 100644 --- a/intern/cycles/kernel/shaders/node_combine_hsv.osl +++ b/intern/cycles/kernel/shaders/node_combine_hsv.osl @@ -15,6 +15,7 @@ */ #include "stdosl.h" +#include "node_color.h" shader node_combine_hsv( float H = 0.0, @@ -22,6 +23,6 @@ shader node_combine_hsv( float V = 0.0, output color Color = 0.8) { - Color = color("hsv", H, S, V); + Color = color_srgb_to_scene_linear(color("hsv", H, S, V)); } diff --git a/intern/cycles/kernel/shaders/node_hsv.osl b/intern/cycles/kernel/shaders/node_hsv.osl index 4722bde4cd7..5f4300ee31d 100644 --- a/intern/cycles/kernel/shaders/node_hsv.osl +++ b/intern/cycles/kernel/shaders/node_hsv.osl @@ -35,6 +35,11 @@ shader node_hsv( Color = hsv_to_rgb(Color); + // Clamp color to prevent negative values cauzed by oversaturation. + Color[0] = max(Color[0], 0.0); + Color[1] = max(Color[1], 0.0); + Color[2] = max(Color[2], 0.0); + ColorOut = mix(ColorIn, Color, Fac); } diff --git a/intern/cycles/kernel/shaders/node_normal.osl b/intern/cycles/kernel/shaders/node_normal.osl index 14af044e0c0..002eddb574c 100644 --- a/intern/cycles/kernel/shaders/node_normal.osl +++ b/intern/cycles/kernel/shaders/node_normal.osl @@ -23,6 +23,6 @@ shader node_normal( output float Dot = 1.0) { NormalOut = normalize(Direction); - Dot = dot(NormalOut, NormalIn); + Dot = dot(NormalOut, normalize(NormalIn)); } diff --git a/intern/cycles/kernel/shaders/node_separate_hsv.osl b/intern/cycles/kernel/shaders/node_separate_hsv.osl index 94fc5de9122..8bfb04aea1c 100644 --- a/intern/cycles/kernel/shaders/node_separate_hsv.osl +++ b/intern/cycles/kernel/shaders/node_separate_hsv.osl @@ -23,7 +23,7 @@ shader node_separate_hsv( output float S = 0.0, output float V = 0.0) { - color col = rgb_to_hsv(Color); + color col = rgb_to_hsv(color_scene_linear_to_srgb(Color)); H = col[0]; S = col[1]; diff --git a/intern/cycles/kernel/shaders/stdosl.h b/intern/cycles/kernel/shaders/stdosl.h index 1ff8f363b49..6babe98717c 100644 --- a/intern/cycles/kernel/shaders/stdosl.h +++ b/intern/cycles/kernel/shaders/stdosl.h @@ -505,6 +505,47 @@ closure color hair_transmission(normal N, float roughnessu, float roughnessv, ve closure color henyey_greenstein(float g) BUILTIN; closure color absorption() BUILTIN; +// OSL 1.5 Microfacet functions +closure color microfacet(string distribution, normal N, vector U, float xalpha, float yalpha, float eta, int refract) { + /* GGX */ + if (distribution == "ggx" || distribution == "default") { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_ggx(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_ggx_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_ggx_refraction(N, xalpha, eta); + } + } + /* Beckmann */ + else { + if (!refract) { + if (xalpha == yalpha) { + /* Isotropic */ + return microfacet_beckmann(N, xalpha); + } + else { + /* Anisotropic */ + return microfacet_beckmann_aniso(N, U, xalpha, yalpha); + } + } + else { + return microfacet_beckmann_refraction(N, xalpha, eta); + } + } +} + +closure color microfacet (string distribution, normal N, float alpha, float eta, int refract) { + return microfacet(distribution, N, vector(0), alpha, alpha, eta, refract); +} + + // Renderer state int backfacing () BUILTIN; int raytype (string typename) BUILTIN; diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index c13eae813d6..5acfbbf972b 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -164,6 +164,7 @@ CCL_NAMESPACE_END #include "svm_mapping.h" #include "svm_normal.h" #include "svm_wave.h" +#include "svm_math_util.h" #include "svm_math.h" #include "svm_mix.h" #include "svm_ramp.h" diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h index 11dfc4f096b..a02d853be1a 100644 --- a/intern/cycles/kernel/svm/svm_hsv.h +++ b/intern/cycles/kernel/svm/svm_hsv.h @@ -46,6 +46,11 @@ ccl_device void svm_node_hsv(KernelGlobals *kg, ShaderData *sd, float *stack, ui color.y = fac*color.y + (1.0f - fac)*in_color.y; color.z = fac*color.z + (1.0f - fac)*in_color.z; + /* Clamp color to prevent negative values cauzed by oversaturation. */ + color.x = max(color.x, 0.0f); + color.y = max(color.y, 0.0f); + color.z = max(color.z, 0.0f); + if (stack_valid(out_color_offset)) stack_store_float3(stack, out_color_offset, color); } diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index 1ce9386e40e..e3d8c1f3242 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -16,56 +16,6 @@ CCL_NAMESPACE_BEGIN -ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) -{ - float Fac; - - if(type == NODE_MATH_ADD) - Fac = Fac1 + Fac2; - else if(type == NODE_MATH_SUBTRACT) - Fac = Fac1 - Fac2; - else if(type == NODE_MATH_MULTIPLY) - Fac = Fac1*Fac2; - else if(type == NODE_MATH_DIVIDE) - Fac = safe_divide(Fac1, Fac2); - else if(type == NODE_MATH_SINE) - Fac = sinf(Fac1); - else if(type == NODE_MATH_COSINE) - Fac = cosf(Fac1); - else if(type == NODE_MATH_TANGENT) - Fac = tanf(Fac1); - else if(type == NODE_MATH_ARCSINE) - Fac = safe_asinf(Fac1); - else if(type == NODE_MATH_ARCCOSINE) - Fac = safe_acosf(Fac1); - else if(type == NODE_MATH_ARCTANGENT) - Fac = atanf(Fac1); - else if(type == NODE_MATH_POWER) - Fac = safe_powf(Fac1, Fac2); - else if(type == NODE_MATH_LOGARITHM) - Fac = safe_logf(Fac1, Fac2); - else if(type == NODE_MATH_MINIMUM) - Fac = fminf(Fac1, Fac2); - else if(type == NODE_MATH_MAXIMUM) - Fac = fmaxf(Fac1, Fac2); - else if(type == NODE_MATH_ROUND) - Fac = floorf(Fac1 + 0.5f); - else if(type == NODE_MATH_LESS_THAN) - Fac = Fac1 < Fac2; - else if(type == NODE_MATH_GREATER_THAN) - Fac = Fac1 > Fac2; - else if(type == NODE_MATH_MODULO) - Fac = safe_modulo(Fac1, Fac2); - else if(type == NODE_MATH_ABSOLUTE) - Fac = fabsf(Fac1); - else if(type == NODE_MATH_CLAMP) - Fac = clamp(Fac1, 0.0f, 1.0f); - else - Fac = 0.0f; - - return Fac; -} - ccl_device float average_fac(float3 v) { return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z))/3.0f; diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h new file mode 100644 index 00000000000..b813bf531dc --- /dev/null +++ b/intern/cycles/kernel/svm/svm_math_util.h @@ -0,0 +1,70 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +CCL_NAMESPACE_BEGIN + +ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) +{ + float Fac; + + if(type == NODE_MATH_ADD) + Fac = Fac1 + Fac2; + else if(type == NODE_MATH_SUBTRACT) + Fac = Fac1 - Fac2; + else if(type == NODE_MATH_MULTIPLY) + Fac = Fac1*Fac2; + else if(type == NODE_MATH_DIVIDE) + Fac = safe_divide(Fac1, Fac2); + else if(type == NODE_MATH_SINE) + Fac = sinf(Fac1); + else if(type == NODE_MATH_COSINE) + Fac = cosf(Fac1); + else if(type == NODE_MATH_TANGENT) + Fac = tanf(Fac1); + else if(type == NODE_MATH_ARCSINE) + Fac = safe_asinf(Fac1); + else if(type == NODE_MATH_ARCCOSINE) + Fac = safe_acosf(Fac1); + else if(type == NODE_MATH_ARCTANGENT) + Fac = atanf(Fac1); + else if(type == NODE_MATH_POWER) + Fac = safe_powf(Fac1, Fac2); + else if(type == NODE_MATH_LOGARITHM) + Fac = safe_logf(Fac1, Fac2); + else if(type == NODE_MATH_MINIMUM) + Fac = fminf(Fac1, Fac2); + else if(type == NODE_MATH_MAXIMUM) + Fac = fmaxf(Fac1, Fac2); + else if(type == NODE_MATH_ROUND) + Fac = floorf(Fac1 + 0.5f); + else if(type == NODE_MATH_LESS_THAN) + Fac = Fac1 < Fac2; + else if(type == NODE_MATH_GREATER_THAN) + Fac = Fac1 > Fac2; + else if(type == NODE_MATH_MODULO) + Fac = safe_modulo(Fac1, Fac2); + else if(type == NODE_MATH_ABSOLUTE) + Fac = fabsf(Fac1); + else if(type == NODE_MATH_CLAMP) + Fac = clamp(Fac1, 0.0f, 1.0f); + else + Fac = 0.0f; + + return Fac; +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index 111d5d47988..abf75b62bd5 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -26,7 +26,8 @@ ccl_device void svm_node_combine_hsv(KernelGlobals *kg, ShaderData *sd, float *s float value = stack_load_float(stack, value_in); /* Combine, and convert back to RGB */ - float3 color = hsv_to_rgb(make_float3(hue, saturation, value)); + float3 color = color_srgb_to_scene_linear( + hsv_to_rgb(make_float3(hue, saturation, value))); if (stack_valid(color_out)) stack_store_float3(stack, color_out, color); @@ -40,7 +41,7 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float * float3 color = stack_load_float3(stack, color_in); /* Convert to HSV */ - color = rgb_to_hsv(color); + color = rgb_to_hsv(color_scene_linear_to_srgb(color)); if (stack_valid(hue_out)) stack_store_float(stack, hue_out, color.x); diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 45b08832fea..e98931b5c7b 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -684,7 +684,7 @@ void ShaderGraph::bump_from_displacement() * different shifted coordinates. * * these 3 displacement values are then fed into the bump node, which will - * output the the perturbed normal. */ + * output the perturbed normal. */ ShaderInput *displacement_in = output()->input("Displacement"); @@ -844,7 +844,7 @@ void ShaderGraph::dump_graph(const char *filename) return; } - fprintf(fd, "digraph dependencygraph {\n"); + fprintf(fd, "digraph shader_graph {\n"); fprintf(fd, "ranksep=1.5\n"); fprintf(fd, "splines=false\n"); diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index 03a8cd5d2d3..9b279660f9c 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -39,7 +39,6 @@ Integrator::Integrator() transparent_max_bounce = max_bounce; transparent_shadows = false; - volume_homogeneous_sampling = 0; volume_max_steps = 1024; volume_step_size = 0.1f; @@ -60,6 +59,10 @@ Integrator::Integrator() mesh_light_samples = 1; subsurface_samples = 1; volume_samples = 1; + + sample_all_lights_direct = true; + sample_all_lights_indirect = true; + method = PATH; sampling_pattern = SAMPLING_PATTERN_SOBOL; @@ -189,7 +192,6 @@ bool Integrator::modified(const Integrator& integrator) transparent_min_bounce == integrator.transparent_min_bounce && transparent_max_bounce == integrator.transparent_max_bounce && transparent_shadows == integrator.transparent_shadows && - volume_homogeneous_sampling == integrator.volume_homogeneous_sampling && volume_max_steps == integrator.volume_max_steps && volume_step_size == integrator.volume_step_size && caustics_reflective == integrator.caustics_reflective && diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 13c10e8ca94..110c354823b 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -39,7 +39,6 @@ public: int transparent_max_bounce; bool transparent_shadows; - int volume_homogeneous_sampling; int volume_max_steps; float volume_step_size; diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 1f006637e67..8d1cec10187 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -125,6 +125,7 @@ Light::Light() shader = 0; samples = 1; + max_bounces = 1024; } void Light::tag_update(Scene *scene) @@ -489,6 +490,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce float3 co = light->co; int shader_id = scene->shader_manager->get_shader_id(scene->lights[i]->shader); float samples = __int_as_float(light->samples); + float max_bounces = __int_as_float(light->max_bounces); if(!light->cast_shadow) shader_id &= ~SHADER_CAST_SHADOW; @@ -523,6 +525,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, 0.0f); light_data[i*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); light_data[i*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f); + light_data[i*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f); } else if(light->type == LIGHT_DISTANT) { shader_id &= ~SHADER_AREA_LIGHT; @@ -533,9 +536,8 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce float area = M_PI_F*radius*radius; float invarea = (area > 0.0f)? 1.0f/area: 1.0f; float3 dir = light->dir; - - if(len(dir) > 0.0f) - dir = normalize(dir); + + dir = safe_normalize(dir); if(light->use_mis && area > 0.0f) shader_id |= SHADER_USE_MIS; @@ -544,6 +546,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, cosangle, invarea); light_data[i*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); light_data[i*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f); + light_data[i*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f); } else if(light->type == LIGHT_BACKGROUND) { uint visibility = scene->background->visibility; @@ -572,6 +575,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), 0.0f, 0.0f, 0.0f); light_data[i*LIGHT_SIZE + 2] = make_float4(0.0f, 0.0f, 0.0f, 0.0f); light_data[i*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f); + light_data[i*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f); } else if(light->type == LIGHT_AREA) { float3 axisu = light->axisu*(light->sizeu*light->size); @@ -580,8 +584,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce float invarea = (area > 0.0f)? 1.0f/area: 1.0f; float3 dir = light->dir; - if(len(dir) > 0.0f) - dir = normalize(dir); + dir = safe_normalize(dir); if(light->use_mis && area > 0.0f) shader_id |= SHADER_USE_MIS; @@ -590,6 +593,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), axisu.x, axisu.y, axisu.z); light_data[i*LIGHT_SIZE + 2] = make_float4(invarea, axisv.x, axisv.y, axisv.z); light_data[i*LIGHT_SIZE + 3] = make_float4(samples, dir.x, dir.y, dir.z); + light_data[i*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f); } else if(light->type == LIGHT_SPOT) { shader_id &= ~SHADER_AREA_LIGHT; @@ -600,8 +604,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce float spot_smooth = (1.0f - spot_angle)*light->spot_smooth; float3 dir = light->dir; - if(len(dir) > 0.0f) - dir = normalize(dir); + dir = safe_normalize(dir); if(light->use_mis && radius > 0.0f) shader_id |= SHADER_USE_MIS; @@ -610,6 +613,7 @@ void LightManager::device_update_points(Device *device, DeviceScene *dscene, Sce light_data[i*LIGHT_SIZE + 1] = make_float4(__int_as_float(shader_id), radius, invarea, spot_angle); light_data[i*LIGHT_SIZE + 2] = make_float4(spot_smooth, dir.x, dir.y, dir.z); light_data[i*LIGHT_SIZE + 3] = make_float4(samples, 0.0f, 0.0f, 0.0f); + light_data[i*LIGHT_SIZE + 4] = make_float4(max_bounces, 0.0f, 0.0f, 0.0f); } } diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h index 89091bb5f9e..cf769ac5aed 100644 --- a/intern/cycles/render/light.h +++ b/intern/cycles/render/light.h @@ -58,6 +58,7 @@ public: int shader; int samples; + int max_bounces; void tag_update(Scene *scene); }; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 42103396b53..6137f7d4fdc 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -93,6 +93,8 @@ Mesh::Mesh() attributes.triangle_mesh = this; curve_attributes.curve_mesh = this; + + has_volume = false; } Mesh::~Mesh() diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index e8476bfac4c..62bdf7cd162 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -17,6 +17,7 @@ #include "image.h" #include "nodes.h" #include "svm.h" +#include "svm_math_util.h" #include "osl.h" #include "sky_model.h" @@ -3669,7 +3670,7 @@ static ShaderEnum math_type_init() enm.insert("Less Than", NODE_MATH_LESS_THAN); enm.insert("Greater Than", NODE_MATH_GREATER_THAN); enm.insert("Modulo", NODE_MATH_MODULO); - enm.insert("Absolute", NODE_MATH_ABSOLUTE); + enm.insert("Absolute", NODE_MATH_ABSOLUTE); return enm; } @@ -3682,9 +3683,24 @@ void MathNode::compile(SVMCompiler& compiler) ShaderInput *value2_in = input("Value2"); ShaderOutput *value_out = output("Value"); + compiler.stack_assign(value_out); + + /* Optimize math node without links to a single value node. */ + if(value1_in->link == NULL && value2_in->link == NULL) { + float optimized_value = svm_math((NodeMath)type_enum[type], + value1_in->value.x, + value2_in->value.x); + if(use_clamp) { + optimized_value = clamp(optimized_value, 0.0f, 1.0f); + } + compiler.add_node(NODE_VALUE_F, + __float_as_int(optimized_value), + value_out->stack_offset); + return; + } + compiler.stack_assign(value1_in); compiler.stack_assign(value2_in); - compiler.stack_assign(value_out); compiler.add_node(NODE_MATH, type_enum[type], value1_in->stack_offset, value2_in->stack_offset); compiler.add_node(NODE_MATH, value_out->stack_offset); diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 46ddab235d9..3b2a3ae0b33 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -318,6 +318,9 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene mtfm_pre = mtfm_pre * itfm; mtfm_post = mtfm_post * itfm; } + else { + flag |= SD_OBJECT_HAS_VERTEX_MOTION; + } memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index f57e16471a1..b9180552ac2 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -248,10 +248,6 @@ void OSLShaderManager::shading_system_free() bool OSLShaderManager::osl_compile(const string& inputfile, const string& outputfile) { -#if OSL_LIBRARY_VERSION_CODE < 10500 - typedef string string_view; -#endif - vector<string_view> options; string stdosl_path; string shader_path = path_get("shader"); @@ -748,11 +744,7 @@ OSL::ShadingAttribStateRef OSLCompiler::compile_type(Shader *shader, ShaderGraph current_type = type; -#if OSL_LIBRARY_VERSION_CODE >= 10501 OSL::ShadingAttribStateRef group = ss->ShaderGroupBegin(shader->name.c_str()); -#else - ss->ShaderGroupBegin(shader->name.c_str()); -#endif ShaderNode *output = graph->output(); set<ShaderNode*> dependencies; @@ -780,13 +772,7 @@ OSL::ShadingAttribStateRef OSLCompiler::compile_type(Shader *shader, ShaderGraph ss->ShaderGroupEnd(); -#if OSL_LIBRARY_VERSION_CODE >= 10501 return group; -#else - OSL::ShadingAttribStateRef group = ss->state(); - ss->clear_state(); - return group; -#endif } void OSLCompiler::compile(OSLGlobals *og, Shader *shader) diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 6c3f98bc9b0..ccb03eaf1e0 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -153,81 +153,83 @@ void Scene::device_update(Device *device_, Progress& progress) progress.set_status("Updating Shaders"); shader_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Images"); image_manager->device_update(device, &dscene, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Background"); background->device_update(device, &dscene, this); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Objects"); object_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Meshes"); mesh_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Objects Flags"); object_manager->device_update_flags(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Hair Systems"); curve_system_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Lookup Tables"); lookup_tables->device_update(device, &dscene); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; /* TODO(sergey): Make sure camera is not needed above. */ progress.set_status("Updating Camera"); camera->device_update(device, &dscene, this); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Lights"); light_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Particle Systems"); particle_system_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Film"); film->device_update(device, &dscene, this); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Integrator"); integrator->device_update(device, &dscene, this); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Lookup Tables"); lookup_tables->device_update(device, &dscene); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; progress.set_status("Updating Baking"); bake_manager->device_update(device, &dscene, this, progress); - if(progress.get_cancel()) return; + if(progress.get_cancel() || device->have_error()) return; - progress.set_status("Updating Device", "Writing constant memory"); - device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); + if(device->have_error() == false) { + progress.set_status("Updating Device", "Writing constant memory"); + device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); + } } Scene::MotionType Scene::need_motion(bool advanced_shading) @@ -277,7 +279,8 @@ bool Scene::need_reset() || shader_manager->need_update || particle_system_manager->need_update || curve_system_manager->need_update - || bake_manager->need_update); + || bake_manager->need_update + || film->need_update); } void Scene::reset() diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 9fcd9fa85f5..c03a3dd081d 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -251,7 +251,7 @@ void Session::run_gpu() update_scene(); if(!device->error_message().empty()) - progress.set_cancel(device->error_message()); + progress.set_error(device->error_message()); if(progress.get_cancel()) break; @@ -292,7 +292,7 @@ void Session::run_gpu() } if(!device->error_message().empty()) - progress.set_cancel(device->error_message()); + progress.set_error(device->error_message()); tiles_written = update_progressive_refine(progress.get_cancel()); @@ -540,7 +540,7 @@ void Session::run_cpu() update_scene(); if(!device->error_message().empty()) - progress.set_cancel(device->error_message()); + progress.set_error(device->error_message()); if(progress.get_cancel()) break; @@ -558,7 +558,7 @@ void Session::run_cpu() need_tonemap = true; if(!device->error_message().empty()) - progress.set_cancel(device->error_message()); + progress.set_error(device->error_message()); } device->task_wait(); @@ -580,7 +580,7 @@ void Session::run_cpu() } if(!device->error_message().empty()) - progress.set_cancel(device->error_message()); + progress.set_error(device->error_message()); tiles_written = update_progressive_refine(progress.get_cancel()); } @@ -604,7 +604,7 @@ void Session::load_kernels() if(message.empty()) message = "Failed loading render kernel, see console for errors"; - progress.set_cancel(message); + progress.set_error(message); progress.set_status("Error", message); progress.set_update(); return; diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index d8925852c21..5c30d191d34 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -138,7 +138,8 @@ Shader::Shader() use_mis = true; use_transparent_shadow = true; heterogeneous_volume = true; - volume_sampling_method = 0; + volume_sampling_method = VOLUME_SAMPLING_DISTANCE; + volume_interpolation_method = VOLUME_INTERPOLATION_LINEAR; has_surface = false; has_surface_transparent = false; @@ -352,10 +353,14 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc flag |= SD_HAS_BSSRDF_BUMP; if(shader->has_converter_blackbody) has_converter_blackbody = true; - if(shader->volume_sampling_method == 1) + if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR) flag |= SD_VOLUME_EQUIANGULAR; - if(shader->volume_sampling_method == 2) + if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) flag |= SD_VOLUME_MIS; + if(shader->volume_interpolation_method == VOLUME_INTERPOLATION_CUBIC) + flag |= SD_VOLUME_CUBIC; + if(shader->graph_bump) + flag |= SD_HAS_BUMP; /* regular shader */ shader_flag[i++] = flag; diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 368496fd188..509c9385e6d 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -18,6 +18,15 @@ #define __SHADER_H__ #ifdef WITH_OSL +# if defined(_MSC_VER) +/* Prevent OSL from polluting the context with weird macros from windows.h. + * TODO(sergey): Ideally it's only enough to have class/struct declarations in + * the header and skip header include here. + */ +# define NOGDI +# define NOMINMAX +# define WIN32_LEAN_AND_MEAN +# endif # include <OSL/oslexec.h> #endif @@ -44,6 +53,18 @@ enum ShadingSystem { SHADINGSYSTEM_SVM }; +/* Keep those in sync with the python-defined enum. */ +enum VolumeSampling { + VOLUME_SAMPLING_DISTANCE = 0, + VOLUME_SAMPLING_EQUIANGULAR = 1, + VOLUME_SAMPLING_MULTIPLE_IMPORTANCE = 2, +}; + +enum VolumeInterpolation { + VOLUME_INTERPOLATION_LINEAR = 0, + VOLUME_INTERPOLATION_CUBIC = 1, +}; + /* Shader describing the appearance of a Mesh, Light or Background. * * While there is only a single shader graph, it has three outputs: surface, @@ -68,7 +89,8 @@ public: bool use_mis; bool use_transparent_shadow; bool heterogeneous_volume; - int volume_sampling_method; + VolumeSampling volume_sampling_method; + int volume_interpolation_method; /* synchronization */ bool need_update; diff --git a/intern/cycles/subd/subd_dice.cpp b/intern/cycles/subd/subd_dice.cpp index 05ff5ca4b65..6bd18d08ba0 100644 --- a/intern/cycles/subd/subd_dice.cpp +++ b/intern/cycles/subd/subd_dice.cpp @@ -117,8 +117,8 @@ void EdgeDice::stitch_triangles(Patch *patch, vector<int>& outer, vector<int>& i } else { /* length of diagonals */ - float len1 = len(mesh_P[inner[i]] - mesh_P[outer[j+1]]); - float len2 = len(mesh_P[outer[j]] - mesh_P[inner[i+1]]); + float len1 = len_squared(mesh_P[inner[i]] - mesh_P[outer[j+1]]); + float len2 = len_squared(mesh_P[outer[j]] - mesh_P[inner[i+1]]); /* use smallest diagonal */ if(len1 < len2) diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 842d5efac79..a07deb68b15 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -10,7 +10,6 @@ set(INC_SYS set(SRC util_cache.cpp - util_dynlib.cpp util_logging.cpp util_md5.cpp util_path.cpp @@ -31,10 +30,10 @@ endif() set(SRC_HEADERS util_algorithm.h util_args.h + util_atomic.h util_boundbox.h util_cache.h util_debug.h - util_dynlib.h util_foreach.h util_function.h util_half.h diff --git a/intern/cycles/util/util_dynlib.h b/intern/cycles/util/util_atomic.h index b30cf98c1b9..1bbb0a86e23 100644 --- a/intern/cycles/util/util_dynlib.h +++ b/intern/cycles/util/util_atomic.h @@ -1,5 +1,5 @@ /* - * Copyright 2011-2013 Blender Foundation + * Copyright 2014 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,18 +14,20 @@ * limitations under the License */ -#ifndef __UTIL_DYNLIB_H__ -#define __UTIL_DYNLIB_H__ +#ifndef __UTIL_ATOMIC_H__ +#define __UTIL_ATOMIC_H__ -CCL_NAMESPACE_BEGIN +/* Using atomic ops header from Blender. */ +#include "atomic_ops.h" -struct DynamicLibrary; - -DynamicLibrary *dynamic_library_open(const char *name); -void *dynamic_library_find(DynamicLibrary *lib, const char *name); -void dynamic_library_close(DynamicLibrary *lib); - -CCL_NAMESPACE_END - -#endif /* __UTIL_DYNLIB_H__ */ +ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) +{ + size_t prev_value = *maximum_value; + while (prev_value < value) { + if (atomic_cas_z(maximum_value, prev_value, value) != prev_value) { + break; + } + } +} +#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_dynlib.cpp b/intern/cycles/util/util_dynlib.cpp deleted file mode 100644 index 587cad607c8..00000000000 --- a/intern/cycles/util/util_dynlib.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#include <stdlib.h> - -#include "util_dynlib.h" - -#ifdef _WIN32 - -#include <windows.h> - -CCL_NAMESPACE_BEGIN - -struct DynamicLibrary { - HMODULE module; -}; - -DynamicLibrary *dynamic_library_open(const char *name) -{ - HMODULE module = LoadLibrary(name); - - if(!module) - return NULL; - - DynamicLibrary *lib = new DynamicLibrary(); - lib->module = module; - - return lib; -} - -void *dynamic_library_find(DynamicLibrary *lib, const char *name) -{ - return (void*)GetProcAddress(lib->module, name); -} - -void dynamic_library_close(DynamicLibrary *lib) -{ - FreeLibrary(lib->module); - delete lib; -} - -CCL_NAMESPACE_END - -#else - -#include <dlfcn.h> - -CCL_NAMESPACE_BEGIN - -struct DynamicLibrary { - void *module; -}; - -DynamicLibrary *dynamic_library_open(const char *name) -{ - void *module = dlopen(name, RTLD_NOW); - - if(!module) - return NULL; - - DynamicLibrary *lib = new DynamicLibrary(); - lib->module = module; - - return lib; -} - -void *dynamic_library_find(DynamicLibrary *lib, const char *name) -{ - return dlsym(lib->module, name); -} - -void dynamic_library_close(DynamicLibrary *lib) -{ - dlclose(lib->module); - delete lib; -} - -CCL_NAMESPACE_END - -#endif - diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h index 991789e7460..2c5455051a4 100644 --- a/intern/cycles/util/util_logging.h +++ b/intern/cycles/util/util_logging.h @@ -43,7 +43,7 @@ public: #endif -class float3; +struct float3; std::ostream& operator <<(std::ostream &os, const float3 &value); diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index c332e1709db..78005546a01 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -314,6 +314,12 @@ ccl_device_inline float2 normalize_len(const float2 a, float *t) return a/(*t); } +ccl_device_inline float2 safe_normalize(const float2 a) +{ + float t = len(a); + return (t)? a/t: a; +} + ccl_device_inline bool operator==(const float2 a, const float2 b) { return (a.x == b.x && a.y == b.y); @@ -510,6 +516,12 @@ ccl_device_inline float3 normalize_len(const float3 a, float *t) return a/(*t); } +ccl_device_inline float3 safe_normalize(const float3 a) +{ + float t = len(a); + return (t)? a/t: a; +} + #ifndef __KERNEL_OPENCL__ ccl_device_inline bool operator==(const float3 a, const float3 b) @@ -817,6 +829,12 @@ ccl_device_inline float4 normalize(const float4 a) return a/len(a); } +ccl_device_inline float4 safe_normalize(const float4 a) +{ + float t = len(a); + return (t)? a/t: a; +} + ccl_device_inline float4 min(float4 a, float4 b) { #ifdef __KERNEL_SSE__ diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp index add0d18c742..b2a32c45287 100644 --- a/intern/cycles/util/util_md5.cpp +++ b/intern/cycles/util/util_md5.cpp @@ -152,8 +152,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + F(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + F(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2); @@ -178,8 +178,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + G(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + G(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18); @@ -230,8 +230,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + I(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + I(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50); diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 2feb3d6ab7e..fba8b1105f3 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -130,6 +130,9 @@ /* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. * Since we can't avoid including <windows.h>, better only include that */ +#define NOGDI +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include <windows.h> #endif diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 85d19b6a325..aa424045ece 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -41,21 +41,12 @@ static string cached_user_path = ""; static boost::filesystem::path to_boost(const string& path) { -#ifdef _MSC_VER - std::wstring path_utf16 = Strutil::utf8_to_utf16(path.c_str()); - return boost::filesystem::path(path_utf16.c_str()); -#else return boost::filesystem::path(path.c_str()); -#endif } static string from_boost(const boost::filesystem::path& path) { -#ifdef _MSC_VER - return Strutil::utf16_to_utf8(path.wstring().c_str()); -#else return path.string().c_str(); -#endif } void path_init(const string& path, const string& user_path) @@ -259,14 +250,7 @@ string path_source_replace_includes(const string& source_, const string& path) FILE *path_fopen(const string& path, const string& mode) { -#ifdef _WIN32 - std::wstring path_utf16 = Strutil::utf8_to_utf16(path); - std::wstring mode_utf16 = Strutil::utf8_to_utf16(mode); - - return _wfopen(path_utf16.c_str(), mode_utf16.c_str()); -#else return fopen(path.c_str(), mode.c_str()); -#endif } void path_cache_clear_except(const string& name, const set<string>& except) diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index e721a3f5047..238fb976778 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -46,6 +46,8 @@ public: update_cb = NULL; cancel = false; cancel_message = ""; + error = false; + error_message = ""; cancel_cb = NULL; } @@ -79,6 +81,8 @@ public: sync_substatus = ""; cancel = false; cancel_message = ""; + error = false; + error_message = ""; } /* cancel */ @@ -108,6 +112,28 @@ public: cancel_cb = function; } + /* error */ + void set_error(const string& error_message_) + { + thread_scoped_lock lock(progress_mutex); + error_message = error_message_; + error = true; + /* If error happens we also stop rendering. */ + cancel_message = error_message_; + cancel = true; + } + + bool get_error() + { + return error; + } + + string get_error_message() + { + thread_scoped_lock lock(progress_mutex); + return error_message; + } + /* tile and timing information */ void set_start_time(double start_time_) @@ -259,6 +285,9 @@ protected: volatile bool cancel; string cancel_message; + + volatile bool error; + string error_message; }; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index f4236cc616e..5e452ea03b4 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -151,7 +151,7 @@ __forceinline ssef maxi(const ssef& a, const ssef& b) { /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_AVX2__) +#if defined(__KERNEL_AVX2__) && !defined(_MSC_VER) // see T41066 __forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmadd_ps(a,b,c); } __forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmsub_ps(a,b,c); } __forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return _mm_fnmadd_ps(a,b,c); } diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h index 8758b823084..fe6c162366e 100644 --- a/intern/cycles/util/util_stats.h +++ b/intern/cycles/util/util_stats.h @@ -17,6 +17,8 @@ #ifndef __UTIL_STATS_H__ #define __UTIL_STATS_H__ +#include "util_atomic.h" + CCL_NAMESPACE_BEGIN class Stats { @@ -24,14 +26,13 @@ public: Stats() : mem_used(0), mem_peak(0) {} void mem_alloc(size_t size) { - mem_used += size; - if(mem_used > mem_peak) - mem_peak = mem_used; + atomic_add_z(&mem_used, size); + atomic_update_max_z(&mem_peak, mem_used); } void mem_free(size_t size) { assert(mem_used >= size); - mem_used -= size; + atomic_sub_z(&mem_used, size); } size_t mem_used; diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 2a199e591bf..ce84200d0b6 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -33,11 +33,7 @@ #ifndef __KERNEL_GPU__ -# ifdef NDEBUG -# define ccl_device static inline -# else -# define ccl_device static -# endif +#define ccl_device static inline #define ccl_device_noinline static #define ccl_global #define ccl_constant @@ -53,11 +49,7 @@ #define ccl_try_align(...) /* not support for function arguments (error C2719) */ #endif #define ccl_may_alias -# ifdef NDEBUG -# define ccl_always_inline __forceinline -# else -# define ccl_always_inline -# endif +#define ccl_always_inline __forceinline #define ccl_maybe_unused #else diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index cc6e8a371ed..3d885691c92 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -107,9 +107,6 @@ public: if(datasize > 0) { data = (T*)malloc_aligned(sizeof(T)*datasize, alignment); memcpy(data, &from[0], datasize*sizeof(T)); - free_aligned(data); - data = (T*)malloc_aligned(sizeof(T)*datasize, alignment); - memcpy(data, &from[0], datasize*sizeof(T)); } return *this; |