Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorJason Wilkins <Jason.A.Wilkins@gmail.com>2014-05-22 04:02:02 +0400
committerJason Wilkins <Jason.A.Wilkins@gmail.com>2014-05-22 04:02:02 +0400
commit6eff1cbebcf0766d2fe69db9b0fb3f76ede2c06b (patch)
tree3af4122e291f53f88b63ec6ded2e0fa7790e04ac /intern
parent49de1ada8dcba35862759e0f7da5ca2209b4f588 (diff)
parent146a1c77eacb925eb7c86bb49495c0f09adc607c (diff)
Merge branch 'soc-2014-viewport' into soc-2013-viewport_fx
Conflicts: intern/cycles/device/device_cuda.cpp intern/ghost/intern/GHOST_WindowCocoa.mm source/blender/blenfont/intern/blf_font.c source/blender/blenfont/intern/blf_translation.c source/blender/blenkernel/BKE_brush.h source/blender/blenkernel/BKE_pbvh.h source/blender/blenkernel/intern/cdderivedmesh.c source/blender/blenkernel/intern/editderivedmesh.c source/blender/blenkernel/intern/mesh.c source/blender/blenkernel/intern/subsurf_ccg.c source/blender/blenlib/BLI_fileops.h source/blender/blenlib/BLI_math_matrix.h source/blender/blenlib/intern/fileops.c source/blender/blenlib/intern/math_matrix.c source/blender/editors/animation/anim_channels_defines.c source/blender/editors/animation/anim_draw.c source/blender/editors/animation/keyframes_draw.c source/blender/editors/include/ED_armature.h source/blender/editors/interface/interface.c source/blender/editors/interface/interface_draw.c source/blender/editors/interface/interface_icons.c source/blender/editors/interface/interface_panel.c source/blender/editors/interface/interface_widgets.c source/blender/editors/interface/view2d.c source/blender/editors/mask/mask_draw.c source/blender/editors/mesh/editmesh_select.c source/blender/editors/render/render_opengl.c source/blender/editors/screen/area.c source/blender/editors/screen/glutil.c source/blender/editors/sculpt_paint/paint_cursor.c source/blender/editors/sculpt_paint/paint_image.c source/blender/editors/sculpt_paint/paint_image_proj.c source/blender/editors/sculpt_paint/paint_utils.c source/blender/editors/sculpt_paint/sculpt_intern.h source/blender/editors/space_buttons/space_buttons.c source/blender/editors/space_clip/clip_dopesheet_draw.c source/blender/editors/space_clip/clip_draw.c source/blender/editors/space_clip/clip_graph_draw.c source/blender/editors/space_clip/clip_utils.c source/blender/editors/space_console/console_draw.c source/blender/editors/space_file/file_draw.c source/blender/editors/space_file/file_ops.c source/blender/editors/space_graph/graph_draw.c source/blender/editors/space_info/info_draw.c source/blender/editors/space_info/textview.c source/blender/editors/space_logic/logic_window.c source/blender/editors/space_nla/nla_draw.c source/blender/editors/space_outliner/outliner_draw.c source/blender/editors/space_sequencer/sequencer_draw.c source/blender/editors/space_view3d/drawanimviz.c source/blender/editors/space_view3d/drawarmature.c source/blender/editors/space_view3d/drawmesh.c source/blender/editors/space_view3d/drawobject.c source/blender/editors/space_view3d/view3d_draw.c source/blender/editors/space_view3d/view3d_fly.c source/blender/editors/space_view3d/view3d_intern.h source/blender/editors/space_view3d/view3d_walk.c source/blender/editors/transform/transform.c source/blender/editors/transform/transform_manipulator.c source/blender/editors/util/ed_util.c source/blender/editors/uvedit/uvedit_draw.c source/blender/gpu/GPU_buffers.h source/blender/gpu/intern/gpu_buffers.c source/blender/gpu/intern/gpu_codegen.c source/blender/gpu/intern/gpu_codegen.h source/blender/gpu/intern/gpu_draw.c source/blender/render/intern/source/convertblender.c source/blender/windowmanager/intern/wm_operators.c source/blender/windowmanager/intern/wm_subwindow.c source/blender/windowmanager/intern/wm_window.c
Diffstat (limited to 'intern')
-rw-r--r--intern/SConscript3
-rw-r--r--intern/audaspace/OpenAL/AUD_OpenALDevice.cpp2
-rw-r--r--intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp65
-rw-r--r--intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h17
-rw-r--r--intern/audaspace/intern/AUD_AnimateableProperty.cpp20
-rw-r--r--intern/audaspace/intern/AUD_AnimateableProperty.h7
-rw-r--r--intern/audaspace/intern/AUD_ConverterFunctions.h5
-rw-r--r--intern/audaspace/intern/AUD_Sequencer.cpp1
-rw-r--r--intern/audaspace/intern/AUD_SequencerEntry.cpp2
-rw-r--r--intern/cycles/CMakeLists.txt83
-rw-r--r--intern/cycles/SConscript10
-rw-r--r--intern/cycles/app/cycles_standalone.cpp109
-rw-r--r--intern/cycles/app/cycles_xml.cpp139
-rw-r--r--intern/cycles/app/cycles_xml.h11
-rw-r--r--intern/cycles/blender/CCL_api.h7
-rw-r--r--intern/cycles/blender/CMakeLists.txt5
-rw-r--r--intern/cycles/blender/addon/__init__.py5
-rw-r--r--intern/cycles/blender/addon/engine.py6
-rw-r--r--intern/cycles/blender/addon/properties.py86
-rw-r--r--intern/cycles/blender/addon/ui.py160
-rw-r--r--intern/cycles/blender/blender_camera.cpp28
-rw-r--r--intern/cycles/blender/blender_curves.cpp62
-rw-r--r--intern/cycles/blender/blender_mesh.cpp261
-rw-r--r--intern/cycles/blender/blender_object.cpp138
-rw-r--r--intern/cycles/blender/blender_particles.cpp5
-rw-r--r--intern/cycles/blender/blender_python.cpp38
-rw-r--r--intern/cycles/blender/blender_session.cpp346
-rw-r--r--intern/cycles/blender/blender_session.h5
-rw-r--r--intern/cycles/blender/blender_shader.cpp11
-rw-r--r--intern/cycles/blender/blender_sync.cpp4
-rw-r--r--intern/cycles/blender/blender_sync.h12
-rw-r--r--intern/cycles/blender/blender_util.h66
-rw-r--r--intern/cycles/bvh/bvh.cpp115
-rw-r--r--intern/cycles/bvh/bvh.h4
-rw-r--r--intern/cycles/bvh/bvh_binning.cpp18
-rw-r--r--intern/cycles/bvh/bvh_build.cpp117
-rw-r--r--intern/cycles/bvh/bvh_build.h6
-rw-r--r--intern/cycles/bvh/bvh_params.h30
-rw-r--r--intern/cycles/bvh/bvh_sort.cpp4
-rw-r--r--intern/cycles/bvh/bvh_split.cpp44
-rw-r--r--intern/cycles/bvh/bvh_split.h4
-rw-r--r--intern/cycles/device/device.cpp11
-rw-r--r--intern/cycles/device/device.h12
-rw-r--r--intern/cycles/device/device_cpu.cpp14
-rw-r--r--intern/cycles/device/device_cuda.cpp340
-rw-r--r--intern/cycles/device/device_memory.h17
-rw-r--r--intern/cycles/device/device_multi.cpp9
-rw-r--r--intern/cycles/device/device_network.cpp8
-rw-r--r--intern/cycles/device/device_network.h10
-rw-r--r--intern/cycles/device/device_opencl.cpp126
-rw-r--r--intern/cycles/kernel/CMakeLists.txt87
-rw-r--r--intern/cycles/kernel/SConscript25
-rw-r--r--intern/cycles/kernel/closure/bsdf_hair.h14
-rw-r--r--intern/cycles/kernel/closure/bsdf_microfacet.h8
-rw-r--r--intern/cycles/kernel/closure/bsdf_phong_ramp.h4
-rw-r--r--intern/cycles/kernel/closure/bsdf_util.h13
-rw-r--r--intern/cycles/kernel/closure/bsdf_westin.h7
-rw-r--r--intern/cycles/kernel/geom/geom.h44
-rw-r--r--intern/cycles/kernel/geom/geom_attribute.h71
-rw-r--r--intern/cycles/kernel/geom/geom_bvh.h318
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_shadow.h375
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_subsurface.h (renamed from intern/cycles/kernel/kernel_bvh_subsurface.h)65
-rw-r--r--intern/cycles/kernel/geom/geom_bvh_traversal.h (renamed from intern/cycles/kernel/kernel_bvh_traversal.h)75
-rw-r--r--intern/cycles/kernel/geom/geom_curve.h1035
-rw-r--r--intern/cycles/kernel/geom/geom_motion_curve.h148
-rw-r--r--intern/cycles/kernel/geom/geom_motion_triangle.h392
-rw-r--r--intern/cycles/kernel/geom/geom_object.h (renamed from intern/cycles/kernel/kernel_object.h)201
-rw-r--r--intern/cycles/kernel/geom/geom_primitive.h (renamed from intern/cycles/kernel/kernel_primitive.h)149
-rw-r--r--intern/cycles/kernel/geom/geom_triangle.h379
-rw-r--r--intern/cycles/kernel/geom/geom_volume.h75
-rw-r--r--intern/cycles/kernel/kernel.cpp10
-rw-r--r--intern/cycles/kernel/kernel.cu96
-rw-r--r--intern/cycles/kernel/kernel.h2
-rw-r--r--intern/cycles/kernel/kernel_accumulate.h25
-rw-r--r--intern/cycles/kernel/kernel_avx.cpp2
-rw-r--r--intern/cycles/kernel/kernel_bvh.h1258
-rw-r--r--intern/cycles/kernel/kernel_camera.h6
-rw-r--r--intern/cycles/kernel/kernel_compat_cpu.h133
-rw-r--r--intern/cycles/kernel/kernel_compat_cuda.h2
-rw-r--r--intern/cycles/kernel/kernel_compat_opencl.h23
-rw-r--r--intern/cycles/kernel/kernel_curve.h137
-rw-r--r--intern/cycles/kernel/kernel_displace.h302
-rw-r--r--intern/cycles/kernel/kernel_emission.h102
-rw-r--r--intern/cycles/kernel/kernel_film.h4
-rw-r--r--intern/cycles/kernel/kernel_light.h75
-rw-r--r--intern/cycles/kernel/kernel_montecarlo.h5
-rw-r--r--intern/cycles/kernel/kernel_passes.h3
-rw-r--r--intern/cycles/kernel/kernel_path.h495
-rw-r--r--intern/cycles/kernel/kernel_path_state.h5
-rw-r--r--intern/cycles/kernel/kernel_projection.h8
-rw-r--r--intern/cycles/kernel/kernel_random.h3
-rw-r--r--intern/cycles/kernel/kernel_shader.h338
-rw-r--r--intern/cycles/kernel/kernel_shadow.h222
-rw-r--r--intern/cycles/kernel/kernel_sse2.cpp2
-rw-r--r--intern/cycles/kernel/kernel_sse3.cpp2
-rw-r--r--intern/cycles/kernel/kernel_sse41.cpp2
-rw-r--r--intern/cycles/kernel/kernel_textures.h57
-rw-r--r--intern/cycles/kernel/kernel_triangle.h180
-rw-r--r--intern/cycles/kernel/kernel_types.h154
-rw-r--r--intern/cycles/kernel/kernel_volume.h689
-rw-r--r--intern/cycles/kernel/osl/osl_services.cpp115
-rw-r--r--intern/cycles/kernel/osl/osl_services.h6
-rw-r--r--intern/cycles/kernel/osl/osl_shader.cpp14
-rw-r--r--intern/cycles/kernel/shaders/CMakeLists.txt1
-rw-r--r--intern/cycles/kernel/shaders/node_absorption_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_fresnel.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_glass_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_image_texture.osl13
-rw-r--r--intern/cycles/kernel/shaders/node_light_path.osl7
-rw-r--r--intern/cycles/kernel/shaders/node_math.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_mix.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_refraction_bsdf.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_scatter_volume.osl2
-rw-r--r--intern/cycles/kernel/shaders/node_uv_map.osl45
-rw-r--r--intern/cycles/kernel/svm/svm.h20
-rw-r--r--intern/cycles/kernel/svm/svm_attribute.h4
-rw-r--r--intern/cycles/kernel/svm/svm_closure.h123
-rw-r--r--intern/cycles/kernel/svm/svm_geometry.h22
-rw-r--r--intern/cycles/kernel/svm/svm_image.h119
-rw-r--r--intern/cycles/kernel/svm/svm_light_path.h1
-rw-r--r--intern/cycles/kernel/svm/svm_math.h2
-rw-r--r--intern/cycles/kernel/svm/svm_mix.h2
-rw-r--r--intern/cycles/kernel/svm/svm_noise.h14
-rw-r--r--intern/cycles/kernel/svm/svm_sepcomb_hsv.h12
-rw-r--r--intern/cycles/kernel/svm/svm_sky.h2
-rw-r--r--intern/cycles/kernel/svm/svm_tex_coord.h77
-rw-r--r--intern/cycles/kernel/svm/svm_texture.h93
-rw-r--r--intern/cycles/kernel/svm/svm_types.h13
-rw-r--r--intern/cycles/kernel/svm/svm_vector_transform.h6
-rw-r--r--intern/cycles/kernel/svm/svm_voronoi.h19
-rw-r--r--intern/cycles/kernel/svm/svm_wavelength.h54
-rw-r--r--intern/cycles/kernel/svm/svm_wireframe.h12
-rw-r--r--intern/cycles/render/CMakeLists.txt2
-rw-r--r--intern/cycles/render/attribute.cpp174
-rw-r--r--intern/cycles/render/attribute.h26
-rw-r--r--intern/cycles/render/background.cpp4
-rw-r--r--intern/cycles/render/bake.cpp206
-rw-r--r--intern/cycles/render/bake.h77
-rw-r--r--intern/cycles/render/blackbody.cpp54
-rw-r--r--intern/cycles/render/buffers.cpp4
-rw-r--r--intern/cycles/render/buffers.h3
-rw-r--r--intern/cycles/render/camera.cpp22
-rw-r--r--intern/cycles/render/camera.h2
-rw-r--r--intern/cycles/render/curves.cpp2
-rw-r--r--intern/cycles/render/film.cpp7
-rw-r--r--intern/cycles/render/graph.cpp18
-rw-r--r--intern/cycles/render/graph.h3
-rw-r--r--intern/cycles/render/image.cpp221
-rw-r--r--intern/cycles/render/image.h33
-rw-r--r--intern/cycles/render/integrator.cpp27
-rw-r--r--intern/cycles/render/integrator.h5
-rw-r--r--intern/cycles/render/light.cpp70
-rw-r--r--intern/cycles/render/mesh.cpp252
-rw-r--r--intern/cycles/render/mesh.h15
-rw-r--r--intern/cycles/render/mesh_displace.cpp12
-rw-r--r--intern/cycles/render/nodes.cpp163
-rw-r--r--intern/cycles/render/nodes.h35
-rw-r--r--intern/cycles/render/object.cpp185
-rw-r--r--intern/cycles/render/object.h12
-rw-r--r--intern/cycles/render/osl.cpp40
-rw-r--r--intern/cycles/render/scene.cpp40
-rw-r--r--intern/cycles/render/scene.h9
-rw-r--r--intern/cycles/render/session.cpp48
-rw-r--r--intern/cycles/render/session.h10
-rw-r--r--intern/cycles/render/shader.cpp3
-rw-r--r--intern/cycles/render/shader.h1
-rw-r--r--intern/cycles/render/sky_model.cpp2
-rw-r--r--intern/cycles/render/svm.cpp256
-rw-r--r--intern/cycles/render/svm.h12
-rw-r--r--intern/cycles/render/tables.cpp5
-rw-r--r--intern/cycles/subd/subd_split.cpp2
-rw-r--r--intern/cycles/util/util_color.h34
-rw-r--r--intern/cycles/util/util_cuda.h3
-rw-r--r--intern/cycles/util/util_half.h22
-rw-r--r--intern/cycles/util/util_hash.h4
-rw-r--r--intern/cycles/util/util_math.h86
-rw-r--r--intern/cycles/util/util_md5.cpp2
-rw-r--r--intern/cycles/util/util_opencl.h4
-rw-r--r--intern/cycles/util/util_path.cpp5
-rw-r--r--intern/cycles/util/util_path.h1
-rw-r--r--intern/cycles/util/util_simd.h92
-rw-r--r--intern/cycles/util/util_system.cpp19
-rw-r--r--intern/cycles/util/util_transform.cpp9
-rw-r--r--intern/cycles/util/util_transform.h6
-rw-r--r--intern/cycles/util/util_types.h63
-rw-r--r--intern/cycles/util/util_view.cpp22
-rw-r--r--intern/elbeem/intern/mvmcoords.cpp2
-rw-r--r--intern/ffmpeg/ffmpeg_compat.h19
-rw-r--r--intern/ghost/intern/GHOST_NDOFManager.cpp6
-rw-r--r--intern/ghost/intern/GHOST_NDOFManager.h4
-rw-r--r--intern/ghost/intern/GHOST_NDOFManagerCocoa.mm6
-rw-r--r--intern/ghost/intern/GHOST_NDOFManagerX11.cpp41
-rw-r--r--intern/ghost/intern/GHOST_SystemWin32.cpp10
-rw-r--r--intern/ghost/intern/GHOST_SystemX11.cpp26
-rw-r--r--intern/ghost/intern/GHOST_WindowX11.cpp12
-rw-r--r--intern/ghost/intern/GHOST_WindowX11.h3
-rw-r--r--intern/ghost/test/multitest/MultiTest.c2
-rw-r--r--intern/guardedalloc/intern/mallocn.c9
-rw-r--r--intern/guardedalloc/intern/mallocn_guarded_impl.c10
-rw-r--r--intern/guardedalloc/intern/mallocn_intern.h8
-rw-r--r--intern/guardedalloc/intern/mallocn_lockfree_impl.c15
-rw-r--r--intern/itasc/SConscript5
-rw-r--r--intern/itasc/kdl/frameacc.hpp39
-rw-r--r--intern/itasc/kdl/frames.hpp29
-rw-r--r--intern/itasc/kdl/framevel.hpp37
-rw-r--r--intern/itasc/kdl/jacobian.hpp3
-rw-r--r--intern/itasc/kdl/jntarray.hpp4
-rw-r--r--intern/itasc/kdl/jntarrayacc.hpp5
-rw-r--r--intern/itasc/kdl/jntarrayvel.hpp5
-rw-r--r--intern/locale/CMakeLists.txt15
-rw-r--r--intern/locale/SConscript6
-rw-r--r--intern/locale/boost_locale_wrapper.cpp8
-rw-r--r--intern/rigidbody/rb_bullet_api.cpp4
-rw-r--r--intern/utfconv/utfconv.c2
214 files changed, 9512 insertions, 4502 deletions
diff --git a/intern/SConscript b/intern/SConscript
index 828c1adc20d..20803884a39 100644
--- a/intern/SConscript
+++ b/intern/SConscript
@@ -53,9 +53,6 @@ if env['WITH_BF_FLUID']:
if env['WITH_BF_CYCLES']:
SConscript(['cycles/SConscript'])
-if env['WITH_BF_BOOLEAN']:
- SConscript(['bsp/SConscript'])
-
if env['WITH_BF_INTERNATIONAL']:
SConscript(['locale/SConscript'])
diff --git a/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp b/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp
index c3877c2c9f2..d055c131183 100644
--- a/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp
+++ b/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp
@@ -994,7 +994,7 @@ void AUD_OpenALDevice::updateStreams()
if(info != AL_PLAYING)
{
// if it really stopped
- if(sound->m_eos)
+ if(sound->m_eos && info != AL_INITIAL)
{
if(sound->m_stop)
sound->m_stop(sound->m_stop_data);
diff --git a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp
index d8f0d837fec..d30835da4e5 100644
--- a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp
+++ b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp
@@ -169,14 +169,29 @@ AUD_FFMPEGWriter::AUD_FFMPEGWriter(std::string filename, AUD_DeviceSpecs specs,
if(!codec)
AUD_THROW(AUD_ERROR_FFMPEG, codec_error);
+ if(codec->sample_fmts) {
+ // Check if the prefered sample format for this codec is supported.
+ const enum AVSampleFormat *p = codec->sample_fmts;
+ for(; *p != -1; p++) {
+ if(*p == m_stream->codec->sample_fmt)
+ break;
+ }
+ if(*p == -1) {
+ // Sample format incompatible with codec. Defaulting to a format known to work.
+ m_stream->codec->sample_fmt = codec->sample_fmts[0];
+ }
+ }
+
if(avcodec_open2(m_codecCtx, codec, NULL))
AUD_THROW(AUD_ERROR_FFMPEG, codec_error);
m_output_buffer.resize(FF_MIN_BUFFER_SIZE);
int samplesize = AUD_MAX(AUD_SAMPLE_SIZE(m_specs), AUD_DEVICE_SAMPLE_SIZE(m_specs));
- if(m_codecCtx->frame_size <= 1)
- m_input_size = 0;
+ if(m_codecCtx->frame_size <= 1) {
+ m_input_size = FF_MIN_BUFFER_SIZE * 8 / m_codecCtx->bits_per_coded_sample / m_codecCtx->channels;
+ m_input_buffer.resize(m_input_size * samplesize);
+ }
else
{
m_input_buffer.resize(m_codecCtx->frame_size * samplesize);
@@ -187,14 +202,21 @@ AUD_FFMPEGWriter::AUD_FFMPEGWriter(std::string filename, AUD_DeviceSpecs specs,
m_frame = av_frame_alloc();
if (!m_frame)
AUD_THROW(AUD_ERROR_FFMPEG, codec_error);
+ avcodec_get_frame_defaults(m_frame);
m_frame->linesize[0] = m_input_size * samplesize;
m_frame->format = m_codecCtx->sample_fmt;
+ m_frame->nb_samples = m_input_size;
# ifdef FFMPEG_HAVE_AVFRAME_SAMPLE_RATE
m_frame->sample_rate = m_codecCtx->sample_rate;
# endif
# ifdef FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT
m_frame->channel_layout = m_codecCtx->channel_layout;
# endif
+ m_sample_size = av_get_bytes_per_sample(m_codecCtx->sample_fmt);
+ m_frame_pts = 0;
+ m_deinterleave = av_sample_fmt_is_planar(m_codecCtx->sample_fmt);
+ if(m_deinterleave)
+ m_deinterleave_buffer.resize(m_input_size * m_codecCtx->channels * m_sample_size);
#endif
try
@@ -272,13 +294,31 @@ void AUD_FFMPEGWriter::encode(sample_t* data)
#ifdef FFMPEG_HAVE_ENCODE_AUDIO2
int got_output, ret;
+ m_frame->pts = m_frame_pts / av_q2d(m_codecCtx->time_base);
+ m_frame_pts++;
+#ifdef FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT
+ m_frame->channel_layout = m_codecCtx->channel_layout;
+#endif
+
+ if(m_deinterleave) {
+ for(int channel = 0; channel < m_codecCtx->channels; channel++) {
+ for(int i = 0; i < m_frame->nb_samples; i++) {
+ memcpy(reinterpret_cast<uint8_t*>(m_deinterleave_buffer.getBuffer()) + (i + channel * m_frame->nb_samples) * m_sample_size,
+ reinterpret_cast<uint8_t*>(data) + (m_codecCtx->channels * i + channel) * m_sample_size, m_sample_size);
+ }
+ }
+
+ data = m_deinterleave_buffer.getBuffer();
+ }
+
+ avcodec_fill_audio_frame(m_frame, m_codecCtx->channels, m_codecCtx->sample_fmt, reinterpret_cast<uint8_t*>(data),
+ m_frame->nb_samples * av_get_bytes_per_sample(m_codecCtx->sample_fmt) * m_codecCtx->channels, 1);
- m_frame->data[0] = reinterpret_cast<uint8_t*>(data);
ret = avcodec_encode_audio2(m_codecCtx, &packet, m_frame, &got_output);
- if (ret < 0)
+ if(ret < 0)
AUD_THROW(AUD_ERROR_FFMPEG, codec_error);
- if (!got_output)
+ if(!got_output)
return;
#else
sample_t* outbuf = m_output_buffer.getBuffer();
@@ -290,10 +330,23 @@ void AUD_FFMPEGWriter::encode(sample_t* data)
packet.data = reinterpret_cast<uint8_t*>(outbuf);
#endif
+ if(packet.pts != AV_NOPTS_VALUE)
+ packet.pts = av_rescale_q(packet.pts, m_codecCtx->time_base, m_stream->time_base);
+ if(packet.dts != AV_NOPTS_VALUE)
+ packet.dts = av_rescale_q(packet.dts, m_codecCtx->time_base, m_stream->time_base);
+ if(packet.duration > 0)
+ packet.duration = av_rescale_q(packet.duration, m_codecCtx->time_base, m_stream->time_base);
+
packet.stream_index = m_stream->index;
- if(av_interleaved_write_frame(m_formatCtx, &packet))
+ packet.flags |= AV_PKT_FLAG_KEY;
+
+ if(av_interleaved_write_frame(m_formatCtx, &packet)) {
+ av_free_packet(&packet);
AUD_THROW(AUD_ERROR_FFMPEG, write_error);
+ }
+
+ av_free_packet(&packet);
}
void AUD_FFMPEGWriter::write(unsigned int length, sample_t* buffer)
diff --git a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h
index 310f69258ea..492aa35ff12 100644
--- a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h
+++ b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h
@@ -83,6 +83,23 @@ private:
AVFrame *m_frame;
/**
+ * PTS of next frame to write.
+ */
+ int m_frame_pts;
+
+ /**
+ * Number of bytes per sample.
+ */
+ int m_sample_size;
+
+ /**
+ * Need to de-interleave audio for planar sample formats.
+ */
+ bool m_deinterleave;
+
+ AUD_Buffer m_deinterleave_buffer;
+
+ /**
* The input buffer for the format converted data before encoding.
*/
AUD_Buffer m_input_buffer;
diff --git a/intern/audaspace/intern/AUD_AnimateableProperty.cpp b/intern/audaspace/intern/AUD_AnimateableProperty.cpp
index 61adae4b34b..9f399a0b99f 100644
--- a/intern/audaspace/intern/AUD_AnimateableProperty.cpp
+++ b/intern/audaspace/intern/AUD_AnimateableProperty.cpp
@@ -47,6 +47,23 @@ AUD_AnimateableProperty::AUD_AnimateableProperty(int count) :
pthread_mutexattr_destroy(&attr);
}
+AUD_AnimateableProperty::AUD_AnimateableProperty(int count, float value) :
+ AUD_Buffer(count * sizeof(float)), m_count(count), m_isAnimated(false)
+{
+ sample_t* buf = getBuffer();
+
+ for(int i = 0; i < count; i++)
+ buf[i] = value;
+
+ pthread_mutexattr_t attr;
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+
+ pthread_mutex_init(&m_mutex, &attr);
+
+ pthread_mutexattr_destroy(&attr);
+}
+
void AUD_AnimateableProperty::updateUnknownCache(int start, int end)
{
float* buf = getBuffer();
@@ -104,7 +121,8 @@ void AUD_AnimateableProperty::write(const float* data, int position, int count)
if(pos == 0)
{
- memset(buf, 0, position * m_count * sizeof(float));
+ for(int i = 0; i < position; i++)
+ memcpy(buf + i * m_count, data, m_count * sizeof(float));
}
else
updateUnknownCache(pos, position - 1);
diff --git a/intern/audaspace/intern/AUD_AnimateableProperty.h b/intern/audaspace/intern/AUD_AnimateableProperty.h
index 37eb8f84550..f07e5916b25 100644
--- a/intern/audaspace/intern/AUD_AnimateableProperty.h
+++ b/intern/audaspace/intern/AUD_AnimateableProperty.h
@@ -76,6 +76,13 @@ public:
AUD_AnimateableProperty(int count = 1);
/**
+ * Creates a new animateable property.
+ * \param count The count of floats for a single property.
+ * \param count The value that the property should get initialized with. All count floats will be initialized to the same value.
+ */
+ AUD_AnimateableProperty(int count, float value);
+
+ /**
* Destroys the animateable property.
*/
~AUD_AnimateableProperty();
diff --git a/intern/audaspace/intern/AUD_ConverterFunctions.h b/intern/audaspace/intern/AUD_ConverterFunctions.h
index 1ffcf6c4ef0..7817ee88c07 100644
--- a/intern/audaspace/intern/AUD_ConverterFunctions.h
+++ b/intern/audaspace/intern/AUD_ConverterFunctions.h
@@ -34,12 +34,11 @@
#include <cstring>
#ifdef _MSC_VER
-#if (_MSC_VER < 1300)
+#if (_MSC_VER <= 1500)
typedef short int16_t;
typedef int int32_t;
#else
- typedef __int16 int16_t;
- typedef __int32 int32_t;
+# include <stdint.h>
#endif
#else
#include <stdint.h>
diff --git a/intern/audaspace/intern/AUD_Sequencer.cpp b/intern/audaspace/intern/AUD_Sequencer.cpp
index c59c56a4479..6c5e48c73f0 100644
--- a/intern/audaspace/intern/AUD_Sequencer.cpp
+++ b/intern/audaspace/intern/AUD_Sequencer.cpp
@@ -42,6 +42,7 @@ AUD_Sequencer::AUD_Sequencer(AUD_Specs specs, float fps, bool muted) :
m_speed_of_sound(434),
m_doppler_factor(1),
m_distance_model(AUD_DISTANCE_MODEL_INVERSE_CLAMPED),
+ m_volume(1, 1.0f),
m_location(3),
m_orientation(4)
{
diff --git a/intern/audaspace/intern/AUD_SequencerEntry.cpp b/intern/audaspace/intern/AUD_SequencerEntry.cpp
index 005557bbed1..6ef8479cdb8 100644
--- a/intern/audaspace/intern/AUD_SequencerEntry.cpp
+++ b/intern/audaspace/intern/AUD_SequencerEntry.cpp
@@ -53,6 +53,8 @@ AUD_SequencerEntry::AUD_SequencerEntry(boost::shared_ptr<AUD_IFactory> sound, fl
m_cone_angle_outer(360),
m_cone_angle_inner(360),
m_cone_volume_outer(0),
+ m_volume(1, 1.0f),
+ m_pitch(1, 1.0f),
m_location(3),
m_orientation(4)
{
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 5c8d68b07ee..a1b0030491e 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -1,4 +1,3 @@
-
# Standalone or with Blender
if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE)
set(CYCLES_INSTALL_PATH "")
@@ -13,8 +12,11 @@ include(cmake/external_libs.cmake)
# Build Flags
# todo: refactor this code to match scons
+# note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm)
if(WIN32 AND MSVC)
+ set(CXX_HAS_SSE TRUE)
+
# /arch:AVX for VC2012 and above
if(NOT MSVC_VERSION LESS 1700)
set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX")
@@ -24,36 +26,49 @@ if(WIN32 AND MSVC)
# there is no /arch:SSE3, but intrinsics are available anyway
if(CMAKE_CL_64)
- set(CYCLES_SSE2_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_SSE3_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_SSE41_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
+ set(CYCLES_SSE2_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_SSE3_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_SSE41_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
else()
- set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
- set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
+ set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
+ set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
endif()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox")
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox")
elseif(CMAKE_COMPILER_IS_GNUCC)
- set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse")
- set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse")
- set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse")
- set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse")
+ check_cxx_compiler_flag(-msse CXX_HAS_SSE)
+ if(CXX_HAS_SSE)
+ set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse")
+ set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse")
+ set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse")
+ set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse")
+ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2")
- set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3")
- set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1")
- set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx")
+ check_cxx_compiler_flag(-msse CXX_HAS_SSE)
+ if(CXX_HAS_SSE)
+ set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2")
+ set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3")
+ set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1")
+ set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx")
+ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()
-add_definitions(-DWITH_KERNEL_SSE2 -DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE41 -DWITH_KERNEL_AVX)
+if(CXX_HAS_SSE)
+ add_definitions(
+ -DWITH_KERNEL_SSE2
+ -DWITH_KERNEL_SSE3
+ -DWITH_KERNEL_SSE41
+ -DWITH_KERNEL_AVX
+ )
+endif()
# for OSL
if(WIN32 AND MSVC)
@@ -64,10 +79,15 @@ endif()
# Definitions and Includes
-add_definitions(${BOOST_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS})
+add_definitions(
+ ${BOOST_DEFINITIONS}
+ ${OPENIMAGEIO_DEFINITIONS}
+)
-add_definitions(-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {)
-add_definitions(-DCCL_NAMESPACE_END=})
+add_definitions(
+ -DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {
+ -DCCL_NAMESPACE_END=}
+)
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
@@ -91,9 +111,11 @@ if(WITH_CYCLES_OSL)
include_directories(${OSL_INCLUDES})
endif()
-add_definitions(-DWITH_OPENCL)
-add_definitions(-DWITH_CUDA)
-add_definitions(-DWITH_MULTI)
+add_definitions(
+ -DWITH_OPENCL
+ -DWITH_CUDA
+ -DWITH_MULTI
+)
include_directories(
SYSTEM
@@ -101,7 +123,16 @@ include_directories(
${OPENIMAGEIO_INCLUDE_DIRS}
${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO
${OPENEXR_INCLUDE_DIR}
- ${OPENEXR_INCLUDE_DIRS})
+ ${OPENEXR_INCLUDE_DIRS}
+)
+
+
+# Warnings
+if(CMAKE_COMPILER_IS_GNUCXX)
+ ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion")
+ unset(_has_cxxflag_float_conversion)
+endif()
+
# Subdirectories
diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript
index b8c731e3315..532238b9d7e 100644
--- a/intern/cycles/SConscript
+++ b/intern/cycles/SConscript
@@ -72,6 +72,12 @@ if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
else:
cxxflags.append('-ffast-math'.split())
+# Warnings
+# XXX Not supported by gcc < 4.9, since we do not have any 'supported flags' test as in cmake,
+# simpler to comment for now.
+#if env['C_COMPILER_ID'] == 'gcc':
+# cxxflags.append(['-Werror=float-conversion'])
+
if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'):
incs.append(env['BF_PTHREADS_INC'])
@@ -81,12 +87,12 @@ kernel_flags = {}
if env['OURPLATFORM'] == 'win32-vc':
# there is no /arch:SSE3, but intrinsics are available anyway
- kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'
+ kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /GS-'
kernel_flags['sse3'] = kernel_flags['sse2']
elif env['OURPLATFORM'] == 'win64-vc':
# /arch:AVX only available from visual studio 2012
- kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-'
+ kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /GS-'
kernel_flags['sse3'] = kernel_flags['sse2']
if env['MSVC_VERSION'] in ('11.0', '12.0'):
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index 230833802b0..7ea1ca2d8fb 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -46,7 +46,8 @@ struct Options {
int width, height;
SceneParams scene_params;
SessionParams session_params;
- bool quiet, show_help, interactive;
+ bool quiet;
+ bool show_help, interactive, pause;
} options;
static void session_print(const string& str)
@@ -114,15 +115,25 @@ static void session_init()
options.scene = NULL;
}
-static void scene_init(int width, int height)
+static void scene_init()
{
options.scene = new Scene(options.scene_params, options.session_params.device);
+
+ /* Read XML */
xml_read_file(options.scene, options.filepath.c_str());
- if (width == 0 || height == 0) {
+ /* Camera width/height override? */
+ if (!(options.width == 0 || options.height == 0)) {
+ options.scene->camera->width = options.width;
+ options.scene->camera->height = options.height;
+ }
+ else {
options.width = options.scene->camera->width;
options.height = options.scene->camera->height;
}
+
+ /* Calculate Viewplane */
+ options.scene->camera->compute_auto_viewplane();
}
static void session_exit()
@@ -166,8 +177,14 @@ static void display_info(Progress& progress)
interactive = options.interactive? "On":"Off";
- str = string_printf("%s Time: %.2f Latency: %.4f Sample: %d Average: %.4f Interactive: %s",
- status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
+ str = string_printf(
+ "%s"
+ " Time: %.2f"
+ " Latency: %.4f"
+ " Sample: %d"
+ " Average: %.4f"
+ " Interactive: %s",
+ status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
view_display_info(str.c_str());
@@ -177,7 +194,9 @@ static void display_info(Progress& progress)
static void display()
{
- options.session->draw(session_buffer_params());
+ static DeviceDrawParams draw_params = DeviceDrawParams();
+
+ options.session->draw(session_buffer_params(), draw_params);
display_info(options.session->progress);
}
@@ -195,11 +214,11 @@ static void motion(int x, int y, int button)
/* Rotate */
else if(button == 2) {
- float4 r1= make_float4(x * 0.1f, 0.0f, 1.0f, 0.0f);
- matrix = matrix * transform_rotate(r1.x * M_PI/180.0f, make_float3(r1.y, r1.z, r1.w));
+ float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f);
+ matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w));
- float4 r2 = make_float4(y * 0.1, 1.0f, 0.0f, 0.0f);
- matrix = matrix * transform_rotate(r2.x * M_PI/180.0f, make_float3(r2.y, r2.z, r2.w));
+ float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f);
+ matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w));
}
/* Update and Reset */
@@ -216,20 +235,64 @@ static void resize(int width, int height)
options.width = width;
options.height = height;
- if(options.session)
+ if(options.session) {
+ /* Update camera */
+ options.session->scene->camera->width = width;
+ options.session->scene->camera->height = height;
+ options.session->scene->camera->compute_auto_viewplane();
+ options.session->scene->camera->need_update = true;
+ options.session->scene->camera->need_device_update = true;
+
options.session->reset(session_buffer_params(), options.session_params.samples);
+ }
}
static void keyboard(unsigned char key)
{
- if(key == 'r')
- options.session->reset(session_buffer_params(), options.session_params.samples);
- else if(key == 'h')
+ /* Toggle help */
+ if(key == 'h')
options.show_help = !(options.show_help);
- else if(key == 'i')
- options.interactive = !(options.interactive);
+
+ /* Reset */
+ else if(key == 'r')
+ options.session->reset(session_buffer_params(), options.session_params.samples);
+
+ /* Cancel */
else if(key == 27) // escape
options.session->progress.set_cancel("Canceled");
+
+ /* Pause */
+ else if(key == 'p') {
+ options.pause = !options.pause;
+ options.session->set_pause(options.pause);
+ }
+
+ /* Interactive Mode */
+ else if(key == 'i')
+ options.interactive = !(options.interactive);
+
+ else if(options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) {
+ Transform matrix = options.session->scene->camera->matrix;
+ float3 translate;
+
+ if(key == 'w')
+ translate = make_float3(0.0f, 0.0f, 0.1f);
+ else if(key == 's')
+ translate = make_float3(0.0f, 0.0f, -0.1f);
+ else if(key == 'a')
+ translate = make_float3(-0.1f, 0.0f, 0.0f);
+ else if(key == 'd')
+ translate = make_float3(0.1f, 0.0f, 0.0f);
+
+ matrix = matrix * transform_translate(translate);
+
+ /* Update and Reset */
+ options.session->scene->camera->matrix = matrix;
+ options.session->scene->camera->need_update = true;
+ options.session->scene->camera->need_device_update = true;
+
+ options.session->reset(session_buffer_params(), options.session_params.samples);
+ }
}
#endif
@@ -314,15 +377,13 @@ static void options_parse(int argc, const char **argv)
else if(ssname == "svm")
options.scene_params.shadingsystem = SceneParams::SVM;
-#ifdef WITH_CYCLES_STANDALONE_GUI
- /* Progressive rendering for GUI */
- if(!options.session_params.background)
- options.session_params.progressive = true;
-#else
- /* When building without GUI, set background */
+#ifndef WITH_CYCLES_STANDALONE_GUI
options.session_params.background = true;
#endif
+ /* Use progressive rendering */
+ options.session_params.progressive = true;
+
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
vector<DeviceInfo>& devices = Device::available_devices();
@@ -360,12 +421,12 @@ static void options_parse(int argc, const char **argv)
fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE);
}
-
+
/* For smoother Viewport */
options.session_params.start_resolution = 64;
/* load scene */
- scene_init(options.width, options.height);
+ scene_init();
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp
index 14fe43115d5..d5ef30e5c6f 100644
--- a/intern/cycles/app/cycles_xml.cpp
+++ b/intern/cycles/app/cycles_xml.cpp
@@ -105,7 +105,7 @@ static bool xml_read_float(float *value, pugi::xml_node node, const char *name)
pugi::xml_attribute attr = node.attribute(name);
if(attr) {
- *value = atof(attr.value());
+ *value = (float)atof(attr.value());
return true;
}
@@ -121,7 +121,7 @@ static bool xml_read_float_array(vector<float>& value, pugi::xml_node node, cons
string_split(tokens, attr.value());
foreach(const string& token, tokens)
- value.push_back(atof(token.c_str()));
+ value.push_back((float)atof(token.c_str()));
return true;
}
@@ -219,6 +219,35 @@ static bool xml_read_enum(ustring *str, ShaderEnum& enm, pugi::xml_node node, co
return false;
}
+static ShaderSocketType xml_read_socket_type(pugi::xml_node node, const char *name)
+{
+ pugi::xml_attribute attr = node.attribute(name);
+
+ if(attr) {
+ string value = attr.value();
+ if (string_iequals(value, "float"))
+ return SHADER_SOCKET_FLOAT;
+ else if (string_iequals(value, "int"))
+ return SHADER_SOCKET_INT;
+ else if (string_iequals(value, "color"))
+ return SHADER_SOCKET_COLOR;
+ else if (string_iequals(value, "vector"))
+ return SHADER_SOCKET_VECTOR;
+ else if (string_iequals(value, "point"))
+ return SHADER_SOCKET_POINT;
+ else if (string_iequals(value, "normal"))
+ return SHADER_SOCKET_NORMAL;
+ else if (string_iequals(value, "closure color"))
+ return SHADER_SOCKET_CLOSURE;
+ else if (string_iequals(value, "string"))
+ return SHADER_SOCKET_STRING;
+ else
+ fprintf(stderr, "Unknown shader socket type \"%s\" for attribute \"%s\".\n", value.c_str(), name);
+ }
+
+ return SHADER_SOCKET_UNDEFINED;
+}
+
/* Film */
static void xml_read_film(const XMLReadState& state, pugi::xml_node node)
@@ -251,6 +280,8 @@ static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node)
xml_read_int(&integrator->mesh_light_samples, node, "mesh_light_samples");
xml_read_int(&integrator->subsurface_samples, node, "subsurface_samples");
xml_read_int(&integrator->volume_samples, node, "volume_samples");
+ xml_read_bool(&integrator->sample_all_lights_direct, node, "sample_all_lights_direct");
+ xml_read_bool(&integrator->sample_all_lights_indirect, node, "sample_all_lights_indirect");
}
/* Bounces */
@@ -268,6 +299,7 @@ static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node)
xml_read_bool(&integrator->transparent_shadows, node, "transparent_shadows");
/* Volume */
+ xml_read_int(&integrator->volume_homogeneous_sampling, node, "volume_homogeneous_sampling");
xml_read_float(&integrator->volume_step_size, node, "volume_step_size");
xml_read_int(&integrator->volume_max_steps, node, "volume_max_steps");
@@ -289,23 +321,8 @@ static void xml_read_camera(const XMLReadState& state, pugi::xml_node node)
xml_read_int(&cam->width, node, "width");
xml_read_int(&cam->height, node, "height");
- float aspect = (float)cam->width/(float)cam->height;
-
- if(cam->width >= cam->height) {
- cam->viewplane.left = -aspect;
- cam->viewplane.right = aspect;
- cam->viewplane.bottom = -1.0f;
- cam->viewplane.top = 1.0f;
- }
- else {
- cam->viewplane.left = -1.0f;
- cam->viewplane.right = 1.0f;
- cam->viewplane.bottom = -1.0f/aspect;
- cam->viewplane.top = 1.0f/aspect;
- }
-
if(xml_read_float(&cam->fov, node, "fov"))
- cam->fov *= M_PI/180.0f;
+ cam->fov = DEG2RADF(cam->fov);
xml_read_float(&cam->nearclip, node, "nearclip");
xml_read_float(&cam->farclip, node, "farclip");
@@ -333,7 +350,6 @@ static void xml_read_camera(const XMLReadState& state, pugi::xml_node node)
xml_read_float(&cam->sensorwidth, node, "sensorwidth");
xml_read_float(&cam->sensorheight, node, "sensorheight");
-
cam->matrix = state.tfm;
cam->need_update = true;
@@ -392,24 +408,41 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
/* Source */
xml_read_string(&osl->filepath, node, "src");
- osl->filepath = path_join(state.base, osl->filepath);
-
- /* Outputs */
- string output = "", output_type = "";
- ShaderSocketType type = SHADER_SOCKET_FLOAT;
+ if(path_is_relative(osl->filepath)) {
+ osl->filepath = path_join(state.base, osl->filepath);
+ }
- xml_read_string(&output, node, "output");
- xml_read_string(&output_type, node, "output_type");
-
- if(output_type == "float")
- type = SHADER_SOCKET_FLOAT;
- else if(output_type == "closure color")
- type = SHADER_SOCKET_CLOSURE;
- else if(output_type == "color")
- type = SHADER_SOCKET_COLOR;
-
- osl->output_names.push_back(ustring(output));
- osl->add_output(osl->output_names.back().c_str(), type);
+ /* Generate inputs/outputs from node sockets
+ *
+ * Note: ShaderInput/ShaderOutput store shallow string copies only!
+ * Socket names must be stored in the extra lists instead. */
+ /* read input values */
+ for(pugi::xml_node param = node.first_child(); param; param = param.next_sibling()) {
+ if (string_iequals(param.name(), "input")) {
+ string name;
+ if (!xml_read_string(&name, param, "name"))
+ continue;
+
+ ShaderSocketType type = xml_read_socket_type(param, "type");
+ if (type == SHADER_SOCKET_UNDEFINED)
+ continue;
+
+ osl->input_names.push_back(ustring(name));
+ osl->add_input(osl->input_names.back().c_str(), type);
+ }
+ else if (string_iequals(param.name(), "output")) {
+ string name;
+ if (!xml_read_string(&name, param, "name"))
+ continue;
+
+ ShaderSocketType type = xml_read_socket_type(param, "type");
+ if (type == SHADER_SOCKET_UNDEFINED)
+ continue;
+
+ osl->output_names.push_back(ustring(name));
+ osl->add_output(osl->output_names.back().c_str(), type);
+ }
+ }
snode = osl;
}
@@ -616,6 +649,11 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
xml_read_ustring(&attr->attribute, node, "attribute");
snode = attr;
}
+ else if(string_iequals(node.name(), "uv_map")) {
+ UVMapNode *uvm = new UVMapNode();
+ xml_read_ustring(&uvm->attribute, node, "uv_map");
+ snode = uvm;
+ }
else if(string_iequals(node.name(), "camera")) {
snode = new CameraNode();
}
@@ -734,6 +772,9 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug
case SHADER_SOCKET_NORMAL:
xml_read_float3(&in->value, node, attr.name());
break;
+ case SHADER_SOCKET_STRING:
+ xml_read_ustring( &in->value_string, node, attr.name() );
+ break;
default:
break;
}
@@ -765,6 +806,8 @@ static void xml_read_shader(const XMLReadState& state, pugi::xml_node node)
static void xml_read_background(const XMLReadState& state, pugi::xml_node node)
{
Shader *shader = state.scene->shaders[state.scene->default_background];
+
+ xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume");
xml_read_shader_graph(state, shader, node);
}
@@ -846,7 +889,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node)
SubdParams sdparams(mesh, shader, smooth);
xml_read_float(&sdparams.dicing_rate, node, "dicing_rate");
- DiagSplit dsplit(sdparams);;
+ DiagSplit dsplit(sdparams);
sdmesh.tessellate(&dsplit);
}
else {
@@ -944,6 +987,26 @@ static void xml_read_light(const XMLReadState& state, pugi::xml_node node)
{
Light *light = new Light();
light->shader = state.shader;
+
+ /* Light Type
+ * 0: Point, 1: Sun, 3: Area, 5: Spot */
+ int type = 0;
+ xml_read_int(&type, node, "type");
+ light->type = (LightType)type;
+
+ /* Spot Light */
+ xml_read_float(&light->spot_angle, node, "spot_angle");
+ xml_read_float(&light->spot_smooth, node, "spot_smooth");
+
+ /* Area Light */
+ xml_read_float(&light->sizeu, node, "sizeu");
+ xml_read_float(&light->sizev, node, "sizev");
+ xml_read_float3(&light->axisu, node, "axisu");
+ xml_read_float3(&light->axisv, node, "axisv");
+
+ /* Generic */
+ xml_read_float(&light->size, node, "size");
+ xml_read_float3(&light->dir, node, "dir");
xml_read_float3(&light->co, node, "P");
light->co = transform_point(&state.tfm, light->co);
@@ -969,7 +1032,7 @@ static void xml_read_transform(pugi::xml_node node, Transform& tfm)
if(node.attribute("rotate")) {
float4 rotate = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
xml_read_float4(&rotate, node, "rotate");
- tfm = tfm * transform_rotate(rotate.x*M_PI/180.0f, make_float3(rotate.y, rotate.z, rotate.w));
+ tfm = tfm * transform_rotate(DEG2RADF(rotate.x), make_float3(rotate.y, rotate.z, rotate.w));
}
if(node.attribute("scale")) {
diff --git a/intern/cycles/app/cycles_xml.h b/intern/cycles/app/cycles_xml.h
index 1e3ed411312..96bc79c35d8 100644
--- a/intern/cycles/app/cycles_xml.h
+++ b/intern/cycles/app/cycles_xml.h
@@ -14,8 +14,8 @@
* limitations under the License
*/
-#ifndef __CYCLES_XML__
-#define __CYCLES_XML__
+#ifndef __CYCLES_XML_H__
+#define __CYCLES_XML_H__
CCL_NAMESPACE_BEGIN
@@ -23,7 +23,10 @@ class Scene;
void xml_read_file(Scene *scene, const char *filepath);
-CCL_NAMESPACE_END
+/* macros for importing */
+#define RAD2DEGF(_rad) ((_rad) * (float)(180.0 / M_PI))
+#define DEG2RADF(_deg) ((_deg) * (float)(M_PI / 180.0))
-#endif /* __CYCLES_XML__ */
+CCL_NAMESPACE_END
+#endif /* __CYCLES_XML_H__ */
diff --git a/intern/cycles/blender/CCL_api.h b/intern/cycles/blender/CCL_api.h
index 6532315cf39..2772b9ac8a7 100644
--- a/intern/cycles/blender/CCL_api.h
+++ b/intern/cycles/blender/CCL_api.h
@@ -14,8 +14,8 @@
* limitations under the License
*/
-#ifndef CCL_API_H
-#define CCL_API_H
+#ifndef __CCL_API_H__
+#define __CCL_API_H__
#ifdef __cplusplus
extern "C" {
@@ -40,5 +40,4 @@ void *CCL_python_module_init(void);
}
#endif
-#endif /* CCL_API_H */
-
+#endif /* __CCL_API_H__ */
diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt
index 25f91a0caea..9a60152841e 100644
--- a/intern/cycles/blender/CMakeLists.txt
+++ b/intern/cycles/blender/CMakeLists.txt
@@ -49,6 +49,11 @@ add_definitions(-DGLEW_STATIC)
blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}")
+# avoid link failure with clang 3.4 debug
+if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4')
+ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only")
+endif()
+
add_dependencies(bf_intern_cycles bf_rna)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH})
diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py
index afd26945d6c..27d986900c8 100644
--- a/intern/cycles/blender/addon/__init__.py
+++ b/intern/cycles/blender/addon/__init__.py
@@ -19,7 +19,7 @@
bl_info = {
"name": "Cycles Render Engine",
"author": "",
- "blender": (2, 67, 0),
+ "blender": (2, 70, 0),
"location": "Info header, render engine menu",
"description": "Cycles Render Engine integration",
"warning": "",
@@ -67,6 +67,9 @@ class CyclesRender(bpy.types.RenderEngine):
def render(self, scene):
engine.render(self)
+ def bake(self, scene, obj, pass_type, pixel_array, num_pixels, depth, result):
+ engine.bake(self, obj, pass_type, pixel_array, num_pixels, depth, result)
+
# viewport render
def view_update(self, context):
if not self.session:
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index b9ce65588df..25a9e97a99b 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -59,6 +59,12 @@ def render(engine):
_cycles.render(engine.session)
+def bake(engine, obj, pass_type, pixel_array, num_pixels, depth, result):
+ import _cycles
+ session = getattr(engine, "session", None)
+ if session is not None:
+ _cycles.bake(engine.session, obj.as_pointer(), pass_type, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer())
+
def reset(engine, data, scene):
import _cycles
data = data.as_pointer()
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index c80e8a3250c..7205a272395 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -108,6 +108,11 @@ enum_integrator = (
('PATH', "Path Tracing", "Pure path tracing integrator"),
)
+enum_volume_homogeneous_sampling = (
+ ('DISTANCE', "Distance", "Use Distance Sampling"),
+ ('EQUI_ANGULAR', "Equi-angular", "Use Equi-angular Sampling"),
+ )
+
class CyclesRenderSettings(bpy.types.PropertyGroup):
@classmethod
@@ -141,6 +146,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='PATH',
)
+ cls.volume_homogeneous_sampling = EnumProperty(
+ name="Homogeneous Sampling",
+ description="Sampling method to use for homogeneous volumes",
+ items=enum_volume_homogeneous_sampling,
+ default='DISTANCE',
+ )
+
cls.use_square_samples = BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
@@ -241,6 +253,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='USE',
)
+ cls.sample_all_lights_direct = BoolProperty(
+ name="Sample All Direct Lights",
+ description="Sample all lights (for direct samples), rather than randomly picking one",
+ default=True,
+ )
+
+ cls.sample_all_lights_indirect = BoolProperty(
+ name="Sample All Indirect Lights",
+ description="Sample all lights (for indirect samples), rather than randomly picking one",
+ default=True,
+ )
+
cls.no_caustics = BoolProperty(
name="No Caustics",
description="Leave out caustics, resulting in a darker image with less noise",
@@ -447,6 +471,33 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=False,
)
+ cls.bake_type = EnumProperty(
+ name="Bake Type",
+ default='COMBINED',
+ description="Type of pass to bake",
+ items = (
+ ('COMBINED', "Combined", ""),
+ ('AO', "Ambient Occlusion", ""),
+ ('SHADOW', "Shadow", ""),
+ ('NORMAL', "Normal", ""),
+ ('UV', "UV", ""),
+ ('EMIT', "Emit", ""),
+ ('ENVIRONMENT', "Environment", ""),
+ ('DIFFUSE_DIRECT', "Diffuse Direct", ""),
+ ('DIFFUSE_INDIRECT', "Diffuse Indirect", ""),
+ ('DIFFUSE_COLOR', "Diffuse Color", ""),
+ ('GLOSSY_DIRECT', "Glossy Direct", ""),
+ ('GLOSSY_INDIRECT', "Glossy Indirect", ""),
+ ('GLOSSY_COLOR', "Glossy Color", ""),
+ ('TRANSMISSION_DIRECT', "Transmission Direct", ""),
+ ('TRANSMISSION_INDIRECT', "Transmission Indirect", ""),
+ ('TRANSMISSION_COLOR', "Transmission Color", ""),
+ ('SUBSURFACE_DIRECT', "Subsurface Direct", ""),
+ ('SUBSURFACE_INDIRECT', "Subsurface Indirect", ""),
+ ('SUBSURFACE_COLOR', "Subsurface Color", ""),
+ ),
+ )
+
@classmethod
def unregister(cls):
del bpy.types.Scene.cycles
@@ -718,6 +769,41 @@ class CyclesMeshSettings(bpy.types.PropertyGroup):
del bpy.types.MetaBall.cycles
+class CyclesObjectBlurSettings(bpy.types.PropertyGroup):
+
+ @classmethod
+ def register(cls):
+
+ bpy.types.Object.cycles = PointerProperty(
+ name="Cycles Object Settings",
+ description="Cycles object settings",
+ type=cls,
+ )
+
+ cls.use_motion_blur = BoolProperty(
+ name="Use Motion Blur",
+ description="Use motion blur for this object",
+ default=True,
+ )
+
+ cls.use_deform_motion = BoolProperty(
+ name="Use Deformation Motion",
+ description="Use deformation motion blur for this object",
+ default=True,
+ )
+
+ cls.motion_steps = IntProperty(
+ name="Motion Steps",
+ description="Control accuracy of deformation motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))",
+ min=1, soft_max=8,
+ default=1,
+ )
+
+ @classmethod
+ def unregister(cls):
+ del bpy.types.Object.cycles
+
+
class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
@classmethod
def register(cls):
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index c0ce80426c0..5c8115b6612 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -49,6 +49,13 @@ class CyclesButtonsPanel():
return rd.engine in cls.COMPAT_ENGINES
+def use_cpu(context):
+ cscene = context.scene.cycles
+ device_type = context.user_preferences.system.compute_device_type
+
+ return (device_type == 'NONE' or cscene.device == 'CPU')
+
+
def draw_samples_info(layout, cscene):
integrator = cscene.progressive
@@ -103,7 +110,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
- device_type = context.user_preferences.system.compute_device_type
row = layout.row(align=True)
row.menu("CYCLES_MT_sampling_presets", text=bpy.types.CYCLES_MT_sampling_presets.bl_label)
@@ -133,6 +139,9 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
sub.label(text="AA Samples:")
sub.prop(cscene, "aa_samples", text="Render")
sub.prop(cscene, "preview_aa_samples", text="Preview")
+ sub.separator()
+ sub.prop(cscene, "sample_all_lights_direct")
+ sub.prop(cscene, "sample_all_lights_indirect")
col = split.column()
sub = col.column(align=True)
@@ -145,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel):
sub.prop(cscene, "subsurface_samples", text="Subsurface")
sub.prop(cscene, "volume_samples", text="Volume")
- if cscene.feature_set == 'EXPERIMENTAL' and (device_type == 'NONE' or cscene.device == 'CPU'):
+ if cscene.feature_set == 'EXPERIMENTAL' and use_cpu(context):
layout.row().prop(cscene, "sampling_pattern", text="Pattern")
for rl in scene.render.layers:
@@ -167,9 +176,16 @@ class CyclesRender_PT_volume_sampling(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
- split = layout.split()
- split.prop(cscene, "volume_step_size")
- split.prop(cscene, "volume_max_steps")
+ split = layout.split(align=True)
+
+ sub = split.column(align=True)
+ sub.label("Heterogeneous:")
+ sub.prop(cscene, "volume_step_size")
+ sub.prop(cscene, "volume_max_steps")
+
+ sub = split.column(align=True)
+ sub.label("Homogeneous:")
+ sub.prop(cscene, "volume_homogeneous_sampling", text="")
class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel):
@@ -310,28 +326,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
col.prop(cscene, "debug_use_spatial_splits")
-class CyclesRender_PT_opengl(CyclesButtonsPanel, Panel):
- bl_label = "OpenGL Render"
- bl_options = {'DEFAULT_CLOSED'}
-
- def draw(self, context):
- layout = self.layout
-
- rd = context.scene.render
-
- split = layout.split()
-
- col = split.column()
- col.prop(rd, "use_antialiasing")
- sub = col.row()
- sub.active = rd.use_antialiasing
- sub.prop(rd, "antialiasing_samples", expand=True)
-
- col = split.column()
- col.label(text="Alpha:")
- col.prop(rd, "alpha_mode", text="")
-
-
class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
bl_label = "Layer"
bl_context = "render_layer"
@@ -562,26 +556,48 @@ class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel):
layout.prop(cdata, "dicing_rate")
-class Cycles_PT_mesh_normals(CyclesButtonsPanel, Panel):
- bl_label = "Normals"
- bl_context = "data"
+class CyclesObject_PT_motion_blur(CyclesButtonsPanel, Panel):
+ bl_label = "Motion Blur"
+ bl_context = "object"
+ bl_options = {'DEFAULT_CLOSED'}
@classmethod
def poll(cls, context):
- return CyclesButtonsPanel.poll(context) and context.mesh
+ ob = context.object
+ return CyclesButtonsPanel.poll(context) and ob and ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META'}
+
+ def draw_header(self, context):
+ layout = self.layout
+
+ rd = context.scene.render
+ scene = context.scene
+ # cscene = scene.cycles
+
+ layout.active = rd.use_motion_blur
+
+ ob = context.object
+ cob = ob.cycles
+
+ layout.prop(cob, "use_motion_blur", text="")
def draw(self, context):
layout = self.layout
- mesh = context.mesh
+ rd = context.scene.render
+ scene = context.scene
+ # cscene = scene.cycles
- split = layout.split()
+ ob = context.object
+ cob = ob.cycles
- col = split.column()
- col.prop(mesh, "show_double_sided")
+ layout.active = (rd.use_motion_blur and cob.use_motion_blur)
- col = split.column()
- col.label()
+ row = layout.row()
+ row.prop(cob, "use_deform_motion", text="Deformation")
+
+ sub = row.row()
+ sub.active = cob.use_deform_motion
+ sub.prop(cob, "motion_steps", text="Steps")
class CyclesObject_PT_ray_visibility(CyclesButtonsPanel, Panel):
@@ -593,7 +609,8 @@ class CyclesObject_PT_ray_visibility(CyclesButtonsPanel, Panel):
def poll(cls, context):
ob = context.object
return (CyclesButtonsPanel.poll(context) and
- ob and ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'LAMP'})
+ ob and ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LAMP'} or
+ ob and ob.dupli_type == 'GROUP' and ob.dupli_group)
def draw(self, context):
layout = self.layout
@@ -847,9 +864,10 @@ class CyclesWorld_PT_mist(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
if CyclesButtonsPanel.poll(context):
- for rl in context.scene.render.layers:
- if rl.use_pass_mist:
- return True
+ if context.world:
+ for rl in context.scene.render.layers:
+ if rl.use_pass_mist:
+ return True
return False
@@ -997,8 +1015,9 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel):
split = layout.split()
- col = split.column()
+ col = split.column(align=True)
col.prop(mat, "diffuse_color", text="Viewport Color")
+ col.prop(mat, "alpha")
col = split.column(align=True)
col.label()
@@ -1108,7 +1127,7 @@ class CyclesTexture_PT_colors(CyclesButtonsPanel, Panel):
def poll(cls, context):
# node = context.texture_node
return False
- #return node and CyclesButtonsPanel.poll(context)
+ # return node and CyclesButtonsPanel.poll(context)
def draw(self, context):
layout = self.layout
@@ -1176,7 +1195,7 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
scene = context.scene
- cscene = scene.cycles
+ # cscene = scene.cycles
psys = context.particle_system
return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR'
@@ -1208,6 +1227,54 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel):
row.prop(ccscene, "maximum_width", text="Max Ext.")
+class CyclesRender_PT_bake(CyclesButtonsPanel, Panel):
+ bl_label = "Bake"
+ bl_context = "render"
+ bl_options = {'DEFAULT_CLOSED'}
+ COMPAT_ENGINES = {'CYCLES'}
+
+ def draw(self, context):
+ layout = self.layout
+
+ scene = context.scene
+ cscene = scene.cycles
+
+ cbk = scene.render.bake
+
+ layout.operator("object.bake", icon='RENDER_STILL').type = \
+ cscene.bake_type
+
+ col = layout.column()
+ col.prop(cscene, "bake_type")
+
+ col.separator()
+ split = layout.split()
+
+ sub = split.column()
+ sub.prop(cbk, "use_clear")
+ sub.prop(cbk, "margin")
+
+ sub = split.column()
+ sub.prop(cbk, "use_selected_to_active")
+ sub = sub.column()
+
+ sub.active = cbk.use_selected_to_active
+ sub.prop(cbk, "cage_extrusion", text="Distance")
+ sub.prop_search(cbk, "cage", scene, "objects")
+
+ if cscene.bake_type == 'NORMAL':
+ col.separator()
+ box = col.box()
+ box.label(text="Normal Settings:")
+ box.prop(cbk, "normal_space", text="Space")
+
+ row = box.row(align=True)
+ row.label(text = "Swizzle:")
+ row.prop(cbk, "normal_r", text="")
+ row.prop(cbk, "normal_g", text="")
+ row.prop(cbk, "normal_b", text="")
+
+
class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
bl_label = "Cycles Hair Settings"
bl_context = "particle"
@@ -1215,7 +1282,7 @@ class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel):
@classmethod
def poll(cls, context):
scene = context.scene
- cscene = scene.cycles
+ # cscene = scene.cycles
ccscene = scene.cycles_curves
psys = context.particle_system
use_curves = ccscene.use_curves and psys
@@ -1275,7 +1342,7 @@ def draw_device(self, context):
if device_type in {'CUDA', 'OPENCL', 'NETWORK'}:
layout.prop(cscene, "device")
- if engine.with_osl() and (cscene.device == 'CPU' or device_type == 'NONE'):
+ if engine.with_osl() and use_cpu(context):
layout.prop(cscene, "shading_system")
@@ -1316,6 +1383,7 @@ def get_panels():
"DATA_PT_context_camera",
"DATA_PT_context_lamp",
"DATA_PT_context_speaker",
+ "DATA_PT_normals",
"DATA_PT_texture_space",
"DATA_PT_curve_texture_space",
"DATA_PT_mball_texture_space",
diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp
index 4c6b42a9cbc..1a85561c6d5 100644
--- a/intern/cycles/blender/blender_camera.cpp
+++ b/intern/cycles/blender/blender_camera.cpp
@@ -212,8 +212,8 @@ static void blender_camera_viewplane(BlenderCamera *bcam, int width, int height,
BoundBox2D *viewplane, float *aspectratio, float *sensor_size)
{
/* dimensions */
- float xratio = width*bcam->pixelaspect.x;
- float yratio = height*bcam->pixelaspect.y;
+ float xratio = (float)width*bcam->pixelaspect.x;
+ float yratio = (float)height*bcam->pixelaspect.y;
/* compute x/y aspect and ratio */
float xaspect, yaspect;
@@ -288,8 +288,8 @@ static void blender_camera_sync(Camera *cam, BlenderCamera *bcam, int width, int
/* panorama sensor */
if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) {
- float fit_xratio = bcam->full_width*bcam->pixelaspect.x;
- float fit_yratio = bcam->full_height*bcam->pixelaspect.y;
+ float fit_xratio = (float)bcam->full_width*bcam->pixelaspect.x;
+ float fit_yratio = (float)bcam->full_height*bcam->pixelaspect.y;
bool horizontal_fit;
float sensor_size;
@@ -386,7 +386,7 @@ void BlenderSync::sync_camera(BL::RenderSettings b_render, BL::Object b_override
blender_camera_sync(cam, &bcam, width, height);
}
-void BlenderSync::sync_camera_motion(BL::Object b_ob, int motion)
+void BlenderSync::sync_camera_motion(BL::Object b_ob, float motion_time)
{
Camera *cam = scene->camera;
@@ -394,12 +394,14 @@ void BlenderSync::sync_camera_motion(BL::Object b_ob, int motion)
tfm = blender_camera_matrix(tfm, cam->type);
if(tfm != cam->matrix) {
- if(motion == -1)
+ if(motion_time == -1.0f) {
cam->motion.pre = tfm;
- else
+ cam->use_motion = true;
+ }
+ else if(motion_time == 1.0f) {
cam->motion.post = tfm;
-
- cam->use_motion = true;
+ cam->use_motion = true;
+ }
}
}
@@ -563,10 +565,10 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings b_render, BL::Sce
if(use_border) {
/* border render */
- params.full_x = cam->border.left*width;
- params.full_y = cam->border.bottom*height;
- params.width = (int)(cam->border.right*width) - params.full_x;
- params.height = (int)(cam->border.top*height) - params.full_y;
+ params.full_x = (int)(cam->border.left * (float)width);
+ params.full_y = (int)(cam->border.bottom * (float)height);
+ params.width = (int)(cam->border.right * (float)width) - params.full_x;
+ params.height = (int)(cam->border.top * (float)height) - params.full_y;
/* survive in case border goes out of view or becomes too small */
params.width = max(params.width, 1);
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 92c51b0aad3..22de7b64273 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -588,7 +588,7 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
- radius =0.0f;
+ radius = 0.0f;
mesh->add_curve_key(ickey_loc, radius);
if(attr_intercept)
@@ -612,16 +612,23 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData)
}
}
-static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveData *CData, int motion)
+static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveData *CData, int time_index)
{
+ /* find attribute */
+ Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ bool new_attribute = false;
+
+ /* add new attribute if it doesn't exist already */
+ if(!attr_mP) {
+ attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ new_attribute = true;
+ }
+
/* export motion vectors for curve keys */
- AttributeStandard std = (motion == -1)? ATTR_STD_MOTION_PRE: ATTR_STD_MOTION_POST;
- Attribute *attr_motion = mesh->curve_attributes.add(std);
- float3 *data_motion = attr_motion->data_float3();
- float3 *current_motion = data_motion;
- size_t size = mesh->curve_keys.size();
- size_t i = 0;
+ size_t numkeys = mesh->curve_keys.size();
+ float4 *mP = attr_mP->data_float4() + time_index*numkeys;
bool have_motion = false;
+ int i = 0;
for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) {
if(CData->psys_curvenum[sys] == 0)
@@ -633,15 +640,21 @@ static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveDat
for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) {
if(i < mesh->curve_keys.size()) {
- *current_motion = CData->curvekey_co[curvekey];
+ float3 ickey_loc = CData->curvekey_co[curvekey];
+ float time = CData->curvekey_time[curvekey]/CData->curve_length[curve];
+ float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time);
+
+ if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1))
+ radius = 0.0f;
+
+ mP[i] = float3_to_float4(ickey_loc);
+ mP[i].w = radius;
/* unlike mesh coordinates, these tend to be slightly different
* between frames due to particle transforms into/out of object
* space, so we use an epsilon to detect actual changes */
- if(len_squared(*current_motion - mesh->curve_keys[i].co) > 1e-5f*1e-5f)
+ if(len_squared(mP[i] - mesh->curve_keys[i]) > 1e-5f*1e-5f)
have_motion = true;
-
- current_motion++;
}
i++;
@@ -649,8 +662,23 @@ static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveDat
}
}
- if(i != size || !have_motion)
- mesh->curve_attributes.remove(std);
+ /* in case of new attribute, we verify if there really was any motion */
+ if(new_attribute) {
+ if(i != numkeys || !have_motion) {
+ /* no motion, remove attributes again */
+ mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+ }
+ else if(time_index > 0) {
+ /* motion, fill up previous steps that we might have skipped because
+ * they had no motion, but we need them anyway now */
+ for(int step = 0; step < time_index; step++) {
+ float4 *mP = attr_mP->data_float4() + step*numkeys;
+
+ for(int key = 0; key < numkeys; key++)
+ mP[key] = mesh->curve_keys[key];
+ }
+ }
+ }
}
void ExportCurveTriangleUV(Mesh *mesh, ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata)
@@ -796,7 +824,7 @@ void BlenderSync::sync_curve_settings()
curve_system_manager->tag_update(scene);
}
-void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int motion)
+void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, bool motion, int time_index)
{
if(!motion) {
/* Clear stored curve data */
@@ -851,7 +879,7 @@ void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int
}
else {
if(motion)
- ExportCurveSegmentsMotion(scene, mesh, &CData, motion);
+ ExportCurveSegmentsMotion(scene, mesh, &CData, time_index);
else
ExportCurveSegments(scene, mesh, &CData);
}
@@ -876,7 +904,7 @@ void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int
size_t i = 0;
foreach(Mesh::Curve& curve, mesh->curves) {
- float3 co = mesh->curve_keys[curve.first_key].co;
+ float3 co = float4_to_float3(mesh->curve_keys[curve.first_key]);
generated[i++] = co*size - loc;
}
}
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index 61c6ef6af1b..83514879477 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -206,6 +206,40 @@ static void mikk_compute_tangents(BL::Mesh b_mesh, BL::MeshTextureFaceLayer b_la
}
}
+/* Create Volume Attribute */
+
+static void create_mesh_volume_attribute(BL::Object b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std)
+{
+ BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
+
+ if(!b_domain)
+ return;
+
+ Attribute *attr = mesh->attributes.add(std);
+ VoxelAttribute *volume_data = attr->data_voxel();
+ bool is_float, is_linear;
+ bool animated = false;
+
+ volume_data->manager = image_manager;
+ volume_data->slot = image_manager->add_image(Attribute::standard_name(std),
+ b_ob.ptr.data, animated, is_float, is_linear, INTERPOLATION_LINEAR, true);
+}
+
+static void create_mesh_volume_attributes(Scene *scene, BL::Object b_ob, Mesh *mesh)
+{
+ /* for smoke volume rendering */
+ if(mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY))
+ create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_DENSITY);
+ if(mesh->need_attribute(scene, ATTR_STD_VOLUME_COLOR))
+ create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_COLOR);
+ if(mesh->need_attribute(scene, ATTR_STD_VOLUME_FLAME))
+ create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_FLAME);
+ if(mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT))
+ create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT);
+ if(mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY))
+ create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY);
+}
+
/* Create Mesh */
static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<uint>& used_shaders)
@@ -214,6 +248,7 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<
int numverts = b_mesh.vertices.length();
int numfaces = b_mesh.tessfaces.length();
int numtris = 0;
+ bool use_loop_normals = b_mesh.use_auto_smooth();
BL::Mesh::vertices_iterator v;
BL::Mesh::tessfaces_iterator f;
@@ -236,6 +271,21 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<
for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++N)
*N = get_float3(v->normal());
+ N = attr_N->data_float3();
+
+ /* create generated coordinates from undeformed coordinates */
+ if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
+ Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
+
+ float3 loc, size;
+ mesh_texture_space(b_mesh, loc, size);
+
+ float3 *generated = attr->data_float3();
+ size_t i = 0;
+
+ for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v)
+ generated[i++] = get_float3(v->undeformed_co())*size - loc;
+ }
/* create faces */
vector<int> nverts(numfaces);
@@ -248,9 +298,32 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<
int shader = used_shaders[mi];
bool smooth = f->use_smooth();
+ /* split vertices if normal is different
+ *
+ * note all vertex attributes must have been set here so we can split
+ * and copy attributes in split_vertex without remapping later */
+ if(use_loop_normals) {
+ BL::Array<float, 12> loop_normals = f->split_normals();
+
+ for(int i = 0; i < n; i++) {
+ float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]);
+
+ if(N[vi[i]] != loop_N) {
+ int new_vi = mesh->split_vertex(vi[i]);
+
+ /* set new normal and vertex index */
+ N = attr_N->data_float3();
+ N[new_vi] = loop_N;
+ vi[i] = new_vi;
+ }
+ }
+ }
+
+ /* create triangles */
if(n == 4) {
if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) ||
- is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]]))) {
+ is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]])))
+ {
mesh->set_triangle(ti++, vi[0], vi[1], vi[3], shader, smooth);
mesh->set_triangle(ti++, vi[2], vi[3], vi[1], shader, smooth);
}
@@ -348,20 +421,6 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<
}
}
- /* create generated coordinates from undeformed coordinates */
- if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) {
- Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED);
-
- float3 loc, size;
- mesh_texture_space(b_mesh, loc, size);
-
- float3 *generated = attr->data_float3();
- size_t i = 0;
-
- for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v)
- generated[i++] = get_float3(v->undeformed_co())*size - loc;
- }
-
/* for volume objects, create a matrix to transform from object space to
* mesh texture space. this does not work with deformations but that can
* probably only be done well with a volume grid mapping of coordinates */
@@ -414,7 +473,7 @@ static void create_subd_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, PointerR
//sdparams.camera = scene->camera;
/* tesselate */
- DiagSplit dsplit(sdparams);;
+ DiagSplit dsplit(sdparams);
sdmesh.tessellate(&dsplit);
}
@@ -449,6 +508,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
Mesh *mesh;
if(!mesh_map.sync(&mesh, key)) {
+
/* if transform was applied to mesh, need full update */
if(object_updated && mesh->transform_applied);
/* test if shaders changed, these can be object level so mesh
@@ -481,7 +541,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
/* compares curve_keys rather than strands in order to handle quick hair
* adjustsments in dynamic BVH - other methods could probably do this better*/
- vector<Mesh::CurveKey> oldcurve_keys = mesh->curve_keys;
+ vector<float4> oldcurve_keys = mesh->curve_keys;
mesh->clear();
mesh->used_shaders = used_shaders;
@@ -500,10 +560,12 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
create_subd_mesh(scene, mesh, b_mesh, &cmesh, used_shaders);
else
create_mesh(scene, mesh, b_mesh, used_shaders);
+
+ create_mesh_volume_attributes(scene, b_ob, mesh);
}
if(render_layer.use_hair)
- sync_curves(mesh, b_mesh, b_ob, 0);
+ sync_curves(mesh, b_mesh, b_ob, false);
/* free derived mesh */
b_data.meshes.remove(b_mesh);
@@ -535,7 +597,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
if(oldcurve_keys.size() != mesh->curve_keys.size())
rebuild = true;
else if(oldcurve_keys.size()) {
- if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(Mesh::CurveKey)*oldcurve_keys.size()) != 0)
+ if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(float4)*oldcurve_keys.size()) != 0)
rebuild = true;
}
@@ -544,46 +606,153 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri
return mesh;
}
-void BlenderSync::sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion)
+void BlenderSync::sync_mesh_motion(BL::Object b_ob, Object *object, float motion_time)
{
- /* todo: displacement, subdivision */
- size_t size = mesh->verts.size();
-
- /* skip objects without deforming modifiers. this is not a totally reliable,
- * would need a more extensive check to see which objects are animated */
- if(!size || !ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview))
- return;
-
/* ensure we only sync instanced meshes once */
+ Mesh *mesh = object->mesh;
+
if(mesh_motion_synced.find(mesh) != mesh_motion_synced.end())
return;
mesh_motion_synced.insert(mesh);
- /* get derived mesh */
- BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false);
+ /* for motion pass always compute, for motion blur it can be disabled */
+ int time_index = 0;
+
+ if(scene->need_motion() == Scene::MOTION_BLUR) {
+ if(!mesh->use_motion_blur)
+ return;
+
+ /* see if this mesh needs motion data at this time */
+ vector<float> object_times = object->motion_times();
+ bool found = false;
+
+ foreach(float object_time, object_times) {
+ if(motion_time == object_time) {
+ found = true;
+ break;
+ }
+ else
+ time_index++;
+ }
- if(b_mesh) {
- BL::Mesh::vertices_iterator v;
- AttributeStandard std = (motion == -1)? ATTR_STD_MOTION_PRE: ATTR_STD_MOTION_POST;
- Attribute *attr_M = mesh->attributes.add(std);
- float3 *M = attr_M->data_float3(), *cur_M;
- size_t i = 0;
+ if(!found)
+ return;
+ }
+ else {
+ if(motion_time == -1.0f)
+ time_index = 0;
+ else if(motion_time == 1.0f)
+ time_index = 1;
+ else
+ return;
+ }
+
+ /* skip empty meshes */
+ size_t numverts = mesh->verts.size();
+ size_t numkeys = mesh->curve_keys.size();
+
+ if(!numverts && !numkeys)
+ return;
+
+ /* skip objects without deforming modifiers. this is not totally reliable,
+ * would need a more extensive check to see which objects are animated */
+ BL::Mesh b_mesh(PointerRNA_NULL);
+
+ if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) {
+ /* get derived mesh */
+ b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false);
+ }
- for(b_mesh.vertices.begin(v), cur_M = M; v != b_mesh.vertices.end() && i < size; ++v, cur_M++, i++)
- *cur_M = get_float3(v->co());
+ if(!b_mesh) {
+ /* if we have no motion blur on this frame, but on other frames, copy */
+ if(numverts) {
+ /* triangles */
+ Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr_mP) {
+ Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+ Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
+ float3 *P = &mesh->verts[0];
+ float3 *N = (attr_N)? attr_N->data_float3(): NULL;
+
+ memcpy(attr_mP->data_float3() + time_index*numverts, P, sizeof(float3)*numverts);
+ if(attr_mN)
+ memcpy(attr_mN->data_float3() + time_index*numverts, N, sizeof(float3)*numverts);
+ }
+ }
+
+ if(numkeys) {
+ /* curves */
+ Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr_mP) {
+ float4 *keys = &mesh->curve_keys[0];
+ memcpy(attr_mP->data_float4() + time_index*numkeys, keys, sizeof(float4)*numkeys);
+ }
+ }
+
+ return;
+ }
+
+ if(numverts) {
+ /* find attributes */
+ Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+ Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL);
+ bool new_attribute = false;
+
+ /* add new attributes if they don't exist already */
+ if(!attr_mP) {
+ attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_N)
+ attr_mN = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL);
- /* if number of vertices changed, or if coordinates stayed the same, drop it */
- if(i != size || memcmp(M, &mesh->verts[0], sizeof(float3)*size) == 0)
- mesh->attributes.remove(std);
+ new_attribute = true;
+ }
+
+ /* load vertex data from mesh */
+ float3 *mP = attr_mP->data_float3() + time_index*numverts;
+ float3 *mN = (attr_mN)? attr_mN->data_float3() + time_index*numverts: NULL;
+
+ BL::Mesh::vertices_iterator v;
+ int i = 0;
- /* hair motion */
- if(render_layer.use_hair)
- sync_curves(mesh, b_mesh, b_ob, motion);
+ for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < numverts; ++v, ++i) {
+ mP[i] = get_float3(v->co());
+ if(mN)
+ mN[i] = get_float3(v->normal());
+ }
- /* free derived mesh */
- b_data.meshes.remove(b_mesh);
+ /* in case of new attribute, we verify if there really was any motion */
+ if(new_attribute) {
+ if(i != numverts || memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0) {
+ /* no motion, remove attributes again */
+ mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr_mN)
+ mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL);
+ }
+ else if(time_index > 0) {
+ /* motion, fill up previous steps that we might have skipped because
+ * they had no motion, but we need them anyway now */
+ float3 *P = &mesh->verts[0];
+ float3 *N = (attr_N)? attr_N->data_float3(): NULL;
+
+ for(int step = 0; step < time_index; step++) {
+ memcpy(attr_mP->data_float3() + step*numverts, P, sizeof(float3)*numverts);
+ if(attr_mN)
+ memcpy(attr_mN->data_float3() + step*numverts, N, sizeof(float3)*numverts);
+ }
+ }
+ }
}
+
+ /* hair motion */
+ if(numkeys)
+ sync_curves(mesh, b_mesh, b_ob, true, time_index);
+
+ /* free derived mesh */
+ b_data.meshes.remove(b_mesh);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index cc52717fdb6..167647608a5 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -38,7 +38,11 @@ CCL_NAMESPACE_BEGIN
bool BlenderSync::BKE_object_is_modified(BL::Object b_ob)
{
/* test if we can instance or if the object is modified */
- if(ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
+ if(b_ob.type() == BL::Object::type_META) {
+ /* multi-user and dupli metaballs are fused, can't instance */
+ return true;
+ }
+ else if(ccl::BKE_object_is_modified(b_ob, b_scene, preview)) {
/* modifiers */
return true;
}
@@ -213,9 +217,11 @@ void BlenderSync::sync_background_light()
/* Object */
-Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob, Transform& tfm, uint layer_flag, int motion, bool hide_tris)
+Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob,
+ Transform& tfm, uint layer_flag, float motion_time, bool hide_tris)
{
BL::Object b_ob = (b_dupli_ob ? b_dupli_ob.object() : b_parent);
+ bool motion = motion_time != 0.0f;
/* light is handled separately */
if(object_is_light(b_ob)) {
@@ -238,19 +244,22 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P
if(motion) {
object = object_map.find(key);
- if(object) {
+ if(object && (scene->need_motion() == Scene::MOTION_PASS || object_use_motion(b_ob))) {
+ /* object transformation */
if(tfm != object->tfm) {
- if(motion == -1)
+ if(motion_time == -1.0f) {
object->motion.pre = tfm;
- else
+ object->use_motion = true;
+ }
+ else if(motion_time == 1.0f) {
object->motion.post = tfm;
-
- object->use_motion = true;
+ object->use_motion = true;
+ }
}
- /* mesh deformation blur not supported yet */
- if(!scene->integrator->motion_blur)
- sync_mesh_motion(b_ob, object->mesh, motion);
+ /* mesh deformation */
+ if(object->mesh)
+ sync_mesh_motion(b_ob, object, motion_time);
}
return object;
@@ -310,6 +319,24 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P
object->motion.post = tfm;
object->use_motion = false;
+ /* motion blur */
+ if(scene->need_motion() == Scene::MOTION_BLUR && object->mesh) {
+ Mesh *mesh = object->mesh;
+
+ mesh->use_motion_blur = false;
+
+ if(object_use_motion(b_ob)) {
+ if(object_use_deform_motion(b_ob)) {
+ mesh->motion_steps = object_motion_steps(b_ob);
+ mesh->use_motion_blur = true;
+ }
+
+ vector<float> times = object->motion_times();
+ foreach(float time, times)
+ motion_times.insert(time);
+ }
+ }
+
/* random number */
object->random_id = hash_string(object->name.c_str());
@@ -408,10 +435,11 @@ static bool object_render_hide_duplis(BL::Object b_ob)
/* Object Loop */
-void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion)
+void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, float motion_time)
{
/* layer data */
uint scene_layer = render_layer.scene_layer;
+ bool motion = motion_time != 0.0f;
if(!motion) {
/* prepare for sync */
@@ -420,36 +448,40 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion)
object_map.pre_sync();
mesh_synced.clear();
particle_system_map.pre_sync();
+ motion_times.clear();
}
else {
mesh_motion_synced.clear();
}
/* object loop */
- BL::Scene::objects_iterator b_ob;
+ BL::Scene::object_bases_iterator b_base;
BL::Scene b_sce = b_scene;
-
- /* global particle index counter */
- int particle_id = 1;
+ /* modifier result type (not exposed as enum in C++ API)
+ * 1 : eModifierMode_Realtime
+ * 2 : eModifierMode_Render
+ */
+ int dupli_settings = preview ? 1 : 2;
bool cancel = false;
for(; b_sce && !cancel; b_sce = b_sce.background_set()) {
- for(b_sce.objects.begin(b_ob); b_ob != b_sce.objects.end() && !cancel; ++b_ob) {
- bool hide = (render_layer.use_viewport_visibility)? b_ob->hide(): b_ob->hide_render();
- uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), render_layer.use_localview, object_is_light(*b_ob));
+ for(b_sce.object_bases.begin(b_base); b_base != b_sce.object_bases.end() && !cancel; ++b_base) {
+ BL::Object b_ob = b_base->object();
+ bool hide = (render_layer.use_viewport_visibility)? b_ob.hide(): b_ob.hide_render();
+ uint ob_layer = get_layer(b_base->layers(), b_base->layers_local_view(), render_layer.use_localview, object_is_light(b_ob));
hide = hide || !(ob_layer & scene_layer);
if(!hide) {
- progress.set_sync_status("Synchronizing object", (*b_ob).name());
+ progress.set_sync_status("Synchronizing object", b_ob.name());
- if(b_ob->is_duplicator() && !object_render_hide_duplis(*b_ob)) {
+ if(b_ob.is_duplicator() && !object_render_hide_duplis(b_ob)) {
/* dupli objects */
- b_ob->dupli_list_create(b_scene, 2);
+ b_ob.dupli_list_create(b_scene, dupli_settings);
BL::Object::dupli_list_iterator b_dup;
- for(b_ob->dupli_list.begin(b_dup); b_dup != b_ob->dupli_list.end(); ++b_dup) {
+ for(b_ob.dupli_list.begin(b_dup); b_dup != b_ob.dupli_list.end(); ++b_dup) {
Transform tfm = get_transform(b_dup->matrix());
BL::Object b_dup_ob = b_dup->object();
bool dup_hide = (b_v3d)? b_dup_ob.hide(): b_dup_ob.hide_render();
@@ -462,32 +494,27 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion)
BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_dup->persistent_id();
/* sync object and mesh or light data */
- Object *object = sync_object(*b_ob, persistent_id.data, *b_dup, tfm, ob_layer, motion, hide_tris);
+ Object *object = sync_object(b_ob, persistent_id.data, *b_dup, tfm, ob_layer, motion_time, hide_tris);
/* sync possible particle data, note particle_id
* starts counting at 1, first is dummy particle */
- if(!motion && object && sync_dupli_particle(*b_ob, *b_dup, object)) {
- if(particle_id != object->particle_id) {
- object->particle_id = particle_id;
- scene->object_manager->tag_update(scene);
- }
-
- particle_id++;
+ if(!motion && object) {
+ sync_dupli_particle(b_ob, *b_dup, object);
}
}
}
- b_ob->dupli_list_clear();
+ b_ob.dupli_list_clear();
}
/* test if object needs to be hidden */
bool hide_tris;
- if(!object_render_hide(*b_ob, true, true, hide_tris)) {
+ if(!object_render_hide(b_ob, true, true, hide_tris)) {
/* object itself */
- Transform tfm = get_transform(b_ob->matrix_world());
- sync_object(*b_ob, NULL, PointerRNA_NULL, tfm, ob_layer, motion, hide_tris);
+ Transform tfm = get_transform(b_ob.matrix_world());
+ sync_object(b_ob, NULL, PointerRNA_NULL, tfm, ob_layer, motion_time, hide_tris);
}
}
@@ -527,31 +554,46 @@ void BlenderSync::sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override, void
b_cam = b_override;
Camera prevcam = *(scene->camera);
-
- /* go back and forth one frame */
- int frame = b_scene.frame_current();
- for(int motion = -1; motion <= 1; motion += 2) {
- /* we need to set the python thread state again because this
- * function assumes it is being executed from python and will
- * try to save the thread state */
+ int frame_center = b_scene.frame_current();
+
+ /* always sample these times for camera motion */
+ motion_times.insert(-1.0f);
+ motion_times.insert(1.0f);
+
+ /* note iteration over motion_times set happens in sorted order */
+ foreach(float relative_time, motion_times) {
+ /* fixed shutter time to get previous and next frame for motion pass */
+ float shuttertime;
+
+ if(scene->need_motion() == Scene::MOTION_PASS)
+ shuttertime = 2.0f;
+ else
+ shuttertime = scene->camera->shuttertime;
+
+ /* compute frame and subframe time */
+ float time = frame_center + relative_time * shuttertime * 0.5f;
+ int frame = (int)floorf(time);
+ float subframe = time - frame;
+
+ /* change frame */
python_thread_state_restore(python_thread_state);
- b_scene.frame_set(frame + motion, 0.0f);
+ b_scene.frame_set(frame, subframe);
python_thread_state_save(python_thread_state);
- /* camera object */
- if(b_cam)
- sync_camera_motion(b_cam, motion);
+ /* sync camera, only supports two times at the moment */
+ if(relative_time == -1.0f || relative_time == 1.0f)
+ sync_camera_motion(b_cam, relative_time);
- /* mesh objects */
- sync_objects(b_v3d, motion);
+ /* sync object */
+ sync_objects(b_v3d, relative_time);
}
/* we need to set the python thread state again because this
* function assumes it is being executed from python and will
* try to save the thread state */
python_thread_state_restore(python_thread_state);
- b_scene.frame_set(frame, 0.0f);
+ b_scene.frame_set(frame_center, 0.0f);
python_thread_state_save(python_thread_state);
/* tag camera for motion update */
diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp
index ef832ed39c0..5b2782ec2ac 100644
--- a/intern/cycles/blender/blender_particles.cpp
+++ b/intern/cycles/blender/blender_particles.cpp
@@ -76,6 +76,11 @@ bool BlenderSync::sync_dupli_particle(BL::Object b_ob, BL::DupliObject b_dup, Ob
psys->particles.push_back(pa);
+ if (object->particle_index != psys->particles.size() - 1)
+ scene->object_manager->tag_update(scene);
+ object->particle_system = psys;
+ object->particle_index = psys->particles.size() - 1;
+
/* return that this object has particle data */
return true;
}
diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp
index e08b7980e78..872f891cc2a 100644
--- a/intern/cycles/blender/blender_python.cpp
+++ b/intern/cycles/blender/blender_python.cpp
@@ -147,6 +147,38 @@ static PyObject *render_func(PyObject *self, PyObject *value)
Py_RETURN_NONE;
}
+/* pixel_array and result passed as pointers */
+static PyObject *bake_func(PyObject *self, PyObject *args)
+{
+ PyObject *pysession, *pyobject;
+ PyObject *pypixel_array, *pyresult;
+ const char *pass_type;
+ int num_pixels, depth;
+
+ if(!PyArg_ParseTuple(args, "OOsOiiO", &pysession, &pyobject, &pass_type, &pypixel_array, &num_pixels, &depth, &pyresult))
+ return NULL;
+
+ Py_BEGIN_ALLOW_THREADS
+
+ BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession);
+
+ PointerRNA objectptr;
+ RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyobject), &objectptr);
+ BL::Object b_object(objectptr);
+
+ void *b_result = PyLong_AsVoidPtr(pyresult);
+
+ PointerRNA bakepixelptr;
+ RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pypixel_array), &bakepixelptr);
+ BL::BakePixel b_bake_pixel(bakepixelptr);
+
+ session->bake(b_object, pass_type, b_bake_pixel, num_pixels, depth, (float *)b_result);
+
+ Py_END_ALLOW_THREADS
+
+ Py_RETURN_NONE;
+}
+
static PyObject *draw_func(PyObject *self, PyObject *args)
{
PyObject *pysession, *pyv3d, *pyrv3d;
@@ -285,7 +317,8 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args)
}
else if(param->type.vecsemantics == TypeDesc::POINT ||
param->type.vecsemantics == TypeDesc::VECTOR ||
- param->type.vecsemantics == TypeDesc::NORMAL) {
+ param->type.vecsemantics == TypeDesc::NORMAL)
+ {
socket_type = "NodeSocketVector";
data_type = BL::NodeSocket::type_VECTOR;
@@ -418,6 +451,7 @@ static PyMethodDef methods[] = {
{"create", create_func, METH_VARARGS, ""},
{"free", free_func, METH_O, ""},
{"render", render_func, METH_O, ""},
+ {"bake", bake_func, METH_VARARGS, ""},
{"draw", draw_func, METH_VARARGS, ""},
{"sync", sync_func, METH_O, ""},
{"reset", reset_func, METH_VARARGS, ""},
@@ -493,7 +527,7 @@ void *CCL_python_module_init()
/* TODO(sergey): This gives us library we've been linking against.
* In theory with dynamic OSL library it might not be
* accurate, but there's nothing in OSL API which we
- * might use th get version in runtime.
+ * might use to get version in runtime.
*/
int curversion = OSL_LIBRARY_VERSION_CODE;
PyModule_AddObject(mod, "with_osl", Py_True);
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index ef578493901..01a5acd8982 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -14,6 +14,8 @@
* limitations under the License
*/
+#include <stdlib.h>
+
#include "background.h"
#include "buffers.h"
#include "camera.h"
@@ -21,6 +23,8 @@
#include "integrator.h"
#include "film.h"
#include "light.h"
+#include "mesh.h"
+#include "object.h"
#include "scene.h"
#include "session.h"
#include "shader.h"
@@ -93,6 +97,11 @@ void BlenderSession::create_session()
/* create scene */
scene = new Scene(scene_params, session_params.device);
+ /* setup callbacks for builtin image support */
+ scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7);
+ scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
+ scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);
+
/* create session */
session = new Session(session_params);
session->scene = scene;
@@ -121,11 +130,6 @@ void BlenderSession::create_session()
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
-
- /* setup callbacks for builtin image support */
- scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6);
- scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3);
- scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3);
}
void BlenderSession::reset_session(BL::BlendData b_data_, BL::Scene b_scene_)
@@ -259,6 +263,58 @@ static PassType get_pass_type(BL::RenderPass b_pass)
return PASS_NONE;
}
+static ShaderEvalType get_shader_type(const string& pass_type)
+{
+ const char *shader_type = pass_type.c_str();
+
+ /* data passes */
+ if(strcmp(shader_type, "NORMAL")==0)
+ return SHADER_EVAL_NORMAL;
+ else if(strcmp(shader_type, "UV")==0)
+ return SHADER_EVAL_UV;
+ else if(strcmp(shader_type, "DIFFUSE_COLOR")==0)
+ return SHADER_EVAL_DIFFUSE_COLOR;
+ else if(strcmp(shader_type, "GLOSSY_COLOR")==0)
+ return SHADER_EVAL_GLOSSY_COLOR;
+ else if(strcmp(shader_type, "TRANSMISSION_COLOR")==0)
+ return SHADER_EVAL_TRANSMISSION_COLOR;
+ else if(strcmp(shader_type, "SUBSURFACE_COLOR")==0)
+ return SHADER_EVAL_SUBSURFACE_COLOR;
+ else if(strcmp(shader_type, "EMIT")==0)
+ return SHADER_EVAL_EMISSION;
+
+ /* light passes */
+ else if(strcmp(shader_type, "AO")==0)
+ return SHADER_EVAL_AO;
+ else if(strcmp(shader_type, "COMBINED")==0)
+ return SHADER_EVAL_COMBINED;
+ else if(strcmp(shader_type, "SHADOW")==0)
+ return SHADER_EVAL_SHADOW;
+ else if(strcmp(shader_type, "DIFFUSE_DIRECT")==0)
+ return SHADER_EVAL_DIFFUSE_DIRECT;
+ else if(strcmp(shader_type, "GLOSSY_DIRECT")==0)
+ return SHADER_EVAL_GLOSSY_DIRECT;
+ else if(strcmp(shader_type, "TRANSMISSION_DIRECT")==0)
+ return SHADER_EVAL_TRANSMISSION_DIRECT;
+ else if(strcmp(shader_type, "SUBSURFACE_DIRECT")==0)
+ return SHADER_EVAL_SUBSURFACE_DIRECT;
+ else if(strcmp(shader_type, "DIFFUSE_INDIRECT")==0)
+ return SHADER_EVAL_DIFFUSE_INDIRECT;
+ else if(strcmp(shader_type, "GLOSSY_INDIRECT")==0)
+ return SHADER_EVAL_GLOSSY_INDIRECT;
+ else if(strcmp(shader_type, "TRANSMISSION_INDIRECT")==0)
+ return SHADER_EVAL_TRANSMISSION_INDIRECT;
+ else if(strcmp(shader_type, "SUBSURFACE_INDIRECT")==0)
+ return SHADER_EVAL_SUBSURFACE_INDIRECT;
+
+ /* extra */
+ else if(strcmp(shader_type, "ENVIRONMENT")==0)
+ return SHADER_EVAL_ENVIRONMENT;
+
+ else
+ return SHADER_EVAL_BAKE;
+}
+
static BL::RenderResult begin_render_result(BL::RenderEngine b_engine, int x, int y, int w, int h, const char *layername)
{
return b_engine.begin_result(x, y, w, h, layername);
@@ -425,6 +481,105 @@ void BlenderSession::render()
sync = NULL;
}
+static void populate_bake_data(BakeData *data, BL::BakePixel pixel_array, const int num_pixels)
+{
+ BL::BakePixel bp = pixel_array;
+
+ int i;
+ for(i=0; i < num_pixels; i++) {
+ data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy());
+ bp = bp.next();
+ }
+}
+
+static bool is_light_pass(ShaderEvalType type)
+{
+ switch (type) {
+ case SHADER_EVAL_AO:
+ case SHADER_EVAL_COMBINED:
+ case SHADER_EVAL_SHADOW:
+ case SHADER_EVAL_DIFFUSE_DIRECT:
+ case SHADER_EVAL_GLOSSY_DIRECT:
+ case SHADER_EVAL_TRANSMISSION_DIRECT:
+ case SHADER_EVAL_SUBSURFACE_DIRECT:
+ case SHADER_EVAL_DIFFUSE_INDIRECT:
+ case SHADER_EVAL_GLOSSY_INDIRECT:
+ case SHADER_EVAL_TRANSMISSION_INDIRECT:
+ case SHADER_EVAL_SUBSURFACE_INDIRECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float result[])
+{
+ ShaderEvalType shader_type = get_shader_type(pass_type);
+ size_t object_index = OBJECT_NONE;
+ int tri_offset = 0;
+
+ if(shader_type == SHADER_EVAL_UV) {
+ /* force UV to be available */
+ Pass::add(PASS_UV, scene->film->passes);
+ }
+
+ if(is_light_pass(shader_type)) {
+ /* force use_light_pass to be true */
+ Pass::add(PASS_LIGHT, scene->film->passes);
+ }
+
+ /* create device and update scene */
+ scene->film->tag_update(scene);
+ scene->integrator->tag_update(scene);
+
+ /* update scene */
+ sync->sync_camera(b_render, b_engine.camera_override(), width, height);
+ sync->sync_data(b_v3d, b_engine.camera_override(), &python_thread_state);
+
+ /* get buffer parameters */
+ SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
+ BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height);
+
+ scene->bake_manager->set_baking(true);
+
+ /* set number of samples */
+ session->tile_manager.set_samples(session_params.samples);
+ session->reset(buffer_params, session_params.samples);
+ session->update_scene();
+
+ /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */
+ for(size_t i = 0; i < scene->objects.size(); i++) {
+ if(strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) {
+ object_index = i;
+ tri_offset = scene->objects[i]->mesh->tri_offset;
+ break;
+ }
+ }
+
+ /* when used, non-instanced convention: object = ~object */
+ int object = ~object_index;
+
+ BakeData *bake_data = scene->bake_manager->init(object, tri_offset, num_pixels);
+
+ populate_bake_data(bake_data, pixel_array, num_pixels);
+
+ /* set number of samples */
+ session->tile_manager.set_samples(session_params.samples);
+ session->reset(buffer_params, session_params.samples);
+ session->update_scene();
+
+ scene->bake_manager->bake(scene->device, &scene->dscene, scene, session->progress, shader_type, bake_data, result);
+
+ /* free all memory used (host and device), so we wouldn't leave render
+ * engine with extra memory allocated
+ */
+
+ session->device_free();
+
+ delete sync;
+ sync = NULL;
+}
+
void BlenderSession::do_write_update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile, bool do_update_only)
{
RenderBuffers *buffers = rtile.buffers;
@@ -592,16 +747,14 @@ bool BlenderSession::draw(int w, int h)
/* draw */
BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height);
+ DeviceDrawParams draw_params;
- if(session->params.display_buffer_linear)
- b_engine.bind_display_space_shader(b_scene);
-
- bool draw_ok = !session->draw(buffer_params);
+ if(session->params.display_buffer_linear) {
+ draw_params.bind_display_space_shader_cb = function_bind(&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
+ draw_params.unbind_display_space_shader_cb = function_bind(&BL::RenderEngine::unbind_display_space_shader, &b_engine);
+ }
- if(session->params.display_buffer_linear)
- b_engine.unbind_display_space_shader();
-
- return draw_ok;
+ return !session->draw(buffer_params, draw_params);
}
void BlenderSession::get_status(string& status, string& substatus)
@@ -726,85 +879,123 @@ int BlenderSession::builtin_image_frame(const string &builtin_name)
return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str());
}
-void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &channels)
+void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels)
{
+ /* empty image */
+ is_float = false;
+ width = 0;
+ height = 0;
+ depth = 0;
+ channels = 0;
+
+ if(!builtin_data)
+ return;
+
+ /* recover ID pointer */
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
- BL::Image b_image(ptr);
+ BL::ID b_id(ptr);
+
+ if(b_id.is_a(&RNA_Image)) {
+ /* image data */
+ BL::Image b_image(b_id);
- if(b_image) {
is_float = b_image.is_float();
width = b_image.size()[0];
height = b_image.size()[1];
+ depth = 1;
channels = b_image.channels();
}
- else {
- is_float = false;
- width = 0;
- height = 0;
- channels = 0;
+ else if(b_id.is_a(&RNA_Object)) {
+ /* smoke volume data */
+ BL::Object b_ob(b_id);
+ BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
+
+ if(!b_domain)
+ return;
+
+ if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) ||
+ builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME))
+ channels = 1;
+ else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR))
+ channels = 4;
+ else
+ return;
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
+
+ width = resolution.x * amplify;
+ height = resolution.y * amplify;
+ depth = resolution.z * amplify;
+
+ is_float = true;
}
}
bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels)
{
+ if(!builtin_data)
+ return false;
+
int frame = builtin_image_frame(builtin_name);
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
BL::Image b_image(ptr);
- if(b_image) {
- int width = b_image.size()[0];
- int height = b_image.size()[1];
- int channels = b_image.channels();
+ int width = b_image.size()[0];
+ int height = b_image.size()[1];
+ int channels = b_image.channels();
- unsigned char *image_pixels;
- image_pixels = image_get_pixels_for_frame(b_image, frame);
+ unsigned char *image_pixels;
+ image_pixels = image_get_pixels_for_frame(b_image, frame);
- if(image_pixels) {
- memcpy(pixels, image_pixels, width * height * channels * sizeof(unsigned char));
- MEM_freeN(image_pixels);
+ if(image_pixels) {
+ memcpy(pixels, image_pixels, width * height * channels * sizeof(unsigned char));
+ MEM_freeN(image_pixels);
+ }
+ else {
+ if(channels == 1) {
+ memset(pixels, 0, width * height * sizeof(unsigned char));
}
else {
- if(channels == 1) {
- memset(pixels, 0, width * height * sizeof(unsigned char));
- }
- else {
- unsigned char *cp = pixels;
- for(int i = 0; i < width * height; i++, cp += channels) {
- cp[0] = 255;
- cp[1] = 0;
- cp[2] = 255;
- if(channels == 4)
- cp[3] = 255;
- }
+ unsigned char *cp = pixels;
+ for(int i = 0; i < width * height; i++, cp += channels) {
+ cp[0] = 255;
+ cp[1] = 0;
+ cp[2] = 255;
+ if(channels == 4)
+ cp[3] = 255;
}
}
+ }
- /* premultiply, byte images are always straight for blender */
- unsigned char *cp = pixels;
- for(int i = 0; i < width * height; i++, cp += channels) {
- cp[0] = (cp[0] * cp[3]) >> 8;
- cp[1] = (cp[1] * cp[3]) >> 8;
- cp[2] = (cp[2] * cp[3]) >> 8;
- }
-
- return true;
+ /* premultiply, byte images are always straight for blender */
+ unsigned char *cp = pixels;
+ for(int i = 0; i < width * height; i++, cp += channels) {
+ cp[0] = (cp[0] * cp[3]) >> 8;
+ cp[1] = (cp[1] * cp[3]) >> 8;
+ cp[2] = (cp[2] * cp[3]) >> 8;
}
- return false;
+ return true;
}
bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels)
{
- int frame = builtin_image_frame(builtin_name);
+ if(!builtin_data)
+ return false;
PointerRNA ptr;
RNA_id_pointer_create((ID*)builtin_data, &ptr);
- BL::Image b_image(ptr);
+ BL::ID b_id(ptr);
+
+ if(b_id.is_a(&RNA_Image)) {
+ /* image data */
+ BL::Image b_image(b_id);
+ int frame = builtin_image_frame(builtin_name);
- if(b_image) {
int width = b_image.size()[0];
int height = b_image.size()[1];
int channels = b_image.channels();
@@ -834,6 +1025,51 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void
return true;
}
+ else if(b_id.is_a(&RNA_Object)) {
+ /* smoke volume data */
+ BL::Object b_ob(b_id);
+ BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob);
+
+ if(!b_domain)
+ return false;
+
+ int3 resolution = get_int3(b_domain.domain_resolution());
+ int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1;
+
+ int width = resolution.x * amplify;
+ int height = resolution.y * amplify;
+ int depth = resolution.z * amplify;
+
+ if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) {
+ SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length);
+
+ if(length == width*height*depth) {
+ SmokeDomainSettings_density_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) {
+ /* this is in range 0..1, and interpreted by the OpenGL smoke viewer
+ * as 1500..3000 K with the first part faded to zero density */
+ SmokeDomainSettings_flame_grid_get_length(&b_domain.ptr, &length);
+
+ if(length == width*height*depth) {
+ SmokeDomainSettings_flame_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+ else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) {
+ /* the RGB is "premultiplied" by density for better interpolation results */
+ SmokeDomainSettings_color_grid_get_length(&b_domain.ptr, &length);
+
+ if(length == width*height*depth*4) {
+ SmokeDomainSettings_color_grid_get(&b_domain.ptr, pixels);
+ return true;
+ }
+ }
+
+ fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n");
+ }
return false;
}
diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h
index 0568fb291d0..0e44493d674 100644
--- a/intern/cycles/blender/blender_session.h
+++ b/intern/cycles/blender/blender_session.h
@@ -20,6 +20,7 @@
#include "device.h"
#include "scene.h"
#include "session.h"
+#include "bake.h"
#include "util_vector.h"
@@ -51,6 +52,8 @@ public:
/* offline render */
void render();
+ void bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float pixels[]);
+
void write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile);
void write_render_tile(RenderTile& rtile);
@@ -99,7 +102,7 @@ protected:
void do_write_update_render_tile(RenderTile& rtile, bool do_update_only);
int builtin_image_frame(const string &builtin_name);
- void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &channels);
+ void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels);
bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels);
bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels);
};
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 6175c8ea399..ddbb40da7db 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -546,9 +546,11 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen
}
image->animated = b_image_node.image_user().use_auto_refresh();
+ image->use_alpha = b_image.use_alpha();
}
image->color_space = ImageTextureNode::color_space_enum[(int)b_image_node.color_space()];
image->projection = ImageTextureNode::projection_enum[(int)b_image_node.projection()];
+ image->interpolation = (InterpolationType)b_image_node.interpolation();
image->projection_blend = b_image_node.projection_blend();
get_tex_mapping(&image->tex_mapping, b_image_node.texture_mapping());
node = image;
@@ -573,6 +575,8 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen
env->animated = b_env_node.image_user().use_auto_refresh();
env->builtin_data = NULL;
}
+
+ env->use_alpha = b_image.use_alpha();
}
env->color_space = EnvironmentTextureNode::color_space_enum[(int)b_env_node.color_space()];
env->projection = EnvironmentTextureNode::projection_enum[(int)b_env_node.projection()];
@@ -667,6 +671,13 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen
tangent->attribute = b_tangent_node.uv_map();
node = tangent;
}
+ else if (b_node.is_a(&RNA_ShaderNodeUVMap)) {
+ BL::ShaderNodeUVMap b_uvmap_node(b_node);
+ UVMapNode *uvm = new UVMapNode();
+ uvm->attribute = b_uvmap_node.uv_map();
+ uvm->from_dupli = b_uvmap_node.from_dupli();
+ node = uvm;
+ }
if(node)
graph->add(node);
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 8e2197a2aa6..1f5e32a1123 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -172,6 +172,7 @@ void BlenderSync::sync_integrator()
integrator->transparent_min_bounce = get_int(cscene, "transparent_min_bounces");
integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows");
+ integrator->volume_homogeneous_sampling = RNA_enum_get(&cscene, "volume_homogeneous_sampling");
integrator->volume_max_steps = get_int(cscene, "volume_max_steps");
integrator->volume_step_size = get_float(cscene, "volume_step_size");
@@ -197,6 +198,9 @@ void BlenderSync::sync_integrator()
integrator->method = (Integrator::Method)get_enum(cscene, "progressive");
+ integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct");
+ integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
+
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h
index 205761ad302..9c4175ef690 100644
--- a/intern/cycles/blender/blender_sync.h
+++ b/intern/cycles/blender/blender_sync.h
@@ -71,7 +71,7 @@ private:
/* sync */
void sync_lamps(bool update_all);
void sync_materials(bool update_all);
- void sync_objects(BL::SpaceView3D b_v3d, int motion = 0);
+ void sync_objects(BL::SpaceView3D b_v3d, float motion_time = 0.0f);
void sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override, void **python_thread_state);
void sync_film();
void sync_view();
@@ -81,12 +81,13 @@ private:
void sync_nodes(Shader *shader, BL::ShaderNodeTree b_ntree);
Mesh *sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tris);
- void sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int motion);
- Object *sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_object, Transform& tfm, uint layer_flag, int motion, bool hide_tris);
+ void sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, bool motion, int time_index = 0);
+ Object *sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob,
+ Transform& tfm, uint layer_flag, float motion_time, bool hide_tris);
void sync_light(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::Object b_ob, Transform& tfm);
void sync_background_light();
- void sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion);
- void sync_camera_motion(BL::Object b_ob, int motion);
+ void sync_mesh_motion(BL::Object b_ob, Object *object, float motion_time);
+ void sync_camera_motion(BL::Object b_ob, float motion_time);
/* particles */
bool sync_dupli_particle(BL::Object b_ob, BL::DupliObject b_dup, Object *object);
@@ -109,6 +110,7 @@ private:
id_map<ParticleSystemKey, ParticleSystem> particle_system_map;
set<Mesh*> mesh_synced;
set<Mesh*> mesh_motion_synced;
+ std::set<float> motion_times;
void *world_map;
bool world_recalc;
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index 58e523d7fc2..35e417d8069 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -42,7 +42,14 @@ void python_thread_state_restore(void **python_thread_state);
static inline BL::Mesh object_to_mesh(BL::BlendData data, BL::Object object, BL::Scene scene, bool apply_modifiers, bool render, bool calc_undeformed)
{
- return data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, true, calc_undeformed);
+ BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed);
+ if ((bool)me) {
+ if (me.use_auto_smooth()) {
+ me.calc_normals_split(me.auto_smooth_angle());
+ }
+ me.calc_tessface();
+ }
+ return me;
}
static inline void colorramp_to_array(BL::ColorRamp ramp, float4 *data, int size)
@@ -50,7 +57,7 @@ static inline void colorramp_to_array(BL::ColorRamp ramp, float4 *data, int size
for(int i = 0; i < size; i++) {
float color[4];
- ramp.evaluate(i/(float)(size-1), color);
+ ramp.evaluate((float)i/(float)(size-1), color);
data[i] = make_float4(color[0], color[1], color[2], color[3]);
}
}
@@ -67,7 +74,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping cumap, float4 *d
BL::CurveMap mapI = cumap.curves[3];
for(int i = 0; i < size; i++) {
- float t = i/(float)(size-1);
+ float t = (float)i/(float)(size-1);
data[i][0] = mapR.evaluate(mapI.evaluate(t));
data[i][1] = mapG.evaluate(mapI.evaluate(t));
@@ -76,7 +83,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping cumap, float4 *d
}
else {
for(int i = 0; i < size; i++) {
- float t = i/(float)(size-1);
+ float t = (float)i/(float)(size-1);
data[i][0] = mapR.evaluate(t);
data[i][1] = mapG.evaluate(t);
@@ -168,6 +175,11 @@ static inline float4 get_float4(BL::Array<float, 4> array)
return make_float4(array[0], array[1], array[2], array[3]);
}
+static inline int3 get_int3(BL::Array<int, 3> array)
+{
+ return make_int3(array[0], array[1], array[2]);
+}
+
static inline int4 get_int4(BL::Array<int, 4> array)
{
return make_int4(array[0], array[1], array[2], array[3]);
@@ -341,6 +353,52 @@ static inline void mesh_texture_space(BL::Mesh b_mesh, float3& loc, float3& size
loc = loc*size - make_float3(0.5f, 0.5f, 0.5f);
}
+/* object used for motion blur */
+static inline bool object_use_motion(BL::Object b_ob)
+{
+ PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+ bool use_motion = get_boolean(cobject, "use_motion_blur");
+
+ return use_motion;
+}
+
+/* object motion steps */
+static inline uint object_motion_steps(BL::Object b_ob)
+{
+ PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+ uint steps = get_int(cobject, "motion_steps");
+
+ /* use uneven number of steps so we get one keyframe at the current frame,
+ * and ue 2^(steps - 1) so objects with more/fewer steps still have samples
+ * at the same times, to avoid sampling at many different times */
+ return (2 << (steps - 1)) + 1;
+}
+
+/* object uses deformation motion blur */
+static inline bool object_use_deform_motion(BL::Object b_ob)
+{
+ PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles");
+ bool use_deform_motion = get_boolean(cobject, "use_deform_motion");
+
+ return use_deform_motion;
+}
+
+static inline BL::SmokeDomainSettings object_smoke_domain_find(BL::Object b_ob)
+{
+ BL::Object::modifiers_iterator b_mod;
+
+ for(b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) {
+ if (b_mod->is_a(&RNA_SmokeModifier)) {
+ BL::SmokeModifier b_smd(*b_mod);
+
+ if(b_smd.smoke_type() == BL::SmokeModifier::smoke_type_DOMAIN)
+ return b_smd.domain_settings();
+ }
+ }
+
+ return BL::SmokeDomainSettings(PointerRNA_NULL);
+}
+
/* ID Map
*
* Utility class to keep in sync with blender data.
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 6c636ac5c8d..3c0c5c021c8 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -77,13 +77,25 @@ bool BVH::cache_read(CacheData& key)
key.add(&params, sizeof(params));
foreach(Object *ob, objects) {
- key.add(ob->mesh->verts);
- key.add(ob->mesh->triangles);
- key.add(ob->mesh->curve_keys);
- key.add(ob->mesh->curves);
+ Mesh *mesh = ob->mesh;
+
+ key.add(mesh->verts);
+ key.add(mesh->triangles);
+ key.add(mesh->curve_keys);
+ key.add(mesh->curves);
key.add(&ob->bounds, sizeof(ob->bounds));
key.add(&ob->visibility, sizeof(ob->visibility));
- key.add(&ob->mesh->transform_applied, sizeof(bool));
+ key.add(&mesh->transform_applied, sizeof(bool));
+
+ if(mesh->use_motion_blur) {
+ Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr)
+ key.add(attr->buffer);
+
+ attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(attr)
+ key.add(attr->buffer);
+ }
}
CacheData value;
@@ -97,7 +109,7 @@ bool BVH::cache_read(CacheData& key)
value.read(pack.nodes);
value.read(pack.object_node);
value.read(pack.tri_woop);
- value.read(pack.prim_segment);
+ value.read(pack.prim_type);
value.read(pack.prim_visibility);
value.read(pack.prim_index);
value.read(pack.prim_object);
@@ -119,7 +131,7 @@ void BVH::cache_write(CacheData& key)
value.add(pack.nodes);
value.add(pack.object_node);
value.add(pack.tri_woop);
- value.add(pack.prim_segment);
+ value.add(pack.prim_type);
value.add(pack.prim_visibility);
value.add(pack.prim_index);
value.add(pack.prim_object);
@@ -165,11 +177,11 @@ void BVH::build(Progress& progress)
}
/* build nodes */
- vector<int> prim_segment;
+ vector<int> prim_type;
vector<int> prim_index;
vector<int> prim_object;
- BVHBuild bvh_build(objects, prim_segment, prim_index, prim_object, params, progress);
+ BVHBuild bvh_build(objects, prim_type, prim_index, prim_object, params, progress);
BVHNode *root = bvh_build.run();
if(progress.get_cancel()) {
@@ -178,7 +190,7 @@ void BVH::build(Progress& progress)
}
/* todo: get rid of this copy */
- pack.prim_segment = prim_segment;
+ pack.prim_type = prim_type;
pack.prim_index = prim_index;
pack.prim_object = prim_object;
@@ -238,9 +250,12 @@ void BVH::refit(Progress& progress)
void BVH::pack_triangle(int idx, float4 woop[3])
{
- /* create Woop triangle */
int tob = pack.prim_object[idx];
const Mesh *mesh = objects[tob]->mesh;
+
+ if(mesh->has_motion_blur())
+ return;
+
int tidx = pack.prim_index[idx];
const int *vidx = mesh->triangles[tidx].v;
const float3* vpos = &mesh->verts[0];
@@ -280,11 +295,11 @@ void BVH::pack_curve_segment(int idx, float4 woop[3])
int tob = pack.prim_object[idx];
const Mesh *mesh = objects[tob]->mesh;
int tidx = pack.prim_index[idx];
- int segment = pack.prim_segment[idx];
+ int segment = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[idx]);
int k0 = mesh->curves[tidx].first_key + segment;
int k1 = mesh->curves[tidx].first_key + segment + 1;
- float3 v0 = mesh->curve_keys[k0].co;
- float3 v1 = mesh->curve_keys[k1].co;
+ float3 v0 = float4_to_float3(mesh->curve_keys[k0]);
+ float3 v1 = float4_to_float3(mesh->curve_keys[k1]);
float3 d0 = v1 - v0;
float l = len(d0);
@@ -324,7 +339,7 @@ void BVH::pack_primitives()
if(pack.prim_index[i] != -1) {
float4 woop[3];
- if(pack.prim_segment[i] != ~0)
+ if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack_curve_segment(i, woop);
else
pack_triangle(i, woop);
@@ -335,7 +350,7 @@ void BVH::pack_primitives()
Object *ob = objects[tob];
pack.prim_visibility[i] = ob->visibility;
- if(pack.prim_segment[i] != ~0)
+ if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_visibility[i] |= PATH_RAY_CURVE;
}
else {
@@ -359,7 +374,7 @@ void BVH::pack_instances(size_t nodes_size)
* meshes with transform applied and already in the top level BVH */
for(size_t i = 0; i < pack.prim_index.size(); i++)
if(pack.prim_index[i] != -1) {
- if(pack.prim_segment[i] != ~0)
+ if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset;
else
pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset;
@@ -401,7 +416,7 @@ void BVH::pack_instances(size_t nodes_size)
mesh_map.clear();
pack.prim_index.resize(prim_index_size);
- pack.prim_segment.resize(prim_index_size);
+ pack.prim_type.resize(prim_index_size);
pack.prim_object.resize(prim_index_size);
pack.prim_visibility.resize(prim_index_size);
pack.tri_woop.resize(tri_woop_size);
@@ -409,7 +424,7 @@ void BVH::pack_instances(size_t nodes_size)
pack.object_node.resize(objects.size());
int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL;
- int *pack_prim_segment = (pack.prim_segment.size())? &pack.prim_segment[0]: NULL;
+ int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL;
int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL;
uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL;
float4 *pack_tri_woop = (pack.tri_woop.size())? &pack.tri_woop[0]: NULL;
@@ -454,16 +469,16 @@ void BVH::pack_instances(size_t nodes_size)
if(bvh->pack.prim_index.size()) {
size_t bvh_prim_index_size = bvh->pack.prim_index.size();
int *bvh_prim_index = &bvh->pack.prim_index[0];
- int *bvh_prim_segment = &bvh->pack.prim_segment[0];
+ int *bvh_prim_type = &bvh->pack.prim_type[0];
uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0];
for(size_t i = 0; i < bvh_prim_index_size; i++) {
- if(bvh->pack.prim_segment[i] != ~0)
+ if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE)
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset;
else
pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset;
- pack_prim_segment[pack_prim_index_offset] = bvh_prim_segment[i];
+ pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i];
pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i];
pack_prim_object[pack_prim_index_offset] = 0; // unused for instances
pack_prim_index_offset++;
@@ -629,37 +644,51 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility
/* primitives */
const Mesh *mesh = ob->mesh;
- if(pack.prim_segment[prim] != ~0) {
+ if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) {
/* curves */
int str_offset = (params.top_level)? mesh->curve_offset: 0;
- int k0 = mesh->curves[pidx - str_offset].first_key + pack.prim_segment[prim]; // XXX!
- int k1 = k0 + 1;
-
- float3 p[4];
- p[0] = mesh->curve_keys[max(k0 - 1,mesh->curves[pidx - str_offset].first_key)].co;
- p[1] = mesh->curve_keys[k0].co;
- p[2] = mesh->curve_keys[k1].co;
- p[3] = mesh->curve_keys[min(k1 + 1,mesh->curves[pidx - str_offset].first_key + mesh->curves[pidx - str_offset].num_keys - 1)].co;
- float3 lower;
- float3 upper;
- curvebounds(&lower.x, &upper.x, p, 0);
- curvebounds(&lower.y, &upper.y, p, 1);
- curvebounds(&lower.z, &upper.z, p, 2);
- float mr = max(mesh->curve_keys[k0].radius,mesh->curve_keys[k1].radius);
- bbox.grow(lower, mr);
- bbox.grow(upper, mr);
+ const Mesh::Curve& curve = mesh->curves[pidx - str_offset];
+ int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]);
+
+ curve.bounds_grow(k, &mesh->curve_keys[0], bbox);
visibility |= PATH_RAY_CURVE;
+
+ /* motion curves */
+ if(mesh->use_motion_blur) {
+ Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr) {
+ size_t mesh_size = mesh->curve_keys.size();
+ size_t steps = mesh->motion_steps - 1;
+ float4 *key_steps = attr->data_float4();
+
+ for (size_t i = 0; i < steps; i++)
+ curve.bounds_grow(k, key_steps + i*mesh_size, bbox);
+ }
+ }
}
else {
/* triangles */
int tri_offset = (params.top_level)? mesh->tri_offset: 0;
- const int *vidx = mesh->triangles[pidx - tri_offset].v;
+ const Mesh::Triangle& triangle = mesh->triangles[pidx - tri_offset];
const float3 *vpos = &mesh->verts[0];
- bbox.grow(vpos[vidx[0]]);
- bbox.grow(vpos[vidx[1]]);
- bbox.grow(vpos[vidx[2]]);
+ triangle.bounds_grow(vpos, bbox);
+
+ /* motion triangles */
+ if(mesh->use_motion_blur) {
+ Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if(attr) {
+ size_t mesh_size = mesh->verts.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps; i++)
+ triangle.bounds_grow(vert_steps + i*mesh_size, bbox);
+ }
+ }
}
}
diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h
index f2c96638b84..5fcaaaa988c 100644
--- a/intern/cycles/bvh/bvh.h
+++ b/intern/cycles/bvh/bvh.h
@@ -52,8 +52,8 @@ struct PackedBVH {
array<int> object_node;
/* precomputed triangle intersection data, one triangle is 4x float4 */
array<float4> tri_woop;
- /* primitive type - triangle or strand (should be moved to flag?) */
- array<int> prim_segment;
+ /* primitive type - triangle or strand */
+ array<int> prim_type;
/* visibility visibilitys for primitives */
array<uint> prim_visibility;
/* mapping from BVH primitive index to true primitive index, as primitives
diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp
index 05a674a47a7..bd37ffbcf38 100644
--- a/intern/cycles/bvh/bvh_binning.cpp
+++ b/intern/cycles/bvh/bvh_binning.cpp
@@ -83,14 +83,14 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
int4 bin1 = get_bin(prim1.bounds());
/* increase bounds for bins for even primitive */
- int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
- int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
- int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+ int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
+ int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
+ int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
/* increase bounds of bins for odd primitive */
- int b10 = extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds());
- int b11 = extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds());
- int b12 = extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds());
+ int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds());
+ int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds());
+ int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds());
}
/* for uneven number of primitives */
@@ -100,9 +100,9 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims)
int4 bin0 = get_bin(prim0.bounds());
/* increase bounds of bins */
- int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
- int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
- int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
+ int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds());
+ int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds());
+ int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds());
}
}
diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index b21b20a87e5..eb4cca92b6b 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -49,10 +49,10 @@ public:
/* Constructor / Destructor */
BVHBuild::BVHBuild(const vector<Object*>& objects_,
- vector<int>& prim_segment_, vector<int>& prim_index_, vector<int>& prim_object_,
+ vector<int>& prim_type_, vector<int>& prim_index_, vector<int>& prim_object_,
const BVHParams& params_, Progress& progress_)
: objects(objects_),
- prim_segment(prim_segment_),
+ prim_type(prim_type_),
prim_index(prim_index_),
prim_object(prim_object_),
params(params_),
@@ -70,45 +70,66 @@ BVHBuild::~BVHBuild()
void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i)
{
+ Attribute *attr_mP = NULL;
+
+ if(mesh->has_motion_blur())
+ attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
for(uint j = 0; j < mesh->triangles.size(); j++) {
Mesh::Triangle t = mesh->triangles[j];
BoundBox bounds = BoundBox::empty;
+ PrimitiveType type = PRIMITIVE_TRIANGLE;
+
+ t.bounds_grow(&mesh->verts[0], bounds);
- for(int k = 0; k < 3; k++) {
- float3 co = mesh->verts[t.v[k]];
- bounds.grow(co);
+ /* motion triangles */
+ if(attr_mP) {
+ size_t mesh_size = mesh->verts.size();
+ size_t steps = mesh->motion_steps - 1;
+ float3 *vert_steps = attr_mP->data_float3();
+
+ for(size_t i = 0; i < steps; i++)
+ t.bounds_grow(vert_steps + i*mesh_size, bounds);
+
+ type = PRIMITIVE_MOTION_TRIANGLE;
}
if(bounds.valid()) {
- references.push_back(BVHReference(bounds, j, i, ~0));
+ references.push_back(BVHReference(bounds, j, i, type));
root.grow(bounds);
center.grow(bounds.center2());
}
}
+ Attribute *curve_attr_mP = NULL;
+
+ if(mesh->has_motion_blur())
+ curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
for(uint j = 0; j < mesh->curves.size(); j++) {
Mesh::Curve curve = mesh->curves[j];
+ PrimitiveType type = PRIMITIVE_CURVE;
for(int k = 0; k < curve.num_keys - 1; k++) {
BoundBox bounds = BoundBox::empty;
+ curve.bounds_grow(k, &mesh->curve_keys[0], bounds);
+
+ /* motion curve */
+ if(curve_attr_mP) {
+ size_t mesh_size = mesh->curve_keys.size();
+ size_t steps = mesh->motion_steps - 1;
+ float4 *key_steps = curve_attr_mP->data_float4();
- float3 co[4];
- co[0] = mesh->curve_keys[max(curve.first_key + k - 1,curve.first_key)].co;
- co[1] = mesh->curve_keys[curve.first_key + k].co;
- co[2] = mesh->curve_keys[curve.first_key + k + 1].co;
- co[3] = mesh->curve_keys[min(curve.first_key + k + 2, curve.first_key + curve.num_keys - 1)].co;
-
- float3 lower;
- float3 upper;
- curvebounds(&lower.x, &upper.x, co, 0);
- curvebounds(&lower.y, &upper.y, co, 1);
- curvebounds(&lower.z, &upper.z, co, 2);
- float mr = max(mesh->curve_keys[curve.first_key + k].radius, mesh->curve_keys[curve.first_key + k + 1].radius);
- bounds.grow(lower, mr);
- bounds.grow(upper, mr);
+ for (size_t i = 0; i < steps; i++)
+ curve.bounds_grow(k, key_steps + i*mesh_size, bounds);
+
+ type = PRIMITIVE_MOTION_CURVE;
+ }
if(bounds.valid()) {
- references.push_back(BVHReference(bounds, j, i, k));
+ int packed_type = PRIMITIVE_PACK_SEGMENT(type, k);
+
+ references.push_back(BVHReference(bounds, j, i, packed_type));
root.grow(bounds);
center.grow(bounds.center2());
}
@@ -118,7 +139,7 @@ void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh,
void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i)
{
- references.push_back(BVHReference(ob->bounds, -1, i, false));
+ references.push_back(BVHReference(ob->bounds, -1, i, 0));
root.grow(ob->bounds);
center.grow(ob->bounds.center2());
}
@@ -207,7 +228,7 @@ BVHNode* BVHBuild::run()
progress_total = references.size();
progress_original_total = progress_total;
- prim_segment.resize(references.size());
+ prim_type.resize(references.size());
prim_index.resize(references.size());
prim_object.resize(references.size());
@@ -277,18 +298,41 @@ void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *
}
}
+bool BVHBuild::range_within_max_leaf_size(const BVHRange& range)
+{
+ size_t size = range.size();
+ size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
+
+ if(size > max_leaf_size)
+ return false;
+
+ size_t num_triangles = 0;
+ size_t num_curves = 0;
+
+ for(int i = 0; i < size; i++) {
+ BVHReference& ref = references[range.start() + i];
+
+ if(ref.prim_type() & PRIMITIVE_ALL_CURVE)
+ num_curves++;
+ else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE)
+ num_triangles++;
+ }
+
+ return (num_triangles < params.max_triangle_leaf_size) && (num_curves < params.max_curve_leaf_size);
+}
+
/* multithreaded binning builder */
BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
{
size_t size = range.size();
- float leafSAH = params.sah_triangle_cost * range.leafSAH;
- float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_triangle_cost * range.splitSAH;
+ float leafSAH = params.sah_primitive_cost * range.leafSAH;
+ float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH;
/* have at least one inner node on top level, for performance and correct
* visibility tests, since object instances do not check visibility flag */
if(!(range.size() > 0 && params.top_level && level == 0)) {
/* make leaf node when threshold reached or SAH tells us */
- if(params.small_enough_for_leaf(size, level) || (size <= params.max_leaf_size && leafSAH < splitSAH))
+ if(params.small_enough_for_leaf(size, level) || (range_within_max_leaf_size(range) && leafSAH < splitSAH))
return create_leaf_node(range);
}
@@ -373,12 +417,12 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
if(start == prim_index.size()) {
assert(params.use_spatial_split);
- prim_segment.push_back(ref->prim_segment());
+ prim_type.push_back(ref->prim_type());
prim_index.push_back(ref->prim_index());
prim_object.push_back(ref->prim_object());
}
else {
- prim_segment[start] = ref->prim_segment();
+ prim_type[start] = ref->prim_type();
prim_index[start] = ref->prim_index();
prim_object[start] = ref->prim_object();
}
@@ -401,7 +445,7 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
BVHNode* BVHBuild::create_leaf_node(const BVHRange& range)
{
- vector<int>& p_segment = prim_segment;
+ vector<int>& p_type = prim_type;
vector<int>& p_index = prim_index;
vector<int>& p_object = prim_object;
BoundBox bounds = BoundBox::empty;
@@ -415,12 +459,12 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range)
if(range.start() + num == prim_index.size()) {
assert(params.use_spatial_split);
- p_segment.push_back(ref.prim_segment());
+ p_type.push_back(ref.prim_type());
p_index.push_back(ref.prim_index());
p_object.push_back(ref.prim_object());
}
else {
- p_segment[range.start() + num] = ref.prim_segment();
+ p_type[range.start() + num] = ref.prim_type();
p_index[range.start() + num] = ref.prim_index();
p_object[range.start() + num] = ref.prim_object();
}
@@ -490,7 +534,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
/* find best rotation. we pick a target child of a first child, and swap
* this with an other child. we perform the best such swap. */
float best_cost = FLT_MAX;
- int best_child = -1, bets_target = -1, best_other = -1;
+ int best_child = -1, best_target = -1, best_other = -1;
for(size_t c = 0; c < 2; c++) {
/* ignore leaf nodes as we cannot descent into */
@@ -514,11 +558,11 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
if(cost0 < cost1) {
best_cost = cost0;
- bets_target = 0;
+ best_target = 0;
}
else {
best_cost = cost0;
- bets_target = 1;
+ best_target = 1;
}
}
}
@@ -527,10 +571,13 @@ void BVHBuild::rotate(BVHNode *node, int max_depth)
if(best_cost >= 0)
return;
+ assert(best_child == 0 || best_child == 1);
+ assert(best_target != -1);
+
/* perform the best found tree rotation */
InnerNode *child = (InnerNode*)parent->children[best_child];
- swap(parent->children[best_other], child->children[bets_target]);
+ swap(parent->children[best_other], child->children[best_target]);
child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
}
diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h
index 3df4da1739a..a6b9916de9b 100644
--- a/intern/cycles/bvh/bvh_build.h
+++ b/intern/cycles/bvh/bvh_build.h
@@ -44,7 +44,7 @@ public:
/* Constructor/Destructor */
BVHBuild(
const vector<Object*>& objects,
- vector<int>& prim_segment,
+ vector<int>& prim_type,
vector<int>& prim_index,
vector<int>& prim_object,
const BVHParams& params,
@@ -70,6 +70,8 @@ protected:
BVHNode *create_leaf_node(const BVHRange& range);
BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num);
+ bool range_within_max_leaf_size(const BVHRange& range);
+
/* threads */
enum { THREAD_TASK_SIZE = 4096 };
void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level);
@@ -88,7 +90,7 @@ protected:
int num_original_references;
/* output primitive indexes and objects */
- vector<int>& prim_segment;
+ vector<int>& prim_type;
vector<int>& prim_index;
vector<int>& prim_object;
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index ad36bdfa326..ed67690a07f 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -33,11 +33,12 @@ public:
/* SAH costs */
float sah_node_cost;
- float sah_triangle_cost;
+ float sah_primitive_cost;
- /* number of triangles in leaf */
+ /* number of primitives in leaf */
int min_leaf_size;
- int max_leaf_size;
+ int max_triangle_leaf_size;
+ int max_curve_leaf_size;
/* object or mesh level bvh */
int top_level;
@@ -62,11 +63,14 @@ public:
use_spatial_split = true;
spatial_split_alpha = 1e-5f;
+ /* todo: see if splitting up primitive cost to be separate for triangles
+ * and curves can help. so far in tests it doesn't help, but why? */
sah_node_cost = 1.0f;
- sah_triangle_cost = 1.0f;
+ sah_primitive_cost = 1.0f;
min_leaf_size = 1;
- max_leaf_size = 8;
+ max_triangle_leaf_size = 8;
+ max_curve_leaf_size = 2;
top_level = false;
use_cache = false;
@@ -75,11 +79,11 @@ public:
}
/* SAH costs */
- __forceinline float cost(int num_nodes, int num_tris) const
- { return node_cost(num_nodes) + triangle_cost(num_tris); }
+ __forceinline float cost(int num_nodes, int num_primitives) const
+ { return node_cost(num_nodes) + primitive_cost(num_primitives); }
- __forceinline float triangle_cost(int n) const
- { return n*sah_triangle_cost; }
+ __forceinline float primitive_cost(int n) const
+ { return n*sah_primitive_cost; }
__forceinline float node_cost(int n) const
{ return n*sah_node_cost; }
@@ -98,22 +102,22 @@ class BVHReference
public:
__forceinline BVHReference() {}
- __forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_segment)
+ __forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type)
: rbounds(bounds_)
{
rbounds.min.w = __int_as_float(prim_index_);
rbounds.max.w = __int_as_float(prim_object_);
- segment = prim_segment;
+ type = prim_type;
}
__forceinline const BoundBox& bounds() const { return rbounds; }
__forceinline int prim_index() const { return __float_as_int(rbounds.min.w); }
__forceinline int prim_object() const { return __float_as_int(rbounds.max.w); }
- __forceinline int prim_segment() const { return segment; }
+ __forceinline int prim_type() const { return type; }
protected:
BoundBox rbounds;
- uint segment;
+ uint type;
};
/* BVH Range
diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index d7dbae36336..3140bf23376 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -52,8 +52,8 @@ public:
else if(ra.prim_object() > rb.prim_object()) return false;
else if(ra.prim_index() < rb.prim_index()) return true;
else if(ra.prim_index() > rb.prim_index()) return false;
- else if(ra.prim_segment() < rb.prim_segment()) return true;
- else if(ra.prim_segment() > rb.prim_segment()) return false;
+ else if(ra.prim_type() < rb.prim_type()) return true;
+ else if(ra.prim_type() > rb.prim_type()) return false;
return false;
}
diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp
index 03ff69d7b6d..07c35c08c18 100644
--- a/intern/cycles/bvh/bvh_split.cpp
+++ b/intern/cycles/bvh/bvh_split.cpp
@@ -54,8 +54,8 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float n
right_bounds = builder->spatial_right_bounds[i - 1];
float sah = nodeSAH +
- left_bounds.safe_area() * builder->params.triangle_cost(i) +
- right_bounds.safe_area() * builder->params.triangle_cost(range.size() - i);
+ left_bounds.safe_area() * builder->params.primitive_cost(i) +
+ right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i);
if(sah < min_sah) {
min_sah = sah;
@@ -150,8 +150,8 @@ BVHSpatialSplit::BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float
rightNum -= builder->spatial_bins[dim][i - 1].exit;
float sah = nodeSAH +
- left_bounds.safe_area() * builder->params.triangle_cost(leftNum) +
- builder->spatial_right_bounds[i - 1].safe_area() * builder->params.triangle_cost(rightNum);
+ left_bounds.safe_area() * builder->params.primitive_cost(leftNum) +
+ builder->spatial_right_bounds[i - 1].safe_area() * builder->params.primitive_cost(rightNum);
if(sah < this->sah) {
this->sah = sah;
@@ -209,10 +209,10 @@ void BVHSpatialSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right,
ldb.grow(lref.bounds());
rdb.grow(rref.bounds());
- float lac = builder->params.triangle_cost(left_end - left_start);
- float rac = builder->params.triangle_cost(right_end - right_start);
- float lbc = builder->params.triangle_cost(left_end - left_start + 1);
- float rbc = builder->params.triangle_cost(right_end - right_start + 1);
+ float lac = builder->params.primitive_cost(left_end - left_start);
+ float rac = builder->params.primitive_cost(right_end - right_start);
+ float lbc = builder->params.primitive_cost(left_end - left_start + 1);
+ float rbc = builder->params.primitive_cost(right_end - right_start + 1);
float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac;
float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc;
@@ -253,7 +253,7 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH
Object *ob = builder->objects[ref.prim_object()];
const Mesh *mesh = ob->mesh;
- if (ref.prim_segment() == ~0) {
+ if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) {
const int *inds = mesh->triangles[ref.prim_index()].v;
const float3 *verts = &mesh->verts[0];
const float3* v1 = &verts[inds[2]];
@@ -282,30 +282,32 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH
}
else {
/* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/
- const int k0 = mesh->curves[ref.prim_index()].first_key + ref.prim_segment();
+ const int k0 = mesh->curves[ref.prim_index()].first_key + PRIMITIVE_UNPACK_SEGMENT(ref.prim_type());
const int k1 = k0 + 1;
- const float3* v0 = &mesh->curve_keys[k0].co;
- const float3* v1 = &mesh->curve_keys[k1].co;
+ const float4 key0 = mesh->curve_keys[k0];
+ const float4 key1 = mesh->curve_keys[k1];
+ const float3 v0 = float4_to_float3(key0);
+ const float3 v1 = float4_to_float3(key1);
- float v0p = (*v0)[dim];
- float v1p = (*v1)[dim];
+ float v0p = v0[dim];
+ float v1p = v1[dim];
/* insert vertex to the boxes it belongs to. */
if(v0p <= pos)
- left_bounds.grow(*v0);
+ left_bounds.grow(v0);
if(v0p >= pos)
- right_bounds.grow(*v0);
+ right_bounds.grow(v0);
if(v1p <= pos)
- left_bounds.grow(*v1);
+ left_bounds.grow(v1);
if(v1p >= pos)
- right_bounds.grow(*v1);
+ right_bounds.grow(v1);
/* edge intersects the plane => insert intersection to both boxes. */
if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) {
- float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
+ float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f));
left_bounds.grow(t);
right_bounds.grow(t);
}
@@ -318,8 +320,8 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH
right_bounds.intersect(ref.bounds());
/* set references */
- left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_segment());
- right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_segment());
+ left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
+ right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type());
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h
index 1f4befbe8e2..5b739311e5f 100644
--- a/intern/cycles/bvh/bvh_split.h
+++ b/intern/cycles/bvh/bvh_split.h
@@ -77,7 +77,7 @@ public:
/* find split candidates. */
float area = range.bounds().safe_area();
- leafSAH = area * builder->params.triangle_cost(range.size());
+ leafSAH = area * builder->params.primitive_cost(range.size());
nodeSAH = area * builder->params.node_cost(2);
object = BVHObjectSplit(builder, range, nodeSAH);
@@ -92,7 +92,7 @@ public:
/* leaf SAH is the lowest => create leaf. */
minSAH = min(min(leafSAH, object.sah), spatial.sah);
- no_split = (minSAH == leafSAH && range.size() <= builder->params.max_leaf_size);
+ no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range));
}
__forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range)
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index 9d60d062b8e..d9e68742c53 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -53,7 +53,8 @@ void Device::pixels_free(device_memory& mem)
mem_free(mem);
}
-void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent)
+void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent,
+ const DeviceDrawParams &draw_params)
{
pixels_copy_from(rgba, y, w, h);
@@ -80,6 +81,10 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
glEnable(GL_TEXTURE_2D);
+ if(draw_params.bind_display_space_shader_cb) {
+ draw_params.bind_display_space_shader_cb();
+ }
+
glPushMatrix();
glTranslatef(0.0f, (float)dy, 0.0f);
@@ -98,6 +103,10 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
glPopMatrix();
+ if(draw_params.unbind_display_space_shader_cb) {
+ draw_params.unbind_display_space_shader_cb();
+ }
+
glBindTexture(GL_TEXTURE_2D, 0);
glDisable(GL_TEXTURE_2D);
glDeleteTextures(1, &texid);
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index bd309e35788..bcddd4f73e2 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -54,6 +54,7 @@ public:
bool display_device;
bool advanced_shading;
bool pack_images;
+ bool extended_images; /* flag for GPU and Multi device */
vector<DeviceInfo> multi_devices;
DeviceInfo()
@@ -64,11 +65,17 @@ public:
display_device = false;
advanced_shading = true;
pack_images = false;
+ extended_images = false;
}
};
/* Device */
+struct DeviceDrawParams {
+ boost::function<void(void)> bind_display_space_shader_cb;
+ boost::function<void(void)> unbind_display_space_shader_cb;
+};
+
class Device {
protected:
Device(DeviceInfo& info_, Stats &stats_, bool background) : background(background), info(info_), stats(stats_) {}
@@ -100,7 +107,7 @@ public:
/* texture memory */
virtual void tex_alloc(const char *name, device_memory& mem,
- bool interpolation = false, bool periodic = false) {};
+ InterpolationType interpolation = INTERPOLATION_NONE, bool periodic = false) {};
virtual void tex_free(device_memory& mem) {};
/* pixel memory */
@@ -121,7 +128,8 @@ public:
/* opengl drawing */
virtual void draw_pixels(device_memory& mem, int y, int w, int h,
- int dy, int width, int height, bool transparent);
+ int dy, int width, int height, bool transparent,
+ const DeviceDrawParams &draw_params);
#ifdef WITH_NETWORK
/* networking */
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 76123fe44d2..c9cc7592028 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -103,9 +103,9 @@ public:
kernel_const_copy(&kernel_globals, name, host, size);
}
- void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+ void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
{
- kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
+ kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height, mem.data_depth, interpolation);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
@@ -395,7 +395,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(task_pool.canceled())
+ if(task.get_cancel() || task_pool.canceled())
break;
}
}
@@ -406,7 +406,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(task_pool.canceled())
+ if(task.get_cancel() || task_pool.canceled())
break;
}
}
@@ -417,7 +417,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(task_pool.canceled())
+ if(task.get_cancel() || task_pool.canceled())
break;
}
}
@@ -428,7 +428,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(task_pool.canceled())
+ if(task.get_cancel() || task_pool.canceled())
break;
}
}
@@ -438,7 +438,7 @@ public:
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
- if(task_pool.canceled())
+ if(task.get_cancel() || task_pool.canceled())
break;
}
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 107ca16c4d2..93b89dc38d9 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -48,6 +48,7 @@ public:
int cuDevArchitecture;
bool first_error;
bool use_texture_storage;
+ unsigned int target_update_frequency;
struct PixelMem {
GLuint cuPBO;
@@ -138,7 +139,7 @@ public:
/*cuda_abort();*/ \
cuda_error_documentation(); \
} \
- }
+ } (void)0
bool cuda_error_(CUresult result, const string& stmt)
{
@@ -165,7 +166,7 @@ public:
void cuda_push_context()
{
- cuda_assert(cuCtxSetCurrent(cuContext))
+ cuda_assert(cuCtxSetCurrent(cuContext));
}
void cuda_pop_context()
@@ -173,12 +174,14 @@ public:
cuda_assert(cuCtxSetCurrent(NULL));
}
- CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
+ CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
: Device(info, stats, background_)
{
first_error = true;
background = background_;
use_texture_storage = true;
+ /* we try an update / sync every 1000 ms */
+ target_update_frequency = 1000;
cuDevId = info.num;
cuDevice = 0;
@@ -209,8 +212,8 @@ public:
if(cuda_error_(result, "cuCtxCreate"))
return;
- cuda_assert(cuStreamCreate(&cuStream, 0))
- cuda_assert(cuEventCreate(&tileDone, 0x1))
+ cuda_assert(cuStreamCreate(&cuStream, 0));
+ cuda_assert(cuEventCreate(&tileDone, 0x1));
int major, minor;
cuDeviceComputeCapability(&major, &minor, cuDevId);
@@ -219,7 +222,7 @@ public:
/* In order to use full 6GB of memory on Titan cards, use arrays instead
* of textures. On earlier cards this seems slower, but on Titan it is
* actually slightly faster in tests. */
- use_texture_storage = (cuDevArchitecture < 350);
+ use_texture_storage = (cuDevArchitecture < 300);
cuda_pop_context();
}
@@ -228,21 +231,22 @@ public:
{
task_pool.stop();
- cuda_assert(cuEventDestroy(tileDone))
- cuda_assert(cuStreamDestroy(cuStream))
- cuda_assert(cuCtxDestroy(cuContext))
+ cuda_assert(cuEventDestroy(tileDone));
+ cuda_assert(cuStreamDestroy(cuStream));
+ cuda_assert(cuCtxDestroy(cuContext));
}
- bool support_device(bool experimental)
+ bool support_device(bool experimental, bool branched)
{
int major, minor;
cuDeviceComputeCapability(&major, &minor, cuDevId);
-
+
+ /* We only support sm_20 and above */
if(major < 2) {
cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor));
return false;
}
-
+
return true;
}
@@ -293,28 +297,16 @@ public:
return "";
}
if(cuda_version < 50) {
- printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10);
+ printf("Unsupported CUDA version %d.%d detected, you need CUDA 6.0.\n", cuda_version/10, cuda_version%10);
return "";
}
-
- else if(cuda_version > 50)
- printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10);
+ else if(cuda_version != 60)
+ printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported.\n", cuda_version/10, cuda_version%10);
/* compile */
string kernel = path_join(kernel_path, "kernel.cu");
string include = kernel_path;
const int machine = system_cpu_bits();
- string arch_flags;
-
- /* CUDA 5.x build flags for different archs */
- if(major == 2) {
- /* sm_2x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
- else if(major == 3) {
- /* sm_3x */
- arch_flags = "--maxrregcount=32 --use_fast_math";
- }
double starttime = time_dt();
printf("Compiling CUDA kernel ...\n");
@@ -322,8 +314,8 @@ public:
path_create_directories(cubin);
string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
- "-o \"%s\" --ptxas-options=\"-v\" %s -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
- nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), arch_flags.c_str(), include.c_str(), cuda_version);
+ "-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d",
+ nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
printf("%s\n", command.c_str());
@@ -349,8 +341,8 @@ public:
if(cuContext == 0)
return false;
- /* check if GPU is supported with current feature set */
- if(!support_device(experimental))
+ /* check if GPU is supported */
+ if(!support_device(experimental, false))
return false;
/* get kernel */
@@ -383,7 +375,7 @@ public:
cuda_push_context();
CUdeviceptr device_pointer;
size_t size = mem.memory_size();
- cuda_assert(cuMemAlloc(&device_pointer, size))
+ cuda_assert(cuMemAlloc(&device_pointer, size));
mem.device_pointer = (device_ptr)device_pointer;
stats.mem_alloc(size);
cuda_pop_context();
@@ -393,7 +385,7 @@ public:
{
cuda_push_context();
if(mem.device_pointer)
- cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()))
+ cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
cuda_pop_context();
}
@@ -405,7 +397,7 @@ public:
cuda_push_context();
if(mem.device_pointer) {
cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
- (CUdeviceptr)((uchar*)mem.device_pointer + offset), size))
+ (CUdeviceptr)((uchar*)mem.device_pointer + offset), size));
}
else {
memset((char*)mem.data_pointer + offset, 0, size);
@@ -419,7 +411,7 @@ public:
cuda_push_context();
if(mem.device_pointer)
- cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()))
+ cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
cuda_pop_context();
}
@@ -427,7 +419,7 @@ public:
{
if(mem.device_pointer) {
cuda_push_context();
- cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)))
+ cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
cuda_pop_context();
mem.device_pointer = 0;
@@ -442,19 +434,21 @@ public:
size_t bytes;
cuda_push_context();
- cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name))
+ cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
//assert(bytes == size);
- cuda_assert(cuMemcpyHtoD(mem, host, size))
+ cuda_assert(cuMemcpyHtoD(mem, host, size));
cuda_pop_context();
}
- void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+ void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
{
+ /* todo: support 3D textures, only CPU for now */
+
/* determine format */
CUarray_format_enum format;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
- bool use_texture = interpolation || use_texture_storage;
+ bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage;
if(use_texture) {
@@ -469,14 +463,14 @@ public:
CUtexref texref = NULL;
cuda_push_context();
- cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))
+ cuda_assert(cuModuleGetTexRef(&texref, cuModule, name));
if(!texref) {
cuda_pop_context();
return;
}
- if(interpolation) {
+ if(interpolation != INTERPOLATION_NONE) {
CUarray handle = NULL;
CUDA_ARRAY_DESCRIPTOR desc;
@@ -485,7 +479,7 @@ public:
desc.Format = format;
desc.NumChannels = mem.data_elements;
- cuda_assert(cuArrayCreate(&handle, &desc))
+ cuda_assert(cuArrayCreate(&handle, &desc));
if(!handle) {
cuda_pop_context();
@@ -503,15 +497,23 @@ public:
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
- cuda_assert(cuMemcpy2D(&param))
+ cuda_assert(cuMemcpy2D(&param));
}
else
- cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size))
+ cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
- cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT))
+ cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
- cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR))
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES))
+ if(interpolation == INTERPOLATION_CLOSEST) {
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
+ }
+ else if (interpolation == INTERPOLATION_LINEAR) {
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
+ }
+ else {/* CUBIC and SMART are unsupported for CUDA */
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR));
+ }
+ cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
mem.device_pointer = (device_ptr)handle;
@@ -525,20 +527,20 @@ public:
cuda_push_context();
- cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size))
- cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT))
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER))
+ cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
+ cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
+ cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
}
if(periodic) {
- cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP))
- cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP))
+ cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP));
+ cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP));
}
else {
- cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP))
- cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP))
+ cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP));
+ cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP));
}
- cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements))
+ cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
cuda_pop_context();
}
@@ -551,23 +553,23 @@ public:
CUdeviceptr cumem;
size_t cubytes;
- cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name))
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name));
if(cubytes == 8) {
/* 64 bit device pointer */
uint64_t ptr = mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
else {
/* 32 bit device pointer */
uint32_t ptr = (uint32_t)mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes))
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
cuda_pop_context();
}
- tex_interp_map[mem.device_pointer] = interpolation;
+ tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
}
void tex_free(device_memory& mem)
@@ -602,10 +604,12 @@ public:
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
- if(branched)
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"))
- else
- cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
+ if(branched && support_device(true, branched)) {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"));
+ }
if(have_error())
return;
@@ -613,49 +617,63 @@ public:
/* pass in parameters */
int offset = 0;
- cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)))
+ cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
- cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
+ cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)));
offset += sizeof(d_rng_state);
offset = align_up(offset, __alignof(sample));
- cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, sample));
offset += sizeof(sample);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x));
offset += sizeof(rtile.x);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y));
offset += sizeof(rtile.y);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w));
offset += sizeof(rtile.w);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h));
offset += sizeof(rtile.h);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset));
offset += sizeof(rtile.offset);
- cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
+ cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride));
offset += sizeof(rtile.stride);
- cuda_assert(cuParamSetSize(cuPathTrace, offset))
+ cuda_assert(cuParamSetSize(cuPathTrace, offset));
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace));
+
+ /*int num_registers;
+ cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace));
+
+ printf("threads_per_block %d\n", threads_per_block);
+ printf("num_registers %d\n", num_registers);*/
- /* launch kernel: todo find optimal size, cache config for fermi */
- int xthreads = 16;
- int ythreads = 16;
+ int xthreads = (int)sqrt((float)threads_per_block);
+ int ythreads = (int)sqrt((float)threads_per_block);
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
- cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
- cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
- cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream))
+ cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1));
- cuda_assert(cuEventRecord(tileDone, cuStream ))
- cuda_assert(cuEventSynchronize(tileDone))
+ if(info.display_device) {
+ /* don't use async for device used for display, locks up UI too much */
+ cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks));
+ cuda_assert(cuCtxSynchronize());
+ }
+ else {
+ cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream));
+ }
cuda_pop_context();
}
@@ -672,55 +690,60 @@ public:
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
- if(rgba_half)
- cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"))
- else
- cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"))
+ if(rgba_half) {
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float"));
+ }
+ else {
+ cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte"));
+ }
/* pass in parameters */
int offset = 0;
- cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)))
+ cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba)));
offset += sizeof(d_rgba);
- cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)))
+ cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer)));
offset += sizeof(d_buffer);
float sample_scale = 1.0f/(task.sample + 1);
offset = align_up(offset, __alignof(sample_scale));
- cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale))
+ cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale));
offset += sizeof(sample_scale);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x));
offset += sizeof(task.x);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y));
offset += sizeof(task.y);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w));
offset += sizeof(task.w);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h));
offset += sizeof(task.h);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset));
offset += sizeof(task.offset);
- cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride))
+ cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride));
offset += sizeof(task.stride);
- cuda_assert(cuParamSetSize(cuFilmConvert, offset))
+ cuda_assert(cuParamSetSize(cuFilmConvert, offset));
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert));
- /* launch kernel: todo find optimal size, cache config for fermi */
- int xthreads = 16;
- int ythreads = 16;
+ int xthreads = (int)sqrt((float)threads_per_block);
+ int ythreads = (int)sqrt((float)threads_per_block);
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
- cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1))
- cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1))
- cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks))
+ cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1));
+ cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks));
unmap_pixels((rgba_byte)? rgba_byte: rgba_half);
@@ -734,40 +757,55 @@ public:
cuda_push_context();
- CUfunction cuDisplace;
+ CUfunction cuShader;
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
/* get kernel function */
- cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader"))
-
- /* pass in parameters */
- int offset = 0;
-
- cuda_assert(cuParamSetv(cuDisplace, offset, &d_input, sizeof(d_input)))
- offset += sizeof(d_input);
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+
+ /* do tasks in smaller chunks, so we can cancel it */
+ const int shader_chunk_size = 65536;
+ const int start = task.shader_x;
+ const int end = task.shader_x + task.shader_w;
- cuda_assert(cuParamSetv(cuDisplace, offset, &d_output, sizeof(d_output)))
- offset += sizeof(d_output);
+ for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) {
+ if(task.get_cancel())
+ break;
- int shader_eval_type = task.shader_eval_type;
- offset = align_up(offset, __alignof(shader_eval_type));
+ /* pass in parameters */
+ int offset = 0;
- cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type))
- offset += sizeof(task.shader_eval_type);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input)));
+ offset += sizeof(d_input);
- cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_x))
- offset += sizeof(task.shader_x);
+ cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output)));
+ offset += sizeof(d_output);
- cuda_assert(cuParamSetSize(cuDisplace, offset))
+ int shader_eval_type = task.shader_eval_type;
+ offset = align_up(offset, __alignof(shader_eval_type));
- /* launch kernel: todo find optimal size, cache config for fermi */
- int xthreads = 16;
- int xblocks = (task.shader_w + xthreads - 1)/xthreads;
+ cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type));
+ offset += sizeof(task.shader_eval_type);
- cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1))
- cuda_assert(cuFuncSetBlockShape(cuDisplace, xthreads, 1, 1))
- cuda_assert(cuLaunchGrid(cuDisplace, xblocks, 1))
+ cuda_assert(cuParamSeti(cuShader, offset, shader_x));
+ offset += sizeof(shader_x);
+
+ cuda_assert(cuParamSetSize(cuShader, offset));
+
+ /* launch kernel */
+ int threads_per_block;
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader));
+
+ int shader_w = min(shader_chunk_size, end - shader_x);
+ int xblocks = (shader_w + threads_per_block - 1)/threads_per_block;
+
+ cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1));
+ cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1));
+ cuda_assert(cuLaunchGrid(cuShader, xblocks, 1));
+
+ cuda_assert(cuCtxSynchronize());
+ }
cuda_pop_context();
}
@@ -779,8 +817,8 @@ public:
CUdeviceptr buffer;
size_t bytes;
- cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0))
- cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource))
+ cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
+ cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
return buffer;
}
@@ -793,7 +831,7 @@ public:
if(!background) {
PixelMem pmem = pixel_mem_map[mem];
- cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0))
+ cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0));
}
}
@@ -882,7 +920,7 @@ public:
cuda_push_context();
- cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource))
+ cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource));
glDeleteBuffers(1, &pmem.cuPBO);
glDeleteTextures(1, &pmem.cuTexId);
@@ -900,7 +938,8 @@ public:
}
}
- void draw_pixels(device_memory& mem, int y, int w, int h, int dy, int width, int height, bool transparent)
+ void draw_pixels(device_memory& mem, int y, int w, int h, int dy, int width, int height, bool transparent,
+ const DeviceDrawParams &draw_params)
{
if(!background) {
PixelMem pmem = pixel_mem_map[mem.device_pointer];
@@ -933,6 +972,10 @@ public:
glColor3f(1.0f, 1.0f, 1.0f);
+ if(draw_params.bind_display_space_shader_cb) {
+ draw_params.bind_display_space_shader_cb();
+ }
+
glPushMatrix();
glTranslatef(0.0f, (float)dy, 0.0f);
@@ -951,6 +994,10 @@ public:
glPopMatrix();
+ if(draw_params.unbind_display_space_shader_cb) {
+ draw_params.unbind_display_space_shader_cb();
+ }
+
if(transparent) {
glDisable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); /* reset blender default */
@@ -964,7 +1011,7 @@ public:
return;
}
- Device::draw_pixels(mem, y, w, h, dy, width, height, transparent);
+ Device::draw_pixels(mem, y, w, h, dy, width, height, transparent, draw_params);
}
void thread_run(DeviceTask *task)
@@ -979,6 +1026,10 @@ public:
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
+ boost::posix_time::ptime start_time(boost::posix_time::microsec_clock::local_time());
+ boost::posix_time::ptime last_time = start_time;
+ int sync_sample = 10;
+
for(int sample = start_sample; sample < end_sample; sample++) {
if (task->get_cancel()) {
if(task->need_finish_queue == false)
@@ -988,8 +1039,28 @@ public:
path_trace(tile, sample, branched);
tile.sample = sample + 1;
-
task->update_progress(tile);
+
+ if(!info.display_device && sample == sync_sample) {
+ cuda_push_context();
+ cuda_assert(cuEventRecord(tileDone, cuStream));
+ cuda_assert(cuEventSynchronize(tileDone));
+
+ /* Do some time keeping to find out if we need to sync less */
+ boost::posix_time::ptime current_time(boost::posix_time::microsec_clock::local_time());
+ boost::posix_time::time_duration sample_duration = current_time - last_time;
+
+ long msec = sample_duration.total_milliseconds();
+ float scaling_factor = (float)target_update_frequency / (float)msec;
+
+ /* sync at earliest next sample and probably later */
+ sync_sample = (sample + 1) + sync_sample * (int)ceil(scaling_factor);
+
+ sync_sample = min(end_sample - 1, sync_sample); // make sure we sync the last sample always
+
+ last_time = current_time;
+ cuda_pop_context();
+ }
}
task->release_tile(tile);
@@ -999,7 +1070,7 @@ public:
shader(*task);
cuda_push_context();
- cuda_assert(cuCtxSynchronize())
+ cuda_assert(cuCtxSynchronize());
cuda_pop_context();
}
}
@@ -1020,7 +1091,7 @@ public:
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
cuda_push_context();
- cuda_assert(cuCtxSynchronize())
+ cuda_assert(cuCtxSynchronize());
cuda_pop_context();
}
else {
@@ -1081,6 +1152,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
int major, minor;
cuDeviceComputeCapability(&major, &minor, num);
info.advanced_shading = (major >= 2);
+ info.extended_images = (major >= 3);
info.pack_images = false;
/* if device has a kernel timeout, assume it is used for display */
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index 1427d12cba2..8d6f4a49a9c 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -169,6 +169,7 @@ public:
size_t data_size;
size_t data_width;
size_t data_height;
+ size_t data_depth;
/* device pointer */
device_ptr device_pointer;
@@ -195,6 +196,7 @@ public:
data_size = 0;
data_width = 0;
data_height = 0;
+ data_depth = 0;
assert(data_elements > 0);
@@ -204,20 +206,21 @@ public:
virtual ~device_vector() {}
/* vector functions */
- T *resize(size_t width, size_t height = 0)
+ T *resize(size_t width, size_t height = 0, size_t depth = 0)
{
- data_size = (height == 0)? width: width*height;
+ data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
data.resize(data_size);
data_pointer = (device_ptr)&data[0];
data_width = width;
data_height = height;
+ data_depth = depth;
return &data[0];
}
- T *copy(T *ptr, size_t width, size_t height = 0)
+ T *copy(T *ptr, size_t width, size_t height = 0, size_t depth = 0)
{
- T *mem = resize(width, height);
+ T *mem = resize(width, height, depth);
memcpy(mem, ptr, memory_size());
return mem;
}
@@ -230,13 +233,14 @@ public:
}
}
- void reference(T *ptr, size_t width, size_t height = 0)
+ void reference(T *ptr, size_t width, size_t height = 0, size_t depth = 0)
{
data.clear();
- data_size = (height == 0)? width: width*height;
+ data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth);
data_pointer = (device_ptr)ptr;
data_width = width;
data_height = height;
+ data_depth = depth;
}
void clear()
@@ -245,6 +249,7 @@ public:
data_pointer = 0;
data_width = 0;
data_height = 0;
+ data_depth = 0;
data_size = 0;
}
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 27b9de0769e..c866ebaaea2 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -168,7 +168,7 @@ public:
sub.device->const_copy_to(name, host, size);
}
- void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+ void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
{
foreach(SubDevice& sub, devices) {
mem.device_pointer = 0;
@@ -233,7 +233,8 @@ public:
mem.device_pointer = tmp;
}
- void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent)
+ void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent,
+ const DeviceDrawParams &draw_params)
{
device_ptr tmp = rgba.device_pointer;
int i = 0, sub_h = h/devices.size();
@@ -247,7 +248,7 @@ public:
/* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[tmp];
- sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent);
+ sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent, draw_params);
i++;
}
@@ -327,6 +328,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
info.advanced_shading = with_advanced_shading;
info.pack_images = false;
+ info.extended_images = true;
foreach(DeviceInfo& subinfo, devices) {
if(subinfo.type == type) {
@@ -350,6 +352,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
if(subinfo.display_device)
info.display_device = true;
info.pack_images = info.pack_images || subinfo.pack_images;
+ info.extended_images = info.extended_images && subinfo.extended_images;
num_added++;
}
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index bffd993818f..af051076009 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -162,7 +162,7 @@ public:
snd.write_buffer(host, size);
}
- void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+ void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
{
thread_scoped_lock lock(rpc_lock);
@@ -326,7 +326,7 @@ class DeviceServer {
public:
thread_mutex rpc_lock;
- void network_error(const string &message){
+ void network_error(const string &message) {
error_func.network_error(message);
}
@@ -366,7 +366,7 @@ protected:
{
/* create a new DataVector and insert it into mem_data */
pair<DataMap::iterator,bool> data_ins = mem_data.insert(
- DataMap::value_type(client_pointer, DataVector()));
+ DataMap::value_type(client_pointer, DataVector()));
/* make sure it was a unique insertion */
assert(data_ins.second);
@@ -559,7 +559,7 @@ protected:
else if(rcv.name == "tex_alloc") {
network_device_memory mem;
string name;
- bool interpolation;
+ InterpolationType interpolation;
bool periodic;
device_ptr client_pointer;
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index bf8f3c70c49..893841d1da7 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -118,7 +118,7 @@ public:
void add(const device_memory& mem)
{
archive & mem.data_type & mem.data_elements & mem.data_size;
- archive & mem.data_width & mem.data_height & mem.device_pointer;
+ archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
}
template<typename T> void add(const T& data)
@@ -209,7 +209,7 @@ public:
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(header), error);
- if(error.value()){
+ if(error.value()) {
error_func->network_error(error.message());
}
@@ -261,7 +261,7 @@ public:
void read(network_device_memory& mem)
{
*archive & mem.data_type & mem.data_elements & mem.data_size;
- *archive & mem.data_width & mem.data_height & mem.device_pointer;
+ *archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
mem.data_pointer = 0;
}
@@ -276,7 +276,7 @@ public:
boost::system::error_code error;
size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error);
- if(error.value()){
+ if(error.value()) {
error_func->network_error(error.message());
}
@@ -391,7 +391,7 @@ private:
/* add address if it's not already in the list */
bool found = std::find(servers.begin(), servers.end(),
- address) != servers.end();
+ address) != servers.end();
if(!found)
servers.push_back(address);
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 9117b70d749..694ec9db036 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -101,9 +101,6 @@ static string opencl_kernel_build_options(const string& platform, const string *
if(opencl_kernel_use_debug())
build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
-
- if(opencl_kernel_use_advanced_shading(platform))
- build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ ";
return build_options;
}
@@ -409,10 +406,22 @@ public:
fprintf(stderr, "%s\n", message.c_str());
}
- void opencl_assert(cl_int err)
+#define opencl_assert(stmt) \
+ { \
+ cl_int err = stmt; \
+ \
+ if(err != CL_SUCCESS) { \
+ string message = string_printf("OpenCL error: %s in %s", opencl_error_string(err), #stmt); \
+ if(error_msg == "") \
+ error_msg = message; \
+ fprintf(stderr, "%s\n", message.c_str()); \
+ } \
+ } (void)0
+
+ void opencl_assert_err(cl_int err, const char* where)
{
if(err != CL_SUCCESS) {
- string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err));
+ string message = string_printf("OpenCL error (%d): %s in %s", err, opencl_error_string(err), where);
if(error_msg == "")
error_msg = message;
fprintf(stderr, "%s\n", message.c_str());
@@ -452,8 +461,10 @@ public:
vector<cl_platform_id> platforms(num_platforms, NULL);
ciErr = clGetPlatformIDs(num_platforms, &platforms[0], NULL);
- if(opencl_error(ciErr))
+ if(opencl_error(ciErr)) {
+ fprintf(stderr, "clGetPlatformIDs failed \n");
return;
+ }
int num_base = 0;
int total_devices = 0;
@@ -478,8 +489,10 @@ public:
/* get devices */
vector<cl_device_id> device_ids(num_devices, NULL);
- if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+ if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL))) {
+ fprintf(stderr, "clGetDeviceIDs failed \n");
return;
+ }
cdDevice = device_ids[info.num - num_base];
@@ -515,8 +528,10 @@ public:
cxContext = clCreateContext(context_props, 1, &cdDevice,
context_notify_callback, cdDevice, &ciErr);
- if(opencl_error(ciErr))
+ if(opencl_error(ciErr)) {
+ opencl_error("OpenCL: clCreateContext failed");
return;
+ }
/* cache it */
OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker);
@@ -531,6 +546,7 @@ public:
if(opencl_error(ciErr))
return;
+ fprintf(stderr,"Device init succes\n");
device_initialized = true;
}
@@ -821,7 +837,7 @@ public:
mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, mem_flag, size, mem_ptr, &ciErr);
- opencl_assert(ciErr);
+ opencl_assert_err(ciErr, "clCreateBuffer");
stats.mem_alloc(size);
}
@@ -830,8 +846,7 @@ public:
{
/* this is blocking */
size_t size = mem.memory_size();
- ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL);
- opencl_assert(ciErr);
+ opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL));
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
@@ -839,8 +854,7 @@ public:
size_t offset = elem*y*w;
size_t size = elem*w*h;
- ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL);
- opencl_assert(ciErr);
+ opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL));
}
void mem_zero(device_memory& mem)
@@ -854,9 +868,8 @@ public:
void mem_free(device_memory& mem)
{
if(mem.device_pointer) {
- ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer));
+ opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)));
mem.device_pointer = 0;
- opencl_assert(ciErr);
stats.mem_free(mem.memory_size());
}
@@ -881,7 +894,7 @@ public:
mem_copy_to(*i->second);
}
- void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
+ void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic)
{
mem_alloc(mem, MEM_READ_ONLY);
mem_copy_to(mem);
@@ -919,7 +932,7 @@ public:
CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
/* try to divide evenly over 2 dimensions */
- size_t sqrt_workgroup_size = max(sqrt((double)workgroup_size), 1.0);
+ size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
/* some implementations have max size 1 on 2nd dimension */
@@ -931,8 +944,7 @@ public:
size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
/* run kernel */
- ciErr = clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL);
- opencl_assert(ciErr);
+ opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
opencl_assert(clFlush(cqCommandQueue));
}
@@ -952,33 +964,29 @@ public:
/* sample arguments */
cl_uint narg = 0;
- ciErr = 0;
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state));
#define KERNEL_TEX(type, ttype, name) \
- ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
+ set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
#include "kernel_textures.h"
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset);
- ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride);
-
- opencl_assert(ciErr);
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset));
+ opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride));
enqueue_kernel(ckPathTraceKernel, d_w, d_h);
}
- cl_int set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
+ void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
{
cl_mem ptr;
- cl_int err = 0;
MemMap::iterator i = mem_map.find(name);
if(i != mem_map.end()) {
@@ -989,10 +997,7 @@ public:
ptr = CL_MEM_PTR(null_mem);
}
- err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
- opencl_assert(err);
-
- return err;
+ opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
}
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
@@ -1011,27 +1016,27 @@ public:
/* sample arguments */
cl_uint narg = 0;
- ciErr = 0;
+
cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel;
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer));
#define KERNEL_TEX(type, ttype, name) \
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
+ set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h"
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset);
- ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride);
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset));
+ opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride));
+
- opencl_assert(ciErr);
enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
}
@@ -1048,21 +1053,18 @@ public:
/* sample arguments */
cl_uint narg = 0;
- ciErr = 0;
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data);
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input);
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output);
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data));
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input));
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output));
#define KERNEL_TEX(type, ttype, name) \
- ciErr |= set_kernel_arg_mem(ckShaderKernel, &narg, #name);
+ set_kernel_arg_mem(ckShaderKernel, &narg, #name);
#include "kernel_textures.h"
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type);
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x);
- ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w);
-
- opencl_assert(ciErr);
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+ opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
enqueue_kernel(ckShaderKernel, task.shader_w, 1);
}
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index cbe0d4b5d10..d18f4fa2998 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -12,10 +12,6 @@ set(INC_SYS
set(SRC
kernel.cpp
- kernel_sse2.cpp
- kernel_sse3.cpp
- kernel_sse41.cpp
- kernel_avx.cpp
kernel.cl
kernel.cu
)
@@ -23,14 +19,10 @@ set(SRC
set(SRC_HEADERS
kernel.h
kernel_accumulate.h
- kernel_bvh.h
- kernel_bvh_subsurface.h
- kernel_bvh_traversal.h
kernel_camera.h
kernel_compat_cpu.h
kernel_compat_cuda.h
kernel_compat_opencl.h
- kernel_curve.h
kernel_differential.h
kernel_displace.h
kernel_emission.h
@@ -40,18 +32,15 @@ set(SRC_HEADERS
kernel_light.h
kernel_math.h
kernel_montecarlo.h
- kernel_object.h
kernel_passes.h
kernel_path.h
kernel_path_state.h
- kernel_primitive.h
kernel_projection.h
kernel_random.h
kernel_shader.h
kernel_shadow.h
kernel_subsurface.h
kernel_textures.h
- kernel_triangle.h
kernel_types.h
kernel_volume.h
)
@@ -118,6 +107,21 @@ set(SRC_SVM_HEADERS
svm/svm_wave.h
)
+set(SRC_GEOM_HEADERS
+ geom/geom.h
+ geom/geom_attribute.h
+ geom/geom_bvh.h
+ geom/geom_bvh_subsurface.h
+ geom/geom_bvh_traversal.h
+ geom/geom_curve.h
+ geom/geom_motion_curve.h
+ geom/geom_motion_triangle.h
+ geom/geom_object.h
+ geom/geom_primitive.h
+ geom/geom_triangle.h
+ geom/geom_volume.h
+)
+
set(SRC_UTIL_HEADERS
../util/util_color.h
../util/util_half.h
@@ -142,37 +146,45 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# warn for other versions
- if(CUDA_VERSION MATCHES "50")
+ if(CUDA_VERSION MATCHES "60")
else()
- message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
+ message(WARNING
+ "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
+ "build may succeed but only CUDA 6.0 is officially supported")
endif()
# build for each arch
- set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
+ set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
set(cuda_cubin kernel_${arch}.cubin)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
-
- # CUDA 5.x build flags for different archs
- if(${arch} MATCHES "sm_2[0-9]")
- # sm_2x
- set(cuda_arch_flags "--maxrregcount=32")
- elseif(${arch} MATCHES "sm_3[0-9]")
- # sm_3x
- set(cuda_arch_flags "--maxrregcount=32")
- endif()
-
set(cuda_math_flags "--use_fast_math")
-
- if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
+
+ if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50")
+ message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping")
+ elseif(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35")
message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping")
else()
add_custom_command(
OUTPUT ${cuda_cubin}
- COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
+ COMMAND ${CUDA_NVCC_EXECUTABLE}
+ -arch=${arch}
+ -m${CUDA_BITS}
+ --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu
+ -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}
+ --ptxas-options="-v"
+ ${cuda_arch_flags}
+ ${cuda_version_flags}
+ ${cuda_math_flags}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/../util
+ -I${CMAKE_CURRENT_SOURCE_DIR}/svm
+ -DCCL_NAMESPACE_BEGIN=
+ -DCCL_NAMESPACE_END=
+ -DNVCC
+
DEPENDS ${cuda_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
@@ -195,12 +207,22 @@ endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
-set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
-set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
-set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
-set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+if(CXX_HAS_SSE)
+ list(APPEND SRC
+ kernel_sse2.cpp
+ kernel_sse3.cpp
+ kernel_sse41.cpp
+ kernel_avx.cpp
+ )
+
+ set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+endif()
+
-add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS})
+add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS})
if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda)
@@ -221,5 +243,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/k
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript
index 5077d8c96b0..04e1bad7538 100644
--- a/intern/cycles/kernel/SConscript
+++ b/intern/cycles/kernel/SConscript
@@ -60,6 +60,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
kernel_file = os.path.join(source_dir, "kernel.cu")
util_dir = os.path.join(source_dir, "../util")
svm_dir = os.path.join(source_dir, "../svm")
+ geom_dir = os.path.join(source_dir, "../geom")
closure_dir = os.path.join(source_dir, "../closure")
# get CUDA version
@@ -68,37 +69,33 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']:
cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0]
cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1])
- if cuda_version != 50:
- print("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported." % (cuda_version/10, cuda_version%10))
+ if cuda_version != 60:
+ print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10))
# nvcc flags
nvcc_flags = "-m%s" % (bits)
nvcc_flags += " --cubin --ptxas-options=\"-v\""
nvcc_flags += " -D__KERNEL_CUDA_VERSION__=%d" % (cuda_version)
nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC"
- nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, closure_dir)
+ nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir)
# dependencies
- dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('closure/*.h')
+ dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h')
last_cubin_file = None
# add command for each cuda architecture
for arch in cuda_archs:
- cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
+ if cuda_version < 60 and arch == "sm_50":
+ print("Can't build kernel for CUDA sm_50 architecture, skipping")
+ continue
- # CUDA 5.x build flags for different archs
- if arch.startswith("sm_2"):
- # sm_2x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
- elif arch.startswith("sm_3"):
- # sm_3x
- cuda_arch_flags = "--maxrregcount=32 --use_fast_math"
+ cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch)
if env['BF_CYCLES_CUDA_ENV']:
MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd"
- command = "\"%s\" & \"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
+ command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file)
else:
- command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file)
+ command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file)
kernel.Command(cubin_file, 'kernel.cu', command)
kernel.Depends(cubin_file, dependencies)
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index 163e7cc5ee2..19cdb773255 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -84,7 +84,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con
float theta_i = M_PI_2_F - safe_acosf(omega_in_z);
float cosphi_i = dot(omega_in_y, locy);
- if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f){
+ if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) {
*pdf = 0.0f;
return make_float3(*pdf, *pdf, *pdf);
}
@@ -99,7 +99,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con
float theta_h = (theta_i + theta_r) * 0.5f;
float t = theta_h - offset;
- float phi_pdf = cos(phi_i * 0.5f) * 0.25f / roughness2;
+ float phi_pdf = cosf(phi_i * 0.5f) * 0.25f / roughness2;
float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i);
*pdf = phi_pdf * theta_pdf;
@@ -140,7 +140,7 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc,
float theta_i = M_PI_2_F - safe_acosf(omega_in_z);
float phi_i = safe_acosf(dot(omega_in_y, locy));
- if(M_PI_2_F - fabsf(theta_i) < 0.001f){
+ if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;
return make_float3(*pdf, *pdf, *pdf);
}
@@ -191,7 +191,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f
float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2;
- float phi_pdf = cos(phi * 0.5f) * 0.25f / roughness2;
+ float phi_pdf = cosf(phi * 0.5f) * 0.25f / roughness2;
float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i);
@@ -251,8 +251,8 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2));
*omega_in =(cosf(phi) * costheta_i) * locy -
- (sinf(phi) * costheta_i) * locx +
- ( sintheta_i) * Tg;
+ (sinf(phi) * costheta_i) * locx +
+ ( sintheta_i) * Tg;
//differentials - TODO: find a better approximation for the transmission bounce
#ifdef __RAY_DIFFERENTIALS__
@@ -261,7 +261,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
#endif
*pdf = fabsf(phi_pdf * theta_pdf);
- if(M_PI_2_F - fabsf(theta_i) < 0.001f){
+ if(M_PI_2_F - fabsf(theta_i) < 0.001f) {
*pdf = 0.0f;
}
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index dfa8886c113..1ec35e444fe 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -154,8 +154,8 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl
float sinThetaM = cosThetaM * safe_sqrtf(tanThetaM2);
float phiM = M_2PI_F * randv;
float3 m = (cosf(phiM) * sinThetaM) * X +
- (sinf(phiM) * sinThetaM) * Y +
- cosThetaM * Z;
+ (sinf(phiM) * sinThetaM) * Y +
+ ( cosThetaM) * Z;
if(!m_refractive) {
float cosMO = dot(m, I);
if(cosMO > 0) {
@@ -383,8 +383,8 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
float sinThetaM = cosThetaM * tanThetaM;
float phiM = M_2PI_F * randv;
float3 m = (cosf(phiM) * sinThetaM) * X +
- (sinf(phiM) * sinThetaM) * Y +
- cosThetaM * Z;
+ (sinf(phiM) * sinThetaM) * Y +
+ ( cosThetaM) * Z;
if(!m_refractive) {
float cosMO = dot(m, I);
diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 219c5aea159..2b4e1c68640 100644
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@ -109,8 +109,8 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo
float sinTheta2 = 1 - cosTheta * cosTheta;
float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
*omega_in = (cosf(phi) * sinTheta) * T +
- (sinf(phi) * sinTheta) * B +
- ( cosTheta) * R;
+ (sinf(phi) * sinTheta) * B +
+ ( cosTheta) * R;
if (dot(Ng, *omega_in) > 0.0f)
{
// common terms for pdf and eval
diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h
index f6dceb3ca82..b3dcb9dcc38 100644
--- a/intern/cycles/kernel/closure/bsdf_util.h
+++ b/intern/cycles/kernel/closure/bsdf_util.h
@@ -35,14 +35,15 @@
CCL_NAMESPACE_BEGIN
-ccl_device float fresnel_dielectric(float eta, const float3 N,
- const float3 I, float3 *R, float3 *T,
+ccl_device float fresnel_dielectric(
+ float eta, const float3 N,
+ const float3 I, float3 *R, float3 *T,
#ifdef __RAY_DIFFERENTIALS__
- const float3 dIdx, const float3 dIdy,
- float3 *dRdx, float3 *dRdy,
- float3 *dTdx, float3 *dTdy,
+ const float3 dIdx, const float3 dIdy,
+ float3 *dRdx, float3 *dRdy,
+ float3 *dTdx, float3 *dTdy,
#endif
- bool *is_inside)
+ bool *is_inside)
{
float cos = dot(N, I), neta;
float3 Nn;
diff --git a/intern/cycles/kernel/closure/bsdf_westin.h b/intern/cycles/kernel/closure/bsdf_westin.h
index ca4c05e91fe..9dc1c00bb3d 100644
--- a/intern/cycles/kernel/closure/bsdf_westin.h
+++ b/intern/cycles/kernel/closure/bsdf_westin.h
@@ -96,10 +96,9 @@ ccl_device int bsdf_westin_backscatter_sample(const ShaderClosure *sc, float3 Ng
float sinTheta2 = 1 - cosTheta * cosTheta;
float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0;
*omega_in = (cosf(phi) * sinTheta) * T +
- (sinf(phi) * sinTheta) * B +
- (cosTheta) * I;
- if(dot(Ng, *omega_in) > 0)
- {
+ (sinf(phi) * sinTheta) * B +
+ (cosTheta) * I;
+ if(dot(Ng, *omega_in) > 0) {
// common terms for pdf and eval
float cosNI = dot(N, *omega_in);
// make sure the direction we chose is still in the right hemisphere
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
new file mode 100644
index 00000000000..9495a2541f9
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom.h
@@ -0,0 +1,44 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* bottom-most stack entry, indicating the end of traversal */
+#define ENTRYPOINT_SENTINEL 0x76543210
+
+/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
+#define BVH_STACK_SIZE 192
+#define BVH_NODE_SIZE 4
+#define TRI_NODE_SIZE 3
+
+/* silly workaround for float extended precision that happens when compiling
+ * without sse support on x86, it results in different results for float ops
+ * that you would otherwise expect to compare correctly */
+#if !defined(__i386__) || defined(__SSE__)
+#define NO_EXTENDED_PRECISION
+#else
+#define NO_EXTENDED_PRECISION volatile
+#endif
+
+#include "geom_attribute.h"
+#include "geom_object.h"
+#include "geom_triangle.h"
+#include "geom_motion_triangle.h"
+#include "geom_motion_curve.h"
+#include "geom_curve.h"
+#include "geom_volume.h"
+#include "geom_primitive.h"
+#include "geom_bvh.h"
+
diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h
new file mode 100644
index 00000000000..63ce31c492f
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_attribute.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Attributes
+ *
+ * We support an arbitrary number of attributes on various mesh elements.
+ * On vertices, triangles, curve keys, curves, meshes and volume grids.
+ * Most of the code for attribute reading is in the primitive files.
+ *
+ * Lookup of attributes is different between OSL and SVM, as OSL is ustring
+ * based while for SVM we use integer ids. */
+
+/* Find attribute based on ID */
+
+ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem)
+{
+ if(sd->object == PRIM_NONE)
+ return (int)ATTR_STD_NOT_FOUND;
+
+ /* for SVM, find attribute by unique id */
+ uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
+#ifdef __HAIR__
+ attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
+#endif
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while(attr_map.x != id) {
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ *elem = (AttributeElement)attr_map.y;
+
+ if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
+ return ATTR_STD_NOT_FOUND;
+
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
+/* Transform matrix attribute on meshes */
+
+ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset)
+{
+ Transform tfm;
+
+ tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0);
+ tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1);
+ tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2);
+ tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3);
+
+ return tfm;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
new file mode 100644
index 00000000000..dd7c25d581d
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -0,0 +1,318 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* BVH
+ *
+ * Bounding volume hierarchy for ray tracing. We compile different variations
+ * of the same BVH traversal function for faster rendering when some types of
+ * primitives are not needed, using #includes to work around the lack of
+ * C++ templates in OpenCL.
+ *
+ * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs",
+ * the code has been extended and modified to support more primitives and work
+ * with CPU/CUDA/OpenCL. */
+
+CCL_NAMESPACE_BEGIN
+
+/* BVH intersection function variations */
+
+#define BVH_INSTANCING 1
+#define BVH_MOTION 2
+#define BVH_HAIR 4
+#define BVH_HAIR_MINIMUM_WIDTH 8
+
+#define BVH_FUNCTION_NAME bvh_intersect
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_traversal.h"
+
+#if defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_traversal.h"
+#endif
+
+#if defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
+#include "geom_bvh_traversal.h"
+#endif
+
+#if defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_traversal.h"
+#endif
+
+#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
+#include "geom_bvh_traversal.h"
+#endif
+
+#if defined(__SUBSURFACE__)
+#define BVH_FUNCTION_NAME bvh_intersect_subsurface
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_subsurface.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_subsurface.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#include "geom_bvh_subsurface.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_subsurface.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+#include "geom_bvh_subsurface.h"
+#endif
+
+#if defined(__SHADOW_RECORD_ALL__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all
+#define BVH_FUNCTION_FEATURES 0
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#include "geom_bvh_shadow.h"
+#endif
+
+#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
+#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion
+#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
+#include "geom_bvh_shadow.h"
+#endif
+
+/* to work around titan bug when using arrays instead of textures */
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+#ifdef __HAIR__
+bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax)
+#else
+bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
+#endif
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_motion(kg, ray, isect, visibility);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax);
+#endif /* __HAIR__ */
+
+#ifdef __KERNEL_CPU__
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_instancing(kg, ray, isect, visibility);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect(kg, ray, isect, visibility);
+#else /* __KERNEL_CPU__ */
+
+#ifdef __INSTANCING__
+ return bvh_intersect_instancing(kg, ray, isect, visibility);
+#else
+ return bvh_intersect(kg, ray, isect, visibility);
+#endif /* __INSTANCING__ */
+
+#endif /* __KERNEL_CPU__ */
+}
+
+/* to work around titan bug when using arrays instead of textures */
+#ifdef __SUBSURFACE__
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#endif /* __HAIR__ */
+
+#ifdef __KERNEL_CPU__
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#else /* __KERNEL_CPU__ */
+
+#ifdef __INSTANCING__
+ return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#else
+ return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#endif /* __INSTANCING__ */
+
+#endif /* __KERNEL_CPU__ */
+}
+#endif
+
+/* to work around titan bug when using arrays instead of textures */
+#ifdef __SHADOW_RECORD_ALL__
+#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
+ccl_device_inline
+#else
+ccl_device_noinline
+#endif
+uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+{
+#ifdef __OBJECT_MOTION__
+ if(kernel_data.bvh.have_motion) {
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits);
+#endif /* __HAIR__ */
+
+ return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits);
+ }
+#endif /* __OBJECT_MOTION__ */
+
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves)
+ return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits);
+#endif /* __HAIR__ */
+
+#ifdef __KERNEL_CPU__
+
+#ifdef __INSTANCING__
+ if(kernel_data.bvh.have_instancing)
+ return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
+#endif /* __INSTANCING__ */
+
+ return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+#else /* __KERNEL_CPU__ */
+
+#ifdef __INSTANCING__
+ return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits);
+#else
+ return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits);
+#endif /* __INSTANCING__ */
+
+#endif /* __KERNEL_CPU__ */
+}
+#endif
+
+
+/* Ray offset to avoid self intersection.
+ *
+ * This function should be used to compute a modified ray start position for
+ * rays leaving from a surface. */
+
+ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
+{
+#ifdef __INTERSECTION_REFINE__
+ const float epsilon_f = 1e-5f;
+ /* ideally this should match epsilon_f, but instancing and motion blur
+ * precision makes it problematic */
+ const float epsilon_test = 1.0f;
+ const int epsilon_i = 32;
+
+ float3 res;
+
+ /* x component */
+ if(fabsf(P.x) < epsilon_test) {
+ res.x = P.x + Ng.x*epsilon_f;
+ }
+ else {
+ uint ix = __float_as_uint(P.x);
+ ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
+ res.x = __uint_as_float(ix);
+ }
+
+ /* y component */
+ if(fabsf(P.y) < epsilon_test) {
+ res.y = P.y + Ng.y*epsilon_f;
+ }
+ else {
+ uint iy = __float_as_uint(P.y);
+ iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
+ res.y = __uint_as_float(iy);
+ }
+
+ /* z component */
+ if(fabsf(P.z) < epsilon_test) {
+ res.z = P.z + Ng.z*epsilon_f;
+ }
+ else {
+ uint iz = __float_as_uint(P.z);
+ iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
+ res.z = __uint_as_float(iz);
+ }
+
+ return res;
+#else
+ const float epsilon_f = 1e-4f;
+ return P + epsilon_f*Ng;
+#endif
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
new file mode 100644
index 00000000000..98bf82b3b2d
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -0,0 +1,375 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
+ * and code copyright 2009-2012 Intel Corporation
+ *
+ * Modifications Copyright 2011-2013, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is a template BVH traversal function, where various features can be
+ * enabled/disabled. This way we can compile optimized versions for each case
+ * without new features slowing things down.
+ *
+ * BVH_INSTANCING: object instancing
+ * BVH_HAIR: hair curve rendering
+ * BVH_MOTION: motion blur rendering
+ *
+ */
+
+#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0)
+
+ccl_device bool BVH_FUNCTION_NAME
+(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, const uint max_hits, uint *num_hits)
+{
+ /* todo:
+ * - likely and unlikely for if() statements
+ * - test restrict attribute for pointers
+ */
+
+ /* traversal stack in CUDA thread-local memory */
+ int traversalStack[BVH_STACK_SIZE];
+ traversalStack[0] = ENTRYPOINT_SENTINEL;
+
+ /* traversal variables in registers */
+ int stackPtr = 0;
+ int nodeAddr = kernel_data.bvh.root;
+
+ /* ray parameters in registers */
+ const float tmax = ray->t;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = tmax;
+
+#if FEATURE(BVH_MOTION)
+ Transform ob_tfm;
+#endif
+
+#if FEATURE(BVH_INSTANCING)
+ int num_hits_in_instance = 0;
+#endif
+
+ *num_hits = 0;
+ isect_array->t = tmax;
+
+#if defined(__KERNEL_SSE2__)
+ const shuffle_swap_t shuf_identity = shuffle_swap_identity();
+ const shuffle_swap_t shuf_swap = shuffle_swap_swap();
+
+ const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0));
+ __m128 Psplat[3], idirsplat[3];
+ shuffle_swap_t shufflexyz[3];
+
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ /* traversal loop */
+ do {
+ do {
+ /* traverse internal nodes */
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
+ bool traverseChild0, traverseChild1;
+ int nodeAddrChild1;
+
+#if !defined(__KERNEL_SSE2__)
+ /* Intersect two child bounding boxes, non-SSE version */
+ float t = isect_t;
+
+ /* fetch node data */
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
+
+ /* intersect ray against child nodes */
+ NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+ NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
+ NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
+ NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
+ NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+ NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
+ traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
+#else
+ traverseChild0 = (c0max >= c0min);
+ traverseChild1 = (c1max >= c1min);
+#endif
+
+#else // __KERNEL_SSE2__
+ /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
+
+ /* fetch node data */
+ const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const float4 cnodes = ((float4*)bvh_nodes)[3];
+
+ /* intersect ray against child nodes */
+ const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
+ const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]);
+ const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]);
+
+ /* calculate { c0min, c1min, -c0max, -c1max} */
+ __m128 minmax = _mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat));
+ const __m128 tminmax = _mm_xor_ps(minmax, pn);
+ const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax));
+
+ /* decide which nodes to traverse next */
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW);
+ traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW);
+#else
+ traverseChild0 = (_mm_movemask_ps(lrhit) & 1);
+ traverseChild1 = (_mm_movemask_ps(lrhit) & 2);
+#endif
+#endif // __KERNEL_SSE2__
+
+ nodeAddr = __float_as_int(cnodes.x);
+ nodeAddrChild1 = __float_as_int(cnodes.y);
+
+ if(traverseChild0 && traverseChild1) {
+ /* both children were intersected, push the farther one */
+#if !defined(__KERNEL_SSE2__)
+ bool closestChild1 = (c1min < c0min);
+#else
+ union { __m128 m128; float v[4]; } uminmax;
+ uminmax.m128 = tminmax;
+ bool closestChild1 = uminmax.v[1] < uminmax.v[0];
+#endif
+
+ if(closestChild1) {
+ int tmp = nodeAddr;
+ nodeAddr = nodeAddrChild1;
+ nodeAddrChild1 = tmp;
+ }
+
+ ++stackPtr;
+ traversalStack[stackPtr] = nodeAddrChild1;
+ }
+ else {
+ /* one child was intersected */
+ if(traverseChild1) {
+ nodeAddr = nodeAddrChild1;
+ }
+ else if(!traverseChild0) {
+ /* neither child was intersected */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+ }
+ }
+
+ /* if node is leaf, fetch triangle list */
+ if(nodeAddr < 0) {
+ float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
+ int primAddr = __float_as_int(leaf.x);
+
+#if FEATURE(BVH_INSTANCING)
+ if(primAddr >= 0) {
+#endif
+ int primAddr2 = __float_as_int(leaf.y);
+
+ /* pop */
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+
+ /* primitive intersection */
+ while(primAddr < primAddr2) {
+ bool hit;
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ /* todo: specialized intersect functions which don't fill in
+ * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
+ * might give a few % performance improvement */
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr);
+ break;
+ }
+#if FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr);
+ break;
+ }
+#endif
+#if FEATURE(BVH_HAIR)
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ else
+ hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0);
+ break;
+ }
+#endif
+ default: {
+ hit = false;
+ break;
+ }
+ }
+
+ /* shadow ray early termination */
+ if(hit) {
+ /* detect if this surface has a shader with transparent shadows */
+
+ /* todo: optimize so primitive visibility flag indicates if
+ * the primitive has a transparent shadow shader? */
+ int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
+ int shader = 0;
+
+#ifdef __HAIR__
+ if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
+#endif
+ {
+ float4 Ns = kernel_tex_fetch(__tri_normal, prim);
+ shader = __float_as_int(Ns.w);
+ }
+#ifdef __HAIR__
+ else {
+ float4 str = kernel_tex_fetch(__curves, prim);
+ shader = __float_as_int(str.z);
+ }
+#endif
+ int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2);
+
+ /* if no transparent shadows, all light is blocked */
+ if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
+ return true;
+ }
+ /* if maximum number of hits reached, block all light */
+ else if(*num_hits == max_hits) {
+ return true;
+ }
+
+ /* move on to next entry in intersections array */
+ isect_array++;
+ (*num_hits)++;
+#if FEATURE(BVH_INSTANCING)
+ num_hits_in_instance++;
+#endif
+
+ isect_array->t = isect_t;
+ }
+
+ primAddr++;
+ }
+ }
+#if FEATURE(BVH_INSTANCING)
+ else {
+ /* instance push */
+ object = kernel_tex_fetch(__prim_object, -primAddr-1);
+
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
+#else
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+#endif
+
+ num_hits_in_instance = 0;
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ isect_array->t = isect_t;
+ tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ ++stackPtr;
+ traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
+
+ nodeAddr = kernel_tex_fetch(__object_node, object);
+ }
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+#if FEATURE(BVH_INSTANCING)
+ if(stackPtr >= 0) {
+ kernel_assert(object != OBJECT_NONE);
+
+ if(num_hits_in_instance) {
+ float t_fac;
+
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm);
+#else
+ bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
+#endif
+
+ /* scale isect->t to adjust for instancing */
+ for(int i = 0; i < num_hits_in_instance; i++)
+ (isect_array-i-1)->t *= t_fac;
+ }
+ else {
+ float ignore_t = FLT_MAX;
+
+#if FEATURE(BVH_MOTION)
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm);
+#else
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+#endif
+ }
+
+#if defined(__KERNEL_SSE2__)
+ Psplat[0] = _mm_set_ps1(P.x);
+ Psplat[1] = _mm_set_ps1(P.y);
+ Psplat[2] = _mm_set_ps1(P.z);
+
+ isect_t = tmax;
+ isect_array->t = isect_t;
+ tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
+
+ gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
+#endif
+
+ object = OBJECT_NONE;
+ nodeAddr = traversalStack[stackPtr];
+ --stackPtr;
+ }
+#endif
+ } while(nodeAddr != ENTRYPOINT_SENTINEL);
+
+ return false;
+}
+
+#undef FEATURE
+#undef BVH_FUNCTION_NAME
+#undef BVH_FUNCTION_FEATURES
+
diff --git a/intern/cycles/kernel/kernel_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index df82dda2435..a19f05dd371 100644
--- a/intern/cycles/kernel/kernel_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -48,12 +48,13 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
int nodeAddr = kernel_data.bvh.root;
/* ray parameters in registers */
- const float tmax = ray->t;
float3 P = ray->P;
- float3 idir = bvh_inverse_direction(ray->D);
- int object = ~0;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
+ float isect_t = ray->t;
- const uint visibility = ~0;
+ const uint visibility = PATH_RAY_ALL_VISIBILITY;
uint num_hits = 0;
#if FEATURE(BVH_MOTION)
@@ -72,7 +73,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
Psplat[1] = _mm_set_ps1(P.y);
Psplat[2] = _mm_set_ps1(P.z);
- __m128 tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+ __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -89,7 +90,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#if !defined(__KERNEL_SSE2__)
/* Intersect two child bounding boxes, non-SSE version */
- float t = tmax;
+ float t = isect_t;
/* fetch node data */
float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
@@ -130,8 +131,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
/* Intersect two child bounding boxes, SSE3 version adapted from Embree */
/* fetch node data */
- __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
- float4 cnodes = ((float4*)bvh_nodes)[3];
+ const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
+ const float4 cnodes = ((float4*)bvh_nodes)[3];
/* intersect ray against child nodes */
const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]);
@@ -203,19 +204,29 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
/* primitive intersection */
for(; primAddr < primAddr2; primAddr++) {
-#if FEATURE(BVH_HAIR)
- uint segment = kernel_tex_fetch(__prim_segment, primAddr);
- if(segment != ~0)
- continue;
-#endif
-
/* only primitives from the same object */
- uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object;
+ uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
- if(tri_object == subsurface_object) {
+ if(tri_object != subsurface_object)
+ continue;
- /* intersect ray against primitive */
- bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, tmax, &num_hits, lcg_state, max_hits);
+ /* intersect ray against primitive */
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
+
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ break;
+ }
+#if FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ break;
+ }
+#endif
+ default: {
+ break;
+ }
}
}
}
@@ -225,11 +236,10 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) {
object = subsurface_object;
- float t_ignore = FLT_MAX;
#if FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
#else
- bvh_instance_push(kg, object, ray, &P, &idir, &t_ignore, tmax);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
#if defined(__KERNEL_SSE2__)
@@ -237,7 +247,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
Psplat[1] = _mm_set_ps1(P.y);
Psplat[2] = _mm_set_ps1(P.z);
- tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+ tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
@@ -259,14 +269,13 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
#if FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
- kernel_assert(object != ~0);
+ kernel_assert(object != OBJECT_NONE);
/* instance pop */
- float t_ignore = FLT_MAX;
#if FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm);
#else
- bvh_instance_pop(kg, object, ray, &P, &idir, &t_ignore, tmax);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t);
#endif
#if defined(__KERNEL_SSE2__)
@@ -274,12 +283,12 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio
Psplat[1] = _mm_set_ps1(P.y);
Psplat[2] = _mm_set_ps1(P.z);
- tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f);
+ tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f);
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- object = ~0;
+ object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
}
diff --git a/intern/cycles/kernel/kernel_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index bfd72b0aa16..9fd40f91471 100644
--- a/intern/cycles/kernel/kernel_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -41,7 +41,6 @@ ccl_device bool BVH_FUNCTION_NAME
* - test if pushing distance on the stack helps (for non shadow rays)
* - separate version for shadow rays
* - likely and unlikely for if() statements
- * - SSE for hair
* - test restrict attribute for pointers
*/
@@ -54,18 +53,18 @@ ccl_device bool BVH_FUNCTION_NAME
int nodeAddr = kernel_data.bvh.root;
/* ray parameters in registers */
- const float tmax = ray->t;
- ccl_align(16) float3 P = ray->P;
- ccl_align(16) float3 idir = bvh_inverse_direction(ray->D);
- int object = ~0;
+ float3 P = ray->P;
+ float3 dir = bvh_clamp_direction(ray->D);
+ float3 idir = bvh_inverse_direction(dir);
+ int object = OBJECT_NONE;
#if FEATURE(BVH_MOTION)
Transform ob_tfm;
#endif
- isect->t = tmax;
- isect->object = ~0;
- isect->prim = ~0;
+ isect->t = ray->t;
+ isect->object = OBJECT_NONE;
+ isect->prim = PRIM_NONE;
isect->u = 0.0f;
isect->v = 0.0f;
@@ -88,11 +87,9 @@ ccl_device bool BVH_FUNCTION_NAME
/* traversal loop */
do {
- do
- {
+ do {
/* traverse internal nodes */
- while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
- {
+ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
bool traverseChild0, traverseChild1;
int nodeAddrChild1;
@@ -250,26 +247,34 @@ ccl_device bool BVH_FUNCTION_NAME
/* primitive intersection */
while(primAddr < primAddr2) {
bool hit;
+ uint type = kernel_tex_fetch(__prim_type, primAddr);
- /* intersect ray against primitive */
+ switch(type & PRIMITIVE_ALL) {
+ case PRIMITIVE_TRIANGLE: {
+ hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr);
+ break;
+ }
+#if FEATURE(BVH_MOTION)
+ case PRIMITIVE_MOTION_TRIANGLE: {
+ hit = motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
+ break;
+ }
+#endif
#if FEATURE(BVH_HAIR)
- uint segment = kernel_tex_fetch(__prim_segment, primAddr);
- if(segment != ~0) {
-
- if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
-#if FEATURE(BVH_HAIR_MINIMUM_WIDTH)
- hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
- else
- hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax);
-#else
- hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
- else
- hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment);
+ case PRIMITIVE_CURVE:
+ case PRIMITIVE_MOTION_CURVE: {
+ if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
+ hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ else
+ hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
+ break;
+ }
#endif
+ default: {
+ hit = false;
+ break;
+ }
}
- else
-#endif
- hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
/* shadow ray early termination */
#if defined(__KERNEL_SSE2__)
@@ -293,9 +298,9 @@ ccl_device bool BVH_FUNCTION_NAME
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if FEATURE(BVH_MOTION)
- bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
+ bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
#else
- bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
+ bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
#if defined(__KERNEL_SSE2__)
@@ -319,13 +324,13 @@ ccl_device bool BVH_FUNCTION_NAME
#if FEATURE(BVH_INSTANCING)
if(stackPtr >= 0) {
- kernel_assert(object != ~0);
+ kernel_assert(object != OBJECT_NONE);
/* instance pop */
#if FEATURE(BVH_MOTION)
- bvh_instance_motion_pop(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax);
+ bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
#else
- bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax);
+ bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
#endif
#if defined(__KERNEL_SSE2__)
@@ -338,14 +343,14 @@ ccl_device bool BVH_FUNCTION_NAME
gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
#endif
- object = ~0;
+ object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
--stackPtr;
}
#endif
} while(nodeAddr != ENTRYPOINT_SENTINEL);
- return (isect->prim != ~0);
+ return (isect->prim != PRIM_NONE);
}
#undef FEATURE
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
new file mode 100644
index 00000000000..e1d225436a6
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -0,0 +1,1035 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Curve Primitive
+ *
+ * Curve primitive for rendering hair and fur. These can be render as flat ribbons
+ * or curves with actual thickness. The curve can also be rendered as line segments
+ * rather than curves for better performance */
+
+#ifdef __HAIR__
+
+/* Reading attributes on various curve elements */
+
+ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+{
+ if(elem == ATTR_ELEMENT_CURVE) {
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = 0.0f;
+ if(dy) *dy = 0.0f;
+#endif
+
+ return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ }
+ else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
+ float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dy) *dy = 0.0f;
+#endif
+
+ return (1.0f - sd->u)*f0 + sd->u*f1;
+ }
+ else {
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = 0.0f;
+ if(dy) *dy = 0.0f;
+#endif
+
+ return 0.0f;
+ }
+}
+
+ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+{
+ if(elem == ATTR_ELEMENT_CURVE) {
+ /* idea: we can't derive any useful differentials here, but for tiled
+ * mipmap image caching it would be useful to avoid reading the highest
+ * detail level always. maybe a derivative based on the hair density
+ * could be computed somehow? */
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+#endif
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ }
+ else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1));
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*(f1 - f0);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+#endif
+
+ return (1.0f - sd->u)*f0 + sd->u*f1;
+ }
+ else {
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+#endif
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+}
+
+/* Curve thickness */
+
+ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
+{
+ float r = 0.0f;
+
+ if(sd->type & PRIMITIVE_ALL_CURVE) {
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float4 P_curve[2];
+
+ if(sd->type & PRIMITIVE_CURVE) {
+ P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ }
+
+ r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w;
+ }
+
+ return r*2.0f;
+}
+
+/* Curve location for motion pass, linear interpolation between keys and
+ * ignoring radius because we do the same for the motion keys */
+
+ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd)
+{
+ float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
+ int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float4 P_curve[2];
+
+ P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+
+ return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u);
+}
+
+/* Curve tangent normal */
+
+ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
+{
+ float3 tgN = make_float3(0.0f,0.0f,0.0f);
+
+ if(sd->type & PRIMITIVE_ALL_CURVE) {
+
+ tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
+ tgN = normalize(tgN);
+
+ /* need to find suitable scaled gd for corrected normal */
+#if 0
+ tgN = normalize(tgN - gd * sd->dPdu);
+#endif
+ }
+
+ return tgN;
+}
+
+/* Curve bounds utility function */
+
+ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3)
+{
+ float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
+ float ta = -1.0f;
+ float tb = -1.0f;
+
+ *extremta = -1.0f;
+ *extremtb = -1.0f;
+ *upper = p0;
+ *lower = (p0 + p1) + (p2 + p3);
+ *extrema = *upper;
+ *extremb = *lower;
+
+ if(*lower >= *upper) {
+ *upper = *lower;
+ *lower = p0;
+ }
+
+ if(halfdiscroot >= 0) {
+ float inv3p3 = (1.0f/3.0f)/p3;
+ halfdiscroot = sqrtf(halfdiscroot);
+ ta = (-p2 - halfdiscroot) * inv3p3;
+ tb = (-p2 + halfdiscroot) * inv3p3;
+ }
+
+ float t2;
+ float t3;
+
+ if(ta > 0.0f && ta < 1.0f) {
+ t2 = ta * ta;
+ t3 = t2 * ta;
+ *extremta = ta;
+ *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
+
+ *upper = fmaxf(*extrema, *upper);
+ *lower = fminf(*extrema, *lower);
+ }
+
+ if(tb > 0.0f && tb < 1.0f) {
+ t2 = tb * tb;
+ t3 = t2 * tb;
+ *extremtb = tb;
+ *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
+
+ *upper = fmaxf(*extremb, *upper);
+ *lower = fminf(*extremb, *lower);
+ }
+}
+
+#ifdef __KERNEL_SSE2__
+ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a)
+{
+ return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2])));
+}
+#endif
+
+#ifdef __KERNEL_SSE2__
+/* Pass P and dir by reference to aligned vector */
+ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
+ const float3 &P, const float3 &dir, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
+#else
+ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
+ float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax)
+#endif
+{
+ int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+ float epsilon = 0.0f;
+ float r_st, r_en;
+
+ int depth = kernel_data.curve.subdivisions;
+ int flags = kernel_data.curve.curveflags;
+ int prim = kernel_tex_fetch(__prim_index, curveAddr);
+
+#ifdef __KERNEL_SSE2__
+ __m128 vdir = load_m128(dir);
+ __m128 vcurve_coef[4];
+ const float3 *curve_coef = (float3 *)vcurve_coef;
+
+ {
+ __m128 dtmp = _mm_mul_ps(vdir, vdir);
+ __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp)));
+ __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss);
+
+ __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]);
+ int2 &v00 = (int2 &)v00vec;
+
+ int k0 = v00.x + segment;
+ int k1 = k0 + 1;
+ int ka = max(k0 - 1, v00.x);
+ int kb = min(k1 + 1, v00.x + v00.y - 1);
+
+ __m128 P_curve[4];
+
+ if(type & PRIMITIVE_CURVE) {
+ P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[ka].x);
+ P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k0].x);
+ P_curve[2] = _mm_load_ps(&kg->__curve_keys.data[k1].x);
+ P_curve[3] = _mm_load_ps(&kg->__curve_keys.data[kb].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
+ motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
+ }
+
+ __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss));
+ __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn);
+ __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy);
+ __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
+ __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)));
+
+ __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
+ __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0);
+ __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
+
+ __m128 htfm[] = { htfm0, htfm1, htfm2 };
+ __m128 vP = load_m128(P);
+ __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P_curve[0], vP));
+ __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P_curve[1], vP));
+ __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P_curve[2], vP));
+ __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P_curve[3], vP));
+
+ float fc = 0.71f;
+ __m128 vfc = _mm_set1_ps(fc);
+ __m128 vfcxp3 = _mm_mul_ps(vfc, p3);
+
+ vcurve_coef[0] = p1;
+ vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0));
+ vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3)));
+ vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3));
+
+ r_st = ((float4 &)P_curve[1]).w;
+ r_en = ((float4 &)P_curve[2]).w;
+ }
+#else
+ float3 curve_coef[4];
+
+ /* curve Intersection check */
+ /* obtain curve parameters */
+ {
+ /* ray transform created - this should be created at beginning of intersection loop */
+ Transform htfm;
+ float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
+ htfm = make_transform(
+ dir.z / d, 0, -dir.x /d, 0,
+ -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
+ dir.x, dir.y, dir.z, 0,
+ 0, 0, 0, 1);
+
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int k0 = __float_as_int(v00.x) + segment;
+ int k1 = k0 + 1;
+
+ int ka = max(k0 - 1,__float_as_int(v00.x));
+ int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+ float4 P_curve[4];
+
+ if(type & PRIMITIVE_CURVE) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
+ motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve);
+ }
+
+ float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P);
+ float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P);
+ float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P);
+ float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P);
+
+ float fc = 0.71f;
+ curve_coef[0] = p1;
+ curve_coef[1] = -fc*p0 + fc*p2;
+ curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
+ curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
+ r_st = P_curve[1].w;
+ r_en = P_curve[2].w;
+ }
+#endif
+
+ float r_curr = max(r_st, r_en);
+
+ if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
+ epsilon = 2 * r_curr;
+
+ /* find bounds - this is slow for cubic curves */
+ float upper, lower;
+
+ float zextrem[4];
+ curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z);
+ if(lower - r_curr > isect->t || upper + r_curr < epsilon)
+ return false;
+
+ /* minimum width extension */
+ float mw_extension = min(difl * fabsf(upper), extmax);
+ float r_ext = mw_extension + r_curr;
+
+ float xextrem[4];
+ curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x);
+ if(lower > r_ext || upper < -r_ext)
+ return false;
+
+ float yextrem[4];
+ curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y);
+ if(lower > r_ext || upper < -r_ext)
+ return false;
+
+ /* setup recurrent loop */
+ int level = 1 << depth;
+ int tree = 0;
+ float resol = 1.0f / (float)level;
+ bool hit = false;
+
+ /* begin loop */
+ while(!(tree >> (depth))) {
+ float i_st = tree * resol;
+ float i_en = i_st + (level * resol);
+#ifdef __KERNEL_SSE2__
+ __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en);
+ __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
+ __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
+
+ __m128 vbmin = _mm_min_ps(vp_st, vp_en);
+ __m128 vbmax = _mm_max_ps(vp_st, vp_en);
+
+ float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
+ float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
+ float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
+ float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
+#else
+ float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0];
+ float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0];
+
+ float bminx = min(p_st.x, p_en.x);
+ float bmaxx = max(p_st.x, p_en.x);
+ float bminy = min(p_st.y, p_en.y);
+ float bmaxy = max(p_st.y, p_en.y);
+ float bminz = min(p_st.z, p_en.z);
+ float bmaxz = max(p_st.z, p_en.z);
+#endif
+
+ if(xextrem[0] >= i_st && xextrem[0] <= i_en) {
+ bminx = min(bminx,xextrem[1]);
+ bmaxx = max(bmaxx,xextrem[1]);
+ }
+ if(xextrem[2] >= i_st && xextrem[2] <= i_en) {
+ bminx = min(bminx,xextrem[3]);
+ bmaxx = max(bmaxx,xextrem[3]);
+ }
+ if(yextrem[0] >= i_st && yextrem[0] <= i_en) {
+ bminy = min(bminy,yextrem[1]);
+ bmaxy = max(bmaxy,yextrem[1]);
+ }
+ if(yextrem[2] >= i_st && yextrem[2] <= i_en) {
+ bminy = min(bminy,yextrem[3]);
+ bmaxy = max(bmaxy,yextrem[3]);
+ }
+ if(zextrem[0] >= i_st && zextrem[0] <= i_en) {
+ bminz = min(bminz,zextrem[1]);
+ bmaxz = max(bmaxz,zextrem[1]);
+ }
+ if(zextrem[2] >= i_st && zextrem[2] <= i_en) {
+ bminz = min(bminz,zextrem[3]);
+ bmaxz = max(bmaxz,zextrem[3]);
+ }
+
+ float r1 = r_st + (r_en - r_st) * i_st;
+ float r2 = r_st + (r_en - r_st) * i_en;
+ r_curr = max(r1, r2);
+
+ mw_extension = min(difl * fabsf(bmaxz), extmax);
+ float r_ext = mw_extension + r_curr;
+ float coverage = 1.0f;
+
+ if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
+ /* the bounding box does not overlap the square centered at O */
+ tree += level;
+ level = tree & -tree;
+ }
+ else if (level == 1) {
+
+ /* the maximum recursion depth is reached.
+ * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
+ * dP* is reversed if necessary.*/
+ float t = isect->t;
+ float u = 0.0f;
+ float gd = 0.0f;
+
+ if(flags & CURVE_KN_RIBBONS) {
+ float3 tg = (p_en - p_st);
+ float w = tg.x * tg.x + tg.y * tg.y;
+ if (w == 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+ w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+ w = clamp((float)w, 0.0f, 1.0f);
+
+ /* compute u on the curve segment */
+ u = i_st * (1 - w) + i_en * w;
+ r_curr = r_st + (r_en - r_st) * u;
+ /* compare x-y distances */
+ float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
+
+ float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+ if (dot(tg, dp_st)< 0)
+ dp_st *= -1;
+ if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+ float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+ if (dot(tg, dp_en) < 0)
+ dp_en *= -1;
+ if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ /* compute coverage */
+ float r_ext = r_curr;
+ coverage = 1.0f;
+ if(difl != 0.0f) {
+ mw_extension = min(difl * fabsf(bmaxz), extmax);
+ r_ext = mw_extension + r_curr;
+ float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+ float d0 = d - r_curr;
+ float d1 = d + r_curr;
+ float inv_mw_extension = 1.0f/mw_extension;
+ if (d0 >= 0)
+ coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f;
+ else // inside
+ coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f;
+ }
+
+ if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ t = p_curr.z;
+
+ /* stochastic fade from minimum width */
+ if(difl != 0.0f && lcg_state) {
+ if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+ return hit;
+ }
+ }
+ else {
+ float l = len(p_en - p_st);
+ /* minimum width extension */
+ float or1 = r1;
+ float or2 = r2;
+
+ if(difl != 0.0f) {
+ mw_extension = min(len(p_st - P) * difl, extmax);
+ or1 = r1 < mw_extension ? mw_extension : r1;
+ mw_extension = min(len(p_en - P) * difl, extmax);
+ or2 = r2 < mw_extension ? mw_extension : r2;
+ }
+ /* --- */
+ float invl = 1.0f/l;
+ float3 tg = (p_en - p_st) * invl;
+ gd = (or2 - or1) * invl;
+ float difz = -dot(p_st,tg);
+ float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
+ float invcyla = 1.0f/cyla;
+ float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
+ float tcentre = -halfb*invcyla;
+ float zcentre = difz + (tg.z * tcentre);
+ float3 tdif = - p_st;
+ tdif.z += tcentre;
+ float tdifz = dot(tdif,tg);
+ float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
+ float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
+ float td = tb*tb - 4*cyla*tc;
+ if (td < 0.0f) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ float rootd = sqrtf(td);
+ float correction = (-tb - rootd) * 0.5f * invcyla;
+ t = tcentre + correction;
+
+ float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
+ if (dot(tg, dp_st)< 0)
+ dp_st *= -1;
+ float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
+ if (dot(tg, dp_en) < 0)
+ dp_en *= -1;
+
+ if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
+ correction = (-tb + rootd) * 0.5f * invcyla;
+ t = tcentre + correction;
+ }
+
+ if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
+ tree++;
+ level = tree & -tree;
+ continue;
+ }
+
+ float w = (zcentre + (tg.z * correction)) * invl;
+ w = clamp((float)w, 0.0f, 1.0f);
+ /* compute u on the curve segment */
+ u = i_st * (1 - w) + i_en * w;
+
+ /* stochastic fade from minimum width */
+ if(difl != 0.0f && lcg_state) {
+ r_curr = r1 + (r2 - r1) * w;
+ r_ext = or1 + (or2 - or1) * w;
+ coverage = r_curr/r_ext;
+
+ if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage))
+ return hit;
+ }
+ }
+ /* we found a new intersection */
+
+#ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+#endif
+ {
+ /* record intersection */
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ isect->u = u;
+ isect->v = gd;
+ /*isect->transparency = 1.0f - coverage; */
+ isect->t = t;
+ hit = true;
+ }
+
+ tree++;
+ level = tree & -tree;
+ }
+ else {
+ /* split the curve into two curves and process */
+ level = level >> 1;
+ }
+ }
+
+ return hit;
+}
+
+ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
+ float3 P, float3 direction, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax)
+{
+ /* define few macros to minimize code duplication for SSE */
+#ifndef __KERNEL_SSE2__
+#define len3_squared(x) len_squared(x)
+#define len3(x) len(x)
+#define dot3(x, y) dot(x, y)
+#endif
+
+ int segment = PRIMITIVE_UNPACK_SEGMENT(type);
+ /* curve Intersection check */
+ int flags = kernel_data.curve.curveflags;
+
+ int prim = kernel_tex_fetch(__prim_index, curveAddr);
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int cnum = __float_as_int(v00.x);
+ int k0 = cnum + segment;
+ int k1 = k0 + 1;
+
+#ifndef __KERNEL_SSE2__
+ float4 P_curve[2];
+
+ if(type & PRIMITIVE_CURVE) {
+ P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
+ motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve);
+ }
+
+ float or1 = P_curve[0].w;
+ float or2 = P_curve[1].w;
+ float3 p1 = float4_to_float3(P_curve[0]);
+ float3 p2 = float4_to_float3(P_curve[1]);
+
+ /* minimum width extension */
+ float r1 = or1;
+ float r2 = or2;
+ float3 dif = P - p1;
+ float3 dif_second = P - p2;
+ if(difl != 0.0f) {
+ float pixelsize = min(len3(dif) * difl, extmax);
+ r1 = or1 < pixelsize ? pixelsize : or1;
+ pixelsize = min(len3(dif_second) * difl, extmax);
+ r2 = or2 < pixelsize ? pixelsize : or2;
+ }
+ /* --- */
+
+ float3 p21_diff = p2 - p1;
+ float3 sphere_dif1 = (dif + dif_second) * 0.5f;
+ float3 dir = direction;
+ float sphere_b_tmp = dot3(dir, sphere_dif1);
+ float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir;
+#else
+ __m128 P_curve[2];
+
+ if(type & PRIMITIVE_CURVE) {
+ P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[k0].x);
+ P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k1].x);
+ }
+ else {
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
+ motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve);
+ }
+
+ const __m128 or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]);
+
+ __m128 r12 = or12;
+ const __m128 vP = load_m128(P);
+ const __m128 dif = _mm_sub_ps(vP, P_curve[0]);
+ const __m128 dif_second = _mm_sub_ps(vP, P_curve[1]);
+ if(difl != 0.0f) {
+ const __m128 len1_sq = len3_squared_splat(dif);
+ const __m128 len2_sq = len3_squared_splat(dif_second);
+ const __m128 len12 = _mm_sqrt_ps(shuffle<0, 0, 0, 0>(len1_sq, len2_sq));
+ const __m128 pixelsize12 = _mm_min_ps(_mm_mul_ps(len12, _mm_set1_ps(difl)), _mm_set1_ps(extmax));
+ r12 = _mm_max_ps(or12, pixelsize12);
+ }
+ float or1 = _mm_cvtss_f32(or12), or2 = _mm_cvtss_f32(broadcast<2>(or12));
+ float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12));
+
+ const __m128 p21_diff = _mm_sub_ps(P_curve[1], P_curve[0]);
+ const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), _mm_set1_ps(0.5f));
+ const __m128 dir = load_m128(direction);
+ const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1);
+ const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1);
+#endif
+
+ float mr = max(r1, r2);
+ float l = len3(p21_diff);
+ float invl = 1.0f / l;
+ float sp_r = mr + 0.5f * l;
+
+ float sphere_b = dot3(dir, sphere_dif2);
+ float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r;
+
+ if(sdisc < 0.0f)
+ return false;
+
+ /* obtain parameters and test midpoint distance for suitable modes */
+#ifndef __KERNEL_SSE2__
+ float3 tg = p21_diff * invl;
+#else
+ const __m128 tg = _mm_mul_ps(p21_diff, _mm_set1_ps(invl));
+#endif
+ float gd = (r2 - r1) * invl;
+
+ float dirz = dot3(dir, tg);
+ float difz = dot3(dif, tg);
+
+ float a = 1.0f - (dirz*dirz*(1 + gd*gd));
+
+ float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1));
+
+ float tcentre = -halfb/a;
+ float zcentre = difz + (dirz * tcentre);
+
+ if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
+ return false;
+ if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION))
+ return false;
+
+ /* test minimum separation */
+#ifndef __KERNEL_SSE2__
+ float3 cprod = cross(tg, dir);
+ float cprod2sq = len3_squared(cross(tg, dif));
+#else
+ const __m128 cprod = cross(tg, dir);
+ float cprod2sq = len3_squared(cross_zxy(tg, dif));
+#endif
+ float cprodsq = len3_squared(cprod);
+ float distscaled = dot3(cprod, dif);
+
+ if(cprodsq == 0)
+ distscaled = cprod2sq;
+ else
+ distscaled = (distscaled*distscaled)/cprodsq;
+
+ if(distscaled > mr*mr)
+ return false;
+
+ /* calculate true intersection */
+#ifndef __KERNEL_SSE2__
+ float3 tdif = dif + tcentre * dir;
+#else
+ const __m128 tdif = fma(_mm_set1_ps(tcentre), dir, dif);
+#endif
+ float tdifz = dot3(tdif, tg);
+ float tdifma = tdifz*gd + r1;
+ float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma));
+ float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma;
+ float td = tb*tb - 4*a*tc;
+
+ if (td < 0.0f)
+ return false;
+
+ float rootd = 0.0f;
+ float correction = 0.0f;
+ if(flags & CURVE_KN_ACCURATE) {
+ rootd = sqrtf(td);
+ correction = ((-tb - rootd)/(2*a));
+ }
+
+ float t = tcentre + correction;
+
+ if(t < isect->t) {
+
+ if(flags & CURVE_KN_INTERSECTCORRECTION) {
+ rootd = sqrtf(td);
+ correction = ((-tb - rootd)/(2*a));
+ t = tcentre + correction;
+ }
+
+ float z = zcentre + (dirz * correction);
+ // bool backface = false;
+
+ if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
+ // backface = true;
+ correction = ((-tb + rootd)/(2*a));
+ t = tcentre + correction;
+ z = zcentre + (dirz * correction);
+ }
+
+ /* stochastic fade from minimum width */
+ float adjradius = or1 + z * (or2 - or1) * invl;
+ adjradius = adjradius / (r1 + z * gd);
+ if(lcg_state && adjradius != 1.0f) {
+ if(lcg_step_float(lcg_state) > adjradius)
+ return false;
+ }
+ /* --- */
+
+ if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
+
+ if (flags & CURVE_KN_ENCLOSEFILTER) {
+ float enc_ratio = 1.01f;
+ if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) {
+ float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
+ float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio;
+ if(a2*c2 < 0.0f)
+ return false;
+ }
+ }
+
+#ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
+#endif
+ {
+ /* record intersection */
+ isect->prim = curveAddr;
+ isect->object = object;
+ isect->type = type;
+ isect->u = z*invl;
+ isect->v = gd;
+ /*isect->transparency = 1.0f - adjradius;*/
+ isect->t = t;
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+
+#ifndef __KERNEL_SSE2__
+#undef len3_squared
+#undef len3
+#undef dot3
+#endif
+}
+
+ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
+{
+ float fc = 0.71f;
+ float data[4];
+ float t2 = t * t;
+ data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
+ data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
+ data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
+ data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
+ return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+}
+
+ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
+{
+ float data[4];
+ float fc = 0.71f;
+ float t2 = t * t;
+ float t3 = t2 * t;
+ data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
+ data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
+ data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
+ data[3] = fc * t3 - fc * t2;
+ return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
+}
+
+ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+{
+ int flag = kernel_data.curve.curveflags;
+ float t = isect->t;
+ float3 P = ray->P;
+ float3 D = ray->D;
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D*t);
+ D = normalize_len(D, &t);
+ }
+
+ int prim = kernel_tex_fetch(__prim_index, isect->prim);
+ float4 v00 = kernel_tex_fetch(__curves, prim);
+
+ int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type);
+ int k1 = k0 + 1;
+
+ float3 tg;
+
+ if(flag & CURVE_KN_INTERPOLATE) {
+ int ka = max(k0 - 1,__float_as_int(v00.x));
+ int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
+
+ float4 P_curve[4];
+
+ if(sd->type & PRIMITIVE_CURVE) {
+ P_curve[0] = kernel_tex_fetch(__curve_keys, ka);
+ P_curve[1] = kernel_tex_fetch(__curve_keys, k0);
+ P_curve[2] = kernel_tex_fetch(__curve_keys, k1);
+ P_curve[3] = kernel_tex_fetch(__curve_keys, kb);
+ }
+ else {
+ motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve);
+ }
+
+ float3 p[4];
+ p[0] = float4_to_float3(P_curve[0]);
+ p[1] = float4_to_float3(P_curve[1]);
+ p[2] = float4_to_float3(P_curve[2]);
+ p[3] = float4_to_float3(P_curve[3]);
+
+ P = P + D*t;
+
+#ifdef __UV__
+ sd->u = isect->u;
+ sd->v = 0.0f;
+#endif
+
+ if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) {
+ tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+ sd->Ng = normalize(-(D - tg * (dot(tg, D))));
+ }
+ else {
+ /* direction from inside to surface of curve */
+ float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
+ sd->Ng = normalize(P - p_curr);
+
+ /* adjustment for changing radius */
+ float gd = isect->v;
+
+ if(gd != 0.0f) {
+ tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
+ sd->Ng = sd->Ng - gd * tg;
+ sd->Ng = normalize(sd->Ng);
+ }
+ }
+
+ /* todo: sometimes the normal is still so that this is detected as
+ * backfacing even if cull backfaces is enabled */
+
+ sd->N = sd->Ng;
+ }
+ else {
+ float4 P_curve[2];
+
+ if(sd->type & PRIMITIVE_CURVE) {
+ P_curve[0]= kernel_tex_fetch(__curve_keys, k0);
+ P_curve[1]= kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve);
+ }
+
+ float l = 1.0f;
+ tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l);
+
+ P = P + D*t;
+
+ float3 dif = P - float4_to_float3(P_curve[0]);
+
+#ifdef __UV__
+ sd->u = dot(dif,tg)/l;
+ sd->v = 0.0f;
+#endif
+
+ if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
+ sd->Ng = -(D - tg * dot(tg, D));
+ sd->Ng = normalize(sd->Ng);
+ }
+ else {
+ float gd = isect->v;
+
+ /* direction from inside to surface of curve */
+ sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd);
+
+ /* adjustment for changing radius */
+ if (gd != 0.0f) {
+ sd->Ng = sd->Ng - gd * tg;
+ sd->Ng = normalize(sd->Ng);
+ }
+ }
+
+ sd->N = sd->Ng;
+ }
+
+#ifdef __DPDU__
+ /* dPdu/dPdv */
+ sd->dPdu = tg;
+ sd->dPdv = cross(tg, sd->Ng);
+#endif
+
+ /*add fading parameter for minimum pixel width with transparency bsdf*/
+ /*sd->curve_transparency = isect->transparency;*/
+ /*sd->curve_radius = sd->u * gd * l + r1;*/
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
new file mode 100644
index 00000000000..1022a957b05
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -0,0 +1,148 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Motion Curve Primitive
+ *
+ * These are stored as regular curves, plus extra positions and radii at times
+ * other than the frame center. Computing the curve keys at a given ray time is
+ * a matter of interpolation of the two steps between which the ray time lies.
+ *
+ * The extra curve keys are stored as ATTR_STD_MOTION_VERTEX_POSITION.
+ */
+
+#ifdef __HAIR__
+
+ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+{
+ /* todo: find a better (faster) solution for this, maybe store offset per object */
+ uint attr_offset = object*kernel_data.bvh.attributes_map_stride + ATTR_PRIM_CURVE;
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while(attr_map.x != id) {
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ *elem = (AttributeElement)attr_map.y;
+
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
+ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2])
+{
+ if(step == numsteps) {
+ /* center step: regular vertex location */
+ keys[0] = kernel_tex_fetch(__curve_keys, k0);
+ keys[1] = kernel_tex_fetch(__curve_keys, k1);
+ }
+ else {
+ /* center step not stored in this array */
+ if(step > numsteps)
+ step--;
+
+ offset += step*numkeys;
+
+ keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+ keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+ }
+}
+
+/* return 2 curve key locations */
+ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2])
+{
+ /* get motion info */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps*2;
+ int step = min((int)(time*maxstep), maxstep-1);
+ float t = time*maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch key coordinates */
+ float4 next_keys[2];
+
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys);
+ motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys);
+
+ /* interpolate between steps */
+ keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
+ keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
+}
+
+ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4])
+{
+ if(step == numsteps) {
+ /* center step: regular vertex location */
+ keys[0] = kernel_tex_fetch(__curve_keys, k0);
+ keys[1] = kernel_tex_fetch(__curve_keys, k1);
+ keys[2] = kernel_tex_fetch(__curve_keys, k2);
+ keys[3] = kernel_tex_fetch(__curve_keys, k3);
+ }
+ else {
+ /* center step not store in this array */
+ if(step > numsteps)
+ step--;
+
+ offset += step*numkeys;
+
+ keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0);
+ keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1);
+ keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2);
+ keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3);
+ }
+}
+
+/* return 2 curve key locations */
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
+{
+ /* get motion info */
+ int numsteps, numkeys;
+ object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps*2;
+ int step = min((int)(time*maxstep), maxstep-1);
+ float t = time*maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch key coordinates */
+ float4 next_keys[4];
+
+ motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys);
+ motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys);
+
+ /* interpolate between steps */
+ keys[0] = (1.0f - t)*keys[0] + t*next_keys[0];
+ keys[1] = (1.0f - t)*keys[1] + t*next_keys[1];
+ keys[2] = (1.0f - t)*keys[2] + t*next_keys[2];
+ keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
new file mode 100644
index 00000000000..73338bb6b3b
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -0,0 +1,392 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Motion Triangle Primitive
+ *
+ * These are stored as regular triangles, plus extra positions and normals at
+ * times other than the frame center. Computing the triangle vertex positions
+ * or normals at a given ray time is a matter of interpolation of the two steps
+ * between which the ray time lies.
+ *
+ * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION
+ * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Time interpolation of vertex positions and normals */
+
+ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem)
+{
+ /* todo: find a better (faster) solution for this, maybe store offset per object */
+ uint attr_offset = object*kernel_data.bvh.attributes_map_stride;
+ uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+
+ while(attr_map.x != id) {
+ attr_offset += ATTR_PRIM_TYPES;
+ attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
+ }
+
+ *elem = (AttributeElement)attr_map.y;
+
+ /* return result */
+ return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
+}
+
+ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3])
+{
+ if(step == numsteps) {
+ /* center step: regular vertex location */
+ verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ }
+ else {
+ /* center step not store in this array */
+ if(step > numsteps)
+ step--;
+
+ offset += step*numverts;
+
+ verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+ verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+ verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ }
+}
+
+ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3])
+{
+ if(step == numsteps) {
+ /* center step: regular vertex location */
+ normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+ }
+ else {
+ /* center step not stored in this array */
+ if(step > numsteps)
+ step--;
+
+ offset += step*numverts;
+
+ normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+ normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+ normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ }
+}
+
+ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3])
+{
+ /* get motion info */
+ int numsteps, numverts;
+ object_motion_info(kg, object, &numsteps, &numverts, NULL);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps*2;
+ int step = min((int)(time*maxstep), maxstep-1);
+ float t = time*maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch vertex coordinates */
+ float3 next_verts[3];
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
+
+ /* interpolate between steps */
+ verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
+ verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
+ verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+}
+
+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance. */
+
+ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D*t);
+ D = normalize_len(D, &t);
+ }
+
+ P = P + D*t;
+
+ /* compute refined intersection distance */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
+
+ const float invdivisor = 1.0f/dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2)*invdivisor;
+
+ /* compute refined position */
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+
+/* Same as above, except that isect->t is assumed to be in object space for instancing */
+
+#ifdef __SUBSURFACE__
+ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3])
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
+
+ P = P + D*t;
+
+ /* compute refined intersection distance */
+ const float3 e1 = verts[0] - verts[2];
+ const float3 e2 = verts[1] - verts[2];
+ const float3 s1 = cross(D, e2);
+
+ const float invdivisor = 1.0f/dot(s1, e1);
+ const float3 d = P - verts[2];
+ const float3 s2 = cross(d, e1);
+ float rt = dot(e2, s2)*invdivisor;
+
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+#endif
+
+/* Setup of motion triangle specific parts of ShaderData, moved into this one
+ * function to more easily share computation of interpolated positions and
+ * normals */
+
+/* return 3 triangle vertex normals */
+ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface)
+{
+ /* get shader */
+ float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
+ sd->shader = __float_as_int(Ns.w);
+
+ /* get motion info */
+ int numsteps, numverts;
+ object_motion_info(kg, sd->object, &numsteps, &numverts, NULL);
+
+ /* figure out which steps we need to fetch and their interpolation factor */
+ int maxstep = numsteps*2;
+ int step = min((int)(sd->time*maxstep), maxstep-1);
+ float t = sd->time*maxstep - step;
+
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch vertex coordinates */
+ float3 verts[3], next_verts[3];
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts);
+ motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts);
+
+ /* interpolate between steps */
+ verts[0] = (1.0f - t)*verts[0] + t*next_verts[0];
+ verts[1] = (1.0f - t)*verts[1] + t*next_verts[1];
+ verts[2] = (1.0f - t)*verts[2] + t*next_verts[2];
+
+ /* compute refined position */
+#ifdef __SUBSURFACE__
+ if(!subsurface)
+#endif
+ sd->P = motion_triangle_refine(kg, sd, isect, ray, verts);
+#ifdef __SUBSURFACE__
+ else
+ sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts);
+#endif
+
+ /* compute face normal */
+ float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0]));
+
+ sd->Ng = Ng;
+ sd->N = Ng;
+
+ /* compute derivatives of P w.r.t. uv */
+#ifdef __DPDU__
+ sd->dPdu = (verts[0] - verts[2]);
+ sd->dPdv = (verts[1] - verts[2]);
+#endif
+
+ /* compute smooth normal */
+ if(sd->shader & SHADER_SMOOTH_NORMAL) {
+ /* find attribute */
+ AttributeElement elem;
+ int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem);
+ kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+ /* fetch vertex coordinates */
+ float3 normals[3], next_normals[3];
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals);
+ motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals);
+
+ /* interpolate between steps */
+ normals[0] = (1.0f - t)*normals[0] + t*next_normals[0];
+ normals[1] = (1.0f - t)*normals[1] + t*next_normals[1];
+ normals[2] = (1.0f - t)*normals[2] + t*next_normals[2];
+
+ /* interpolate between vertices */
+ float u = sd->u;
+ float v = sd->v;
+ float w = 1.0f - u - v;
+ sd->N = (u*normals[0] + v*normals[1] + w*normals[2]);
+ }
+}
+
+/* Ray intersection. We simply compute the vertex positions at the given ray
+ * time and do a ray intersection with the resulting triangle */
+
+ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
+ float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
+{
+ /* primitive index for vertex location lookup */
+ int prim = kernel_tex_fetch(__prim_index, triAddr);
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+
+ /* get vertex locations for intersection */
+ float3 verts[3];
+ motion_triangle_vertices(kg, fobject, prim, time, verts);
+
+ /* ray-triangle intersection, unoptimized */
+ float t, u, v;
+
+ if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) {
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+
+ return true;
+ }
+
+ return false;
+}
+
+/* Special ray intersection routines for subsurface scattering. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point. */
+
+#ifdef __SUBSURFACE__
+ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
+ float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+{
+ /* primitive index for vertex location lookup */
+ int prim = kernel_tex_fetch(__prim_index, triAddr);
+ int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+
+ /* get vertex locations for intersection */
+ float3 verts[3];
+ motion_triangle_vertices(kg, fobject, prim, time, verts);
+
+ /* ray-triangle intersection, unoptimized */
+ float t, u, v;
+
+ if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) {
+ (*num_hits)++;
+
+ int hit;
+
+ if(*num_hits <= max_hits) {
+ hit = *num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit = lcg_step_uint(lcg_state) % *num_hits;
+
+ if(hit >= max_hits)
+ return;
+ }
+
+ /* record intersection */
+ Intersection *isect = &isect_array[hit];
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_MOTION_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+ }
+}
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/geom/geom_object.h
index a66277e10cd..91edd5863ac 100644
--- a/intern/cycles/kernel/kernel_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -1,6 +1,4 @@
/*
- * Copyright 2011-2013 Blender Foundation
- *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -11,11 +9,23 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
+/* Object Primitive
+ *
+ * All mesh and curve primitives are part of an object. The same mesh and curves
+ * may be instanced multiple times by different objects.
+ *
+ * If the mesh is not instanced multiple times, the object will not be explicitly
+ * stored as a primitive in the BVH, rather the bare triangles are curved are
+ * directly primitives in the BVH with world space locations applied, and the object
+ * ID is looked up afterwards. */
+
CCL_NAMESPACE_BEGIN
+/* Object attributes, for now a fixed size and contents */
+
enum ObjectTransform {
OBJECT_TRANSFORM = 0,
OBJECT_TRANSFORM_MOTION_PRE = 0,
@@ -30,6 +40,8 @@ enum ObjectVectorTransform {
OBJECT_VECTOR_MOTION_POST = 3
};
+/* Object to world space transformation */
+
ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type)
{
int offset = object*OBJECT_SIZE + (int)type;
@@ -43,6 +55,8 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object
return tfm;
}
+/* Object to world space transformation for motion vectors */
+
ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type)
{
int offset = object*OBJECT_VECTOR_SIZE + (int)type;
@@ -56,6 +70,8 @@ ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int
return tfm;
}
+/* Motion blurred object transformations */
+
#ifdef __OBJECT_MOTION__
ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time)
{
@@ -102,7 +118,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg
}
#endif
-ccl_device_inline void object_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P)
+/* Transform position from object to world space */
+
+ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
*P = transform_point(&sd->ob_tfm, *P);
@@ -112,7 +130,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, ShaderData *
#endif
}
-ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P)
+/* Transform position from world to object space */
+
+ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P)
{
#ifdef __OBJECT_MOTION__
*P = transform_point(&sd->ob_itfm, *P);
@@ -122,7 +142,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, Shad
#endif
}
-ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, ShaderData *sd, float3 *N)
+/* Transform normal from world to object space */
+
+ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
*N = normalize(transform_direction_transposed(&sd->ob_tfm, *N));
@@ -132,7 +154,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, Shader
#endif
}
-ccl_device_inline void object_normal_transform(KernelGlobals *kg, ShaderData *sd, float3 *N)
+/* Transform normal from object to world space */
+
+ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N)
{
#ifdef __OBJECT_MOTION__
*N = normalize(transform_direction_transposed(&sd->ob_itfm, *N));
@@ -142,7 +166,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, ShaderData *sd
#endif
}
-ccl_device_inline void object_dir_transform(KernelGlobals *kg, ShaderData *sd, float3 *D)
+/* Transform direction vector from object to world space */
+
+ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
*D = transform_direction(&sd->ob_tfm, *D);
@@ -152,7 +178,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, ShaderData *sd, f
#endif
}
-ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, ShaderData *sd, float3 *D)
+/* Transform direction vector from world to object space */
+
+ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D)
{
#ifdef __OBJECT_MOTION__
*D = transform_direction(&sd->ob_itfm, *D);
@@ -162,9 +190,11 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, ShaderDat
#endif
}
-ccl_device_inline float3 object_location(KernelGlobals *kg, ShaderData *sd)
+/* Object center position */
+
+ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd)
{
- if(sd->object == ~0)
+ if(sd->object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
#ifdef __OBJECT_MOTION__
@@ -175,6 +205,8 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, ShaderData *sd)
#endif
}
+/* Total surface area of object */
+
ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
{
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
@@ -182,9 +214,11 @@ ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
return f.x;
}
+/* Pass ID number of object */
+
ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
{
- if(object == ~0)
+ if(object == OBJECT_NONE)
return 0.0f;
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
@@ -192,9 +226,11 @@ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
return f.y;
}
+/* Per object random number for shader variation */
+
ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
{
- if(object == ~0)
+ if(object == OBJECT_NONE)
return 0.0f;
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
@@ -202,9 +238,11 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object)
return f.z;
}
-ccl_device_inline uint object_particle_id(KernelGlobals *kg, int object)
+/* Particle ID from which this object was generated */
+
+ccl_device_inline int object_particle_id(KernelGlobals *kg, int object)
{
- if(object == ~0)
+ if(object == OBJECT_NONE)
return 0.0f;
int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES;
@@ -212,9 +250,11 @@ ccl_device_inline uint object_particle_id(KernelGlobals *kg, int object)
return __float_as_uint(f.w);
}
+/* Generated texture coordinate on surface from where object was instanced */
+
ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
{
- if(object == ~0)
+ if(object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
@@ -222,9 +262,11 @@ ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object)
return make_float3(f.x, f.y, f.z);
}
+/* UV texture coordinate on surface from where object was instanced */
+
ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
{
- if(object == ~0)
+ if(object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
@@ -232,12 +274,33 @@ ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object)
return make_float3(f.x, f.y, 0.0f);
}
+/* Information about mesh for motion blurred triangles and curves */
+
+ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys)
+{
+ int offset = object*OBJECT_SIZE + OBJECT_DUPLI;
+
+ if(numkeys) {
+ float4 f = kernel_tex_fetch(__objects, offset);
+ *numkeys = __float_as_int(f.w);
+ }
+
+ float4 f = kernel_tex_fetch(__objects, offset + 1);
+ if(numsteps)
+ *numsteps = __float_as_int(f.z);
+ if(numverts)
+ *numverts = __float_as_int(f.w);
+}
+
+/* Pass ID for shader */
-ccl_device int shader_pass_id(KernelGlobals *kg, ShaderData *sd)
+ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd)
{
return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1);
}
+/* Particle data from which object was instanced */
+
ccl_device_inline float particle_index(KernelGlobals *kg, int particle)
{
int offset = particle*PARTICLE_SIZE;
@@ -296,5 +359,107 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle)
return make_float3(f3.z, f3.w, f4.x);
}
+/* Object intersection in BVH */
+
+ccl_device_inline float3 bvh_clamp_direction(float3 dir)
+{
+ /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */
+ float ooeps = 8.271806E-25f;
+ return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x),
+ (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y),
+ (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
+}
+
+ccl_device_inline float3 bvh_inverse_direction(float3 dir)
+{
+ return 1.0f / dir;
+}
+
+/* Transform ray into object space to enter static object in BVH */
+
+ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+{
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
+
+ *P = transform_point(&tfm, ray->P);
+
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
+
+ if(*t != FLT_MAX)
+ *t *= len;
+}
+
+/* Transorm ray to exit static object in BVH */
+
+ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
+{
+ if(*t != FLT_MAX) {
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ *t *= len(transform_direction(&tfm, 1.0f/(*idir)));
+ }
+
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
+}
+
+/* Same as above, but returns scale factor to apply to multiple intersection distances */
+
+ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac)
+{
+ Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
+ *t_fac = len(transform_direction(&tfm, 1.0f/(*idir)));
+
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
+}
+
+
+#ifdef __OBJECT_MOTION__
+/* Transform ray into object space to enter motion blurred object in BVH */
+
+ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+{
+ Transform itfm;
+ *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
+
+ *P = transform_point(&itfm, ray->P);
+
+ float len;
+ *dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len));
+ *idir = bvh_inverse_direction(*dir);
+
+ if(*t != FLT_MAX)
+ *t *= len;
+}
+
+/* Transorm ray to exit motion blurred object in BVH */
+
+ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)
+{
+ if(*t != FLT_MAX)
+ *t *= len(transform_direction(tfm, 1.0f/(*idir)));
+
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
+}
+
+/* Same as above, but returns scale factor to apply to multiple intersection distances */
+
+ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac, Transform *tfm)
+{
+ *t_fac = len(transform_direction(tfm, 1.0f/(*idir)));
+
+ *P = ray->P;
+ *dir = bvh_clamp_direction(ray->D);
+ *idir = bvh_inverse_direction(*dir);
+}
+
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index fa450c97cbf..533973621d7 100644
--- a/intern/cycles/kernel/kernel_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -14,82 +14,60 @@
* limitations under the License
*/
-#ifndef __KERNEL_ATTRIBUTE_CL__
-#define __KERNEL_ATTRIBUTE_CL__
+/* Primitive Utilities
+ *
+ * Generic functions to look up mesh, curve and volume primitive attributes for
+ * shading and render passes. */
CCL_NAMESPACE_BEGIN
-/* attribute lookup */
-
-ccl_device_inline int find_attribute(KernelGlobals *kg, ShaderData *sd, uint id, AttributeElement *elem)
-{
- if(sd->object == ~0)
- return (int)ATTR_STD_NOT_FOUND;
-
-#ifdef __OSL__
- if (kg->osl) {
- return OSLShader::find_attribute(kg, sd, id, elem);
- }
- else
-#endif
- {
- /* for SVM, find attribute by unique id */
- uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
-#ifdef __HAIR__
- attr_offset = (sd->segment == ~0)? attr_offset: attr_offset + ATTR_PRIM_CURVE;
-#endif
- uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
-
- while(attr_map.x != id) {
- attr_offset += ATTR_PRIM_TYPES;
- attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
- }
-
- *elem = (AttributeElement)attr_map.y;
-
- if(sd->prim == ~0 && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH)
- return ATTR_STD_NOT_FOUND;
-
- /* return result */
- return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z;
- }
-}
+/* Generic primitive attribute reading functions */
ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
{
-#ifdef __HAIR__
- if(sd->segment == ~0)
-#endif
+ if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float(kg, sd, elem, offset, dx, dy);
+ }
#ifdef __HAIR__
- else
+ else if(sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float(kg, sd, elem, offset, dx, dy);
+ }
+#endif
+#ifdef __VOLUME__
+ else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float(kg, sd, elem, offset, dx, dy);
+ }
#endif
+ else {
+ if(dx) *dx = 0.0f;
+ if(dy) *dy = 0.0f;
+ return 0.0f;
+ }
}
ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
{
-#ifdef __HAIR__
- if(sd->segment == ~0)
-#endif
+ if(sd->type & PRIMITIVE_ALL_TRIANGLE) {
return triangle_attribute_float3(kg, sd, elem, offset, dx, dy);
+ }
#ifdef __HAIR__
- else
+ else if(sd->type & PRIMITIVE_ALL_CURVE) {
return curve_attribute_float3(kg, sd, elem, offset, dx, dy);
+ }
+#endif
+#ifdef __VOLUME__
+ else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) {
+ return volume_attribute_float3(kg, sd, elem, offset, dx, dy);
+ }
#endif
+ else {
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
}
-ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset)
-{
- Transform tfm;
-
- tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0);
- tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1);
- tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2);
- tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3);
-
- return tfm;
-}
+/* Default UV coordinate */
ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
{
@@ -104,6 +82,8 @@ ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
return uv;
}
+/* Ptex coordinates */
+
ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id)
{
/* storing ptex data as attributes is not memory efficient but simple for tests */
@@ -123,10 +103,12 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in
return true;
}
+/* Surface tangent */
+
ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
{
#ifdef __HAIR__
- if(sd->segment != ~0)
+ if(sd->type & PRIMITIVE_ALL_CURVE)
#ifdef __DPDU__
return normalize(sd->dPdu);
#else
@@ -154,21 +136,39 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
}
}
-/* motion */
+/* Motion vector for motion pass */
ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
{
- float3 motion_pre = sd->P, motion_post = sd->P;
+ /* center position */
+ float3 center;
+
+ if(sd->type & PRIMITIVE_ALL_CURVE) {
+ center = curve_motion_center_location(kg, sd);
+
+ if(!(sd->flag & SD_TRANSFORM_APPLIED))
+ object_position_transform(kg, sd, &center);
+ }
+ else
+ center = sd->P;
+
+ float3 motion_pre = center, motion_post = center;
/* deformation motion */
- AttributeElement elem_pre, elem_post;
- int offset_pre = find_attribute(kg, sd, ATTR_STD_MOTION_PRE, &elem_pre);
- int offset_post = find_attribute(kg, sd, ATTR_STD_MOTION_POST, &elem_post);
+ AttributeElement elem;
+ int offset = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION, &elem);
+
+ if(offset != ATTR_STD_NOT_FOUND) {
+ /* get motion info */
+ int numverts, numkeys;
+ object_motion_info(kg, sd->object, NULL, &numverts, &numkeys);
- if(offset_pre != ATTR_STD_NOT_FOUND)
- motion_pre = primitive_attribute_float3(kg, sd, elem_pre, offset_pre, NULL, NULL);
- if(offset_post != ATTR_STD_NOT_FOUND)
- motion_post = primitive_attribute_float3(kg, sd, elem_post, offset_post, NULL, NULL);
+ /* lookup attributes */
+ int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys;
+
+ motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL);
+ motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL);
+ }
/* object motion. note that depending on the mesh having motion vectors, this
* transformation was set match the world/object space of motion_pre/post */
@@ -180,13 +180,13 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST);
motion_post = transform_point(&tfm, motion_post);
- float3 P;
+ float3 motion_center;
/* camera motion, for perspective/orthographic motion.pre/post will be a
* world-to-raster matrix, for panorama it's world-to-camera */
if (kernel_data.cam.type != CAMERA_PANORAMA) {
tfm = kernel_data.cam.worldtoraster;
- P = transform_perspective(&tfm, sd->P);
+ motion_center = transform_perspective(&tfm, center);
tfm = kernel_data.cam.motion.pre;
motion_pre = transform_perspective(&tfm, motion_pre);
@@ -196,10 +196,10 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
}
else {
tfm = kernel_data.cam.worldtocamera;
- P = normalize(transform_point(&tfm, sd->P));
- P = float2_to_float3(direction_to_panorama(kg, P));
- P.x *= kernel_data.cam.width;
- P.y *= kernel_data.cam.height;
+ motion_center = normalize(transform_point(&tfm, center));
+ motion_center = float2_to_float3(direction_to_panorama(kg, motion_center));
+ motion_center.x *= kernel_data.cam.width;
+ motion_center.y *= kernel_data.cam.height;
tfm = kernel_data.cam.motion.pre;
motion_pre = normalize(transform_point(&tfm, motion_pre));
@@ -214,12 +214,11 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
motion_post.y *= kernel_data.cam.height;
}
- motion_pre = motion_pre - P;
- motion_post = P - motion_post;
+ motion_pre = motion_pre - motion_center;
+ motion_post = motion_center - motion_post;
return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y);
}
CCL_NAMESPACE_END
-#endif /* __KERNEL_ATTRIBUTE_CL__ */
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
new file mode 100644
index 00000000000..355e36fef0c
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -0,0 +1,379 @@
+/*
+ * Adapted from code Copyright 2009-2010 NVIDIA Corporation
+ * Modifications Copyright 2011, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Triangle Primitive
+ *
+ * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH
+ * ray intersection we use a precomputed triangle storage to accelarate
+ * intersection at the cost of more memory usage */
+
+CCL_NAMESPACE_BEGIN
+
+/* Refine triangle intersection to more precise hit point. For rays that travel
+ * far the precision is often not so good, this reintersects the primitive from
+ * a closer distance. */
+
+ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D*t);
+ D = normalize_len(D, &t);
+ }
+
+ P = P + D*t;
+
+ float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+ float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+ float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
+ float rt = Oz * invDz;
+
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+
+/* same as above, except that isect->t is assumed to be in object space for instancing */
+ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
+{
+ float3 P = ray->P;
+ float3 D = ray->D;
+ float t = isect->t;
+
+#ifdef __INTERSECTION_REFINE__
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_itfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ D = transform_direction(&tfm, D);
+ D = normalize(D);
+ }
+
+ P = P + D*t;
+
+ float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+ float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+ float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
+ float rt = Oz * invDz;
+
+ P = P + D*rt;
+
+ if(isect->object != OBJECT_NONE) {
+#ifdef __OBJECT_MOTION__
+ Transform tfm = sd->ob_tfm;
+#else
+ Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
+#endif
+
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+#else
+ return P + D*t;
+#endif
+}
+
+/* point and normal on triangle */
+ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader)
+{
+ /* load triangle vertices */
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+
+ /* compute point */
+ float t = 1.0f - u - v;
+ *P = (u*v0 + v*v1 + t*v2);
+
+ float4 Nm = kernel_tex_fetch(__tri_normal, prim);
+ *Ng = make_float3(Nm.x, Nm.y, Nm.z);
+ *shader = __float_as_int(Nm.w);
+}
+
+/* Triangle vertex locations */
+
+ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3])
+{
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+
+ P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+}
+
+/* Interpolate smooth vertex normal from vertices */
+
+ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v)
+{
+ /* load triangle vertices */
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+
+ float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
+ float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
+ float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+
+ return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
+}
+
+/* Ray differentials on triangle */
+
+ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv)
+{
+ /* fetch triangle vertex coordinates */
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim));
+
+ float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+
+ /* compute derivatives of P w.r.t. uv */
+ *dPdu = (p0 - p2);
+ *dPdv = (p1 - p2);
+}
+
+/* Reading attributes on various triangle elements */
+
+ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+{
+ if(elem == ATTR_ELEMENT_FACE) {
+ if(dx) *dx = 0.0f;
+ if(dy) *dy = 0.0f;
+
+ return kernel_tex_fetch(__attributes_float, offset + sd->prim);
+ }
+ else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+
+ float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
+ float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
+ float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z));
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
+ if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+#endif
+
+ return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ }
+ else if(elem == ATTR_ELEMENT_CORNER) {
+ int tri = offset + sd->prim*3;
+ float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
+ float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
+ float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
+ if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+#endif
+
+ return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ }
+ else {
+ if(dx) *dx = 0.0f;
+ if(dy) *dy = 0.0f;
+
+ return 0.0f;
+ }
+}
+
+ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+{
+ if(elem == ATTR_ELEMENT_FACE) {
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ }
+ else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) {
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
+ if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+#endif
+
+ return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ }
+ else if(elem == ATTR_ELEMENT_CORNER) {
+ int tri = offset + sd->prim*3;
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
+ if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
+#endif
+
+ return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
+ }
+ else {
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return make_float3(0.0f, 0.0f, 0.0f);
+ }
+}
+
+/* Ray-Triangle intersection for BVH traversal
+ *
+ * Based on Sven Woop's algorithm with precomputed triangle storage */
+
+ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect,
+ float3 P, float3 dir, uint visibility, int object, int triAddr)
+{
+ /* compute and check intersection t-value */
+ float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
+ float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
+
+ float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+ float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
+ float t = Oz * invDz;
+
+ if(t > 0.0f && t < isect->t) {
+ /* compute and check barycentric u */
+ float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
+ float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
+ float u = Ox + t*Dx;
+
+ if(u >= 0.0f) {
+ /* compute and check barycentric v */
+ float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
+ float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
+ float v = Oy + t*Dy;
+
+ if(v >= 0.0f && u + v <= 1.0f) {
+#ifdef __VISIBILITY_FLAG__
+ /* visibility flag test. we do it here under the assumption
+ * that most triangles are culled by node flags */
+ if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+#endif
+ {
+ /* record intersection */
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+/* Special ray intersection routines for subsurface scattering. In that case we
+ * only want to intersect with primitives in the same object, and if case of
+ * multiple hits we pick a single random primitive as the intersection point. */
+
+#ifdef __SUBSURFACE__
+ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
+ float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+{
+ /* compute and check intersection t-value */
+ float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
+ float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
+
+ float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
+ float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
+ float t = Oz * invDz;
+
+ if(t > 0.0f && t < tmax) {
+ /* compute and check barycentric u */
+ float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
+ float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
+ float u = Ox + t*Dx;
+
+ if(u >= 0.0f) {
+ /* compute and check barycentric v */
+ float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+ float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
+ float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
+ float v = Oy + t*Dy;
+
+ if(v >= 0.0f && u + v <= 1.0f) {
+ (*num_hits)++;
+
+ int hit;
+
+ if(*num_hits <= max_hits) {
+ hit = *num_hits - 1;
+ }
+ else {
+ /* reservoir sampling: if we are at the maximum number of
+ * hits, randomly replace element or skip it */
+ hit = lcg_step_uint(lcg_state) % *num_hits;
+
+ if(hit >= max_hits)
+ return;
+ }
+
+ /* record intersection */
+ Intersection *isect = &isect_array[hit];
+ isect->prim = triAddr;
+ isect->object = object;
+ isect->type = PRIMITIVE_TRIANGLE;
+ isect->u = u;
+ isect->v = v;
+ isect->t = t;
+ }
+ }
+ }
+}
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
new file mode 100644
index 00000000000..963d6cbee9c
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+/* Volume Primitive
+ *
+ * Volumes are just regions inside meshes with the mesh surface as boundaries.
+ * There isn't as much data to access as for surfaces, there is only a position
+ * to do lookups in 3D voxel or procedural textures.
+ *
+ * 3D voxel textures can be assigned as attributes per mesh, which means the
+ * same shader can be used for volume objects with different densities, etc. */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __VOLUME__
+
+/* Return position normalized to 0..1 in mesh bounds */
+
+ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P)
+{
+ /* todo: optimize this so it's just a single matrix multiplication when
+ * possible (not motion blur), or perhaps even just translation + scale */
+ AttributeElement attr_elem;
+ int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem);
+
+ object_inverse_position_transform(kg, sd, &P);
+
+ if(attr_offset != ATTR_STD_NOT_FOUND) {
+ Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset);
+ P = transform_point(&tfm, P);
+ }
+
+ return P;
+}
+
+ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy)
+{
+ float3 P = volume_normalized_position(kg, sd, sd->P);
+ float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+
+ if(dx) *dx = 0.0f;
+ if(dx) *dy = 0.0f;
+
+ /* todo: support float textures to lower memory usage for single floats */
+ return average(float4_to_float3(r));
+}
+
+ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy)
+{
+ float3 P = volume_normalized_position(kg, sd, sd->P);
+ float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z);
+
+ if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
+ if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
+
+ return float4_to_float3(r);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index 6cd14d3c51c..173028d50c8 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -37,7 +37,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
assert(0);
}
-void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height)
+void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation)
{
if(0) {
}
@@ -61,8 +61,8 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
if(tex) {
tex->data = (float4*)mem;
- tex->width = width;
- tex->height = height;
+ tex->dimensions_set(width, height, depth);
+ tex->interpolation = interpolation;
}
}
else if(strstr(name, "__tex_image")) {
@@ -76,8 +76,8 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
if(tex) {
tex->data = (uchar4*)mem;
- tex->width = width;
- tex->height = height;
+ tex->dimensions_set(width, height, depth);
+ tex->interpolation = interpolation;
}
}
else
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index 5e6748c66fc..636e48b5456 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -24,7 +24,83 @@
#include "kernel_path.h"
#include "kernel_displace.h"
-extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+/* device data taken from CUDA occupancy calculator */
+
+#ifdef __CUDA_ARCH__
+
+/* 2.0 and 2.1 */
+#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210
+#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
+#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8
+#define CUDA_BLOCK_MAX_THREADS 1024
+#define CUDA_THREAD_MAX_REGISTERS 63
+
+/* tunable parameters */
+#define CUDA_THREADS_BLOCK_WIDTH 16
+#define CUDA_KERNEL_MAX_REGISTERS 32
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40
+
+/* 3.0 and 3.5 */
+#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
+#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
+#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
+#define CUDA_BLOCK_MAX_THREADS 1024
+#define CUDA_THREAD_MAX_REGISTERS 63
+
+/* tunable parameters */
+#define CUDA_THREADS_BLOCK_WIDTH 16
+#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
+
+/* 5.0 */
+#elif __CUDA_ARCH__ == 500
+#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
+#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
+#define CUDA_BLOCK_MAX_THREADS 1024
+#define CUDA_THREAD_MAX_REGISTERS 255
+
+/* tunable parameters */
+#define CUDA_THREADS_BLOCK_WIDTH 16
+#define CUDA_KERNEL_MAX_REGISTERS 63
+#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
+
+/* unknown architecture */
+#else
+#error "Unknown or unuspported CUDA architecture, can't determine launch bounds"
+#endif
+
+/* compute number of threads per block and minimum blocks per multiprocessor
+ * given the maximum number of registers per thread */
+
+#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
+ __launch_bounds__( \
+ threads_block_width*threads_block_width, \
+ CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \
+ )
+
+/* sanity checks */
+
+#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
+#error "Maximum number of threads per block exceeded"
+#endif
+
+#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS
+#error "Maximum number of blocks per multiprocessor exceeded"
+#endif
+
+#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
+#error "Maximum number of registers per thread exceeded"
+#endif
+
+#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
+#error "Maximum number of registers per thread exceeded"
+#endif
+
+/* kernels */
+
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
@@ -34,7 +110,9 @@ extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state
}
#ifdef __BRANCHED_PATH__
-extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
+kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
@@ -44,7 +122,9 @@ extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint *
}
#endif
-extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
@@ -53,7 +133,9 @@ extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buff
kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
}
-extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
@@ -62,10 +144,14 @@ extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float
kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride);
}
-extern "C" __global__ void kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
{
int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
}
+#endif
+
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 039dc791b08..c4a08646bab 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -32,7 +32,7 @@ void *kernel_osl_memory(KernelGlobals *kg);
bool kernel_osl_use(KernelGlobals *kg);
void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
-void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height);
+void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation=INTERPOLATION_LINEAR);
void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 582a220ab3c..b4f6dcdace9 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -407,5 +407,30 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi
return L_sum;
}
+ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample, int num_samples)
+{
+ float fac = 1.0f/num_samples;
+
+#ifdef __PASSES__
+ L->direct_diffuse += L_sample->direct_diffuse*fac;
+ L->direct_glossy += L_sample->direct_glossy*fac;
+ L->direct_transmission += L_sample->direct_transmission*fac;
+ L->direct_subsurface += L_sample->direct_subsurface*fac;
+
+ L->indirect_diffuse += L_sample->indirect_diffuse*fac;
+ L->indirect_glossy += L_sample->indirect_glossy*fac;
+ L->indirect_transmission += L_sample->indirect_transmission*fac;
+ L->indirect_subsurface += L_sample->indirect_subsurface*fac;
+
+ L->emission += L_sample->emission*fac;
+ L->background += L_sample->background*fac;
+ L->ao += L_sample->ao*fac;
+ L->shadow += L_sample->shadow*fac;
+ L->mist += L_sample->mist*fac;
+#else
+ *L += *L_sample * fac;
+#endif
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index d2a7142c551..354214c406e 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -77,6 +77,6 @@ CCL_NAMESPACE_END
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_avx(void);
-void __dummy_function_cycles_avx(void){}
+void __dummy_function_cycles_avx(void) {}
#endif
diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h
deleted file mode 100644
index 93e546eaece..00000000000
--- a/intern/cycles/kernel/kernel_bvh.h
+++ /dev/null
@@ -1,1258 +0,0 @@
-/*
- * Adapted from code Copyright 2009-2010 NVIDIA Corporation
- * Modifications Copyright 2011, Blender Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-CCL_NAMESPACE_BEGIN
-
-/*
- * "Persistent while-while kernel" used in:
- *
- * "Understanding the Efficiency of Ray Traversal on GPUs",
- * Timo Aila and Samuli Laine,
- * Proc. High-Performance Graphics 2009
- */
-
-/* bottom-most stack entry, indicating the end of traversal */
-#define ENTRYPOINT_SENTINEL 0x76543210
-
-/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
-#define BVH_STACK_SIZE 192
-#define BVH_NODE_SIZE 4
-#define TRI_NODE_SIZE 3
-
-/* silly workaround for float extended precision that happens when compiling
- * without sse support on x86, it results in different results for float ops
- * that you would otherwise expect to compare correctly */
-#if !defined(__i386__) || defined(__SSE__)
-#define NO_EXTENDED_PRECISION
-#else
-#define NO_EXTENDED_PRECISION volatile
-#endif
-
-ccl_device_inline float3 bvh_inverse_direction(float3 dir)
-{
- /* avoid divide by zero (ooeps = exp2f(-80.0f)) */
- float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
- float3 idir;
-
- idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x));
- idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y));
- idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
-
- return idir;
-}
-
-ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
-{
- Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-
- *P = transform_point(&tfm, ray->P);
-
- float3 dir = transform_direction(&tfm, ray->D);
-
- float len;
- dir = normalize_len(dir, &len);
-
- *idir = bvh_inverse_direction(dir);
-
- if(*t != FLT_MAX)
- *t *= len;
-}
-
-ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
-{
- if(*t != FLT_MAX) {
- Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
- *t *= len(transform_direction(&tfm, 1.0f/(*idir)));
- }
-
- *P = ray->P;
- *idir = bvh_inverse_direction(ray->D);
-}
-
-#ifdef __OBJECT_MOTION__
-ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
-{
- Transform itfm;
- *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
-
- *P = transform_point(&itfm, ray->P);
-
- float3 dir = transform_direction(&itfm, ray->D);
-
- float len;
- dir = normalize_len(dir, &len);
-
- *idir = bvh_inverse_direction(dir);
-
- if(*t != FLT_MAX)
- *t *= len;
-}
-
-ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax)
-{
- if(*t != FLT_MAX)
- *t *= len(transform_direction(tfm, 1.0f/(*idir)));
-
- *P = ray->P;
- *idir = bvh_inverse_direction(ray->D);
-}
-#endif
-
-/* Sven Woop's algorithm */
-ccl_device_inline bool bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 idir, uint visibility, int object, int triAddr)
-{
- /* compute and check intersection t-value */
- float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
- float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
- float3 dir = 1.0f/idir;
-
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
- float t = Oz * invDz;
-
- if(t > 0.0f && t < isect->t) {
- /* compute and check barycentric u */
- float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
- float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
- float u = Ox + t*Dx;
-
- if(u >= 0.0f) {
- /* compute and check barycentric v */
- float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
- float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
- float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
- float v = Oy + t*Dy;
-
- if(v >= 0.0f && u + v <= 1.0f) {
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
-#endif
- {
- /* record intersection */
- isect->prim = triAddr;
- isect->object = object;
- isect->u = u;
- isect->v = v;
- isect->t = t;
- return true;
- }
- }
- }
- }
-
- return false;
-}
-
-#ifdef __HAIR__
-ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3)
-{
- float halfdiscroot = (p2 * p2 - 3 * p3 * p1);
- float ta = -1.0f;
- float tb = -1.0f;
- *extremta = -1.0f;
- *extremtb = -1.0f;
- *upper = p0;
- *lower = p0 + p1 + p2 + p3;
- *extrema = *upper;
- *extremb = *lower;
- if(*lower >= *upper) {
- *upper = *lower;
- *lower = p0;
- }
-
- if(halfdiscroot >= 0) {
- halfdiscroot = sqrt(halfdiscroot);
- ta = (-p2 - halfdiscroot) / (3 * p3);
- tb = (-p2 + halfdiscroot) / (3 * p3);
- }
-
- float t2;
- float t3;
- if(ta > 0.0f && ta < 1.0f) {
- t2 = ta * ta;
- t3 = t2 * ta;
- *extremta = ta;
- *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0;
- if(*extrema > *upper) {
- *upper = *extrema;
- }
- if(*extrema < *lower) {
- *lower = *extrema;
- }
- }
- if(tb > 0.0f && tb < 1.0f) {
- t2 = tb * tb;
- t3 = t2 * tb;
- *extremtb = tb;
- *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0;
- if(*extremb >= *upper) {
- *upper = *extremb;
- }
- if(*extremb <= *lower) {
- *lower = *extremb;
- }
- }
-}
-
-#ifdef __KERNEL_SSE2__
-ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a)
-{
- return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2])));
-}
-#endif
-
-#ifdef __KERNEL_SSE2__
-/* Pass P and idir by reference to aligned vector */
-ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
- const float3 &P, const float3 &idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax)
-#else
-ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax)
-#endif
-{
- float epsilon = 0.0f;
- float r_st, r_en;
-
- int depth = kernel_data.curve.subdivisions;
- int flags = kernel_data.curve.curveflags;
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
-
-#ifdef __KERNEL_SSE2__
- __m128 vdir = _mm_div_ps(_mm_set1_ps(1.0f), (__m128 &)idir);
- __m128 vcurve_coef[4];
- const float3 *curve_coef = (float3 *)vcurve_coef;
-
- {
- __m128 dtmp = _mm_mul_ps(vdir, vdir);
- __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp)));
- __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss);
-
- __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]);
- int2 &v00 = (int2 &)v00vec;
-
- int k0 = v00.x + segment;
- int k1 = k0 + 1;
- int ka = max(k0 - 1, v00.x);
- int kb = min(k1 + 1, v00.x + v00.y - 1);
-
- __m128 P0 = _mm_load_ps(&kg->__curve_keys.data[ka].x);
- __m128 P1 = _mm_load_ps(&kg->__curve_keys.data[k0].x);
- __m128 P2 = _mm_load_ps(&kg->__curve_keys.data[k1].x);
- __m128 P3 = _mm_load_ps(&kg->__curve_keys.data[kb].x);
-
- __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss));
- __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn);
- __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy);
- __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz);
- __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0)));
-
- __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0);
- __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0);
- __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
-
- __m128 htfm[] = { htfm0, htfm1, htfm2 };
- __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P0, (__m128 &)P));
- __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P1, (__m128 &)P));
- __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P2, (__m128 &)P));
- __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P3, (__m128 &)P));
-
- float fc = 0.71f;
- __m128 vfc = _mm_set1_ps(fc);
- __m128 vfcxp3 = _mm_mul_ps(vfc, p3);
-
- vcurve_coef[0] = p1;
- vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0));
- vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3)));
- vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3));
-
- r_st = ((float4 &)P1).w;
- r_en = ((float4 &)P2).w;
- }
-#else
- float3 curve_coef[4];
-
- /* curve Intersection check */
- float3 dir = 1.0f/idir;
-
- /* obtain curve parameters */
- {
- /* ray transform created - this should be created at beginning of intersection loop */
- Transform htfm;
- float d = sqrtf(dir.x * dir.x + dir.z * dir.z);
- htfm = make_transform(
- dir.z / d, 0, -dir.x /d, 0,
- -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0,
- dir.x, dir.y, dir.z, 0,
- 0, 0, 0, 1);
-
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + segment;
- int k1 = k0 + 1;
-
- int ka = max(k0 - 1,__float_as_int(v00.x));
- int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P0 = kernel_tex_fetch(__curve_keys, ka);
- float4 P1 = kernel_tex_fetch(__curve_keys, k0);
- float4 P2 = kernel_tex_fetch(__curve_keys, k1);
- float4 P3 = kernel_tex_fetch(__curve_keys, kb);
-
- float3 p0 = transform_point(&htfm, float4_to_float3(P0) - P);
- float3 p1 = transform_point(&htfm, float4_to_float3(P1) - P);
- float3 p2 = transform_point(&htfm, float4_to_float3(P2) - P);
- float3 p3 = transform_point(&htfm, float4_to_float3(P3) - P);
-
- float fc = 0.71f;
- curve_coef[0] = p1;
- curve_coef[1] = -fc*p0 + fc*p2;
- curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3;
- curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3;
- r_st = P1.w;
- r_en = P2.w;
- }
-#endif
-
- float r_curr = max(r_st, r_en);
-
- if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING))
- epsilon = 2 * r_curr;
-
- /* find bounds - this is slow for cubic curves */
- float upper, lower;
-
- float zextrem[4];
- curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z);
- if(lower - r_curr > isect->t || upper + r_curr < epsilon)
- return false;
-
- /* minimum width extension */
- float mw_extension = min(difl * fabsf(upper), extmax);
- float r_ext = mw_extension + r_curr;
-
- float xextrem[4];
- curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x);
- if(lower > r_ext || upper < -r_ext)
- return false;
-
- float yextrem[4];
- curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y);
- if(lower > r_ext || upper < -r_ext)
- return false;
-
- /* setup recurrent loop */
- int level = 1 << depth;
- int tree = 0;
- float resol = 1.0f / (float)level;
- bool hit = false;
-
- /* begin loop */
- while(!(tree >> (depth))) {
- float i_st = tree * resol;
- float i_en = i_st + (level * resol);
-#ifdef __KERNEL_SSE2__
- __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en);
- __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
- __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]);
-
- __m128 vbmin = _mm_min_ps(vp_st, vp_en);
- __m128 vbmax = _mm_max_ps(vp_st, vp_en);
-
- float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax;
- float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z;
- float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z;
- float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en;
-#else
- float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0];
- float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0];
-
- float bminx = min(p_st.x, p_en.x);
- float bmaxx = max(p_st.x, p_en.x);
- float bminy = min(p_st.y, p_en.y);
- float bmaxy = max(p_st.y, p_en.y);
- float bminz = min(p_st.z, p_en.z);
- float bmaxz = max(p_st.z, p_en.z);
-#endif
-
- if(xextrem[0] >= i_st && xextrem[0] <= i_en) {
- bminx = min(bminx,xextrem[1]);
- bmaxx = max(bmaxx,xextrem[1]);
- }
- if(xextrem[2] >= i_st && xextrem[2] <= i_en) {
- bminx = min(bminx,xextrem[3]);
- bmaxx = max(bmaxx,xextrem[3]);
- }
- if(yextrem[0] >= i_st && yextrem[0] <= i_en) {
- bminy = min(bminy,yextrem[1]);
- bmaxy = max(bmaxy,yextrem[1]);
- }
- if(yextrem[2] >= i_st && yextrem[2] <= i_en) {
- bminy = min(bminy,yextrem[3]);
- bmaxy = max(bmaxy,yextrem[3]);
- }
- if(zextrem[0] >= i_st && zextrem[0] <= i_en) {
- bminz = min(bminz,zextrem[1]);
- bmaxz = max(bmaxz,zextrem[1]);
- }
- if(zextrem[2] >= i_st && zextrem[2] <= i_en) {
- bminz = min(bminz,zextrem[3]);
- bmaxz = max(bmaxz,zextrem[3]);
- }
-
- float r1 = r_st + (r_en - r_st) * i_st;
- float r2 = r_st + (r_en - r_st) * i_en;
- r_curr = max(r1, r2);
-
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- float r_ext = mw_extension + r_curr;
- float coverage = 1.0f;
-
- if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) {
- /* the bounding box does not overlap the square centered at O */
- tree += level;
- level = tree & -tree;
- }
- else if (level == 1) {
-
- /* the maximum recursion depth is reached.
- * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0.
- * dP* is reversed if necessary.*/
- float t = isect->t;
- float u = 0.0f;
- if(flags & CURVE_KN_RIBBONS) {
- float3 tg = (p_en - p_st);
- float w = tg.x * tg.x + tg.y * tg.y;
- if (w == 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
- w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
- w = clamp((float)w, 0.0f, 1.0f);
-
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
- r_curr = r_st + (r_en - r_st) * u;
- /* compare x-y distances */
- float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0];
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
- dp_st *= -1;
- if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
- if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- /* compute coverage */
- float r_ext = r_curr;
- coverage = 1.0f;
- if(difl != 0.0f) {
- mw_extension = min(difl * fabsf(bmaxz), extmax);
- r_ext = mw_extension + r_curr;
- float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
- float d0 = d - r_curr;
- float d1 = d + r_curr;
- if (d0 >= 0)
- coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f;
- else // inside
- coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f;
- }
-
- if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- t = p_curr.z;
- }
- else {
- float l = len(p_en - p_st);
- /* minimum width extension */
- float or1 = r1;
- float or2 = r2;
- if(difl != 0.0f) {
- mw_extension = min(len(p_st - P) * difl, extmax);
- or1 = r1 < mw_extension ? mw_extension : r1;
- mw_extension = min(len(p_en - P) * difl, extmax);
- or2 = r2 < mw_extension ? mw_extension : r2;
- }
- /* --- */
- float3 tg = (p_en - p_st) / l;
- float gd = (or2 - or1) / l;
- float difz = -dot(p_st,tg);
- float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd));
- float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1)));
- float tcentre = -halfb/cyla;
- float zcentre = difz + (tg.z * tcentre);
- float3 tdif = - p_st;
- tdif.z += tcentre;
- float tdifz = dot(tdif,tg);
- float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1)));
- float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd;
- float td = tb*tb - 4*cyla*tc;
- if (td < 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- float rootd = sqrtf(td);
- float correction = ((-tb - rootd)/(2*cyla));
- t = tcentre + correction;
-
- float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1];
- if (dot(tg, dp_st)< 0)
- dp_st *= -1;
- float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1];
- if (dot(tg, dp_en) < 0)
- dp_en *= -1;
-
- if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) {
- correction = ((-tb + rootd)/(2*cyla));
- t = tcentre + correction;
- }
-
- if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) {
- tree++;
- level = tree & -tree;
- continue;
- }
-
- float w = (zcentre + (tg.z * correction))/l;
- w = clamp((float)w, 0.0f, 1.0f);
- /* compute u on the curve segment */
- u = i_st * (1 - w) + i_en * w;
- r_curr = r1 + (r2 - r1) * w;
- r_ext = or1 + (or2 - or1) * w;
- coverage = r_curr/r_ext;
-
- }
- /* we found a new intersection */
-
- /* stochastic fade from minimum width */
- if(lcg_state && coverage != 1.0f) {
- if(lcg_step_float(lcg_state) > coverage)
- return hit;
- }
-
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
- {
- /* record intersection */
- isect->prim = curveAddr;
- isect->segment = segment;
- isect->object = object;
- isect->u = u;
- isect->v = 0.0f;
- /*isect->v = 1.0f - coverage; */
- isect->t = t;
- hit = true;
- }
-
- tree++;
- level = tree & -tree;
- }
- else {
- /* split the curve into two curves and process */
- level = level >> 1;
- }
- }
-
- return hit;
-}
-
-ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect,
- float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax)
-{
- /* curve Intersection check */
- int flags = kernel_data.curve.curveflags;
-
- int prim = kernel_tex_fetch(__prim_index, curveAddr);
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int cnum = __float_as_int(v00.x);
- int k0 = cnum + segment;
- int k1 = k0 + 1;
-
- float4 P1 = kernel_tex_fetch(__curve_keys, k0);
- float4 P2 = kernel_tex_fetch(__curve_keys, k1);
-
- float or1 = P1.w;
- float or2 = P2.w;
- float3 p1 = float4_to_float3(P1);
- float3 p2 = float4_to_float3(P2);
-
- /* minimum width extension */
- float r1 = or1;
- float r2 = or2;
- if(difl != 0.0f) {
- float pixelsize = min(len(p1 - P) * difl, extmax);
- r1 = or1 < pixelsize ? pixelsize : or1;
- pixelsize = min(len(p2 - P) * difl, extmax);
- r2 = or2 < pixelsize ? pixelsize : or2;
- }
- /* --- */
-
- float mr = max(r1,r2);
- float3 dif = P - p1;
- float3 dir = 1.0f/idir;
- float l = len(p2 - p1);
-
- float sp_r = mr + 0.5f * l;
- float3 sphere_dif = P - ((p1 + p2) * 0.5f);
- float sphere_b = dot(dir,sphere_dif);
- sphere_dif = sphere_dif - sphere_b * dir;
- sphere_b = dot(dir,sphere_dif);
- float sdisc = sphere_b * sphere_b - len_squared(sphere_dif) + sp_r * sp_r;
- if(sdisc < 0.0f)
- return false;
-
- /* obtain parameters and test midpoint distance for suitable modes */
- float3 tg = (p2 - p1) / l;
- float gd = (r2 - r1) / l;
- float dirz = dot(dir,tg);
- float difz = dot(dif,tg);
-
- float a = 1.0f - (dirz*dirz*(1 + gd*gd));
- float halfb = dot(dir,dif) - dirz*(difz + gd*(difz*gd + r1));
-
- float tcentre = -halfb/a;
- float zcentre = difz + (dirz * tcentre);
-
- if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE))
- return false;
- if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION))
- return false;
-
- /* test minimum separation */
- float3 cprod = cross(tg, dir);
- float3 cprod2 = cross(tg, dif);
- float cprodsq = len_squared(cprod);
- float cprod2sq = len_squared(cprod2);
- float distscaled = dot(cprod,dif);
-
- if(cprodsq == 0)
- distscaled = cprod2sq;
- else
- distscaled = (distscaled*distscaled)/cprodsq;
-
- if(distscaled > mr*mr)
- return false;
-
- /* calculate true intersection */
- float3 tdif = P - p1 + tcentre * dir;
- float tdifz = dot(tdif,tg);
- float tb = 2*(dot(dir,tdif) - dirz*(tdifz + gd*(tdifz*gd + r1)));
- float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - r1*r1 - 2*r1*tdifz*gd;
- float td = tb*tb - 4*a*tc;
-
- if (td < 0.0f)
- return false;
-
- float rootd = 0.0f;
- float correction = 0.0f;
- if(flags & CURVE_KN_ACCURATE) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd)/(2*a));
- }
-
- float t = tcentre + correction;
-
- if(t < isect->t) {
-
- if(flags & CURVE_KN_INTERSECTCORRECTION) {
- rootd = sqrtf(td);
- correction = ((-tb - rootd)/(2*a));
- t = tcentre + correction;
- }
-
- float z = zcentre + (dirz * correction);
- bool backface = false;
-
- if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) {
- backface = true;
- correction = ((-tb + rootd)/(2*a));
- t = tcentre + correction;
- z = zcentre + (dirz * correction);
- }
-
- /* stochastic fade from minimum width */
- float adjradius = or1 + z * (or2 - or1) / l;
- adjradius = adjradius / (r1 + z * gd);
- if(lcg_state && adjradius != 1.0f) {
- if(lcg_step_float(lcg_state) > adjradius)
- return false;
- }
- /* --- */
-
- if(t > 0.0f && t < isect->t && z >= 0 && z <= l) {
-
- if (flags & CURVE_KN_ENCLOSEFILTER) {
- float enc_ratio = 1.01f;
- if((dot(P - p1, tg) > -r1 * enc_ratio) && (dot(P - p2, tg) < r2 * enc_ratio)) {
- float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio));
- float c2 = dot(dif,dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio;
- if(a2*c2 < 0.0f)
- return false;
- }
- }
-
-#ifdef __VISIBILITY_FLAG__
- /* visibility flag test. we do it here under the assumption
- * that most triangles are culled by node flags */
- if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility)
-#endif
- {
- /* record intersection */
- isect->prim = curveAddr;
- isect->segment = segment;
- isect->object = object;
- isect->u = z/l;
- isect->v = td/(4*a*a);
- /*isect->v = 1.0f - adjradius;*/
- isect->t = t;
-
- if(backface)
- isect->u = -isect->u;
-
- return true;
- }
- }
- }
-
- return false;
-}
-#endif
-
-#ifdef __SUBSURFACE__
-/* Special ray intersection routines for subsurface scattering. In that case we
- * only want to intersect with primitives in the same object, and if case of
- * multiple hits we pick a single random primitive as the intersection point. */
-
-ccl_device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
- float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
-{
- /* compute and check intersection t-value */
- float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
- float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
- float3 dir = 1.0f/idir;
-
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
- float t = Oz * invDz;
-
- if(t > 0.0f && t < tmax) {
- /* compute and check barycentric u */
- float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
- float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
- float u = Ox + t*Dx;
-
- if(u >= 0.0f) {
- /* compute and check barycentric v */
- float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
- float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
- float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
- float v = Oy + t*Dy;
-
- if(v >= 0.0f && u + v <= 1.0f) {
- (*num_hits)++;
-
- int hit;
-
- if(*num_hits <= max_hits) {
- hit = *num_hits - 1;
- }
- else {
- /* reservoir sampling: if we are at the maximum number of
- * hits, randomly replace element or skip it */
- hit = lcg_step_uint(lcg_state) % *num_hits;
-
- if(hit >= max_hits)
- return;
- }
-
- /* record intersection */
- Intersection *isect = &isect_array[hit];
- isect->prim = triAddr;
- isect->object = object;
- isect->u = u;
- isect->v = v;
- isect->t = t;
- }
- }
- }
-}
-#endif
-
-/* BVH intersection function variations */
-
-#define BVH_INSTANCING 1
-#define BVH_MOTION 2
-#define BVH_HAIR 4
-#define BVH_HAIR_MINIMUM_WIDTH 8
-
-#define BVH_FUNCTION_NAME bvh_intersect
-#define BVH_FUNCTION_FEATURES 0
-#include "kernel_bvh_traversal.h"
-
-#if defined(__INSTANCING__)
-#define BVH_FUNCTION_NAME bvh_intersect_instancing
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING
-#include "kernel_bvh_traversal.h"
-#endif
-
-#if defined(__HAIR__)
-#define BVH_FUNCTION_NAME bvh_intersect_hair
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH
-#include "kernel_bvh_traversal.h"
-#endif
-
-#if defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-#include "kernel_bvh_traversal.h"
-#endif
-
-#if defined(__HAIR__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_hair_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION
-#include "kernel_bvh_traversal.h"
-#endif
-
-#if defined(__SUBSURFACE__)
-#define BVH_FUNCTION_NAME bvh_intersect_subsurface
-#define BVH_FUNCTION_FEATURES 0
-#include "kernel_bvh_subsurface.h"
-#endif
-
-#if defined(__SUBSURFACE__) && defined(__INSTANCING__)
-#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING
-#include "kernel_bvh_subsurface.h"
-#endif
-
-#if defined(__SUBSURFACE__) && defined(__HAIR__)
-#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
-#include "kernel_bvh_subsurface.h"
-#endif
-
-#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
-#include "kernel_bvh_subsurface.h"
-#endif
-
-#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__)
-#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion
-#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION
-#include "kernel_bvh_subsurface.h"
-#endif
-
-/* to work around titan bug when using arrays instead of textures */
-#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
-ccl_device_inline
-#else
-ccl_device_noinline
-#endif
-#ifdef __HAIR__
-bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax)
-#else
-bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
-#endif
-{
-#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
-
- return bvh_intersect_motion(kg, ray, isect, visibility);
- }
-#endif /* __OBJECT_MOTION__ */
-
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax);
-#endif /* __HAIR__ */
-
-#ifdef __KERNEL_CPU__
-
-#ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-#endif /* __INSTANCING__ */
-
- return bvh_intersect(kg, ray, isect, visibility);
-#else /* __KERNEL_CPU__ */
-
-#ifdef __INSTANCING__
- return bvh_intersect_instancing(kg, ray, isect, visibility);
-#else
- return bvh_intersect(kg, ray, isect, visibility);
-#endif /* __INSTANCING__ */
-
-#endif /* __KERNEL_CPU__ */
-}
-
-/* to work around titan bug when using arrays instead of textures */
-#ifdef __SUBSURFACE__
-#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__)
-ccl_device_inline
-#else
-ccl_device_noinline
-#endif
-uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
-{
-#ifdef __OBJECT_MOTION__
- if(kernel_data.bvh.have_motion) {
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#endif /* __HAIR__ */
-
- return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
- }
-#endif /* __OBJECT_MOTION__ */
-
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#endif /* __HAIR__ */
-
-#ifdef __KERNEL_CPU__
-
-#ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#endif /* __INSTANCING__ */
-
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#else /* __KERNEL_CPU__ */
-
-#ifdef __INSTANCING__
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#else
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
-#endif /* __INSTANCING__ */
-
-#endif /* __KERNEL_CPU__ */
-}
-#endif
-
-/* Ray offset to avoid self intersection */
-
-ccl_device_inline float3 ray_offset(float3 P, float3 Ng)
-{
-#ifdef __INTERSECTION_REFINE__
- const float epsilon_f = 1e-5f;
- /* ideally this should match epsilon_f, but instancing/mblur
- * precision makes it problematic */
- const float epsilon_test = 1.0f;
- const int epsilon_i = 32;
-
- float3 res;
-
- /* x component */
- if(fabsf(P.x) < epsilon_test) {
- res.x = P.x + Ng.x*epsilon_f;
- }
- else {
- uint ix = __float_as_uint(P.x);
- ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
- res.x = __uint_as_float(ix);
- }
-
- /* y component */
- if(fabsf(P.y) < epsilon_test) {
- res.y = P.y + Ng.y*epsilon_f;
- }
- else {
- uint iy = __float_as_uint(P.y);
- iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
- res.y = __uint_as_float(iy);
- }
-
- /* z component */
- if(fabsf(P.z) < epsilon_test) {
- res.z = P.z + Ng.z*epsilon_f;
- }
- else {
- uint iz = __float_as_uint(P.z);
- iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
- res.z = __uint_as_float(iz);
- }
-
- return res;
-#else
- const float epsilon_f = 1e-4f;
- return P + epsilon_f*Ng;
-#endif
-}
-
-/* Refine triangle intersection to more precise hit point. For rays that travel
- * far the precision is often not so good, this reintersects the primitive from
- * a closer distance. */
-
-ccl_device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
-{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
-
-#ifdef __INTERSECTION_REFINE__
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
-
- P = P + D*t;
-
- float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
- float rt = Oz * invDz;
-
- P = P + D*rt;
-
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
-#else
- return P + D*t;
-#endif
-}
-
-/* same as above, except that isect->t is assumed to be in object space for instancing */
-ccl_device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
-{
- float3 P = ray->P;
- float3 D = ray->D;
- float t = isect->t;
-
-#ifdef __INTERSECTION_REFINE__
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D);
- D = normalize(D);
- }
-
- P = P + D*t;
-
- float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
- float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
- float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
- float rt = Oz * invDz;
-
- P = P + D*rt;
-
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
-#else
- return P + D*t;
-#endif
-}
-
-#ifdef __HAIR__
-
-ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float fc = 0.71f;
- float data[4];
- float t2 = t * t;
- data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc;
- data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t;
- data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc;
- data[3] = 3.0f * fc * t2 - 2.0f * fc * t;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
-ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3)
-{
- float data[4];
- float fc = 0.71f;
- float t2 = t * t;
- float t3 = t2 * t;
- data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t;
- data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f;
- data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t;
- data[3] = fc * t3 - fc * t2;
- return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3;
-}
-
-ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray)
-{
- int flag = kernel_data.curve.curveflags;
- float t = isect->t;
- float3 P = ray->P;
- float3 D = ray->D;
-
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_itfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- D = transform_direction(&tfm, D*t);
- D = normalize_len(D, &t);
- }
-
- int prim = kernel_tex_fetch(__prim_index, isect->prim);
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + isect->segment;
- int k1 = k0 + 1;
-
- float4 P1 = kernel_tex_fetch(__curve_keys, k0);
- float4 P2 = kernel_tex_fetch(__curve_keys, k1);
- float l = 1.0f;
- float3 tg = normalize_len(float4_to_float3(P2 - P1), &l);
- float r1 = P1.w;
- float r2 = P2.w;
- float gd = ((r2 - r1)/l);
-
- P = P + D*t;
-
- if(flag & CURVE_KN_INTERPOLATE) {
- int ka = max(k0 - 1,__float_as_int(v00.x));
- int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1);
-
- float4 P0 = kernel_tex_fetch(__curve_keys, ka);
- float4 P3 = kernel_tex_fetch(__curve_keys, kb);
-
- float3 p[4];
- p[0] = float4_to_float3(P0);
- p[1] = float4_to_float3(P1);
- p[2] = float4_to_float3(P2);
- p[3] = float4_to_float3(P3);
-
-#ifdef __UV__
- sd->u = isect->u;
- sd->v = 0.0f;
-#endif
-
- tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3]));
-
- if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS)
- sd->Ng = normalize(-(D - tg * (dot(tg, D))));
- else {
- float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]);
- sd->Ng = normalize(P - p_curr);
- sd->Ng = sd->Ng - gd * tg;
- sd->Ng = normalize(sd->Ng);
- }
- sd->N = sd->Ng;
- }
- else {
- float3 dif = P - float4_to_float3(P1);
-
-#ifdef __UV__
- sd->u = dot(dif,tg)/l;
- sd->v = 0.0f;
-#endif
-
- if (flag & CURVE_KN_TRUETANGENTGNORMAL) {
- sd->Ng = -(D - tg * dot(tg, D));
- sd->Ng = normalize(sd->Ng);
- }
- else {
- sd->Ng = (dif - tg * sd->u * l) / (P1.w + sd->u * l * gd);
- if (gd != 0.0f) {
- sd->Ng = sd->Ng - gd * tg ;
- sd->Ng = normalize(sd->Ng);
- }
- }
-
- sd->N = sd->Ng;
- }
-
-#ifdef __DPDU__
- /* dPdu/dPdv */
- sd->dPdu = tg;
- sd->dPdv = cross(tg, sd->Ng);
-#endif
-
- /*add fading parameter for minimum pixel width with transparency bsdf*/
- /*sd->curve_transparency = isect->v;*/
- /*sd->curve_radius = sd->u * gd * l + r1;*/
-
- if(isect->object != ~0) {
-#ifdef __OBJECT_MOTION__
- Transform tfm = sd->ob_tfm;
-#else
- Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
-#endif
-
- P = transform_point(&tfm, P);
- }
-
- return P;
-}
-#endif
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 887b1afddd4..7fc66a9fdee 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -229,7 +229,7 @@ ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, f
if(kernel_data.cam.shuttertime == -1.0f)
ray->time = TIME_INVALID;
else
- ray->time = 0.5f + 0.5f*(time - 0.5f)*kernel_data.cam.shuttertime;
+ ray->time = time;
#endif
/* sample */
@@ -266,7 +266,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
{
if(kernel_data.cam.type != CAMERA_PANORAMA) {
/* perspective / ortho */
- if(sd->object == ~0 && kernel_data.cam.type == CAMERA_PERSPECTIVE)
+ if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE)
P += camera_position(kg);
Transform tfm = kernel_data.cam.worldtondc;
@@ -276,7 +276,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd,
/* panorama */
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
P = normalize(transform_point(&tfm, P));
else
P = normalize(transform_direction(&tfm, P));
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index b213e91274d..d027bb62ebe 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -20,9 +20,9 @@
#define __KERNEL_CPU__
#include "util_debug.h"
-#include "util_half.h"
#include "util_math.h"
#include "util_simd.h"
+#include "util_half.h"
#include "util_types.h"
CCL_NAMESPACE_BEGIN
@@ -95,38 +95,128 @@ template<typename T> struct texture_image {
ccl_always_inline float4 interp(float x, float y, bool periodic = true)
{
- if(!data)
+ if(UNLIKELY(!data))
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
int ix, iy, nix, niy;
- float tx = frac(x*width - 0.5f, &ix);
- float ty = frac(y*height - 0.5f, &iy);
- if(periodic) {
- ix = wrap_periodic(ix, width);
- iy = wrap_periodic(iy, height);
-
- nix = wrap_periodic(ix+1, width);
- niy = wrap_periodic(iy+1, height);
+ if(interpolation == INTERPOLATION_CLOSEST) {
+ frac(x*(float)width, &ix);
+ frac(y*(float)height, &iy);
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ }
+ return read(data[ix + iy*width]);
}
else {
- ix = wrap_clamp(ix, width);
- iy = wrap_clamp(iy, height);
-
- nix = wrap_clamp(ix+1, width);
- niy = wrap_clamp(iy+1, height);
+ float tx = frac(x*(float)width - 0.5f, &ix);
+ float ty = frac(y*(float)height - 0.5f, &iy);
+
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+
+ nix = wrap_periodic(ix+1, width);
+ niy = wrap_periodic(iy+1, height);
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+
+ nix = wrap_clamp(ix+1, width);
+ niy = wrap_clamp(iy+1, height);
+ }
+
+ float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
+ r += (1.0f - ty)*tx*read(data[nix + iy*width]);
+ r += ty*(1.0f - tx)*read(data[ix + niy*width]);
+ r += ty*tx*read(data[nix + niy*width]);
+
+ return r;
}
+ }
+
+ ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false)
+ {
+ if(UNLIKELY(!data))
+ return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
- r += (1.0f - ty)*tx*read(data[nix + iy*width]);
- r += ty*(1.0f - tx)*read(data[ix + niy*width]);
- r += ty*tx*read(data[nix + niy*width]);
+ int ix, iy, iz, nix, niy, niz;
+
+ if(interpolation == INTERPOLATION_CLOSEST) {
+ frac(x*(float)width, &ix);
+ frac(y*(float)height, &iy);
+ frac(z*(float)depth, &iz);
+
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+ }
+
+ return read(data[ix + iy*width + iz*width*height]);
+ }
+ else {
+ float tx = frac(x*(float)width - 0.5f, &ix);
+ float ty = frac(y*(float)height - 0.5f, &iy);
+ float tz = frac(z*(float)depth - 0.5f, &iz);
+
+ if(periodic) {
+ ix = wrap_periodic(ix, width);
+ iy = wrap_periodic(iy, height);
+ iz = wrap_periodic(iz, depth);
+
+ nix = wrap_periodic(ix+1, width);
+ niy = wrap_periodic(iy+1, height);
+ niz = wrap_periodic(iz+1, depth);
+ }
+ else {
+ ix = wrap_clamp(ix, width);
+ iy = wrap_clamp(iy, height);
+ iz = wrap_clamp(iz, depth);
+
+ nix = wrap_clamp(ix+1, width);
+ niy = wrap_clamp(iy+1, height);
+ niz = wrap_clamp(iz+1, depth);
+ }
+
+ float4 r;
+
+ r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
+ r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
+ r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
+ r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
+
+ r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
+ r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
+ r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
+ r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
+
+ return r;
+ }
+ }
- return r;
+ ccl_always_inline void dimensions_set(int width_, int height_, int depth_)
+ {
+ width = width_;
+ height = height_;
+ depth = depth_;
}
T *data;
- int width, height;
+ int interpolation;
+ int width, height, depth;
};
typedef texture<float4> texture_float4;
@@ -146,6 +236,7 @@ typedef texture_image<uchar4> texture_image_uchar4;
#define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))
#define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
#define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y))
+#define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z))
#define kernel_data (kg->__data)
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 15e7353ec38..e4c20d26ff1 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -60,7 +60,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* In order to use full 6GB of memory on Titan cards, use arrays instead
* of textures. On earlier cards this seems slower, but on Titan it is
* actually slightly faster in tests. */
-#if __CUDA_ARCH__ < 350
+#if __CUDA_ARCH__ < 300
#define __KERNEL_CUDA_TEX_STORAGE__
#endif
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 4f4414cc298..8346b09619e 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -85,27 +85,36 @@
#define __float_as_uint(x) as_uint(x)
#define __int_as_float(x) as_float(x)
#define __float_as_int(x) as_int(x)
-#define sqrtf(x) sqrt(((float)x))
-#define cosf(x) cos(((float)x))
-#define sinf(x) sin(((float)x))
#define powf(x, y) pow(((float)x), ((float)y))
#define fabsf(x) fabs(((float)x))
#define copysignf(x, y) copysign(((float)x), ((float)y))
-#define cosf(x) cos(((float)x))
#define asinf(x) asin(((float)x))
#define acosf(x) acos(((float)x))
#define atanf(x) atan(((float)x))
-#define tanf(x) tan(((float)x))
-#define logf(x) log(((float)x))
#define floorf(x) floor(((float)x))
#define ceilf(x) ceil(((float)x))
-#define expf(x) exp(((float)x))
#define hypotf(x, y) hypot(((float)x), ((float)y))
#define atan2f(x, y) atan2(((float)x), ((float)y))
#define fmaxf(x, y) fmax(((float)x), ((float)y))
#define fminf(x, y) fmin(((float)x), ((float)y))
#define fmodf(x, y) fmod((float)x, (float)y)
+#ifndef __CL_USE_NATIVE__
+#define sinf(x) native_sin(((float)x))
+#define cosf(x) native_cos(((float)x))
+#define tanf(x) native_tan(((float)x))
+#define expf(x) native_exp(((float)x))
+#define sqrtf(x) native_sqrt(((float)x))
+#define logf(x) native_log(((float)x))
+#else
+#define sinf(x) sin(((float)x))
+#define cosf(x) cos(((float)x))
+#define tanf(x) tan(((float)x))
+#define expf(x) exp(((float)x))
+#define sqrtf(x) sqrt(((float)x))
+#define logf(x) log(((float)x))
+#endif
+
/* data lookup defines */
#define kernel_data (*kg->data)
#define kernel_tex_fetch(t, index) kg->t[index]
diff --git a/intern/cycles/kernel/kernel_curve.h b/intern/cycles/kernel/kernel_curve.h
deleted file mode 100644
index 821ac50eaa9..00000000000
--- a/intern/cycles/kernel/kernel_curve.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef __HAIR__
-
-/* curve attributes */
-
-ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
-{
- if(elem == ATTR_ELEMENT_CURVE) {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-#endif
-
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
- }
- else if(elem == ATTR_ELEMENT_CURVE_KEY) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + sd->segment;
- int k1 = k0 + 1;
-
- float f0 = kernel_tex_fetch(__attributes_float, offset + k0);
- float f1 = kernel_tex_fetch(__attributes_float, offset + k1);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
- if(dy) *dy = 0.0f;
-#endif
-
- return (1.0f - sd->u)*f0 + sd->u*f1;
- }
- else {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-#endif
-
- return 0.0f;
- }
-}
-
-ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
-{
- if(elem == ATTR_ELEMENT_CURVE) {
- /* idea: we can't derive any useful differentials here, but for tiled
- * mipmap image caching it would be useful to avoid reading the highest
- * detail level always. maybe a derivative based on the hair density
- * could be computed somehow? */
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
- }
- else if(elem == ATTR_ELEMENT_CURVE_KEY) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + sd->segment;
- int k1 = k0 + 1;
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1));
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*(f1 - f0);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return (1.0f - sd->u)*f0 + sd->u*f1;
- }
- else {
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-}
-
-/* hair info node functions */
-
-ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd)
-{
- float r = 0.0f;
-
- if(sd->segment != ~0) {
- float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
- int k0 = __float_as_int(curvedata.x) + sd->segment;
- int k1 = k0 + 1;
-
- float4 P1 = kernel_tex_fetch(__curve_keys, k0);
- float4 P2 = kernel_tex_fetch(__curve_keys, k1);
- r = (P2.w - P1.w) * sd->u + P1.w;
- }
-
- return r*2.0f;
-}
-
-ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd)
-{
- float3 tgN = make_float3(0.0f,0.0f,0.0f);
-
- if(sd->segment != ~0) {
-
- tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu)));
- tgN = normalize(tgN);
-
- /* need to find suitable scaled gd for corrected normal */
-#if 0
- tgN = normalize(tgN - gd * sd->dPdu);
-#endif
- }
-
- return tgN;
-}
-
-#endif
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h
index c50e2166660..b8c64af658f 100644
--- a/intern/cycles/kernel/kernel_displace.h
+++ b/intern/cycles/kernel/kernel_displace.h
@@ -16,8 +16,308 @@
CCL_NAMESPACE_BEGIN
+ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
+ bool is_combined, bool is_ao, bool is_sss)
+{
+ int samples = kernel_data.integrator.aa_samples;
+
+ /* initialize master radiance accumulator */
+ kernel_assert(kernel_data.film.use_light_pass);
+ path_radiance_init(L, kernel_data.film.use_light_pass);
+
+ /* take multiple samples */
+ for(int sample = 0; sample < samples; sample++) {
+ PathRadiance L_sample;
+ PathState state;
+ Ray ray;
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+
+ /* init radiance */
+ path_radiance_init(&L_sample, kernel_data.film.use_light_pass);
+
+ /* init path state */
+ path_state_init(kg, &state, &rng, sample);
+ state.num_samples = samples;
+
+ /* evaluate surface shader */
+ float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF);
+ shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
+
+ /* TODO, disable the closures we won't need */
+
+ /* sample ambient occlusion */
+ if(is_combined || is_ao) {
+ kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput);
+ }
+
+ /* sample subsurface scattering */
+ if((is_combined || is_sss) && (sd->flag & SD_BSSRDF)) {
+#ifdef __SUBSURFACE__
+ /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */
+ if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput))
+ is_sss = true;
+#endif
+ }
+
+ /* sample light and BSDF */
+ if((!is_sss) && (!is_ao)) {
+ if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) {
+#ifdef __LAMP_MIS__
+ state.ray_t = 0.0f;
+#endif
+ /* compute indirect light */
+ kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample);
+
+ /* sum and reset indirect light pass variables for the next samples */
+ path_radiance_sum_indirect(&L_sample);
+ path_radiance_reset_indirect(&L_sample);
+ }
+ }
+
+ /* accumulate into master L */
+ path_radiance_accum_sample(L, &L_sample, samples);
+ }
+}
+
+ccl_device bool is_light_pass(ShaderEvalType type)
+{
+ switch (type) {
+ case SHADER_EVAL_AO:
+ case SHADER_EVAL_COMBINED:
+ case SHADER_EVAL_SHADOW:
+ case SHADER_EVAL_DIFFUSE_DIRECT:
+ case SHADER_EVAL_GLOSSY_DIRECT:
+ case SHADER_EVAL_TRANSMISSION_DIRECT:
+ case SHADER_EVAL_SUBSURFACE_DIRECT:
+ case SHADER_EVAL_DIFFUSE_INDIRECT:
+ case SHADER_EVAL_GLOSSY_INDIRECT:
+ case SHADER_EVAL_TRANSMISSION_INDIRECT:
+ case SHADER_EVAL_SUBSURFACE_INDIRECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
+{
+ ShaderData sd;
+ uint4 in = input[i * 2];
+ uint4 diff = input[i * 2 + 1];
+
+ float3 out;
+
+ int object = in.x;
+ int prim = in.y;
+
+ if(prim == -1)
+ return;
+
+ float u = __uint_as_float(in.z);
+ float v = __uint_as_float(in.w);
+
+ float dudx = __uint_as_float(diff.x);
+ float dudy = __uint_as_float(diff.y);
+ float dvdx = __uint_as_float(diff.z);
+ float dvdy = __uint_as_float(diff.w);
+
+ int shader;
+ float3 P, Ng;
+
+ triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
+
+ /* dummy initilizations copied from SHADER_EVAL_DISPLACE */
+ float3 I = Ng;
+ float t = 0.0f;
+ float time = TIME_INVALID;
+ int bounce = 0;
+ int transparent_bounce = 0;
+
+ /* light passes */
+ PathRadiance L;
+
+ shader_setup_from_sample(kg, &sd, P, Ng, I, shader, object, prim, u, v, t, time, bounce, transparent_bounce);
+ sd.I = sd.N;
+
+ /* update differentials */
+ sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx;
+ sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy;
+ sd.du.dx = dudx;
+ sd.du.dy = dudy;
+ sd.dv.dx = dvdx;
+ sd.dv.dy = dvdy;
+
+ if(is_light_pass(type)) {
+ RNG rng = cmj_hash(i, 0);
+ compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED),
+ (type == SHADER_EVAL_AO),
+ (type == SHADER_EVAL_SUBSURFACE_DIRECT ||
+ type == SHADER_EVAL_SUBSURFACE_INDIRECT));
+ }
+
+ switch (type) {
+ /* data passes */
+ case SHADER_EVAL_NORMAL:
+ {
+ /* compression: normal = (2 * color) - 1 */
+ out = sd.N * 0.5f + make_float3(0.5f, 0.5f, 0.5f);
+ break;
+ }
+ case SHADER_EVAL_UV:
+ {
+ out = primitive_uv(kg, &sd);
+ break;
+ }
+ case SHADER_EVAL_DIFFUSE_COLOR:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = shader_bsdf_diffuse(kg, &sd);
+ break;
+ }
+ case SHADER_EVAL_GLOSSY_COLOR:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = shader_bsdf_glossy(kg, &sd);
+ break;
+ }
+ case SHADER_EVAL_TRANSMISSION_COLOR:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = shader_bsdf_transmission(kg, &sd);
+ break;
+ }
+ case SHADER_EVAL_SUBSURFACE_COLOR:
+ {
+#ifdef __SUBSURFACE__
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = shader_bsdf_subsurface(kg, &sd);
+#endif
+ break;
+ }
+ case SHADER_EVAL_EMISSION:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_EMISSION);
+ out = shader_emissive_eval(kg, &sd);
+ break;
+ }
+
+#ifdef __PASSES__
+ /* light passes */
+ case SHADER_EVAL_AO:
+ {
+ out = L.ao;
+ break;
+ }
+ case SHADER_EVAL_COMBINED:
+ {
+ out = path_radiance_clamp_and_sum(kg, &L);
+ break;
+ }
+ case SHADER_EVAL_SHADOW:
+ {
+ out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z);
+ break;
+ }
+ case SHADER_EVAL_DIFFUSE_DIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.direct_diffuse, shader_bsdf_diffuse(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_GLOSSY_DIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.direct_glossy, shader_bsdf_glossy(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_TRANSMISSION_DIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.direct_transmission, shader_bsdf_transmission(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_SUBSURFACE_DIRECT:
+ {
+#ifdef __SUBSURFACE__
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.direct_subsurface, shader_bsdf_subsurface(kg, &sd));
+#endif
+ break;
+ }
+ case SHADER_EVAL_DIFFUSE_INDIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.indirect_diffuse, shader_bsdf_diffuse(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_GLOSSY_INDIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.indirect_glossy, shader_bsdf_glossy(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_TRANSMISSION_INDIRECT:
+ {
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.indirect_transmission, shader_bsdf_transmission(kg, &sd));
+ break;
+ }
+ case SHADER_EVAL_SUBSURFACE_INDIRECT:
+ {
+#ifdef __SUBSURFACE__
+ shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN);
+ out = safe_divide_color(L.indirect_subsurface, shader_bsdf_subsurface(kg, &sd));
+#endif
+ break;
+ }
+#endif
+
+ /* extra */
+ case SHADER_EVAL_ENVIRONMENT:
+ {
+ /* setup ray */
+ Ray ray;
+
+ ray.P = make_float3(0.0f, 0.0f, 0.0f);
+ ray.D = normalize(P);
+ ray.t = 0.0f;
+#ifdef __CAMERA_MOTION__
+ ray.time = 0.5f;
+#endif
+
+#ifdef __RAY_DIFFERENTIALS__
+ ray.dD = differential3_zero();
+ ray.dP = differential3_zero();
+#endif
+
+ /* setup shader data */
+ shader_setup_from_background(kg, &sd, &ray, 0, 0);
+
+ /* evaluate */
+ int flag = 0; /* we can't know which type of BSDF this is for */
+ out = shader_eval_background(kg, &sd, flag, SHADER_CONTEXT_MAIN);
+ break;
+ }
+ default:
+ {
+ /* no real shader, returning the position of the verts for debugging */
+ out = normalize(P);
+ break;
+ }
+ }
+
+ /* write output */
+ output[i] = make_float4(out.x, out.y, out.z, 1.0f);
+ return;
+}
+
ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
{
+ if(type >= SHADER_EVAL_BAKE) {
+ kernel_bake_evaluate(kg, input, output, type, i);
+ return;
+ }
+
ShaderData sd;
uint4 in = input[i];
float3 out;
@@ -55,7 +355,7 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu
#endif
/* setup shader data */
- shader_setup_from_background(kg, &sd, &ray, 0);
+ shader_setup_from_background(kg, &sd, &ray, 0, 0);
/* evaluate */
int flag = 0; /* we can't know which type of BSDF this is for */
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 58bdc2b70ca..deffa7f2ba2 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN
/* Direction Emission */
-ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
- LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce)
+ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
+ LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce)
{
/* setup shading at emitter */
ShaderData sd;
@@ -36,27 +36,20 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
#endif
ray.dP = differential3_zero();
ray.dD = dI;
-#ifdef __CAMERA_MOTION__
- ray.time = time;
-#endif
- shader_setup_from_background(kg, &sd, &ray, bounce+1);
+
+ shader_setup_from_background(kg, &sd, &ray, bounce+1, transparent_bounce);
eval = shader_eval_background(kg, &sd, 0, SHADER_CONTEXT_EMISSION);
}
else
#endif
{
-#ifdef __HAIR__
- if(ls->type == LIGHT_STRAND)
- shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, ls->prim);
- else
-#endif
- shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, ~0);
+ shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce);
ls->Ng = sd.Ng;
/* no path flag, we're evaluating this for all closures. that's weak but
* we'd have to do multiple evaluations otherwise */
- shader_eval_surface(kg, &sd, rando, 0, SHADER_CONTEXT_EMISSION);
+ shader_eval_surface(kg, &sd, 0.0f, 0, SHADER_CONTEXT_EMISSION);
/* evaluate emissive closure */
if(sd.flag & SD_EMISSION)
@@ -71,13 +64,13 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando,
}
ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex,
- float randt, float rando, float randu, float randv, Ray *ray, BsdfEval *eval,
- bool *is_lamp, int bounce)
+ float randt, float randu, float randv, Ray *ray, BsdfEval *eval,
+ bool *is_lamp, int bounce, int transparent_bounce)
{
LightSample ls;
#ifdef __BRANCHED_PATH__
- if(lindex != -1) {
+ if(lindex != LAMP_NONE) {
/* sample position on a specified light */
light_select(kg, lindex, randu, randv, sd->P, &ls);
}
@@ -95,7 +88,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int
differential3 dD = differential3_zero();
/* evaluate closure */
- float3 light_eval = direct_emissive_eval(kg, rando, &ls, -ls.D, dD, ls.t, sd->time, bounce);
+ float3 light_eval = direct_emissive_eval(kg, &ls, -ls.D, dD, ls.t, sd->time, bounce, transparent_bounce);
if(is_zero(light_eval))
return false;
@@ -104,7 +97,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int
float bsdf_pdf;
#ifdef __VOLUME__
- if(sd->prim != ~0)
+ if(sd->prim != PRIM_NONE)
shader_bsdf_eval(kg, sd, ls.D, eval, &bsdf_pdf);
else
shader_volume_phase_eval(kg, sd, ls.D, eval, &bsdf_pdf);
@@ -160,7 +153,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int
}
/* return if it's a lamp for shadow pass */
- *is_lamp = (ls.prim == ~0 && ls.type != LIGHT_BACKGROUND);
+ *is_lamp = (ls.prim == PRIM_NONE && ls.type != LIGHT_BACKGROUND);
return true;
}
@@ -173,10 +166,11 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
float3 L = shader_emissive_eval(kg, sd);
#ifdef __HAIR__
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->segment == ~0)) {
+ if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE))
#else
- if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) {
+ if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS))
#endif
+ {
/* multiple importance sampling, get triangle light pdf,
* and compute weight with respect to BSDF pdf */
float pdf = triangle_light_pdf(kg, sd->Ng, sd->I, t);
@@ -190,71 +184,75 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader
/* Indirect Lamp Emission */
-ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, float randt, float3 *emission, int bounce)
+ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission)
{
- LightSample ls;
- int lamp = lamp_light_eval_sample(kg, randt);
+ bool hit_lamp = false;
- if(lamp == ~0)
- return false;
+ *emission = make_float3(0.0f, 0.0f, 0.0f);
- if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
- return false;
+ for(int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) {
+ LightSample ls;
+
+ if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls))
+ continue;
#ifdef __PASSES__
- /* use visibility flag to skip lights */
- if(ls.shader & SHADER_EXCLUDE_ANY) {
- if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
- ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (path_flag & PATH_RAY_GLOSSY)) ||
- ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)))
- return false;
- }
+ /* use visibility flag to skip lights */
+ if(ls.shader & SHADER_EXCLUDE_ANY) {
+ if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+ ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
+ ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)))
+ continue;
+ }
#endif
- float3 L = direct_emissive_eval(kg, 0.0f, &ls, -ray->D, ray->dD, ls.t, ray->time, bounce);
+ float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce);
- if(!(path_flag & PATH_RAY_MIS_SKIP)) {
- /* multiple importance sampling, get regular light pdf,
- * and compute weight with respect to BSDF pdf */
- float mis_weight = power_heuristic(bsdf_pdf, ls.pdf);
- L *= mis_weight;
+ if(!(state->flag & PATH_RAY_MIS_SKIP)) {
+ /* multiple importance sampling, get regular light pdf,
+ * and compute weight with respect to BSDF pdf */
+ float mis_weight = power_heuristic(state->ray_pdf, ls.pdf);
+ L *= mis_weight;
+ }
+
+ *emission += L;
+ hit_lamp = true;
}
- *emission = L;
- return true;
+ return hit_lamp;
}
/* Indirect Background */
-ccl_device_noinline float3 indirect_background(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, int bounce)
+ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *state, Ray *ray)
{
#ifdef __BACKGROUND__
int shader = kernel_data.background.surface_shader;
/* use visibility flag to skip lights */
if(shader & SHADER_EXCLUDE_ANY) {
- if(((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) ||
- ((shader & SHADER_EXCLUDE_GLOSSY) && (path_flag & PATH_RAY_GLOSSY)) ||
- ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) ||
- ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA)))
+ if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) ||
+ ((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) ||
+ ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) ||
+ ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA)))
return make_float3(0.0f, 0.0f, 0.0f);
}
/* evaluate background closure */
ShaderData sd;
- shader_setup_from_background(kg, &sd, ray, bounce+1);
+ shader_setup_from_background(kg, &sd, ray, state->bounce+1, state->transparent_bounce);
- float3 L = shader_eval_background(kg, &sd, path_flag, SHADER_CONTEXT_EMISSION);
+ float3 L = shader_eval_background(kg, &sd, state->flag, SHADER_CONTEXT_EMISSION);
#ifdef __BACKGROUND_MIS__
/* check if background light exists or if we should skip pdf */
int res = kernel_data.integrator.pdf_background_res;
- if(!(path_flag & PATH_RAY_MIS_SKIP) && res) {
+ if(!(state->flag & PATH_RAY_MIS_SKIP) && res) {
/* multiple importance sampling, get background light pdf for ray
* direction, and compute weight with respect to BSDF pdf */
float pdf = background_light_pdf(kg, ray->D);
- float mis_weight = power_heuristic(bsdf_pdf, pdf);
+ float mis_weight = power_heuristic(state->ray_pdf, pdf);
return L*mis_weight;
}
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index cbd875e994c..dc5f6e7ce38 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -75,7 +75,7 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
float exposure = kernel_data.film.exposure;
- ccl_align(16) float4 rgba_in = *in;
+ float4 rgba_in = *in;
if(exposure != 1.0f) {
rgba_in.x *= exposure;
@@ -83,7 +83,7 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg,
rgba_in.z *= exposure;
}
- float4_store_half(out, &rgba_in, sample_scale);
+ float4_store_half(out, rgba_in, sample_scale);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index c32f0395744..ac432d3fe04 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -217,8 +217,8 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
LightType type = (LightType)__float_as_int(data0.x);
ls->type = type;
ls->shader = __float_as_int(data1.x);
- ls->object = ~0;
- ls->prim = ~0;
+ ls->object = PRIM_NONE;
+ ls->prim = PRIM_NONE;
ls->lamp = lamp;
ls->u = randu;
ls->v = randv;
@@ -309,8 +309,8 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
LightType type = (LightType)__float_as_int(data0.x);
ls->type = type;
ls->shader = __float_as_int(data1.x);
- ls->object = ~0;
- ls->prim = ~0;
+ ls->object = PRIM_NONE;
+ ls->prim = PRIM_NONE;
ls->lamp = lamp;
/* todo: missing texture coordinates */
ls->u = 0.0f;
@@ -421,7 +421,6 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D,
/* compute pdf */
if(ls->t != FLT_MAX)
ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
- ls->eval_fac *= kernel_data.integrator.inv_pdf_lights;
return true;
}
@@ -458,11 +457,10 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object,
v = randv*randu;
/* triangle, so get position, normal, shader */
- ls->P = triangle_point_MT(kg, prim, u, v);
- ls->Ng = triangle_normal_MT(kg, prim, &ls->shader);
+ triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader);
ls->object = object;
ls->prim = prim;
- ls->lamp = ~0;
+ ls->lamp = LAMP_NONE;
ls->shader |= SHADER_USE_MIS;
ls->t = 0.0f;
ls->u = u;
@@ -485,52 +483,6 @@ ccl_device float triangle_light_pdf(KernelGlobals *kg,
return t*t*pdf/cos_pi;
}
-/* Curve Light */
-
-#ifdef __HAIR__
-
-ccl_device void curve_segment_light_sample(KernelGlobals *kg, int prim, int object,
- int segment, float randu, float randv, float time, LightSample *ls)
-{
- /* this strand code needs completion */
- float4 v00 = kernel_tex_fetch(__curves, prim);
-
- int k0 = __float_as_int(v00.x) + segment;
- int k1 = k0 + 1;
-
- float4 P1 = kernel_tex_fetch(__curve_keys, k0);
- float4 P2 = kernel_tex_fetch(__curve_keys, k1);
-
- float l = len(float4_to_float3(P2) - float4_to_float3(P1));
-
- float r1 = P1.w;
- float r2 = P2.w;
- float3 tg = (float4_to_float3(P2) - float4_to_float3(P1)) / l;
- float3 xc = make_float3(tg.x * tg.z, tg.y * tg.z, -(tg.x * tg.x + tg.y * tg.y));
- if (is_zero(xc))
- xc = make_float3(tg.x * tg.y, -(tg.x * tg.x + tg.z * tg.z), tg.z * tg.y);
- xc = normalize(xc);
- float3 yc = cross(tg, xc);
- float gd = ((r2 - r1)/l);
-
- /* normal currently ignores gradient */
- ls->Ng = sinf(M_2PI_F * randv) * xc + cosf(M_2PI_F * randv) * yc;
- ls->P = randu * l * tg + (gd * l + r1) * ls->Ng;
- ls->object = object;
- ls->prim = prim;
- ls->lamp = ~0;
- ls->t = 0.0f;
- ls->u = randu;
- ls->v = randv;
- ls->type = LIGHT_STRAND;
- ls->eval_fac = 1.0f;
- ls->shader = __float_as_int(v00.z) | SHADER_USE_MIS;
-
- object_transform_light_sample(kg, ls, object, time);
-}
-
-#endif
-
/* Light Distribution */
ccl_device int light_distribution_sample(KernelGlobals *kg, float randt)
@@ -573,21 +525,14 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float
if(prim >= 0) {
int object = __float_as_int(l.w);
-#ifdef __HAIR__
- int segment = __float_as_int(l.z) & SHADER_MASK;
-#endif
+ int shader_flag = __float_as_int(l.z);
-#ifdef __HAIR__
- if (segment != SHADER_MASK)
- curve_segment_light_sample(kg, prim, object, segment, randu, randv, time, ls);
- else
-#endif
- triangle_light_sample(kg, prim, object, randu, randv, time, ls);
+ triangle_light_sample(kg, prim, object, randu, randv, time, ls);
/* compute incoming direction, distance and pdf */
ls->D = normalize_len(ls->P - P, &ls->t);
ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t);
- ls->shader |= __float_as_int(l.z) & (~SHADER_MASK);
+ ls->shader |= shader_flag;
}
else {
int lamp = -prim-1;
@@ -620,7 +565,7 @@ ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt)
return lamp;
}
else
- return ~0;
+ return LAMP_NONE;
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index 92f3420a218..af7b727c1ba 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -131,6 +131,11 @@ ccl_device float power_heuristic_3(float a, float b, float c)
return (a*a)/(a*a + b*b + c*c);
}
+ccl_device float max_heuristic(float a, float b)
+{
+ return (a > b)? 1.0f: 0.0f;
+}
+
/* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping
* to better preserve stratification for some RNG sequences */
ccl_device float2 concentric_sample_disk(float u1, float u2)
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 9cdcb8c5229..b3b6fc02894 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -51,7 +51,8 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl
if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) {
if(!(sd->flag & SD_TRANSPARENT) ||
kernel_data.film.pass_alpha_threshold == 0.0f ||
- average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) {
+ average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold)
+ {
if(sample == 0) {
if(flag & PASS_DEPTH) {
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 635201471e1..a80a0033712 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -18,18 +18,15 @@
#include "osl_shader.h"
#endif
-#include "kernel_differential.h"
-#include "kernel_montecarlo.h"
-#include "kernel_projection.h"
-#include "kernel_object.h"
-#include "kernel_triangle.h"
-#include "kernel_curve.h"
-#include "kernel_primitive.h"
-#include "kernel_projection.h"
#include "kernel_random.h"
-#include "kernel_bvh.h"
-#include "kernel_accumulate.h"
+#include "kernel_projection.h"
+#include "kernel_montecarlo.h"
+#include "kernel_differential.h"
#include "kernel_camera.h"
+
+#include "geom/geom.h"
+
+#include "kernel_accumulate.h"
#include "kernel_shader.h"
#include "kernel_light.h"
#include "kernel_emission.h"
@@ -59,11 +56,6 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg,
/* sample illumination from lights to find path contribution */
if(sd->flag & SD_BSDF_HAS_EVAL) {
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
-#ifdef __MULTI_CLOSURE__
- float light_o = 0.0f;
-#else
- float light_o = path_state_rng_1D(kg, rng, state, PRNG_LIGHT_F);
-#endif
float light_u, light_v;
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
@@ -75,7 +67,7 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg,
light_ray.time = sd->time;
#endif
- if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
+ if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
float3 shadow;
@@ -133,7 +125,96 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg,
#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
-ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_global float *buffer,
+ccl_device void kernel_branched_path_integrate_direct_lighting(KernelGlobals *kg, RNG *rng,
+ ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, bool sample_all_lights)
+{
+ /* sample illumination from lights to find path contribution */
+ if(sd->flag & SD_BSDF_HAS_EVAL) {
+ Ray light_ray;
+ BsdfEval L_light;
+ bool is_lamp;
+
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+
+ if(sample_all_lights) {
+ /* lamp sampling */
+ for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
+ int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
+ float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
+ RNG lamp_rng = cmj_hash(*rng, i);
+
+ if(kernel_data.integrator.pdf_triangles != 0.0f)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ float light_u, light_v;
+ path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ if(direct_emission(kg, sd, i, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+
+ /* mesh light sampling */
+ if(kernel_data.integrator.pdf_triangles != 0.0f) {
+ int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
+ float num_samples_inv = num_samples_adjust/num_samples;
+
+ if(kernel_data.integrator.num_all_lights)
+ num_samples_inv *= 0.5f;
+
+ for(int j = 0; j < num_samples; j++) {
+ float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
+ float light_u, light_v;
+ path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* only sample triangle lights */
+ if(kernel_data.integrator.num_all_lights)
+ light_t = 0.5f*light_t;
+
+ if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+ }
+ else {
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* sample random light */
+ if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
+ /* trace shadow ray */
+ float3 shadow;
+
+ if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
+ /* accumulate */
+ path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp);
+ }
+ }
+ }
+ }
+}
+
+#endif
+
+ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray,
float3 throughput, int num_samples, PathState state, PathRadiance *L)
{
/* path iteration */
@@ -161,17 +242,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g
light_ray.dP = ray.dP;
/* intersect with lamp */
- float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT);
float3 emission;
- if(indirect_lamp_emission(kg, &light_ray, state.flag, state.ray_pdf, light_t, &emission, state.bounce))
+ if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
path_radiance_accum_emission(L, throughput, emission, state.bounce);
}
#endif
#ifdef __VOLUME__
/* volume attenuation, emission, scatter */
- if(state.volume_stack[0].shader != SHADER_NO_ID) {
+ if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
@@ -191,7 +271,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g
if(!hit) {
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce);
+ float3 L_background = indirect_background(kg, &state, &ray);
path_radiance_accum_background(L, throughput, L_background, state.bounce);
#endif
@@ -200,7 +280,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g
/* setup shading */
ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+ shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT);
#ifdef __BRANCHED_PATH__
@@ -300,38 +380,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g
}
#endif
-#ifdef __EMISSION__
+#if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
if(kernel_data.integrator.use_direct_light) {
- /* sample illumination from lights to find path contribution */
- if(sd.flag & SD_BSDF_HAS_EVAL) {
- float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT);
-#ifdef __MULTI_CLOSURE__
- float light_o = 0.0f;
-#else
- float light_o = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT_F);
-#endif
- float light_u, light_v;
- path_state_rng_2D(kg, rng, &state, PRNG_LIGHT_U, &light_u, &light_v);
-
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd.time;
-#endif
-
- /* sample random light */
- if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, &state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp);
- }
- }
- }
+ bool all = kernel_data.integrator.sample_all_lights_indirect;
+ kernel_branched_path_integrate_direct_lighting(kg, rng, &sd, &state, throughput, 1.0f, L, all);
}
#endif
@@ -406,10 +458,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g
}
}
-#endif
-
-#ifdef __SUBSURFACE__
-
ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rng,
ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
{
@@ -418,11 +466,6 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn
/* sample illumination from lights to find path contribution */
if(sd->flag & SD_BSDF_HAS_EVAL) {
float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
-#ifdef __MULTI_CLOSURE__
- float light_o = 0.0f;
-#else
- float light_o = path_state_rng_1D(kg, rng, state, PRNG_LIGHT_F);
-#endif
float light_u, light_v;
path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
@@ -434,7 +477,7 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn
light_ray.time = sd->time;
#endif
- if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
+ if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) {
/* trace shadow ray */
float3 shadow;
@@ -524,6 +567,84 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn
}
}
+ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
+{
+ /* todo: solve correlation */
+ float bsdf_u, bsdf_v;
+
+ path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
+
+ float ao_factor = kernel_data.background.ao_factor;
+ float3 ao_N;
+ float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
+ float3 ao_D;
+ float ao_pdf;
+ float3 ao_alpha = shader_bsdf_alpha(kg, sd);
+
+ sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
+
+ if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
+ Ray light_ray;
+ float3 ao_shadow;
+
+ light_ray.P = ray_offset(sd->P, sd->Ng);
+ light_ray.D = ao_D;
+ light_ray.t = kernel_data.background.ao_distance;
+#ifdef __OBJECT_MOTION__
+ light_ray.time = sd->time;
+#endif
+ light_ray.dP = sd->dP;
+ light_ray.dD = differential3_zero();
+
+ if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
+ path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
+ }
+}
+
+#ifdef __SUBSURFACE__
+ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput)
+{
+ float bssrdf_probability;
+ ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+ /* modify throughput for picking bssrdf or bsdf */
+ *throughput *= bssrdf_probability;
+
+ /* do bssrdf scatter step if we picked a bssrdf closure */
+ if(sc) {
+ uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
+
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
+
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++) {
+ float3 tp = *throughput;
+ PathState hit_state = *state;
+ Ray hit_ray = *ray;
+
+ hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
+ hit_state.rng_offset += PRNG_BOUNCE_NUM;
+
+ if(kernel_path_integrate_lighting(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) {
+#ifdef __LAMP_MIS__
+ hit_state.ray_t = 0.0f;
+#endif
+
+ kernel_path_indirect(kg, rng, hit_ray, tp, state->num_samples, hit_state, L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(L);
+ path_radiance_reset_indirect(L);
+ }
+ }
+ return true;
+ }
+ return false;
+}
#endif
ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
@@ -578,17 +699,16 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
light_ray.dP = ray.dP;
/* intersect with lamp */
- float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT);
float3 emission;
- if(indirect_lamp_emission(kg, &light_ray, state.flag, state.ray_pdf, light_t, &emission, state.bounce))
+ if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
path_radiance_accum_emission(&L, throughput, emission, state.bounce);
}
#endif
#ifdef __VOLUME__
/* volume attenuation, emission, scatter */
- if(state.volume_stack[0].shader != SHADER_NO_ID) {
+ if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
@@ -618,7 +738,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce);
+ float3 L_background = indirect_background(kg, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@@ -627,7 +747,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
/* setup shading */
ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+ shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF);
shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
@@ -694,35 +814,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
#ifdef __AO__
/* ambient occlusion */
if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
- /* todo: solve correlation */
- float bsdf_u, bsdf_v;
- path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
-
- float ao_factor = kernel_data.background.ao_factor;
- float3 ao_N;
- float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
- float3 ao_D;
- float ao_pdf;
- float3 ao_alpha = shader_bsdf_alpha(kg, &sd);
-
- sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
-
- if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
- Ray light_ray;
- float3 ao_shadow;
-
- light_ray.P = ray_offset(sd.P, sd.Ng);
- light_ray.D = ao_D;
- light_ray.t = kernel_data.background.ao_distance;
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd.time;
-#endif
- light_ray.dP = sd.dP;
- light_ray.dD = differential3_zero();
-
- if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow))
- path_radiance_accum_ao(&L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce);
- }
+ kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
}
#endif
@@ -730,60 +822,18 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
/* bssrdf scatter to a different location on the same object, replacing
* the closures with a diffuse BSDF */
if(sd.flag & SD_BSSRDF) {
- float bssrdf_probability;
- ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
-
- /* modify throughput for picking bssrdf or bsdf */
- throughput *= bssrdf_probability;
-
- /* do bssrdf scatter step if we picked a bssrdf closure */
- if(sc) {
- uint lcg_state = lcg_state_init(rng, &state, 0x68bc21eb);
-
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
- float bssrdf_u, bssrdf_v;
- path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false);
-
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- float3 tp = throughput;
- PathState hit_state = state;
- Ray hit_ray = ray;
-
- hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR;
- hit_state.rng_offset += PRNG_BOUNCE_NUM;
-
- if(kernel_path_integrate_lighting(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, &L, &hit_ray)) {
-#ifdef __LAMP_MIS__
- hit_state.ray_t = 0.0f;
-#endif
-
- kernel_path_indirect(kg, rng, hit_ray, buffer, tp, state.num_samples, hit_state, &L);
-
- /* for render passes, sum and reset indirect light pass variables
- * for the next samples */
- path_radiance_sum_indirect(&L);
- path_radiance_reset_indirect(&L);
- }
- }
+ if(kernel_path_subsurface_scatter(kg, &sd, &L, &state, rng, &ray, &throughput))
break;
- }
}
#endif
- /* The following code is the same as in kernel_path_integrate_lighting(),
+ /* Same as kernel_path_integrate_lighting(kg, rng, &sd, &throughput, &state, &L, &ray),
but for CUDA the function call is slower. */
#ifdef __EMISSION__
if(kernel_data.integrator.use_direct_light) {
/* sample illumination from lights to find path contribution */
if(sd.flag & SD_BSDF_HAS_EVAL) {
float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT);
-#ifdef __MULTI_CLOSURE__
- float light_o = 0.0f;
-#else
- float light_o = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT_F);
-#endif
float light_u, light_v;
path_state_rng_2D(kg, rng, &state, PRNG_LIGHT_U, &light_u, &light_v);
@@ -795,7 +845,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample,
light_ray.time = sd.time;
#endif
- if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) {
+ if(direct_emission(kg, &sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce, state.transparent_bounce)) {
/* trace shadow ray */
float3 shadow;
@@ -898,69 +948,9 @@ ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *
PathState *state, PathRadiance *L, ccl_global float *buffer)
{
#ifdef __EMISSION__
- /* sample illumination from lights to find path contribution */
- if(sd->flag & SD_BSDF_HAS_EVAL) {
- Ray light_ray;
- BsdfEval L_light;
- bool is_lamp;
-
-#ifdef __OBJECT_MOTION__
- light_ray.time = sd->time;
-#endif
-
- /* lamp sampling */
- for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
- int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
- float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
- RNG lamp_rng = cmj_hash(*rng, i);
-
- if(kernel_data.integrator.pdf_triangles != 0.0f)
- num_samples_inv *= 0.5f;
-
- for(int j = 0; j < num_samples; j++) {
- float light_u, light_v;
- path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
- }
- }
- }
- }
-
- /* mesh light sampling */
- if(kernel_data.integrator.pdf_triangles != 0.0f) {
- int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples);
- float num_samples_inv = num_samples_adjust/num_samples;
-
- if(kernel_data.integrator.num_all_lights)
- num_samples_inv *= 0.5f;
-
- for(int j = 0; j < num_samples; j++) {
- float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT);
- float light_u, light_v;
- path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
- /* only sample triangle lights */
- if(kernel_data.integrator.num_all_lights)
- light_t = 0.5f*light_t;
-
- if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) {
- /* trace shadow ray */
- float3 shadow;
-
- if(!shadow_blocked(kg, state, &light_ray, &shadow)) {
- /* accumulate */
- path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp);
- }
- }
- }
- }
+ if(kernel_data.integrator.use_direct_light) {
+ bool all = kernel_data.integrator.sample_all_lights_direct;
+ kernel_branched_path_integrate_direct_lighting(kg, rng, sd, state, throughput, num_samples_adjust, L, all);
}
#endif
@@ -1043,7 +1033,7 @@ ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals *
ps.ray_t = 0.0f;
#endif
- kernel_path_indirect(kg, rng, bsdf_ray, buffer, tp*num_samples_inv, num_samples, ps, L);
+ kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@@ -1092,13 +1082,66 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __VOLUME__
/* volume attenuation, emission, scatter */
- if(state.volume_stack[0].shader != SHADER_NO_ID) {
+ if(state.volume_stack[0].shader != SHADER_NONE) {
Ray volume_ray = ray;
volume_ray.t = (hit)? isect.t: FLT_MAX;
+#ifdef __KERNEL_CPU__
+ /* decoupled ray marching only supported on CPU */
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
+
+ /* cache steps along volume for repeated sampling */
+ VolumeSegment volume_segment;
+ ShaderData volume_sd;
+
+ shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
+ kernel_volume_decoupled_record(kg, &state,
+ &volume_ray, &volume_sd, &volume_segment, heterogeneous);
+
+ /* sample scattering */
+ int num_samples = kernel_data.integrator.volume_samples;
+ float num_samples_inv = 1.0f/num_samples;
+
+ for(int j = 0; j < num_samples; j++) {
+ /* workaround to fix correlation bug in T38710, can find better solution
+ * in random number generator later, for now this is done here to not impact
+ * performance of rendering without volumes */
+ RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
+
+ PathState ps = state;
+ Ray pray = ray;
+ float3 tp = throughput;
+
+ /* branch RNG state */
+ path_state_branch(&ps, j, num_samples);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+ &ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment);
+
+ if(result == VOLUME_PATH_SCATTERED) {
+ /* todo: use all-light sampling */
+ if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
+ kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
+
+ /* for render passes, sum and reset indirect light pass variables
+ * for the next samples */
+ path_radiance_sum_indirect(&L);
+ path_radiance_reset_indirect(&L);
+ }
+ }
+ }
+
+ /* emission and transmittance */
+ if(volume_segment.closure_flag & SD_EMISSION)
+ path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
+ throughput *= volume_segment.accum_transmittance;
+
+ /* free cached steps */
+ kernel_volume_decoupled_free(kg, &volume_segment);
+#else
+ /* GPU: no decoupled ray marching, scatter probalistically */
int num_samples = kernel_data.integrator.volume_samples;
float num_samples_inv = 1.0f/num_samples;
- float3 avg_tp = make_float3(0.0f, 0.0f, 0.0f);
/* todo: we should cache the shader evaluations from stepping
* through the volume, for now we redo them multiple times */
@@ -1118,7 +1161,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
if(result == VOLUME_PATH_SCATTERED) {
/* todo: use all-light sampling */
if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) {
- kernel_path_indirect(kg, rng, pray, buffer, tp*num_samples_inv, num_samples, ps, &L);
+ kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L);
/* for render passes, sum and reset indirect light pass variables
* for the next samples */
@@ -1126,11 +1169,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
path_radiance_reset_indirect(&L);
}
}
- else
- avg_tp += tp;
}
- throughput = avg_tp * num_samples_inv;
+ /* todo: avoid this calculation using decoupled ray marching */
+ kernel_volume_shadow(kg, &state, &volume_ray, &throughput);
+#endif
}
#endif
@@ -1147,7 +1190,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
#ifdef __BACKGROUND__
/* sample background shader */
- float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce);
+ float3 L_background = indirect_background(kg, &state, &ray);
path_radiance_accum_background(&L, throughput, L_background, state.bounce);
#endif
@@ -1156,7 +1199,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* setup shading */
ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce);
+ shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
shader_merge_closures(&sd);
@@ -1270,21 +1313,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
/* do subsurface scatter step with copy of shader data, this will
* replace the BSSRDF with a diffuse BSDF closure */
for(int j = 0; j < num_samples; j++) {
- ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
- float bssrdf_u, bssrdf_v;
- path_branched_rng_2D(kg, &bssrdf_rng, &state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
- int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
+ ShaderData bssrdf_sd[BSSRDF_MAX_HITS];
+ float bssrdf_u, bssrdf_v;
+ path_branched_rng_2D(kg, &bssrdf_rng, &state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+ int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
- /* compute lighting with the BSDF closure */
- for(int hit = 0; hit < num_hits; hit++) {
- PathState hit_state = state;
+ /* compute lighting with the BSDF closure */
+ for(int hit = 0; hit < num_hits; hit++) {
+ PathState hit_state = state;
- path_state_branch(&hit_state, j, num_samples);
+ path_state_branch(&hit_state, j, num_samples);
- kernel_branched_path_integrate_lighting(kg, rng,
- &bssrdf_sd[hit], throughput, num_samples_inv,
- &hit_state, &L, buffer);
- }
+ kernel_branched_path_integrate_lighting(kg, rng,
+ &bssrdf_sd[hit], throughput, num_samples_inv,
+ &hit_state, &L, buffer);
+ }
}
state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR;
diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h
index c3f617542a6..406654c1741 100644
--- a/intern/cycles/kernel/kernel_path_state.h
+++ b/intern/cycles/kernel/kernel_path_state.h
@@ -50,7 +50,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG
state->rng_congruential = lcg_init(*rng + sample*0x51633e2d);
}
else {
- state->volume_stack[0].shader = SHADER_NO_ID;
+ state->volume_stack[0].shader = SHADER_NONE;
}
#endif
}
@@ -132,6 +132,9 @@ ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *s
/* for visibility, diffuse/glossy are for reflection only */
if(flag & PATH_RAY_TRANSMIT)
flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY);
+ /* todo: this is not supported as its own ray visibility yet */
+ if(state->flag & PATH_RAY_VOLUME_SCATTER)
+ flag |= PATH_RAY_DIFFUSE;
/* for camera visibility, use render layer flags */
if(flag & PATH_RAY_CAMERA)
flag |= kernel_data.integrator.layer_flag;
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index e2108604bc8..6744471d659 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -39,7 +39,7 @@ CCL_NAMESPACE_BEGIN
ccl_device float2 direction_to_spherical(float3 dir)
{
- float theta = acosf(dir.z);
+ float theta = safe_acosf(dir.z);
float phi = atan2f(dir.x, dir.y);
return make_float2(theta, phi);
@@ -97,7 +97,7 @@ ccl_device float3 fisheye_to_direction(float u, float v, float fov)
if(r > 1.0f)
return make_float3(0.0f, 0.0f, 0.0f);
- float phi = acosf((r != 0.0f)? u/r: 0.0f);
+ float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
float theta = r * fov * 0.5f;
if(v < 0.0f) phi = -phi;
@@ -111,7 +111,7 @@ ccl_device float3 fisheye_to_direction(float u, float v, float fov)
ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height)
{
- float theta = acosf(dir.x);
+ float theta = safe_acosf(dir.x);
float r = 2.0f * lens * sinf(theta * 0.5f);
float phi = atan2f(dir.z, dir.y);
@@ -132,7 +132,7 @@ ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, f
if(r > rmax)
return make_float3(0.0f, 0.0f, 0.0f);
- float phi = acosf((r != 0.0f)? u/r: 0.0f);
+ float phi = safe_acosf((r != 0.0f)? u/r: 0.0f);
float theta = 2.0f * asinf(r/(2.0f * lens));
if(v < 0.0f) phi = -phi;
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index ef397269ec2..31cb6ff6abd 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -120,6 +120,9 @@ ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int
/* Cranly-Patterson rotation using rng seed */
float shift;
+ /* using the same *rng value to offset seems to give correlation issues,
+ * we could hash it with the dimension but this has a performance impact,
+ * we need to find a solution for this */
if(dimension & 1)
shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
else
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b113e906e9d..58cec090410 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -39,7 +39,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd
{
if(sd->flag & SD_OBJECT_MOTION) {
sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm= transform_quick_inverse(sd->ob_tfm);
+ sd->ob_itfm = transform_quick_inverse(sd->ob_tfm);
}
else {
sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
@@ -49,12 +49,13 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd
#endif
ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
- const Intersection *isect, const Ray *ray, int bounce)
+ const Intersection *isect, const Ray *ray, int bounce, int transparent_bounce)
{
#ifdef __INSTANCING__
- sd->object = (isect->object == ~0)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
+ sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object;
#endif
+ sd->type = isect->type;
sd->flag = kernel_tex_fetch(__object_flag, sd->object);
/* matrices and time */
@@ -66,37 +67,31 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
sd->ray_length = isect->t;
sd->ray_depth = bounce;
+ sd->transparent_depth = transparent_bounce;
+
+#ifdef __UV__
+ sd->u = isect->u;
+ sd->v = isect->v;
+#endif
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) {
- /* Strand Shader setting*/
+ if(sd->type & PRIMITIVE_ALL_CURVE) {
+ /* curve */
float4 curvedata = kernel_tex_fetch(__curves, sd->prim);
sd->shader = __float_as_int(curvedata.z);
- sd->segment = isect->segment;
sd->P = bvh_curve_refine(kg, sd, isect, ray);
}
- else {
+ else
#endif
- /* fetch triangle data */
+ if(sd->type & PRIMITIVE_TRIANGLE) {
+ /* static triangle */
float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
sd->shader = __float_as_int(Ns.w);
-#ifdef __HAIR__
- sd->segment = ~0;
- /*elements for minimum hair width using transparency bsdf*/
- /*sd->curve_transparency = 0.0f;*/
- /*sd->curve_radius = 0.0f;*/
-#endif
-
-#ifdef __UV__
- sd->u = isect->u;
- sd->v = isect->v;
-#endif
-
/* vectors */
- sd->P = bvh_triangle_refine(kg, sd, isect, ray);
+ sd->P = triangle_refine(kg, sd, isect, ray);
sd->Ng = Ng;
sd->N = Ng;
@@ -106,19 +101,20 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd,
#ifdef __DPDU__
/* dPdu/dPdv */
- triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
#endif
-
-#ifdef __HAIR__
}
-#endif
+ else {
+ /* motion triangle */
+ motion_triangle_shader_setup(kg, sd, isect, ray, false);
+ }
sd->I = -ray->D;
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
#ifdef __INSTANCING__
- if(isect->object != ~0) {
+ if(isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform(kg, sd, &sd->N);
object_normal_transform(kg, sd, &sd->Ng);
@@ -161,39 +157,41 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
/* object, matrices, time, ray_length stay the same */
sd->flag = kernel_tex_fetch(__object_flag, sd->object);
sd->prim = kernel_tex_fetch(__prim_index, isect->prim);
-
- /* fetch triangle data */
- float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
- float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
- sd->shader = __float_as_int(Ns.w);
-
-#ifdef __HAIR__
- sd->segment = ~0;
-#endif
+ sd->type = isect->type;
#ifdef __UV__
sd->u = isect->u;
sd->v = isect->v;
#endif
- /* vectors */
- sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray);
- sd->Ng = Ng;
- sd->N = Ng;
-
- /* smooth normal */
- if(sd->shader & SHADER_SMOOTH_NORMAL)
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
+ /* fetch triangle data */
+ if(sd->type == PRIMITIVE_TRIANGLE) {
+ float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim);
+ float3 Ng = make_float3(Ns.x, Ns.y, Ns.z);
+ sd->shader = __float_as_int(Ns.w);
+
+ /* static triangle */
+ sd->P = triangle_refine_subsurface(kg, sd, isect, ray);
+ sd->Ng = Ng;
+ sd->N = Ng;
+
+ if(sd->shader & SHADER_SMOOTH_NORMAL)
+ sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
#ifdef __DPDU__
- /* dPdu/dPdv */
- triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
+ /* dPdu/dPdv */
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
#endif
+ }
+ else {
+ /* motion triangle */
+ motion_triangle_shader_setup(kg, sd, isect, ray, true);
+ }
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
#ifdef __INSTANCING__
- if(isect->object != ~0) {
+ if(isect->object != OBJECT_NONE) {
/* instance transform */
object_normal_transform(kg, sd, &sd->N);
object_normal_transform(kg, sd, &sd->Ng);
@@ -231,7 +229,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
const float3 P, const float3 Ng, const float3 I,
- int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment)
+ int shader, int object, int prim, float u, float v, float t, float time, int bounce, int transparent_bounce)
{
/* vectors */
sd->P = P;
@@ -239,9 +237,7 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
sd->Ng = Ng;
sd->I = I;
sd->shader = shader;
-#ifdef __HAIR__
- sd->segment = segment;
-#endif
+ sd->type = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE;
/* primitive */
#ifdef __INSTANCING__
@@ -255,12 +251,13 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
#endif
sd->ray_length = t;
sd->ray_depth = bounce;
+ sd->transparent_depth = transparent_bounce;
/* detect instancing, for non-instanced the object index is -object-1 */
#ifdef __INSTANCING__
bool instanced = false;
- if(sd->prim != ~0) {
+ if(sd->prim != PRIM_NONE) {
if(sd->object >= 0)
instanced = true;
else
@@ -271,7 +268,7 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
#endif
sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
- if(sd->object != -1) {
+ if(sd->object != OBJECT_NONE) {
sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
#ifdef __OBJECT_MOTION__
@@ -283,36 +280,20 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
}
#endif
- /* smooth normal */
-#ifdef __HAIR__
- if(sd->shader & SHADER_SMOOTH_NORMAL && sd->segment == ~0) {
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
-#else
- if(sd->shader & SHADER_SMOOTH_NORMAL) {
- sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
-#endif
+ if(sd->type & PRIMITIVE_TRIANGLE) {
+ /* smooth normal */
+ if(sd->shader & SHADER_SMOOTH_NORMAL) {
+ sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v);
#ifdef __INSTANCING__
- if(instanced)
- object_normal_transform(kg, sd, &sd->N);
+ if(instanced)
+ object_normal_transform(kg, sd, &sd->N);
#endif
- }
+ }
+ /* dPdu/dPdv */
#ifdef __DPDU__
- /* dPdu/dPdv */
-#ifdef __HAIR__
- if(sd->prim == ~0 || sd->segment != ~0) {
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
- }
-#else
- if(sd->prim == ~0) {
- sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
- sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
- }
-#endif
- else {
- triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim);
+ triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv);
#ifdef __INSTANCING__
if(instanced) {
@@ -320,11 +301,17 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd,
object_dir_transform(kg, sd, &sd->dPdv);
}
#endif
+#endif
}
+ else {
+#ifdef __DPDU__
+ sd->dPdu = make_float3(0.0f, 0.0f, 0.0f);
+ sd->dPdv = make_float3(0.0f, 0.0f, 0.0f);
#endif
+ }
/* backfacing test */
- if(sd->prim != ~0) {
+ if(sd->prim != PRIM_NONE) {
bool backfacing = (dot(sd->Ng, sd->I) < 0.0f);
if(backfacing) {
@@ -355,20 +342,19 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd,
float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f);
int shader;
- P = triangle_point_MT(kg, prim, u, v);
- Ng = triangle_normal_MT(kg, prim, &shader);
+ triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader);
/* force smooth shading for displacement */
shader |= SHADER_SMOOTH_NORMAL;
/* watch out: no instance transform currently */
- shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, 0, ~0);
+ shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, 0, 0);
}
/* ShaderData setup from ray into background */
-ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce)
+ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce)
{
/* vectors */
sd->P = ray->D;
@@ -382,11 +368,12 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
#endif
sd->ray_length = 0.0f;
sd->ray_depth = bounce;
+ sd->transparent_depth = transparent_bounce;
#ifdef __INSTANCING__
- sd->object = ~0;
+ sd->object = PRIM_NONE;
#endif
- sd->prim = ~0;
+ sd->prim = PRIM_NONE;
#ifdef __UV__
sd->u = 0.0f;
sd->v = 0.0f;
@@ -411,28 +398,27 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat
/* ShaderData setup from point inside volume */
-ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce)
+ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce)
{
/* vectors */
sd->P = ray->P;
sd->N = -ray->D;
sd->Ng = -ray->D;
sd->I = -ray->D;
- sd->shader = SHADER_NO_ID;
+ sd->shader = SHADER_NONE;
sd->flag = 0;
#ifdef __OBJECT_MOTION__
sd->time = ray->time;
#endif
sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */
sd->ray_depth = bounce;
+ sd->transparent_depth = transparent_bounce;
#ifdef __INSTANCING__
- sd->object = ~0; /* todo: fill this for texture coordinates */
-#endif
- sd->prim = ~0;
-#ifdef __HAIR__
- sd->segment = ~0;
+ sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */
#endif
+ sd->prim = PRIM_NONE;
+ sd->type = PRIMITIVE_NONE;
#ifdef __UV__
sd->u = 0.0f;
@@ -471,23 +457,32 @@ ccl_device void shader_merge_closures(ShaderData *sd)
ShaderClosure *scj = &sd->closure[j];
#ifdef __OSL__
- if(!sci->prim && !scj->prim && sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) {
-#else
- if(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) {
+ if(sci->prim || scj->prim)
+ continue;
#endif
- sci->weight += scj->weight;
- sci->sample_weight += scj->sample_weight;
-
- int size = sd->num_closure - (j+1);
- if(size > 0) {
- for(int k = 0; k < size; k++) {
- scj[k] = scj[k+1];
- }
- }
- sd->num_closure--;
- j--;
+ if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1))
+ continue;
+
+ if(CLOSURE_IS_BSDF_OR_BSSRDF(sci->type)) {
+ if(sci->N != scj->N)
+ continue;
+ else if(CLOSURE_IS_BSDF_ANISOTROPIC(sci->type) && sci->T != scj->T)
+ continue;
}
+
+ sci->weight += scj->weight;
+ sci->sample_weight += scj->sample_weight;
+
+ int size = sd->num_closure - (j+1);
+ if(size > 0) {
+ for(int k = 0; k < size; k++) {
+ scj[k] = scj[k+1];
+ }
+ }
+
+ sd->num_closure--;
+ j--;
}
}
}
@@ -495,8 +490,6 @@ ccl_device void shader_merge_closures(ShaderData *sd)
/* BSDF */
-#ifdef __MULTI_CLOSURE__
-
ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, float *pdf,
int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight)
{
@@ -524,28 +517,18 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa
*pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f;
}
-#endif
-
ccl_device void shader_bsdf_eval(KernelGlobals *kg, const ShaderData *sd,
const float3 omega_in, BsdfEval *eval, float *pdf)
{
-#ifdef __MULTI_CLOSURE__
bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
_shader_bsdf_multi_eval(kg, sd, omega_in, pdf, -1, eval, 0.0f, 0.0f);
-#else
- const ShaderClosure *sc = &sd->closure;
-
- *pdf = 0.0f;
- *eval = bsdf_eval(kg, sd, sc, omega_in, pdf)*sc->weight;
-#endif
}
ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
float randu, float randv, BsdfEval *bsdf_eval,
float3 *omega_in, differential3 *domega_in, float *pdf)
{
-#ifdef __MULTI_CLOSURE__
int sampled = 0;
if(sd->num_closure > 1) {
@@ -596,13 +579,6 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd,
}
return label;
-#else
- /* sample the single closure that we picked */
- *pdf = 0.0f;
- int label = bsdf_sample(kg, sd, &sd->closure, randu, randv, bsdf_eval, omega_in, domega_in, pdf);
- *bsdf_eval *= sd->closure.weight;
- return label;
-#endif
}
ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *sd,
@@ -623,21 +599,16 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *s
ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness)
{
-#ifdef __MULTI_CLOSURE__
for(int i = 0; i< sd->num_closure; i++) {
ShaderClosure *sc = &sd->closure[i];
if(CLOSURE_IS_BSDF(sc->type))
bsdf_blur(kg, sc, roughness);
}
-#else
- bsdf_blur(kg, &sd->closure, roughness);
-#endif
}
ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -648,12 +619,6 @@ ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd)
}
return eval;
-#else
- if(sd->closure.type == CLOSURE_BSDF_TRANSPARENT_ID)
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
@@ -668,7 +633,6 @@ ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -679,17 +643,10 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd)
}
return eval;
-#else
- if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type))
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -700,17 +657,10 @@ ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd)
}
return eval;
-#else
- if(CLOSURE_IS_BSDF_GLOSSY(sd->closure.type))
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -721,17 +671,10 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd)
}
return eval;
-#else
- if(CLOSURE_IS_BSDF_TRANSMISSION(sd->closure.type))
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -742,17 +685,10 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd)
}
return eval;
-#else
- if(CLOSURE_IS_BSSRDF(sd->closure.type))
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
float3 N = make_float3(0.0f, 0.0f, 0.0f);
@@ -776,21 +712,10 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac
*N_ = N;
return eval;
-#else
- *N_ = sd->N;
-
- if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type))
- return sd->closure.weight*ao_factor;
- else if(CLOSURE_IS_AMBIENT_OCCLUSION(sd->closure.type))
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_)
{
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
float3 N = make_float3(0.0f, 0.0f, 0.0f);
float texture_blur = 0.0f, weight_sum = 0.0f;
@@ -815,20 +740,6 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
*texture_blur_ = texture_blur/weight_sum;
return eval;
-#else
- if(CLOSURE_IS_BSSRDF(sd->closure.type)) {
- if(N_) *N_ = sd->closure.N;
- if(texture_blur_) *texture_blur_ = sd->closure.data1;
-
- return sd->closure.weight;
- }
- else {
- if(N_) *N_ = sd->N;
- if(texture_blur_) *texture_blur_ = 0.0f;
-
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-#endif
}
/* Emission */
@@ -841,7 +752,6 @@ ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure
ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
{
float3 eval;
-#ifdef __MULTI_CLOSURE__
eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i < sd->num_closure; i++) {
@@ -850,9 +760,6 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
if(CLOSURE_IS_EMISSION(sc->type))
eval += emissive_eval(kg, sd, sc)*sc->weight;
}
-#else
- eval = emissive_eval(kg, sd, &sd->closure)*sd->closure.weight;
-#endif
return eval;
}
@@ -861,7 +768,6 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
{
-#ifdef __MULTI_CLOSURE__
float3 weight = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i < sd->num_closure; i++) {
@@ -872,12 +778,6 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
}
return weight;
-#else
- if(sd->closure.type == CLOSURE_HOLDOUT_ID)
- return make_float3(1.0f, 1.0f, 1.0f);
-
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
}
/* Surface Evaluation */
@@ -885,12 +785,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
float randb, int path_flag, ShaderContext ctx)
{
-#ifdef __MULTI_CLOSURE__
sd->num_closure = 0;
sd->randb_closure = randb;
-#else
- sd->closure.type = NBUILTIN_CLOSURES;
-#endif
#ifdef __OSL__
if(kg->osl)
@@ -899,7 +795,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
#endif
{
#ifdef __SVM__
- svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag);
+ svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag);
#else
sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f);
sd->closure.N = sd->N;
@@ -912,12 +808,8 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd,
ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx)
{
-#ifdef __MULTI_CLOSURE__
sd->num_closure = 0;
sd->randb_closure = 0.0f;
-#else
- sd->closure.type = NBUILTIN_CLOSURES;
-#endif
#ifdef __OSL__
if(kg->osl) {
@@ -928,9 +820,8 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int
{
#ifdef __SVM__
- svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, 0.0f, path_flag);
+ svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag);
-#ifdef __MULTI_CLOSURE__
float3 eval = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i< sd->num_closure; i++) {
@@ -942,13 +833,6 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int
return eval;
#else
- if(sd->closure.type == CLOSURE_BACKGROUND_ID)
- return sd->closure.weight;
- else
- return make_float3(0.0f, 0.0f, 0.0f);
-#endif
-
-#else
return make_float3(0.8f, 0.8f, 0.8f);
#endif
}
@@ -1067,14 +951,10 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
{
/* reset closures once at the start, we will be accumulating the closures
* for all volumes in the stack into a single array of closures */
-#ifdef __MULTI_CLOSURE__
sd->num_closure = 0;
-#else
- sd->closure.type = NBUILTIN_CLOSURES;
-#endif
sd->flag = 0;
- for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) {
+ for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
/* setup shaderdata from stack. it's mostly setup already in
* shader_setup_from_volume, this switching should be quick */
sd->object = stack[i].object;
@@ -1083,7 +963,7 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS);
sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2);
- if(sd->object != ~0) {
+ if(sd->object != OBJECT_NONE) {
sd->flag |= kernel_tex_fetch(__object_flag, sd->object);
#ifdef __OBJECT_MOTION__
@@ -1102,7 +982,7 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
else
#endif
{
- svm_eval_nodes(kg, sd, SHADER_TYPE_VOLUME, 0.0f, path_flag);
+ svm_eval_nodes(kg, sd, SHADER_TYPE_VOLUME, path_flag);
}
#endif
@@ -1118,12 +998,8 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx)
{
-#ifdef __MULTI_CLOSURE__
sd->num_closure = 0;
sd->randb_closure = 0.0f;
-#else
- sd->closure.type = NBUILTIN_CLOSURES;
-#endif
/* this will modify sd->P */
#ifdef __SVM__
@@ -1133,7 +1009,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, Shad
else
#endif
{
- svm_eval_nodes(kg, sd, SHADER_TYPE_DISPLACEMENT, 0.0f, 0);
+ svm_eval_nodes(kg, sd, SHADER_TYPE_DISPLACEMENT, 0);
}
#endif
}
@@ -1147,7 +1023,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect
int shader = 0;
#ifdef __HAIR__
- if(kernel_tex_fetch(__prim_segment, isect->prim) == ~0) {
+ if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
#endif
float4 Ns = kernel_tex_fetch(__tri_normal, prim);
shader = __float_as_int(Ns.w);
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 9b015c98c40..ab7524c411a 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -16,6 +16,178 @@
CCL_NAMESPACE_BEGIN
+#ifdef __SHADOW_RECORD_ALL__
+
+/* Shadow function to compute how much light is blocked, CPU variation.
+ *
+ * We trace a single ray. If it hits any opaque surface, or more than a given
+ * number of transparent surfaces is hit, then we consider the geometry to be
+ * entirely blocked. If not, all transparent surfaces will be recorded and we
+ * will shade them one by one to determine how much light is blocked. This all
+ * happens in one scene intersection function.
+ *
+ * Recording all hits works well in some cases but may be slower in others. If
+ * we have many semi-transparent hairs, one intersection may be faster because
+ * you'd be reinteresecting the same hairs a lot with each step otherwise. If
+ * however there is mostly binary transparency then we may be recording many
+ * unnecessary intersections when one of the first surfaces blocks all light.
+ *
+ * From tests in real scenes it seems the performance loss is either minimal,
+ * or there is a performance increase anyway due to avoiding the need to send
+ * two rays with transparent shadows.
+ *
+ * This is CPU only because of qsort, and malloc or high stack space usage to
+ * record all these intersections. */
+
+ccl_device_noinline int shadow_intersections_compare(const void *a, const void *b)
+{
+ const Intersection *isect_a = (const Intersection*)a;
+ const Intersection *isect_b = (const Intersection*)b;
+
+ if(isect_a->t < isect_b->t)
+ return -1;
+ else if(isect_a->t > isect_b->t)
+ return 1;
+ else
+ return 0;
+}
+
+#define STACK_MAX_HITS 64
+
+ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
+{
+ *shadow = make_float3(1.0f, 1.0f, 1.0f);
+
+ if(ray->t == 0.0f)
+ return false;
+
+ bool blocked;
+
+ if(kernel_data.integrator.transparent_shadows) {
+ /* intersect to find an opaque surface, or record all transparent surface hits */
+ Intersection hits_stack[STACK_MAX_HITS];
+ Intersection *hits;
+ uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1;
+
+ /* prefer to use stack but use dynamic allocation if too deep max hits
+ * we need max_hits + 1 storage space due to the logic in
+ * scene_intersect_shadow_all which will first store and then check if
+ * the limit is exceeded */
+ if(max_hits + 1 <= STACK_MAX_HITS)
+ hits = hits_stack;
+ else
+ hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1));
+
+ uint num_hits;
+ blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits);
+
+ /* if no opaque surface found but we did find transparent hits, shade them */
+ if(!blocked && num_hits > 0) {
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float3 Pend = ray->P + ray->D*ray->t;
+ float last_t = 0.0f;
+ int bounce = state->transparent_bounce;
+ Intersection *isect = hits;
+#ifdef __VOLUME__
+ PathState ps = *state;
+#endif
+
+ qsort(hits, num_hits, sizeof(Intersection), shadow_intersections_compare);
+
+ for(int hit = 0; hit < num_hits; hit++, isect++) {
+ /* adjust intersection distance for moving ray forward */
+ float new_t = isect->t;
+ isect->t -= last_t;
+
+ /* skip hit if we did not move forward, step by step raytracing
+ * would have skipped it as well then */
+ if(last_t == new_t)
+ continue;
+
+ last_t = new_t;
+
+#ifdef __VOLUME__
+ /* attenuation between last surface and next surface */
+ if(ps.volume_stack[0].shader != SHADER_NONE) {
+ Ray segment_ray = *ray;
+ segment_ray.t = isect->t;
+ kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
+ }
+#endif
+
+ /* setup shader data at surface */
+ ShaderData sd;
+ shader_setup_from_ray(kg, &sd, isect, ray, state->bounce+1, bounce);
+
+ /* attenuation from transparent surface */
+ if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
+ shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
+ throughput *= shader_bsdf_transparency(kg, &sd);
+ }
+
+ /* stop if all light is blocked */
+ if(is_zero(throughput)) {
+ /* free dynamic storage */
+ if(hits != hits_stack)
+ free(hits);
+ return true;
+ }
+
+ /* move ray forward */
+ ray->P = sd.P;
+ if(ray->t != FLT_MAX)
+ ray->D = normalize_len(Pend - ray->P, &ray->t);
+
+#ifdef __VOLUME__
+ /* exit/enter volume */
+ kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack);
+#endif
+
+ bounce++;
+ }
+
+#ifdef __VOLUME__
+ /* attenuation for last line segment towards light */
+ if(ps.volume_stack[0].shader != SHADER_NONE)
+ kernel_volume_shadow(kg, &ps, ray, &throughput);
+#endif
+
+ *shadow *= throughput;
+ }
+
+ /* free dynamic storage */
+ if(hits != hits_stack)
+ free(hits);
+ }
+ else {
+ Intersection isect;
+#ifdef __HAIR__
+ blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+#else
+ blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
+#endif
+ }
+
+#ifdef __VOLUME__
+ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ /* apply attenuation from current volume shader */
+ kernel_volume_shadow(kg, state, ray, shadow);
+ }
+#endif
+
+ return blocked;
+}
+
+#else
+
+/* Shadow function to compute how much light is blocked, GPU variation.
+ *
+ * Here we raytrace from one transparent surface to the next step by step.
+ * To minimize overhead in cases where we don't need transparent shadows, we
+ * first trace a regular shadow ray. We check if the hit primitive was
+ * potentially transparent, and only in that case start marching. this gives
+ * one extra ray cast for the cases were we do want transparency. */
+
ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow)
{
*shadow = make_float3(1.0f, 1.0f, 1.0f);
@@ -25,21 +197,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
Intersection isect;
#ifdef __HAIR__
- bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
+ bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f);
#else
- bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
+ bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect);
#endif
#ifdef __TRANSPARENT_SHADOWS__
- if(result && kernel_data.integrator.transparent_shadows) {
- /* transparent shadows work in such a way to try to minimize overhead
- * in cases where we don't need them. after a regular shadow ray is
- * cast we check if the hit primitive was potentially transparent, and
- * only in that case start marching. this gives on extra ray cast for
- * the cases were we do want transparency.
- *
- * also note that for this to work correct, multi close sampling must
- * be used, since we don't pass a random number to shader_eval_surface */
+ if(blocked && kernel_data.integrator.transparent_shadows) {
if(shader_transparent_shadow(kg, &isect)) {
float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
float3 Pend = ray->P + ray->D*ray->t;
@@ -49,35 +213,24 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#endif
for(;;) {
- if(bounce >= kernel_data.integrator.transparent_max_bounce) {
+ if(bounce >= kernel_data.integrator.transparent_max_bounce)
return true;
- }
- else if(bounce >= kernel_data.integrator.transparent_min_bounce) {
- /* todo: get random number somewhere for probabilistic terminate */
-#if 0
- float probability = average(throughput);
- float terminate = 0.0f;
-
- if(terminate >= probability)
- return true;
-
- throughput /= probability;
-#endif
- }
#ifdef __HAIR__
- if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) {
+ if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f))
#else
- if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect)) {
+ if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect))
#endif
+ {
#ifdef __VOLUME__
/* attenuation for last line segment towards light */
- if(ps.volume_stack[0].shader != SHADER_NO_ID)
+ if(ps.volume_stack[0].shader != SHADER_NONE)
kernel_volume_shadow(kg, &ps, ray, &throughput);
#endif
*shadow *= throughput;
+
return false;
}
@@ -86,7 +239,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
#ifdef __VOLUME__
/* attenuation between last surface and next surface */
- if(ps.volume_stack[0].shader != SHADER_NO_ID) {
+ if(ps.volume_stack[0].shader != SHADER_NONE) {
Ray segment_ray = *ray;
segment_ray.t = isect.t;
kernel_volume_shadow(kg, &ps, &segment_ray, &throughput);
@@ -95,7 +248,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
/* setup shader data at surface */
ShaderData sd;
- shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1);
+ shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1, bounce);
/* attenuation from transparent surface */
if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
@@ -103,6 +256,9 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
throughput *= shader_bsdf_transparency(kg, &sd);
}
+ if(is_zero(throughput))
+ return true;
+
/* move ray forward */
ray->P = ray_offset(sd.P, -sd.Ng);
if(ray->t != FLT_MAX)
@@ -118,15 +274,17 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *
}
}
#ifdef __VOLUME__
- else if(!result && state->volume_stack[0].shader != SHADER_NO_ID) {
+ else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
/* apply attenuation from current volume shader */
kernel_volume_shadow(kg, state, ray, shadow);
}
#endif
#endif
- return result;
+ return blocked;
}
+#endif
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 6a2a7804146..2d5f6091908 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -75,6 +75,6 @@ CCL_NAMESPACE_END
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse2(void);
-void __dummy_function_cycles_sse2(void){}
+void __dummy_function_cycles_sse2(void) {}
#endif
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 9d0abb93cc6..1062fd0c990 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -76,6 +76,6 @@ CCL_NAMESPACE_END
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse3(void);
-void __dummy_function_cycles_sse3(void){}
+void __dummy_function_cycles_sse3(void) {}
#endif
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index bc20de0ec20..ba3b4887650 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -77,6 +77,6 @@ CCL_NAMESPACE_END
/* needed for some linkers in combination with scons making empty compilation unit in a library */
void __dummy_function_cycles_sse41(void);
-void __dummy_function_cycles_sse41(void){}
+void __dummy_function_cycles_sse41(void) {}
#endif
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index f06fa119cfc..b07075c6c95 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -25,7 +25,7 @@
/* bvh */
KERNEL_TEX(float4, texture_float4, __bvh_nodes)
KERNEL_TEX(float4, texture_float4, __tri_woop)
-KERNEL_TEX(uint, texture_uint, __prim_segment)
+KERNEL_TEX(uint, texture_uint, __prim_type)
KERNEL_TEX(uint, texture_uint, __prim_visibility)
KERNEL_TEX(uint, texture_uint, __prim_index)
KERNEL_TEX(uint, texture_uint, __prim_object)
@@ -174,6 +174,61 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
+/* Kepler and above */
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_100)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_101)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_102)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_103)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_104)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_105)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_106)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_107)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_108)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_109)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_110)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_111)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_112)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_113)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_114)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_115)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_116)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_117)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_118)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_119)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_120)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_121)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_122)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_123)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_124)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_125)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_126)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_127)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_128)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_129)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_130)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_131)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_132)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_133)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_134)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_135)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_136)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_137)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_138)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_139)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_140)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_141)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_142)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_143)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_144)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_145)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_146)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_147)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_148)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_149)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_150)
+#endif
+
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
diff --git a/intern/cycles/kernel/kernel_triangle.h b/intern/cycles/kernel/kernel_triangle.h
deleted file mode 100644
index 0455df85961..00000000000
--- a/intern/cycles/kernel/kernel_triangle.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-CCL_NAMESPACE_BEGIN
-
-/* Point on triangle for Moller-Trumbore triangles */
-ccl_device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v)
-{
- /* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
-
- float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
-
- /* compute point */
- float t = 1.0f - u - v;
- return (u*v0 + v*v1 + t*v2);
-}
-
-/* Normal for Moller-Trumbore triangles */
-ccl_device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int *shader)
-{
-#if 0
- /* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
-
- float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
-
- /* compute normal */
- return normalize(cross(v2 - v0, v1 - v0));
-#else
- float4 Nm = kernel_tex_fetch(__tri_normal, tri_index);
- *shader = __float_as_int(Nm.w);
- return make_float3(Nm.x, Nm.y, Nm.z);
-#endif
-}
-
-/* Return 3 triangle vertex locations */
-ccl_device_inline void triangle_vertices(KernelGlobals *kg, int tri_index, float3 P[3])
-{
- /* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
-
- P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
-}
-
-ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v)
-{
- /* load triangle vertices */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
-
- float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
- float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
- float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
-
- return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
-}
-
-ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri)
-{
- /* fetch triangle vertex coordinates */
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri));
-
- float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
-
- /* compute derivatives of P w.r.t. uv */
- *dPdu = (p0 - p2);
- *dPdv = (p1 - p2);
-}
-
-/* attributes */
-
-ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
-{
- if(elem == ATTR_ELEMENT_FACE) {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-
- return kernel_tex_fetch(__attributes_float, offset + sd->prim);
- }
- else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
-
- float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
- float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
- float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z));
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
-#endif
-
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else if(elem == ATTR_ELEMENT_CORNER) {
- int tri = offset + sd->prim*3;
- float f0 = kernel_tex_fetch(__attributes_float, tri + 0);
- float f1 = kernel_tex_fetch(__attributes_float, tri + 1);
- float f2 = kernel_tex_fetch(__attributes_float, tri + 2);
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
-#endif
-
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else {
- if(dx) *dx = 0.0f;
- if(dy) *dy = 0.0f;
-
- return 0.0f;
- }
-}
-
-ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
-{
- if(elem == ATTR_ELEMENT_FACE) {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
- return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
- }
- else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
-#endif
-
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else if(elem == ATTR_ELEMENT_CORNER) {
- int tri = offset + sd->prim*3;
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
-
-#ifdef __RAY_DIFFERENTIALS__
- if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
- if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2;
-#endif
-
- return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2;
- }
- else {
- if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
- if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
-
- return make_float3(0.0f, 0.0f, 0.0f);
- }
-}
-
-CCL_NAMESPACE_END
-
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 5ee25a6cb98..11445aa1c93 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -46,7 +46,10 @@ CCL_NAMESPACE_BEGIN
#define TEX_NUM_FLOAT_IMAGES 5
-#define SHADER_NO_ID -1
+#define SHADER_NONE (~0)
+#define OBJECT_NONE (~0)
+#define PRIM_NONE (~0)
+#define LAMP_NONE (~0)
#define VOLUME_STACK_SIZE 16
@@ -61,13 +64,17 @@ CCL_NAMESPACE_BEGIN
#define __SUBSURFACE__
#define __CMJ__
#define __VOLUME__
+#define __SHADOW_RECORD_ALL__
#endif
#ifdef __KERNEL_CUDA__
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#define __BRANCHED_PATH__
+
+/* Experimental on GPU */
//#define __VOLUME__
+//#define __SUBSURFACE__
#endif
#ifdef __KERNEL_OPENCL__
@@ -85,26 +92,24 @@ CCL_NAMESPACE_BEGIN
#endif
#ifdef __KERNEL_OPENCL_AMD__
-#define __SVM__
-#define __EMISSION__
-#define __IMAGE_TEXTURES__
-#define __PROCEDURAL_TEXTURES__
-#define __EXTRA_NODES__
-#define __HOLDOUT__
-#define __NORMAL_MAP__
-//#define __BACKGROUND_MIS__
-//#define __LAMP_MIS__
-//#define __AO__
-//#define __ANISOTROPIC__
+#define __CL_USE_NATIVE__
+#define __KERNEL_SHADING__
+//__KERNEL_ADV_SHADING__
+#define __MULTI_CLOSURE__
+#define __TRANSPARENT_SHADOWS__
+#define __PASSES__
+#define __BACKGROUND_MIS__
+#define __LAMP_MIS__
+#define __AO__
+#define __ANISOTROPIC__
//#define __CAMERA_MOTION__
//#define __OBJECT_MOTION__
//#define __HAIR__
-//#define __MULTI_CLOSURE__
-//#define __TRANSPARENT_SHADOWS__
-//#define __PASSES__
+//end __KERNEL_ADV_SHADING__
#endif
#ifdef __KERNEL_OPENCL_INTEL_CPU__
+#define __CL_USE_NATIVE__
#define __KERNEL_SHADING__
#define __KERNEL_ADV_SHADING__
#endif
@@ -147,12 +152,6 @@ CCL_NAMESPACE_BEGIN
#define __HAIR__
#endif
-/* Sanity check */
-
-#if defined(__KERNEL_OPENCL_NEED_ADVANCED_SHADING__) && !defined(__MULTI_CLOSURE__)
-#error "OpenCL: mismatch between advanced shading flags in device_opencl.cpp and kernel_types.h"
-#endif
-
/* Random Numbers */
typedef uint RNG;
@@ -161,7 +160,35 @@ typedef uint RNG;
typedef enum ShaderEvalType {
SHADER_EVAL_DISPLACE,
- SHADER_EVAL_BACKGROUND
+ SHADER_EVAL_BACKGROUND,
+ /* bake types */
+ SHADER_EVAL_BAKE, /* no real shade, it's used in the code to
+ * differentiate the type of shader eval from the above
+ */
+ /* data passes */
+ SHADER_EVAL_NORMAL,
+ SHADER_EVAL_UV,
+ SHADER_EVAL_DIFFUSE_COLOR,
+ SHADER_EVAL_GLOSSY_COLOR,
+ SHADER_EVAL_TRANSMISSION_COLOR,
+ SHADER_EVAL_SUBSURFACE_COLOR,
+ SHADER_EVAL_EMISSION,
+
+ /* light passes */
+ SHADER_EVAL_AO,
+ SHADER_EVAL_COMBINED,
+ SHADER_EVAL_SHADOW,
+ SHADER_EVAL_DIFFUSE_DIRECT,
+ SHADER_EVAL_GLOSSY_DIRECT,
+ SHADER_EVAL_TRANSMISSION_DIRECT,
+ SHADER_EVAL_SUBSURFACE_DIRECT,
+ SHADER_EVAL_DIFFUSE_INDIRECT,
+ SHADER_EVAL_GLOSSY_INDIRECT,
+ SHADER_EVAL_TRANSMISSION_INDIRECT,
+ SHADER_EVAL_SUBSURFACE_INDIRECT,
+
+ /* extra */
+ SHADER_EVAL_ENVIRONMENT,
} ShaderEvalType;
/* Path Tracing
@@ -177,10 +204,8 @@ enum PathTraceDimension {
PRNG_UNUSED_0 = 5,
PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */
PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */
- PRNG_BASE_NUM = 8,
-#else
- PRNG_BASE_NUM = 4,
#endif
+ PRNG_BASE_NUM = 8,
PRNG_BSDF_U = 0,
PRNG_BSDF_V = 1,
@@ -188,7 +213,7 @@ enum PathTraceDimension {
PRNG_LIGHT = 3,
PRNG_LIGHT_U = 4,
PRNG_LIGHT_V = 5,
- PRNG_LIGHT_F = 6,
+ PRNG_UNUSED_3 = 6,
PRNG_TERMINATE = 7,
#ifdef __VOLUME__
@@ -220,7 +245,6 @@ enum PathRayFlag {
PATH_RAY_GLOSSY = 16,
PATH_RAY_SINGULAR = 32,
PATH_RAY_TRANSPARENT = 64,
- PATH_RAY_VOLUME_SCATTER = 128,
PATH_RAY_SHADOW_OPAQUE = 128,
PATH_RAY_SHADOW_TRANSPARENT = 256,
@@ -228,16 +252,17 @@ enum PathRayFlag {
PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/
+ /* note that these can use maximum 12 bits, the other are for layers */
PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512),
PATH_RAY_MIS_SKIP = 1024,
PATH_RAY_DIFFUSE_ANCESTOR = 2048,
PATH_RAY_GLOSSY_ANCESTOR = 4096,
PATH_RAY_BSSRDF_ANCESTOR = 8192,
- PATH_RAY_SINGLE_PASS_DONE = 8192,
+ PATH_RAY_SINGLE_PASS_DONE = 16384,
+ PATH_RAY_VOLUME_SCATTER = 32768,
- /* this gives collisions with localview bits
- * see: blender_util.h, grr - Campbell */
+ /* we need layer member flags to be the 20 upper bits */
PATH_RAY_LAYER_SHIFT = (32-20)
};
@@ -282,7 +307,8 @@ typedef enum PassType {
PASS_MIST = 2097152,
PASS_SUBSURFACE_DIRECT = 4194304,
PASS_SUBSURFACE_INDIRECT = 8388608,
- PASS_SUBSURFACE_COLOR = 16777216
+ PASS_SUBSURFACE_COLOR = 16777216,
+ PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */
} PassType;
#define PASS_ALL (~0)
@@ -418,9 +444,27 @@ typedef struct Intersection {
float t, u, v;
int prim;
int object;
- int segment;
+ int type;
} Intersection;
+/* Primitives */
+
+typedef enum PrimitiveType {
+ PRIMITIVE_NONE = 0,
+ PRIMITIVE_TRIANGLE = 1,
+ PRIMITIVE_MOTION_TRIANGLE = 2,
+ PRIMITIVE_CURVE = 4,
+ PRIMITIVE_MOTION_CURVE = 8,
+
+ PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE),
+ PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE),
+ PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE)
+} PrimitiveType;
+
+#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type)
+#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> 16)
+
/* Attributes */
#define ATTR_PRIM_TYPES 2
@@ -432,9 +476,12 @@ typedef enum AttributeElement {
ATTR_ELEMENT_MESH,
ATTR_ELEMENT_FACE,
ATTR_ELEMENT_VERTEX,
+ ATTR_ELEMENT_VERTEX_MOTION,
ATTR_ELEMENT_CORNER,
ATTR_ELEMENT_CURVE,
- ATTR_ELEMENT_CURVE_KEY
+ ATTR_ELEMENT_CURVE_KEY,
+ ATTR_ELEMENT_CURVE_KEY_MOTION,
+ ATTR_ELEMENT_VOXEL
} AttributeElement;
typedef enum AttributeStandard {
@@ -448,12 +495,17 @@ typedef enum AttributeStandard {
ATTR_STD_GENERATED_TRANSFORM,
ATTR_STD_POSITION_UNDEFORMED,
ATTR_STD_POSITION_UNDISPLACED,
- ATTR_STD_MOTION_PRE,
- ATTR_STD_MOTION_POST,
+ ATTR_STD_MOTION_VERTEX_POSITION,
+ ATTR_STD_MOTION_VERTEX_NORMAL,
ATTR_STD_PARTICLE,
ATTR_STD_CURVE_INTERCEPT,
ATTR_STD_PTEX_FACE_ID,
ATTR_STD_PTEX_UV,
+ ATTR_STD_VOLUME_DENSITY,
+ ATTR_STD_VOLUME_COLOR,
+ ATTR_STD_VOLUME_FLAME,
+ ATTR_STD_VOLUME_HEAT,
+ ATTR_STD_VOLUME_VELOCITY,
ATTR_STD_NUM,
ATTR_STD_NOT_FOUND = ~0
@@ -461,15 +513,17 @@ typedef enum AttributeStandard {
/* Closure data */
+#ifdef __MULTI_CLOSURE__
#define MAX_CLOSURE 64
+#else
+#define MAX_CLOSURE 1
+#endif
typedef struct ShaderClosure {
ClosureType type;
float3 weight;
-#ifdef __MULTI_CLOSURE__
float sample_weight;
-#endif
float data0;
float data1;
@@ -561,13 +615,9 @@ typedef struct ShaderData {
/* primitive id if there is one, ~0 otherwise */
int prim;
-#ifdef __HAIR__
- /* for curves, segment number in curve, ~0 for triangles */
- int segment;
- /* variables for minimum hair width using transparency bsdf */
- /*float curve_transparency; */
- /*float curve_radius; */
-#endif
+ /* combined type and curve segment for hair */
+ int type;
+
/* parametric coordinates
* - barycentric weights for triangles */
float u, v;
@@ -583,6 +633,9 @@ typedef struct ShaderData {
/* ray bounce depth */
int ray_depth;
+ /* ray transparent depth */
+ int transparent_depth;
+
#ifdef __RAY_DIFFERENTIALS__
/* differential of P. these are orthogonal to Ng, not N */
differential3 dP;
@@ -605,15 +658,10 @@ typedef struct ShaderData {
Transform ob_itfm;
#endif
-#ifdef __MULTI_CLOSURE__
/* Closure data, we store a fixed array of closures */
ShaderClosure closure[MAX_CLOSURE];
int num_closure;
float randb_closure;
-#else
- /* Closure data, with a single sampled closure for low memory usage */
- ShaderClosure closure;
-#endif
/* ray start position, only set for backgrounds */
float3 ray_P;
@@ -824,25 +872,27 @@ typedef struct KernelIntegrator {
/* clamp */
float sample_clamp_direct;
float sample_clamp_indirect;
- float pad1, pad2, pad3;
/* branched path */
int branched;
- int aa_samples;
int diffuse_samples;
int glossy_samples;
int transmission_samples;
int ao_samples;
int mesh_light_samples;
int subsurface_samples;
-
+ int sample_all_lights_direct;
+ int sample_all_lights_indirect;
+
/* mis */
int use_lamp_mis;
/* sampler */
int sampling_pattern;
+ int aa_samples;
/* volume render */
+ int volume_homogeneous_sampling;
int use_volumes;
int volume_max_steps;
float volume_step_size;
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index dc2ddf1098e..faaa68e3309 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -16,6 +16,8 @@
CCL_NAMESPACE_BEGIN
+/* Events for probalistic scattering */
+
typedef enum VolumeIntegrateResult {
VOLUME_PATH_SCATTERED = 0,
VOLUME_PATH_ATTENUATED = 1,
@@ -92,14 +94,19 @@ ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathStat
return true;
}
-ccl_device float3 volume_color_attenuation(float3 sigma, float t)
+ccl_device float3 volume_color_transmittance(float3 sigma, float t)
{
return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t));
}
+ccl_device float kernel_volume_channel_get(float3 value, int channel)
+{
+ return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
+}
+
ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
{
- for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) {
+ for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2);
if(shader_flag & SD_HETEROGENEOUS_VOLUME)
@@ -114,14 +121,14 @@ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *st
* These functions are used to attenuate shadow rays to lights. Both absorption
* and scattering will block light, represented by the extinction coefficient. */
-/* homogenous volume: assume shader evaluation at the starts gives
+/* homogeneous volume: assume shader evaluation at the starts gives
* the extinction coefficient for the entire line segment */
ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
{
float3 sigma_t;
if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
- *throughput *= volume_color_attenuation(sigma_t, ray->t);
+ *throughput *= volume_color_transmittance(sigma_t, ray->t);
}
/* heterogeneous volume: integrate stepping through the volume until we
@@ -138,34 +145,29 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
/* compute extinction at the start */
float t = 0.0f;
- float3 P = ray->P;
- float3 sigma_t;
-
- if(!volume_shader_extinction_sample(kg, sd, state, P, &sigma_t))
- sigma_t = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i < max_steps; i++) {
/* advance to new position */
- float new_t = min(ray->t, t + random_jitter_offset + i * step);
- float3 new_P = ray->P + ray->D * new_t;
- float3 new_sigma_t;
+ float new_t = min(ray->t, (i+1) * step);
+ float dt = new_t - t;
+
+ /* use random position inside this segment to sample shader */
+ if(new_t == ray->t)
+ random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+
+ float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
+ float3 sigma_t;
/* compute attenuation over segment */
- if(volume_shader_extinction_sample(kg, sd, state, new_P, &new_sigma_t)) {
+ if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
/* todo: we could avoid computing expf() for each step by summing,
* because exp(a)*exp(b) = exp(a+b), but we still want a quick
* tp_eps check too */
- tp *= volume_color_attenuation(0.5f*(sigma_t + new_sigma_t), new_t - t);
+ tp *= volume_color_transmittance(sigma_t, new_t - t);
/* stop if nearly all light blocked */
if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps)
break;
-
- sigma_t = new_sigma_t;
- }
- else {
- /* skip empty space */
- sigma_t = make_float3(0.0f, 0.0f, 0.0f);
}
/* stop if at the end of the volume */
@@ -182,7 +184,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput)
{
ShaderData sd;
- shader_setup_from_volume(kg, &sd, ray, state->bounce);
+ shader_setup_from_volume(kg, &sd, ray, state->bounce, state->transparent_bounce);
if(volume_stack_is_heterogeneous(kg, state->volume_stack))
kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput);
@@ -190,9 +192,123 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *stat
kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput);
}
+/* Equi-angular sampling as in:
+ * "Importance Sampling Techniques for Path Tracing in Participating Media" */
+
+ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf)
+{
+ float t = ray->t;
+
+ float delta = dot((light_P - ray->P) , ray->D);
+ float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
+ float theta_a = -atan2f(delta, D);
+ float theta_b = atan2f(t - delta, D);
+ float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a);
+
+ *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+ return min(t, delta + t_); /* min is only for float precision errors */
+}
+
+ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t)
+{
+ float delta = dot((light_P - ray->P) , ray->D);
+ float D = sqrtf(len_squared(light_P - ray->P) - delta * delta);
+
+ float t = ray->t;
+ float t_ = sample_t - delta;
+
+ float theta_a = -atan2f(delta, D);
+ float theta_b = atan2f(t - delta, D);
+
+ float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_));
+
+ return pdf;
+}
+
+ccl_device bool kernel_volume_equiangular_light_position(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float3 *light_P)
+{
+ /* light RNGs */
+ float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT);
+ float light_u, light_v;
+ path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v);
+
+ /* light sample */
+ LightSample ls;
+ light_sample(kg, light_t, light_u, light_v, ray->time, ray->P, &ls);
+ if(ls.pdf == 0.0f)
+ return false;
+
+ *light_P = ls.P;
+ return true;
+}
+
+ccl_device float kernel_volume_decoupled_equiangular_pdf(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float sample_t)
+{
+ float3 light_P;
+
+ if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
+ return 0.0f;
+
+ return kernel_volume_equiangular_pdf(ray, light_P, sample_t);
+}
+
+/* Distance sampling */
+
+ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf)
+{
+ /* xi is [0, 1[ so log(0) should never happen, division by zero is
+ * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel);
+
+ float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t);
+
+ *transmittance = volume_color_transmittance(sigma_t, sample_t);
+ *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+
+ /* todo: optimization: when taken together with hit/miss decision,
+ * the full_transmittance cancels out drops out and xi does not
+ * need to be remapped */
+
+ return sample_t;
+}
+
+ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t)
+{
+ float3 full_transmittance = volume_color_transmittance(sigma_t, max_t);
+ float3 transmittance = volume_color_transmittance(sigma_t, sample_t);
+
+ return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance);
+}
+
+/* Emission */
+
+ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t)
+{
+ /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
+ * this goes to E * t as sigma_t goes to zero
+ *
+ * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+ float3 emission = coeff->emission;
+
+ if(closure_flag & SD_ABSORPTION) {
+ float3 sigma_t = coeff->sigma_a + coeff->sigma_s;
+
+ emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
+ emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
+ emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
+ }
+ else
+ emission *= t;
+
+ return emission;
+}
+
/* Volume Path */
-/* homogenous volume: assume shader evaluation at the starts gives
+/* homogeneous volume: assume shader evaluation at the start gives
* the volume shading coefficient for the entire line segment */
ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
@@ -206,69 +322,73 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
int closure_flag = sd->flag;
float t = ray->t;
float3 new_tp;
- float3 transmittance;
/* randomly scatter, and if we do t is shortened */
if(closure_flag & SD_SCATTER) {
+ /* extinction coefficient */
float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
- /* set up variables for sampling */
+ /* pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels */
float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
int channel = (int)(rphase*3.0f);
sd->randb_closure = rphase*3.0f - channel;
- /* pick random color channel, we use the Veach one-sample
- * model with balance heuristic for the channels */
- float sample_sigma_t;
+ float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
- if(channel == 0)
- sample_sigma_t = sigma_t.x;
- else if(channel == 1)
- sample_sigma_t = sigma_t.y;
- else
- sample_sigma_t = sigma_t.z;
+ /* decide if we will hit or miss */
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float sample_transmittance = expf(-sample_sigma_t * t);
- /* xi is [0, 1[ so log(0) should never happen, division by zero is
- * avoided because sample_sigma_t > 0 when SD_SCATTER is set */
- float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
- float sample_t = min(t, -logf(1.0f - xi)/sample_sigma_t);
+ if(xi >= sample_transmittance) {
+ /* scattering */
+ float3 pdf;
+ float3 transmittance;
+ float sample_t;
- transmittance = volume_color_attenuation(sigma_t, sample_t);
+ /* rescale random number so we can reuse it */
+ xi = (xi - sample_transmittance)/(1.0f - sample_transmittance);
- if(sample_t < t) {
- float pdf = dot(sigma_t, transmittance);
- new_tp = *throughput * coeff.sigma_s * transmittance * (3.0f / pdf);
+ if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) {
+ /* distance sampling */
+ sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf);
+ }
+ else {
+ /* equiangular sampling */
+ float3 light_P;
+ float equi_pdf;
+ if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
+ return VOLUME_PATH_MISSED;
+
+ sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf);
+ transmittance = volume_color_transmittance(sigma_t, sample_t);
+ pdf = make_float3(equi_pdf, equi_pdf, equi_pdf);
+ }
+
+ /* modifiy pdf for hit/miss decision */
+ pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t);
+
+ new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf);
t = sample_t;
}
else {
- float pdf = (transmittance.x + transmittance.y + transmittance.z);
- new_tp = *throughput * transmittance * (3.0f / pdf);
+ /* no scattering */
+ float3 transmittance = volume_color_transmittance(sigma_t, t);
+ float pdf = average(transmittance);
+ new_tp = *throughput * transmittance / pdf;
}
}
else if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
- transmittance = volume_color_attenuation(coeff.sigma_a, t);
+ float3 transmittance = volume_color_transmittance(coeff.sigma_a, t);
new_tp = *throughput * transmittance;
}
- /* integrate emission attenuated by extinction
- * integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
- * this goes to E * t as sigma_t goes to zero
- *
- * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+ /* integrate emission attenuated by extinction */
if(closure_flag & SD_EMISSION) {
- float3 emission = coeff.emission;
-
- if(closure_flag & SD_ABSORPTION) {
- float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
-
- emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t;
- emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t;
- emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t;
- }
- else
- emission *= t;
-
+ float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
+ float3 transmittance = volume_color_transmittance(sigma_t, ray->t);
+ float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t);
path_radiance_accum_emission(L, *throughput, emission, state->bounce);
}
@@ -293,45 +413,38 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlobals *kg,
PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
{
- VolumeShaderCoefficients coeff;
float3 tp = *throughput;
const float tp_eps = 1e-10f; /* todo: this is likely not the right value */
/* prepare for stepping */
int max_steps = kernel_data.integrator.volume_max_steps;
- float step = kernel_data.integrator.volume_step_size;
- float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
+ float step_size = kernel_data.integrator.volume_step_size;
+ float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
/* compute coefficients at the start */
float t = 0.0f;
- float3 P = ray->P;
-
- if(!volume_shader_sample(kg, sd, state, P, &coeff)) {
- coeff.sigma_a = make_float3(0.0f, 0.0f, 0.0f);
- coeff.sigma_s = make_float3(0.0f, 0.0f, 0.0f);
- coeff.emission = make_float3(0.0f, 0.0f, 0.0f);
- }
-
- /* accumulate these values so we can use a single stratified number to sample */
float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
- float3 accum_sigma_t = make_float3(0.0f, 0.0f, 0.0f);
- float3 accum_sigma_s = make_float3(0.0f, 0.0f, 0.0f);
/* cache some constant variables */
- float nlogxi;
+ float xi;
int channel = -1;
bool has_scatter = false;
for(int i = 0; i < max_steps; i++) {
/* advance to new position */
- float new_t = min(ray->t, t + random_jitter_offset + i * step);
- float3 new_P = ray->P + ray->D * new_t;
- VolumeShaderCoefficients new_coeff;
+ float new_t = min(ray->t, (i+1) * step_size);
+ float dt = new_t - t;
+
+ /* use random position inside this segment to sample shader */
+ if(new_t == ray->t)
+ random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+
+ float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
+ VolumeShaderCoefficients coeff;
/* compute segment */
- if(volume_shader_sample(kg, sd, state, new_P, &new_coeff)) {
+ if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
int closure_flag = sd->flag;
- float dt = new_t - t;
float3 new_tp;
float3 transmittance;
bool scatter = false;
@@ -341,94 +454,58 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
has_scatter = true;
/* average sigma_t and sigma_s over segment */
- float3 last_sigma_t = coeff.sigma_a + coeff.sigma_s;
- float3 new_sigma_t = new_coeff.sigma_a + new_coeff.sigma_s;
- float3 sigma_t = 0.5f*(last_sigma_t + new_sigma_t);
- float3 sigma_s = 0.5f*(coeff.sigma_s + new_coeff.sigma_s);
+ float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
+ float3 sigma_s = coeff.sigma_s;
/* lazily set up variables for sampling */
if(channel == -1) {
- float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
- nlogxi = -logf(1.0f - xi);
+ /* pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels */
+ xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
channel = (int)(rphase*3.0f);
sd->randb_closure = rphase*3.0f - channel;
}
- /* pick random color channel, we use the Veach one-sample
- * model with balance heuristic for the channels */
- float sample_sigma_t;
+ /* compute transmittance over full step */
+ transmittance = volume_color_transmittance(sigma_t, dt);
- if(channel == 0)
- sample_sigma_t = accum_sigma_t.x + dt*sigma_t.x;
- else if(channel == 1)
- sample_sigma_t = accum_sigma_t.y + dt*sigma_t.y;
- else
- sample_sigma_t = accum_sigma_t.z + dt*sigma_t.z;
+ /* decide if we will scatter or continue */
+ float sample_transmittance = kernel_volume_channel_get(transmittance, channel);
- if(nlogxi < sample_sigma_t) {
+ if(1.0f - xi >= sample_transmittance) {
/* compute sampling distance */
- sample_sigma_t /= new_t;
- new_t = nlogxi/sample_sigma_t;
- dt = new_t - t;
-
- transmittance = volume_color_attenuation(sigma_t, dt);
-
- accum_transmittance *= transmittance;
- accum_sigma_t = (accum_sigma_t + dt*sigma_t)/new_t;
- accum_sigma_s = (accum_sigma_s + dt*sigma_s)/new_t;
-
- /* todo: it's not clear to me that this is correct if we move
- * through a color volumed, needs verification */
- float pdf = dot(accum_sigma_t, accum_transmittance);
- new_tp = tp * accum_sigma_s * transmittance * (3.0f / pdf);
-
+ float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
+ float new_dt = -logf(1.0f - xi)/sample_sigma_t;
+ new_t = t + new_dt;
+
+ /* transmittance, throughput */
+ float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt);
+ float pdf = average(sigma_t * new_transmittance);
+ new_tp = tp * sigma_s * new_transmittance / pdf;
scatter = true;
}
else {
- transmittance = volume_color_attenuation(sigma_t, dt);
-
- accum_transmittance *= transmittance;
- accum_sigma_t += dt*sigma_t;
- accum_sigma_s += dt*sigma_s;
+ /* throughput */
+ float pdf = average(transmittance);
+ new_tp = tp * transmittance / pdf;
- new_tp = tp * transmittance;
+ /* remap xi so we can reuse it and keep thing stratified */
+ xi = 1.0f - (1.0f - xi)/sample_transmittance;
}
}
else if(closure_flag & SD_ABSORPTION) {
/* absorption only, no sampling needed */
- float3 sigma_a = 0.5f*(coeff.sigma_a + new_coeff.sigma_a);
- transmittance = volume_color_attenuation(sigma_a, dt);
-
- accum_transmittance *= transmittance;
- accum_sigma_t += dt*sigma_a;
+ float3 sigma_a = coeff.sigma_a;
+ transmittance = volume_color_transmittance(sigma_a, dt);
new_tp = tp * transmittance;
-
- /* todo: we could avoid computing expf() for each step by summing,
- * because exp(a)*exp(b) = exp(a+b), but we still want a quick
- * tp_eps check too */
}
- /* integrate emission attenuated by absorption
- * integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t
- * this goes to E * t as sigma_t goes to zero
- *
- * todo: we should use an epsilon to avoid precision issues near zero sigma_t */
+ /* integrate emission attenuated by absorption */
if(closure_flag & SD_EMISSION) {
- float3 emission = 0.5f*(coeff.emission + new_coeff.emission);
-
- if(closure_flag & SD_ABSORPTION) {
- float3 sigma_t = 0.5f*(coeff.sigma_a + coeff.sigma_s + new_coeff.sigma_a + new_coeff.sigma_s);
-
- emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: dt;
- emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: dt;
- emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: dt;
- }
- else
- emission *= dt;
-
+ float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
path_radiance_accum_emission(L, tp, emission, state->bounce);
}
@@ -450,47 +527,323 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo
return VOLUME_PATH_SCATTERED;
}
+ else {
+ /* accumulate transmittance */
+ accum_transmittance *= transmittance;
+ }
}
+ }
+
+ /* stop if at the end of the volume */
+ t = new_t;
+ if(t == ray->t)
+ break;
+ }
+
+ *throughput = tp;
- coeff = new_coeff;
+ return VOLUME_PATH_ATTENUATED;
+}
+
+/* Decoupled Volume Sampling
+ *
+ * VolumeSegment is list of coefficients and transmittance stored at all steps
+ * through a volume. This can then latter be used for decoupled sampling as in:
+ * "Importance Sampling Techniques for Path Tracing in Participating Media" */
+
+/* CPU only because of malloc/free */
+#ifdef __KERNEL_CPU__
+
+typedef struct VolumeStep {
+ float3 sigma_s; /* scatter coefficient */
+ float3 sigma_t; /* extinction coefficient */
+ float3 accum_transmittance; /* accumulated transmittance including this step */
+ float3 cdf_distance; /* cumulative density function for distance sampling */
+ float t; /* distance at end of this step */
+ float shade_t; /* jittered distance where shading was done in step */
+ int closure_flag; /* shader evaluation closure flags */
+} VolumeStep;
+
+typedef struct VolumeSegment {
+ VolumeStep *steps; /* recorded steps */
+ int numsteps; /* number of steps */
+ int closure_flag; /* accumulated closure flags from all steps */
+
+ float3 accum_emission; /* accumulated emission at end of segment */
+ float3 accum_transmittance; /* accumulated transmittance at end of segment */
+} VolumeSegment;
+
+/* record volume steps to the end of the volume.
+ *
+ * it would be nice if we could only record up to the point that we need to scatter,
+ * but the entire segment is needed to do always scattering, rather than probalistically
+ * hitting or missing the volume. if we don't know the transmittance at the end of the
+ * volume we can't generate stratitied distance samples up to that transmittance */
+ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state,
+ Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous)
+{
+ /* prepare for volume stepping */
+ int max_steps;
+ float step_size, random_jitter_offset;
+
+ if(heterogeneous) {
+ max_steps = kernel_data.integrator.volume_max_steps;
+ step_size = kernel_data.integrator.volume_step_size;
+ random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+
+ /* compute exact steps in advance for malloc */
+ max_steps = max((int)ceilf(ray->t/step_size), 1);
+ }
+ else {
+ max_steps = 1;
+ step_size = ray->t;
+ random_jitter_offset = 0.0f;
+ }
+
+ /* init accumulation variables */
+ float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f);
+ float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f);
+ float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+ float t = 0.0f;
+
+ segment->closure_flag = 0;
+ segment->numsteps = 0;
+ segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps);
+
+ VolumeStep *step = segment->steps;
+
+ for(int i = 0; i < max_steps; i++, step++) {
+ /* advance to new position */
+ float new_t = min(ray->t, (i+1) * step_size);
+ float dt = new_t - t;
+
+ /* use random position inside this segment to sample shader */
+ if(heterogeneous && new_t == ray->t)
+ random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+
+ float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
+ VolumeShaderCoefficients coeff;
+
+ /* compute segment */
+ if(volume_shader_sample(kg, sd, state, new_P, &coeff)) {
+ int closure_flag = sd->flag;
+ float3 sigma_t = coeff.sigma_a + coeff.sigma_s;
+
+ /* compute accumulated transmittance */
+ float3 transmittance = volume_color_transmittance(sigma_t, dt);
+
+ /* compute emission attenuated by absorption */
+ if(closure_flag & SD_EMISSION) {
+ float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt);
+ accum_emission += accum_transmittance * emission;
+ }
+
+ accum_transmittance *= transmittance;
+
+ /* compute pdf for distance sampling */
+ float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s;
+ cdf_distance = cdf_distance + pdf_distance;
+
+ /* write step data */
+ step->sigma_t = sigma_t;
+ step->sigma_s = coeff.sigma_s;
+ step->closure_flag = closure_flag;
+
+ segment->closure_flag |= closure_flag;
}
else {
- /* skip empty space */
- coeff.sigma_a = make_float3(0.0f, 0.0f, 0.0f);
- coeff.sigma_s = make_float3(0.0f, 0.0f, 0.0f);
- coeff.emission = make_float3(0.0f, 0.0f, 0.0f);
+ /* store empty step (todo: skip consecutive empty steps) */
+ step->sigma_t = make_float3(0.0f, 0.0f, 0.0f);
+ step->sigma_s = make_float3(0.0f, 0.0f, 0.0f);
+ step->closure_flag = 0;
}
+ step->accum_transmittance = accum_transmittance;
+ step->cdf_distance = cdf_distance;
+ step->t = new_t;
+ step->shade_t = t + random_jitter_offset;
+
+ segment->numsteps++;
+
/* stop if at the end of the volume */
t = new_t;
if(t == ray->t)
break;
}
- /* include pdf for volumes with scattering */
- if(has_scatter) {
- float pdf = (accum_transmittance.x + accum_transmittance.y + accum_transmittance.z);
- if(pdf > 0.0f)
- tp *= (3.0f/pdf);
+ /* store total emission and transmittance */
+ segment->accum_emission = accum_emission;
+ segment->accum_transmittance = accum_transmittance;
+
+ /* normalize cumulative density function for distance sampling */
+ VolumeStep *last_step = segment->steps + segment->numsteps - 1;
+
+ if(!is_zero(last_step->cdf_distance)) {
+ VolumeStep *step = &segment->steps[0];
+ int numsteps = segment->numsteps;
+ float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance);
+
+ for(int i = 0; i < numsteps; i++, step++)
+ step->cdf_distance *= inv_cdf_distance_sum;
+ }
+}
+
+ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment)
+{
+ free(segment->steps);
+}
+
+/* scattering for homogeneous and heterogeneous volumes, using decoupled ray
+ * marching. unlike the non-decoupled functions, these do not do probalistic
+ * scattering, they always scatter if there is any non-zero scattering
+ * coefficient.
+ *
+ * these also do not do emission or modify throughput. */
+ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
+ KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd,
+ float3 *throughput, RNG *rng, VolumeSegment *segment)
+{
+ int closure_flag = segment->closure_flag;
+
+ if(!(closure_flag & SD_SCATTER))
+ return VOLUME_PATH_MISSED;
+
+ /* pick random color channel, we use the Veach one-sample
+ * model with balance heuristic for the channels */
+ float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE);
+ int channel = (int)(rphase*3.0f);
+ sd->randb_closure = rphase*3.0f - channel;
+
+ float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE);
+
+ VolumeStep *step;
+ float3 transmittance;
+ float pdf, sample_t;
+
+ /* distance sampling */
+ if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) {
+ /* find step in cdf */
+ step = segment->steps;
+
+ float prev_t = 0.0f;
+ float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f);
+
+ if(segment->numsteps > 1) {
+ float prev_cdf = 0.0f;
+ float step_cdf = 1.0f;
+ float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f);
+
+ for(int i = 0; ; i++, step++) {
+ /* todo: optimize using binary search */
+ step_cdf = kernel_volume_channel_get(step->cdf_distance, channel);
+
+ if(xi < step_cdf || i == segment->numsteps-1)
+ break;
+
+ prev_cdf = step_cdf;
+ prev_t = step->t;
+ prev_cdf_distance = step->cdf_distance;
+ }
+
+ /* remap xi so we can reuse it */
+ xi = (xi - prev_cdf)/(step_cdf - prev_cdf);
+
+ /* pdf for picking step */
+ step_pdf = step->cdf_distance - prev_cdf_distance;
+ }
+
+ /* determine range in which we will sample */
+ float step_t = step->t - prev_t;
+
+ /* sample distance and compute transmittance */
+ float3 distance_pdf;
+ sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf);
+ pdf = average(distance_pdf * step_pdf);
}
+ /* equi-angular sampling */
+ else {
+ /* pick position on light */
+ float3 light_P;
+ if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P))
+ return VOLUME_PATH_MISSED;
- *throughput = tp;
+ /* sample distance */
+ sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf);
- return VOLUME_PATH_ATTENUATED;
+ /* find step in which sampled distance is located */
+ step = segment->steps;
+
+ float prev_t = 0.0f;
+
+ if(segment->numsteps > 1) {
+ /* todo: optimize using binary search */
+ for(int i = 0; i < segment->numsteps-1; i++, step++) {
+ if(sample_t < step->t)
+ break;
+
+ prev_t = step->t;
+ }
+ }
+
+ /* compute transmittance */
+ transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t);
+ }
+
+ /* compute transmittance up to this step */
+ if(step != segment->steps)
+ transmittance *= (step-1)->accum_transmittance;
+
+ /* modify throughput */
+ *throughput *= step->sigma_s * transmittance / pdf;
+
+ /* evaluate shader to create closures at shading point */
+ if(segment->numsteps > 1) {
+ sd->P = ray->P + step->shade_t*ray->D;
+
+ VolumeShaderCoefficients coeff;
+ volume_shader_sample(kg, sd, state, sd->P, &coeff);
+ }
+
+ /* move to new position */
+ sd->P = ray->P + sample_t*ray->D;
+
+ return VOLUME_PATH_SCATTERED;
}
+#endif
+
/* get the volume attenuation and emission over line segment defined by
* ray, with the assumption that there are no surfaces blocking light
* between the endpoints */
ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng)
{
- shader_setup_from_volume(kg, sd, ray, state->bounce);
+ /* workaround to fix correlation bug in T38710, can find better solution
+ * in random number generator later, for now this is done here to not impact
+ * performance of rendering without volumes */
+ RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
+ bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
- if(volume_stack_is_heterogeneous(kg, state->volume_stack))
- return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, rng);
+#if 0
+ /* debugging code to compare decoupled ray marching */
+ VolumeSegment segment;
+
+ shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+ kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous);
+
+ VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment);
+
+ kernel_volume_decoupled_free(kg, &segment);
+
+ return result;
+#else
+ shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce);
+
+ if(heterogeneous)
+ return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
else
- return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng);
+ return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng);
+#endif
}
/* Volume Stack
@@ -501,13 +854,13 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
ccl_device void kernel_volume_stack_init(KernelGlobals *kg, VolumeStack *stack)
{
/* todo: this assumes camera is always in air, need to detect when it isn't */
- if(kernel_data.background.volume_shader == SHADER_NO_ID) {
- stack[0].shader = SHADER_NO_ID;
+ if(kernel_data.background.volume_shader == SHADER_NONE) {
+ stack[0].shader = SHADER_NONE;
}
else {
stack[0].shader = kernel_data.background.volume_shader;
- stack[0].object = ~0;
- stack[1].shader = SHADER_NO_ID;
+ stack[0].object = PRIM_NONE;
+ stack[1].shader = SHADER_NONE;
}
}
@@ -522,14 +875,14 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
if(sd->flag & SD_BACKFACING) {
/* exit volume object: remove from stack */
- for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) {
+ for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
if(stack[i].object == sd->object) {
/* shift back next stack entries */
do {
stack[i] = stack[i+1];
i++;
}
- while(stack[i].shader != SHADER_NO_ID);
+ while(stack[i].shader != SHADER_NONE);
return;
}
@@ -539,7 +892,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
/* enter volume object: add to stack */
int i;
- for(i = 0; stack[i].shader != SHADER_NO_ID; i++) {
+ for(i = 0; stack[i].shader != SHADER_NONE; i++) {
/* already in the stack? then we have nothing to do */
if(stack[i].object == sd->object)
return;
@@ -552,7 +905,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
/* add to the end of the stack */
stack[i].shader = sd->shader;
stack[i].object = sd->object;
- stack[i+1].shader = SHADER_NO_ID;
+ stack[i+1].shader = SHADER_NONE;
}
}
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 4fad66be6e1..54894ea19eb 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -30,18 +30,16 @@
#include "kernel_compat_cpu.h"
#include "kernel_globals.h"
-#include "kernel_montecarlo.h"
+#include "kernel_random.h"
#include "kernel_projection.h"
#include "kernel_differential.h"
-#include "kernel_object.h"
-#include "kernel_random.h"
-#include "kernel_bvh.h"
-#include "kernel_triangle.h"
-#include "kernel_curve.h"
-#include "kernel_primitive.h"
+#include "kernel_montecarlo.h"
+#include "kernel_camera.h"
+
+#include "geom/geom.h"
+
#include "kernel_projection.h"
#include "kernel_accumulate.h"
-#include "kernel_camera.h"
#include "kernel_shader.h"
#ifdef WITH_PTEX
@@ -52,11 +50,16 @@ CCL_NAMESPACE_BEGIN
/* RenderServices implementation */
-#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2))
+#define COPY_MATRIX44(m1, m2) { \
+ CHECK_TYPE(m1, OSL::Matrix44*); \
+ CHECK_TYPE(m2, Transform*); \
+ memcpy(m1, m2, sizeof(*m2)); \
+} (void)0
/* static ustrings */
ustring OSLRenderServices::u_distance("distance");
ustring OSLRenderServices::u_index("index");
+ustring OSLRenderServices::u_world("world");
ustring OSLRenderServices::u_camera("camera");
ustring OSLRenderServices::u_screen("screen");
ustring OSLRenderServices::u_raster("raster");
@@ -87,6 +90,7 @@ ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
#endif
ustring OSLRenderServices::u_path_ray_length("path:ray_length");
ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
+ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
ustring OSLRenderServices::u_trace("trace");
ustring OSLRenderServices::u_hit("hit");
ustring OSLRenderServices::u_hitdist("hitdist");
@@ -131,7 +135,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
KernelGlobals *kg = sd->osl_globals;
int object = sd->object;
- if (object != ~0) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm;
@@ -161,7 +165,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
KernelGlobals *kg = sd->osl_globals;
int object = sd->object;
- if (object != ~0) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform itfm;
@@ -206,6 +210,10 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float ti
COPY_MATRIX44(&result, &tfm);
return true;
}
+ else if (from == u_world) {
+ result.makeIdentity();
+ return true;
+ }
return false;
}
@@ -234,6 +242,10 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, fl
COPY_MATRIX44(&result, &tfm);
return true;
}
+ else if (to == u_world) {
+ result.makeIdentity();
+ return true;
+ }
return false;
}
@@ -246,7 +258,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr
const ShaderData *sd = (const ShaderData *)xform;
int object = sd->object;
- if (object != ~0) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_tfm;
#else
@@ -271,7 +283,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform
const ShaderData *sd = (const ShaderData *)xform;
int object = sd->object;
- if (object != ~0) {
+ if (object != OBJECT_NONE) {
#ifdef __OBJECT_MOTION__
Transform tfm = sd->ob_itfm;
#else
@@ -525,7 +537,8 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd,
const TypeDesc& type, bool derivatives, void *val)
{
if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
- attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) {
+ attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
+ {
float3 fval[3];
fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset,
(derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
@@ -596,44 +609,44 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
/* Particle Attributes */
else if (name == u_particle_index) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float f = particle_index(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == u_particle_age) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float f = particle_age(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == u_particle_lifetime) {
- uint particle_id = object_particle_id(kg, sd->object);
- float f= particle_lifetime(kg, particle_id);
+ int particle_id = object_particle_id(kg, sd->object);
+ float f = particle_lifetime(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == u_particle_location) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_location(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
#if 0 /* unsupported */
else if (name == u_particle_rotation) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float4 f = particle_rotation(kg, particle_id);
return set_attribute_float4(f, type, derivatives, val);
}
#endif
else if (name == u_particle_size) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float f = particle_size(kg, particle_id);
return set_attribute_float(f, type, derivatives, val);
}
else if (name == u_particle_velocity) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
else if (name == u_particle_angular_velocity) {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
float3 f = particle_angular_velocity(kg, particle_id);
return set_attribute_float3(f, type, derivatives, val);
}
@@ -644,12 +657,17 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
}
else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices)
#ifdef __HAIR__
- && sd->segment == ~0) {
+ && sd->type & PRIMITIVE_ALL_TRIANGLE)
#else
- ) {
+ )
#endif
+ {
float3 P[3];
- triangle_vertices(kg, sd->prim, P);
+
+ if(sd->type & PRIMITIVE_TRIANGLE)
+ triangle_vertices(kg, sd->prim, P);
+ else
+ motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
object_position_transform(kg, sd, &P[0]);
@@ -670,7 +688,7 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD
#ifdef __HAIR__
/* Hair Attributes */
else if (name == u_is_curve) {
- float f = (sd->segment != ~0);
+ float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
return set_attribute_float(f, type, derivatives, val);
}
else if (name == u_curve_thickness) {
@@ -699,13 +717,18 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *
int f = sd->ray_depth;
return set_attribute_int(f, type, derivatives, val);
}
+ else if (name == u_path_transparent_depth) {
+ /* Ray Depth */
+ int f = sd->transparent_depth;
+ return set_attribute_int(f, type, derivatives, val);
+ }
else if (name == u_ndc) {
/* NDC coordinates with special exception for otho */
OSLThreadData *tdata = kg->osl_tdata;
OSL::ShaderGlobals *globals = &tdata->globals;
float3 ndc[3];
- if((globals->raytype & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
+ if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
if(derivatives) {
@@ -733,7 +756,9 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
{
ShaderData *sd = (ShaderData *)renderstate;
KernelGlobals *kg = sd->osl_globals;
- int object, prim, segment;
+ bool is_curve;
+ int object;
+ // int prim;
/* lookup of attribute on another object */
if (object_name != u_empty) {
@@ -743,24 +768,20 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
return false;
object = it->second;
- prim = ~0;
- segment = ~0;
+ // prim = PRIM_NONE;
+ is_curve = false;
}
else {
object = sd->object;
- prim = sd->prim;
-#ifdef __HAIR__
- segment = sd->segment;
-#else
- segment = ~0;
-#endif
+ // prim = sd->prim;
+ is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
- if (object == ~0)
+ if (object == OBJECT_NONE)
return get_background_attribute(kg, sd, name, type, derivatives, val);
}
/* find attribute on object */
- object = object*ATTR_PRIM_TYPES + (segment != ~0);
+ object = object*ATTR_PRIM_TYPES + (is_curve == true);
OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
@@ -769,8 +790,8 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri
if (attr.elem != ATTR_ELEMENT_OBJECT) {
/* triangle and vertex attributes */
- if (prim != ~0)
- return get_mesh_element_attribute(kg, sd, attr, type, derivatives, val);
+ if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val))
+ return true;
else
return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
}
@@ -1001,12 +1022,13 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
tracedata->ray = ray;
tracedata->setup = false;
tracedata->init = true;
+ tracedata->sd.osl_globals = sd->osl_globals;
/* raytrace */
#ifdef __HAIR__
- return scene_intersect(sd->osl_globals, &ray, ~0, &tracedata->isect, NULL, 0.0f, 0.0f);
+ return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f);
#else
- return scene_intersect(sd->osl_globals, &ray, ~0, &tracedata->isect);
+ return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect);
#endif
}
@@ -1018,9 +1040,9 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri
if(source == u_trace && tracedata->init) {
if(name == u_hit) {
- return set_attribute_int((tracedata->isect.prim != ~0), type, derivatives, val);
+ return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
}
- else if(tracedata->isect.prim != ~0) {
+ else if(tracedata->isect.prim != PRIM_NONE) {
if(name == u_hitdist) {
float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
return set_attribute_float(f, type, derivatives, val);
@@ -1033,8 +1055,9 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri
/* lazy shader data setup */
ShaderData *original_sd = (ShaderData *)(sg->renderstate);
int bounce = original_sd->ray_depth + 1;
+ int transparent_bounce = original_sd->transparent_depth;
- shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray, bounce);
+ shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray, bounce, transparent_bounce);
tracedata->setup = true;
}
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 479b6da1afb..069722d81b6 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -110,12 +110,13 @@ public:
ustring dataname, TypeDesc datatype, void *data);
static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val);
+ TypeDesc type, bool derivatives, void *val);
static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
- TypeDesc type, bool derivatives, void *val);
+ TypeDesc type, bool derivatives, void *val);
static ustring u_distance;
static ustring u_index;
+ static ustring u_world;
static ustring u_camera;
static ustring u_screen;
static ustring u_raster;
@@ -144,6 +145,7 @@ public:
static ustring u_curve_tangent_normal;
static ustring u_path_ray_length;
static ustring u_path_ray_depth;
+ static ustring u_path_transparent_depth;
static ustring u_trace;
static ustring u_hit;
static ustring u_hitdist;
diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp
index 554f647df7c..843dcdd0985 100644
--- a/intern/cycles/kernel/osl/osl_shader.cpp
+++ b/intern/cycles/kernel/osl/osl_shader.cpp
@@ -18,7 +18,8 @@
#include "kernel_montecarlo.h"
#include "kernel_types.h"
#include "kernel_globals.h"
-#include "kernel_object.h"
+
+#include "geom/geom_object.h"
#include "closure/bsdf_diffuse.h"
#include "closure/bssrdf.h"
@@ -112,7 +113,7 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd,
globals->dvdy = sd->dv.dy;
globals->dPdu = TO_VEC3(sd->dPdu);
globals->dPdv = TO_VEC3(sd->dPdv);
- globals->surfacearea = (sd->object == ~0) ? 1.0f : object_surface_area(kg, sd->object);
+ globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object);
globals->time = sd->time;
/* booleans */
@@ -408,8 +409,9 @@ static void flatten_volume_closure_tree(ShaderData *sd,
sc.data1 = volume->sc.data1;
/* add */
- if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF &&
- sd->num_closure < MAX_CLOSURE) {
+ if((sc.sample_weight > CLOSURE_WEIGHT_CUTOFF) &&
+ (sd->num_closure < MAX_CLOSURE))
+ {
sd->closure[sd->num_closure++] = sc;
sd->flag |= volume->shaderdata_flag();
}
@@ -535,7 +537,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id,
/* for OSL, a hash map is used to lookup the attribute by name. */
int object = sd->object*ATTR_PRIM_TYPES;
#ifdef __HAIR__
- if(sd->segment != ~0) object += ATTR_PRIM_CURVE;
+ if(sd->type & PRIMITIVE_ALL_CURVE) object += ATTR_PRIM_CURVE;
#endif
OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object];
@@ -546,7 +548,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id,
const OSLGlobals::Attribute &osl_attr = it->second;
*elem = osl_attr.elem;
- if(sd->prim == ~0 && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH)
+ if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH)
return ATTR_STD_NOT_FOUND;
/* return result */
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index 045abdb80af..5518d652bf9 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -77,6 +77,7 @@ set(SRC_OSL
node_wave_texture.osl
node_wireframe.osl
node_hair_bsdf.osl
+ node_uv_map.osl
)
set(SRC_OSL_HEADERS
diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl
index 69c4c0ef7af..6bac83ba4f5 100644
--- a/intern/cycles/kernel/shaders/node_absorption_volume.osl
+++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl
@@ -21,6 +21,6 @@ shader node_absorption_volume(
float Density = 1.0,
output closure color Volume = 0)
{
- Volume = ((color(1.0, 1.0, 1.0) - Color) * Density) * absorption();
+ Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption();
}
diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl
index 8c59d5bb512..7ef553c0f39 100644
--- a/intern/cycles/kernel/shaders/node_fresnel.osl
+++ b/intern/cycles/kernel/shaders/node_fresnel.osl
@@ -23,7 +23,7 @@ shader node_fresnel(
output float Fac = 0.0)
{
float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f: f;
+ float eta = backfacing() ? 1.0 / f : f;
float cosi = dot(I, Normal);
Fac = fresnel_dielectric_cos(cosi, eta);
}
diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
index 96934199621..b3d6133553b 100644
--- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl
@@ -26,7 +26,7 @@ shader node_glass_bsdf(
output closure color BSDF = 0)
{
float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f: f;
+ float eta = backfacing() ? 1.0 / f : f;
float cosi = dot(I, Normal);
float Fr = fresnel_dielectric_cos(cosi, eta);
diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl
index caa755636b9..7238a1e8862 100644
--- a/intern/cycles/kernel/shaders/node_image_texture.osl
+++ b/intern/cycles/kernel/shaders/node_image_texture.osl
@@ -17,9 +17,9 @@
#include "stdosl.h"
#include "node_color.h"
-color image_texture_lookup(string filename, string color_space, float u, float v, output float Alpha, int use_alpha, int is_float)
+color image_texture_lookup(string filename, string color_space, float u, float v, output float Alpha, int use_alpha, int is_float, string interpolation)
{
- color rgb = (color)texture(filename, u, 1.0 - v, "wrap", "periodic", "alpha", Alpha);
+ color rgb = (color)texture(filename, u, 1.0 - v, "wrap", "periodic", "interp", interpolation, "alpha", Alpha);
if (use_alpha) {
rgb = color_unpremultiply(rgb, Alpha);
@@ -42,6 +42,7 @@ shader node_image_texture(
string filename = "",
string color_space = "sRGB",
string projection = "Flat",
+ string interpolation = "smartcubic",
float projection_blend = 0.0,
int is_float = 1,
int use_alpha = 1,
@@ -54,7 +55,7 @@ shader node_image_texture(
p = transform(mapping, p);
if (projection == "Flat") {
- Color = image_texture_lookup(filename, color_space, p[0], p[1], Alpha, use_alpha, is_float);
+ Color = image_texture_lookup(filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation);
}
else if (projection == "Box") {
/* object space normal */
@@ -119,15 +120,15 @@ shader node_image_texture(
float tmp_alpha;
if (weight[0] > 0.0) {
- Color += weight[0] * image_texture_lookup(filename, color_space, p[1], p[2], tmp_alpha, use_alpha, is_float);
+ Color += weight[0] * image_texture_lookup(filename, color_space, p[1], p[2], tmp_alpha, use_alpha, is_float, interpolation);
Alpha += weight[0] * tmp_alpha;
}
if (weight[1] > 0.0) {
- Color += weight[1] * image_texture_lookup(filename, color_space, p[0], p[2], tmp_alpha, use_alpha, is_float);
+ Color += weight[1] * image_texture_lookup(filename, color_space, p[0], p[2], tmp_alpha, use_alpha, is_float, interpolation);
Alpha += weight[1] * tmp_alpha;
}
if (weight[2] > 0.0) {
- Color += weight[2] * image_texture_lookup(filename, color_space, p[1], p[0], tmp_alpha, use_alpha, is_float);
+ Color += weight[2] * image_texture_lookup(filename, color_space, p[1], p[0], tmp_alpha, use_alpha, is_float, interpolation);
Alpha += weight[2] * tmp_alpha;
}
}
diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl
index 599c7f5a262..95fbcabf917 100644
--- a/intern/cycles/kernel/shaders/node_light_path.osl
+++ b/intern/cycles/kernel/shaders/node_light_path.osl
@@ -26,7 +26,8 @@ shader node_light_path(
output float IsTransmissionRay = 0.0,
output float IsVolumeScatterRay = 0.0,
output float RayLength = 0.0,
- output float RayDepth = 0.0)
+ output float RayDepth = 0.0,
+ output float TransparentDepth = 0.0)
{
IsCameraRay = raytype("camera");
IsShadowRay = raytype("shadow");
@@ -42,5 +43,9 @@ shader node_light_path(
int ray_depth;
getattribute("path:ray_depth", ray_depth);
RayDepth = (float)ray_depth;
+
+ int transparent_depth;
+ getattribute("path:transparent_depth", transparent_depth);
+ TransparentDepth = (float)transparent_depth;
}
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index 066e5f8dbe1..abb6a359e75 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -93,6 +93,8 @@ shader node_math(
Value = Value1 > Value2;
else if (type == "Modulo")
Value = safe_modulo(Value1, Value2);
+ else if (type == "Absolute")
+ Value = fabs(Value1);
if (Clamp)
Value = clamp(Value, 0.0, 1.0);
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index c2c397c6446..dd54fd814de 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -88,7 +88,7 @@ color node_mix_diff(float t, color col1, color col2)
color node_mix_dark(float t, color col1, color col2)
{
- return min(col1, col2 * t);
+ return min(col1, col2) * t + col1 * (1.0 - t);
}
color node_mix_light(float t, color col1, color col2)
diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
index f87b3a5dd86..4a32415b482 100644
--- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
+++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl
@@ -25,7 +25,7 @@ shader node_refraction_bsdf(
output closure color BSDF = 0)
{
float f = max(IOR, 1e-5);
- float eta = backfacing() ? 1.0 / f: f;
+ float eta = backfacing() ? 1.0 / f : f;
if (distribution == "Sharp")
BSDF = Color * refraction(Normal, eta);
diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl
index bf23abbf933..77c157bd92b 100644
--- a/intern/cycles/kernel/shaders/node_scatter_volume.osl
+++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl
@@ -22,6 +22,6 @@ shader node_scatter_volume(
float Anisotropy = 0.0,
output closure color Volume = 0)
{
- Volume = (Color * Density) * henyey_greenstein(Anisotropy);
+ Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy);
}
diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl
new file mode 100644
index 00000000000..01c984aff4c
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_uv_map.osl
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#include "stdosl.h"
+
+shader node_uv_map(
+ int from_dupli = 0,
+ string name = "",
+ string bump_offset = "center",
+ output point UV = point(0.0, 0.0, 0.0))
+{
+ if (from_dupli) {
+ getattribute("geom:dupli_uv", UV);
+ }
+ else {
+ if (name == "")
+ getattribute("geom:uv", UV);
+ else
+ getattribute(name, UV);
+ }
+
+ if (bump_offset == "dx") {
+ if (!from_dupli) {
+ UV += Dx(UV);
+ }
+ }
+ else if (bump_offset == "dy") {
+ if (!from_dupli) {
+ UV += Dy(UV);
+ }
+ }
+}
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 96c7cefbcb2..dbf59c60cb0 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -182,10 +182,9 @@ CCL_NAMESPACE_BEGIN
/* Main Interpreter Loop */
-ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, float randb, int path_flag)
+ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, int path_flag)
{
float stack[SVM_STACK_SIZE];
- float closure_weight = 1.0f;
int offset = sd->shader & SHADER_MASK;
while(1) {
@@ -200,7 +199,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
break;
}
case NODE_CLOSURE_BSDF:
- svm_node_closure_bsdf(kg, sd, stack, node, randb, path_flag, &offset);
+ svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset);
break;
case NODE_CLOSURE_EMISSION:
svm_node_closure_emission(sd, stack, node);
@@ -227,13 +226,15 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
svm_node_emission_weight(kg, sd, stack, node);
break;
case NODE_MIX_CLOSURE:
- svm_node_mix_closure(sd, stack, node, &offset, &randb);
+ svm_node_mix_closure(sd, stack, node);
break;
- case NODE_ADD_CLOSURE:
- svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight);
+ case NODE_JUMP_IF_ZERO:
+ if(stack_load_float(stack, node.z) == 0.0f)
+ offset += node.y;
break;
- case NODE_JUMP:
- offset = node.y;
+ case NODE_JUMP_IF_ONE:
+ if(stack_load_float(stack, node.z) == 1.0f)
+ offset += node.y;
break;
#ifdef __IMAGE_TEXTURES__
case NODE_TEX_IMAGE:
@@ -437,9 +438,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade
#endif
case NODE_END:
default:
-#ifndef __MULTI_CLOSURE__
- sd->closure.weight *= closure_weight;
-#endif
return;
}
}
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 4c53bfd74fa..fd0ea7fef31 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -22,12 +22,12 @@ ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd,
uint4 node, NodeAttributeType *type,
NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset)
{
- if(sd->object != ~0 && sd->prim != ~0) {
+ if(sd->object != OBJECT_NONE) {
/* find attribute by unique id */
uint id = node.y;
uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride;
#ifdef __HAIR__
- attr_offset = (sd->segment == ~0)? attr_offset: attr_offset + ATTR_PRIM_CURVE;
+ attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset;
#endif
uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset);
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 2813e38d8f7..a3770877544 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -51,7 +51,6 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type
ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, ClosureType type, float mix_weight)
{
-#ifdef __MULTI_CLOSURE__
ShaderClosure *sc = &sd->closure[sd->num_closure];
if(sd->num_closure < MAX_CLOSURE) {
@@ -65,14 +64,10 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, C
}
return NULL;
-#else
- return &sd->closure;
-#endif
}
ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float mix_weight)
{
-#ifdef __MULTI_CLOSURE__
ShaderClosure *sc = &sd->closure[sd->num_closure];
float3 weight = sc->weight * mix_weight;
float sample_weight = fabsf(average(weight));
@@ -88,14 +83,10 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float
}
return NULL;
-#else
- return &sd->closure;
-#endif
}
ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, float mix_weight)
{
-#ifdef __MULTI_CLOSURE__
ShaderClosure *sc = &sd->closure[sd->num_closure];
float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sc->weight) * mix_weight;
float sample_weight = fabsf(average(weight));
@@ -111,16 +102,12 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd,
}
return NULL;
-#else
- return &sd->closure;
-#endif
}
-ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, float randb, int path_flag, int *offset)
+ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset)
{
uint type, param1_offset, param2_offset;
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset;
decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
@@ -132,13 +119,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
return;
float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
-#else
- decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, NULL);
- float mix_weight = 1.0f;
-
- uint4 data_node = read_node(kg, offset);
- float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N;
-#endif
float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
@@ -255,7 +235,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
float fresnel = fresnel_dielectric_cos(cosNO, eta);
float roughness = param1;
-#ifdef __MULTI_CLOSURE__
/* reflection */
ShaderClosure *sc = &sd->closure[sd->num_closure];
float3 weight = sc->weight;
@@ -279,15 +258,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->N = N;
svm_node_glass_setup(sd, sc, type, eta, roughness, true);
}
-#else
- ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
-
- if(sc) {
- sc->N = N;
- bool refract = (randb > fresnel);
- svm_node_glass_setup(sd, sc, type, eta, roughness, refract);
- }
-#endif
break;
}
@@ -364,10 +334,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
case CLOSURE_BSDF_HAIR_REFLECTION_ID:
case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: {
- if(sd->flag & SD_BACKFACING && sd->segment != ~0) {
+ if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) {
ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight);
+
if(sc) {
- sc->weight = make_float3(1.0f,1.0f,1.0f);
+ /* todo: giving a fixed weight here will cause issues when
+ * mixing multiple BSDFS. energey will not be conserved and
+ * the throughput can blow up after multiple bounces. we
+ * better figure out a way to skip backfaces from rays
+ * spawned by transmission from the front */
+ sc->weight = make_float3(1.0f, 1.0f, 1.0f);
sc->N = N;
sd->flag |= bsdf_transparent_setup(sc);
}
@@ -381,12 +357,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
sc->data0 = param1;
sc->data1 = param2;
sc->offset = -stack_load_float(stack, data_node.z);
- if(sd->segment == ~0) {
+
+ if(!(sd->type & PRIMITIVE_ALL_CURVE)) {
sc->T = normalize(sd->dPdv);
sc->offset = 0.0f;
}
else
sc->T = sd->dPdu;
+
if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) {
sd->flag |= bsdf_hair_reflection_setup(sc);
}
@@ -484,21 +462,16 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float
#ifdef __VOLUME__
uint type, param1_offset, param2_offset;
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset;
decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f);
if(mix_weight == 0.0f)
return;
-#else
- decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, NULL);
- float mix_weight = 1.0f;
-#endif
float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z);
float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
- float density = param1;
+ float density = fmaxf(param1, 0.0f);
switch(type) {
case CLOSURE_VOLUME_ABSORPTION_ID: {
@@ -527,7 +500,6 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float
ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node)
{
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset = node.y;
if(stack_valid(mix_weight_offset)) {
@@ -540,17 +512,12 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no
}
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, 1.0f);
-#else
- ShaderClosure *sc = &sd->closure;
- sc->type = CLOSURE_EMISSION_ID;
-#endif
sd->flag |= SD_EMISSION;
}
ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node)
{
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset = node.y;
if(stack_valid(mix_weight_offset)) {
@@ -563,15 +530,10 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4
}
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_BACKGROUND_ID, 1.0f);
-#else
- ShaderClosure *sc = &sd->closure;
- sc->type = CLOSURE_BACKGROUND_ID;
-#endif
}
ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node)
{
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset = node.y;
if(stack_valid(mix_weight_offset)) {
@@ -584,17 +546,12 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod
}
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, 1.0f);
-#else
- ShaderClosure *sc = &sd->closure;
- sc->type = CLOSURE_HOLDOUT_ID;
-#endif
sd->flag |= SD_HOLDOUT;
}
ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, uint4 node)
{
-#ifdef __MULTI_CLOSURE__
uint mix_weight_offset = node.y;
if(stack_valid(mix_weight_offset)) {
@@ -607,10 +564,6 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack,
}
else
svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, 1.0f);
-#else
- ShaderClosure *sc = &sd->closure;
- sc->type = CLOSURE_AMBIENT_OCCLUSION_ID;
-#endif
sd->flag |= SD_AO;
}
@@ -619,12 +572,8 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack,
ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight)
{
-#ifdef __MULTI_CLOSURE__
if(sd->num_closure < MAX_CLOSURE)
sd->closure[sd->num_closure].weight = weight;
-#else
- sd->closure.weight = weight;
-#endif
}
ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b)
@@ -637,7 +586,7 @@ ccl_device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData
{
float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b));
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
weight /= object_surface_area(kg, sd->object);
svm_node_closure_store_weight(sd, weight);
@@ -659,16 +608,14 @@ ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, floa
float strength = stack_load_float(stack, strength_offset);
float3 weight = stack_load_float3(stack, color_offset)*strength;
- if(total_power && sd->object != ~0)
+ if(total_power && sd->object != OBJECT_NONE)
weight /= object_surface_area(kg, sd->object);
svm_node_closure_store_weight(sd, weight);
}
-ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack,
- uint4 node, int *offset, float *randb)
+ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
{
-#ifdef __MULTI_CLOSURE__
/* fetch weight from blend input, previous mix closures,
* and write to stack to be used by closure nodes later */
uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
@@ -683,44 +630,6 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack,
stack_store_float(stack, weight1_offset, in_weight*(1.0f - weight));
if(stack_valid(weight2_offset))
stack_store_float(stack, weight2_offset, in_weight*weight);
-#else
- /* pick a closure and make the random number uniform over 0..1 again.
- * closure 1 starts on the next node, for closure 2 the start is at an
- * offset from the current node, so we jump */
- uint weight_offset = node.y;
- uint node_jump = node.z;
- float weight = stack_load_float(stack, weight_offset);
- weight = clamp(weight, 0.0f, 1.0f);
-
- if(*randb < weight) {
- *offset += node_jump;
- *randb = *randb/weight;
- }
- else
- *randb = (*randb - weight)/(1.0f - weight);
-#endif
-}
-
-ccl_device void svm_node_add_closure(ShaderData *sd, float *stack, uint unused,
- uint node_jump, int *offset, float *randb, float *closure_weight)
-{
-#ifdef __MULTI_CLOSURE__
- /* nothing to do, handled in compiler */
-#else
- /* pick one of the two closures with probability 0.5. sampling quality
- * is not going to be great, for that we'd need to evaluate the weights
- * of the two closures being added */
- float weight = 0.5f;
-
- if(*randb < weight) {
- *offset += node_jump;
- *randb = *randb/weight;
- }
- else
- *randb = (*randb - weight)/(1.0f - weight);
-
- *closure_weight *= 2.0f;
-#endif
}
/* (Bump) normal */
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index ad0cacb027a..fe681ec92af 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -98,44 +98,44 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float
{
switch(type) {
case NODE_INFO_PAR_INDEX: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float(stack, out_offset, particle_index(kg, particle_id));
break;
}
case NODE_INFO_PAR_AGE: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float(stack, out_offset, particle_age(kg, particle_id));
break;
}
case NODE_INFO_PAR_LIFETIME: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id));
break;
}
case NODE_INFO_PAR_LOCATION: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float3(stack, out_offset, particle_location(kg, particle_id));
break;
}
- #if 0 /* XXX float4 currently not supported in SVM stack */
+#if 0 /* XXX float4 currently not supported in SVM stack */
case NODE_INFO_PAR_ROTATION: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id));
break;
}
- #endif
+#endif
case NODE_INFO_PAR_SIZE: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float(stack, out_offset, particle_size(kg, particle_id));
break;
}
case NODE_INFO_PAR_VELOCITY: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id));
break;
}
case NODE_INFO_PAR_ANGULAR_VELOCITY: {
- uint particle_id = object_particle_id(kg, sd->object);
+ int particle_id = object_particle_id(kg, sd->object);
stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id));
break;
}
@@ -153,7 +153,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta
switch(type) {
case NODE_INFO_CURVE_IS_STRAND: {
- data = (sd->segment != ~0);
+ data = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
stack_store_float(stack, out_offset, data);
break;
}
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index bc76ea1e662..daf7c6652d2 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -60,31 +60,51 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
uint width = info.x;
uint height = info.y;
uint offset = info.z;
- uint periodic = info.w;
+ uint periodic = (info.w & 0x1);
+ uint interpolation = info.w >> 1;
+ float4 r;
int ix, iy, nix, niy;
- float tx = svm_image_texture_frac(x*width, &ix);
- float ty = svm_image_texture_frac(y*height, &iy);
+ if (interpolation == INTERPOLATION_CLOSEST) {
+ svm_image_texture_frac(x*width, &ix);
+ svm_image_texture_frac(y*height, &iy);
- if(periodic) {
- ix = svm_image_texture_wrap_periodic(ix, width);
- iy = svm_image_texture_wrap_periodic(iy, height);
+ if(periodic) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
+ }
+ else {
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
- nix = svm_image_texture_wrap_periodic(ix+1, width);
- niy = svm_image_texture_wrap_periodic(iy+1, height);
+ }
+ r = svm_image_texture_read(kg, offset + ix + iy*width);
}
- else {
- ix = svm_image_texture_wrap_clamp(ix, width);
- iy = svm_image_texture_wrap_clamp(iy, height);
+ else { /* We default to linear interpolation if it is not closest */
+ float tx = svm_image_texture_frac(x*width, &ix);
+ float ty = svm_image_texture_frac(y*height, &iy);
- nix = svm_image_texture_wrap_clamp(ix+1, width);
- niy = svm_image_texture_wrap_clamp(iy+1, height);
- }
+ if(periodic) {
+ ix = svm_image_texture_wrap_periodic(ix, width);
+ iy = svm_image_texture_wrap_periodic(iy, height);
- float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
- r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
- r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
- r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
+ nix = svm_image_texture_wrap_periodic(ix+1, width);
+ niy = svm_image_texture_wrap_periodic(iy+1, height);
+ }
+ else {
+ ix = svm_image_texture_wrap_clamp(ix, width);
+ iy = svm_image_texture_wrap_clamp(iy, height);
+
+ nix = svm_image_texture_wrap_clamp(ix+1, width);
+ niy = svm_image_texture_wrap_clamp(iy+1, height);
+ }
+
+
+ r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
+ r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
+ r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
+ r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
+ }
if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
float invw = 1.0f/r.w;
@@ -129,8 +149,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
* - group by size and use a 3d texture, performance impact
* - group into larger texture with some padding for correct lerp
*
- * also note that cuda has 128 textures limit, we use 100 now, since
- * we still need some for other storage */
+ * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
+ * and we cannot use all since we still need some for other storage */
switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break;
@@ -233,7 +253,62 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
- default:
+
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300)
+ case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
+ case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break;
+ case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break;
+ case 103: r = kernel_tex_image_interp(__tex_image_103, x, y); break;
+ case 104: r = kernel_tex_image_interp(__tex_image_104, x, y); break;
+ case 105: r = kernel_tex_image_interp(__tex_image_105, x, y); break;
+ case 106: r = kernel_tex_image_interp(__tex_image_106, x, y); break;
+ case 107: r = kernel_tex_image_interp(__tex_image_107, x, y); break;
+ case 108: r = kernel_tex_image_interp(__tex_image_108, x, y); break;
+ case 109: r = kernel_tex_image_interp(__tex_image_109, x, y); break;
+ case 110: r = kernel_tex_image_interp(__tex_image_110, x, y); break;
+ case 111: r = kernel_tex_image_interp(__tex_image_111, x, y); break;
+ case 112: r = kernel_tex_image_interp(__tex_image_112, x, y); break;
+ case 113: r = kernel_tex_image_interp(__tex_image_113, x, y); break;
+ case 114: r = kernel_tex_image_interp(__tex_image_114, x, y); break;
+ case 115: r = kernel_tex_image_interp(__tex_image_115, x, y); break;
+ case 116: r = kernel_tex_image_interp(__tex_image_116, x, y); break;
+ case 117: r = kernel_tex_image_interp(__tex_image_117, x, y); break;
+ case 118: r = kernel_tex_image_interp(__tex_image_118, x, y); break;
+ case 119: r = kernel_tex_image_interp(__tex_image_119, x, y); break;
+ case 120: r = kernel_tex_image_interp(__tex_image_120, x, y); break;
+ case 121: r = kernel_tex_image_interp(__tex_image_121, x, y); break;
+ case 122: r = kernel_tex_image_interp(__tex_image_122, x, y); break;
+ case 123: r = kernel_tex_image_interp(__tex_image_123, x, y); break;
+ case 124: r = kernel_tex_image_interp(__tex_image_124, x, y); break;
+ case 125: r = kernel_tex_image_interp(__tex_image_125, x, y); break;
+ case 126: r = kernel_tex_image_interp(__tex_image_126, x, y); break;
+ case 127: r = kernel_tex_image_interp(__tex_image_127, x, y); break;
+ case 128: r = kernel_tex_image_interp(__tex_image_128, x, y); break;
+ case 129: r = kernel_tex_image_interp(__tex_image_129, x, y); break;
+ case 130: r = kernel_tex_image_interp(__tex_image_130, x, y); break;
+ case 131: r = kernel_tex_image_interp(__tex_image_131, x, y); break;
+ case 132: r = kernel_tex_image_interp(__tex_image_132, x, y); break;
+ case 133: r = kernel_tex_image_interp(__tex_image_133, x, y); break;
+ case 134: r = kernel_tex_image_interp(__tex_image_134, x, y); break;
+ case 135: r = kernel_tex_image_interp(__tex_image_135, x, y); break;
+ case 136: r = kernel_tex_image_interp(__tex_image_136, x, y); break;
+ case 137: r = kernel_tex_image_interp(__tex_image_137, x, y); break;
+ case 138: r = kernel_tex_image_interp(__tex_image_138, x, y); break;
+ case 139: r = kernel_tex_image_interp(__tex_image_139, x, y); break;
+ case 140: r = kernel_tex_image_interp(__tex_image_140, x, y); break;
+ case 141: r = kernel_tex_image_interp(__tex_image_141, x, y); break;
+ case 142: r = kernel_tex_image_interp(__tex_image_142, x, y); break;
+ case 143: r = kernel_tex_image_interp(__tex_image_143, x, y); break;
+ case 144: r = kernel_tex_image_interp(__tex_image_144, x, y); break;
+ case 145: r = kernel_tex_image_interp(__tex_image_145, x, y); break;
+ case 146: r = kernel_tex_image_interp(__tex_image_146, x, y); break;
+ case 147: r = kernel_tex_image_interp(__tex_image_147, x, y); break;
+ case 148: r = kernel_tex_image_interp(__tex_image_148, x, y); break;
+ case 149: r = kernel_tex_image_interp(__tex_image_149, x, y); break;
+ case 150: r = kernel_tex_image_interp(__tex_image_150, x, y); break;
+#endif
+
+ default:
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
@@ -302,7 +377,7 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
float3 N = sd->N;
N = sd->N;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &N);
/* project from direction vector to barycentric coordinates in triangles */
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 8968146c5e2..da544c63ae0 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -34,6 +34,7 @@ ccl_device void svm_node_light_path(ShaderData *sd, float *stack, uint type, uin
case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break;
case NODE_LP_ray_length: info = sd->ray_length; break;
case NODE_LP_ray_depth: info = (float)sd->ray_depth; break;
+ case NODE_LP_ray_transparent: info = sd->transparent_depth; break;
}
stack_store_float(stack, out_offset, info);
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index bb46d443a6b..1ce9386e40e 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -56,6 +56,8 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
Fac = Fac1 > Fac2;
else if(type == NODE_MATH_MODULO)
Fac = safe_modulo(Fac1, Fac2);
+ else if(type == NODE_MATH_ABSOLUTE)
+ Fac = fabsf(Fac1);
else if(type == NODE_MATH_CLAMP)
Fac = clamp(Fac1, 0.0f, 1.0f);
else
diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h
index 4e834b7c500..edc3903865e 100644
--- a/intern/cycles/kernel/svm/svm_mix.h
+++ b/intern/cycles/kernel/svm/svm_mix.h
@@ -89,7 +89,7 @@ ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
{
- return min(col1, col2*t);
+ return min(col1, col2)*t + col1*(1.0f - t);
}
ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 282ad191470..91dda8972f9 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -357,15 +357,13 @@ ccl_device float3 cellnoise_color(float3 p)
return make_float3(r, g, b);
}
#else
-ccl_device float3 cellnoise_color(const float3& p)
+ccl_device __m128 cellnoise_color(const __m128& p)
{
- __m128i v_yxz = quick_floor_sse(_mm_setr_ps(p.y, p.x, p.z, 0.0f));
- __m128i v_xyy = shuffle<1, 0, 0, 3>(v_yxz);
- __m128i v_zzx = shuffle<2, 2, 1, 3>(v_yxz);
- __m128 rgb = bits_to_01_sse(hash_sse(v_xyy, v_yxz, v_zzx));
-
- float3 result = *(float3*)&rgb;
- return result;
+ __m128i ip = quick_floor_sse(p);
+ __m128i ip_yxz = shuffle<1, 0, 2, 3>(ip);
+ __m128i ip_xyy = shuffle<0, 1, 1, 3>(ip);
+ __m128i ip_zzx = shuffle<2, 2, 0, 3>(ip);
+ return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx));
}
#endif
diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
index 0f68ecbea03..111d5d47988 100644
--- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
+++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h
@@ -42,12 +42,12 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float *
/* Convert to HSV */
color = rgb_to_hsv(color);
- if (stack_valid(hue_out))
- stack_store_float(stack, hue_out, color.x);
- if (stack_valid(saturation_out))
- stack_store_float(stack, saturation_out, color.y);
- if (stack_valid(value_out))
- stack_store_float(stack, value_out, color.z);
+ if (stack_valid(hue_out))
+ stack_store_float(stack, hue_out, color.x);
+ if (stack_valid(saturation_out))
+ stack_store_float(stack, saturation_out, color.y);
+ if (stack_valid(value_out))
+ stack_store_float(stack, value_out, color.z);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h
index 1e3552647bd..500b5146931 100644
--- a/intern/cycles/kernel/svm/svm_sky.h
+++ b/intern/cycles/kernel/svm/svm_sky.h
@@ -74,7 +74,7 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float
float expM = expf(configuration[4] * gamma);
float rayM = cgamma * cgamma;
float mieM = (1.0f + rayM) / powf((1.0f + configuration[8]*configuration[8] - 2.0f*configuration[8]*cgamma), 1.5f);
- float zenith = sqrt(ctheta);
+ float zenith = sqrtf(ctheta);
return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) *
(configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith);
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 4b1f30e55bb..a17e4a25efe 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -25,27 +25,27 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f
switch(type) {
case NODE_TEXCO_OBJECT: {
data = sd->P;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_position_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_NORMAL: {
data = sd->N;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = transform_point(&tfm, sd->P);
else
data = transform_point(&tfm, sd->P + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P);
else
data = camera_world_to_ndc(kg, sd, sd->P);
@@ -53,7 +53,7 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
else
data = sd->I;
@@ -70,17 +70,10 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f
case NODE_TEXCO_VOLUME_GENERATED: {
data = sd->P;
- if(sd->object != ~0) {
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem);
-
- object_inverse_position_transform(kg, sd, &data);
-
- if(attr_offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset);
- data = transform_point(&tfm, data);
- }
- }
+#ifdef __VOLUME__
+ if(sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
+#endif
break;
}
}
@@ -96,27 +89,27 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in
switch(type) {
case NODE_TEXCO_OBJECT: {
data = sd->P + sd->dP.dx;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_position_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_NORMAL: {
data = sd->N;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = transform_point(&tfm, sd->P + sd->dP.dx);
else
data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx);
@@ -124,7 +117,7 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
else
data = sd->I;
@@ -141,17 +134,10 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in
case NODE_TEXCO_VOLUME_GENERATED: {
data = sd->P + sd->dP.dx;
- if(sd->object != ~0) {
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem);
-
- object_inverse_position_transform(kg, sd, &data);
-
- if(attr_offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset);
- data = transform_point(&tfm, data);
- }
- }
+#ifdef __VOLUME__
+ if(sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
+#endif
break;
}
}
@@ -170,27 +156,27 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in
switch(type) {
case NODE_TEXCO_OBJECT: {
data = sd->P + sd->dP.dy;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_position_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_NORMAL: {
data = sd->N;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
object_inverse_normal_transform(kg, sd, &data);
break;
}
case NODE_TEXCO_CAMERA: {
Transform tfm = kernel_data.cam.worldtocamera;
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = transform_point(&tfm, sd->P + sd->dP.dy);
else
data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg));
break;
}
case NODE_TEXCO_WINDOW: {
- if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
+ if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy);
else
data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy);
@@ -198,7 +184,7 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in
break;
}
case NODE_TEXCO_REFLECTION: {
- if(sd->object != ~0)
+ if(sd->object != OBJECT_NONE)
data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I;
else
data = sd->I;
@@ -215,17 +201,10 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in
case NODE_TEXCO_VOLUME_GENERATED: {
data = sd->P + sd->dP.dy;
- if(sd->object != ~0) {
- AttributeElement attr_elem;
- int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem);
-
- object_inverse_position_transform(kg, sd, &data);
-
- if(attr_offset != ATTR_STD_NOT_FOUND) {
- Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset);
- data = transform_point(&tfm, data);
- }
- }
+#ifdef __VOLUME__
+ if(sd->object != OBJECT_NONE)
+ data = volume_normalized_position(kg, sd, data);
+#endif
break;
}
}
@@ -248,7 +227,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
if(space == NODE_NORMAL_MAP_TANGENT) {
/* tangent space */
- if(sd->object == ~0) {
+ if(sd->object == OBJECT_NONE) {
stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f));
return;
}
diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h
index 8ced8390b0b..5fd9204cbf6 100644
--- a/intern/cycles/kernel/svm/svm_texture.h
+++ b/intern/cycles/kernel/svm/svm_texture.h
@@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN
/* Voronoi Distances */
+#if 0
ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, float e)
{
#if 0
@@ -43,8 +44,7 @@ ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d,
}
/* Voronoi / Worley like */
-
-ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
+ccl_device_inline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
{
float da[4];
float3 pa[4];
@@ -119,7 +119,95 @@ ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2)
return result;
}
+#endif
+
+ccl_device float voronoi_F1_distance(float3 p)
+{
+ /* returns squared distance in da */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+ da = min(d, da);
+ }
+ }
+ }
+#else
+ __m128 vec_p = load_m128(p);
+ __m128i xyzi = quick_floor_sse(vec_p);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
+ __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
+ float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+ da = min(d, da);
+ }
+ }
+ }
+#endif
+
+ return da;
+}
+
+ccl_device float3 voronoi_F1_color(float3 p)
+{
+ /* returns color of the nearest point */
+ float da = 1e10f;
+
+#ifndef __KERNEL_SSE2__
+ float3 pa;
+ int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
+ float3 vp = ip + cellnoise_color(ip);
+ float d = len_squared(p - vp);
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ return cellnoise_color(pa);
+#else
+ __m128 pa, vec_p = load_m128(p);
+ __m128i xyzi = quick_floor_sse(vec_p);
+
+ for (int xx = -1; xx <= 1; xx++) {
+ for (int yy = -1; yy <= 1; yy++) {
+ for (int zz = -1; zz <= 1; zz++) {
+ __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0)));
+ __m128 vp = _mm_add_ps(ip, cellnoise_color(ip));
+ float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp));
+
+ if(d < da) {
+ da = d;
+ pa = vp;
+ }
+ }
+ }
+ }
+
+ __m128 color = cellnoise_color(pa);
+ return (float3 &)color;
+#endif
+}
+#if 0
ccl_device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; }
ccl_device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; }
ccl_device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; }
@@ -139,6 +227,7 @@ ccl_device float voronoi_F3S(float3 p) { return 2.0f*voronoi_F3(p) - 1.0f; }
ccl_device float voronoi_F4S(float3 p) { return 2.0f*voronoi_F4(p) - 1.0f; }
ccl_device float voronoi_F1F2S(float3 p) { return 2.0f*voronoi_F1F2(p) - 1.0f; }
ccl_device float voronoi_CrS(float3 p) { return 2.0f*voronoi_Cr(p) - 1.0f; }
+#endif
/* Noise Bases */
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index ad5e1ea6d2e..80972ec82bc 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -36,7 +36,8 @@ typedef enum NodeType {
NODE_CLOSURE_SET_WEIGHT,
NODE_CLOSURE_WEIGHT,
NODE_MIX_CLOSURE,
- NODE_JUMP,
+ NODE_JUMP_IF_ZERO,
+ NODE_JUMP_IF_ONE,
NODE_TEX_IMAGE,
NODE_TEX_IMAGE_BOX,
NODE_TEX_SKY,
@@ -71,7 +72,6 @@ typedef enum NodeType {
NODE_TEX_COORD,
NODE_TEX_COORD_BUMP_DX,
NODE_TEX_COORD_BUMP_DY,
- NODE_ADD_CLOSURE,
NODE_EMISSION_SET_WEIGHT_TOTAL,
NODE_ATTR_BUMP_DX,
NODE_ATTR_BUMP_DY,
@@ -102,7 +102,8 @@ typedef enum NodeType {
NODE_CLOSURE_AMBIENT_OCCLUSION,
NODE_TANGENT,
NODE_NORMAL_MAP,
- NODE_HAIR_INFO
+ NODE_HAIR_INFO,
+ NODE_UVMAP
} NodeType;
typedef enum NodeAttributeType {
@@ -158,7 +159,8 @@ typedef enum NodeLightPath {
NODE_LP_volume_scatter,
NODE_LP_backfacing,
NODE_LP_ray_length,
- NODE_LP_ray_depth
+ NODE_LP_ray_depth,
+ NODE_LP_ray_transparent
} NodeLightPath;
typedef enum NodeLightFalloff {
@@ -219,6 +221,7 @@ typedef enum NodeMath {
NODE_MATH_LESS_THAN,
NODE_MATH_GREATER_THAN,
NODE_MATH_MODULO,
+ NODE_MATH_ABSOLUTE,
NODE_MATH_CLAMP /* used for the clamp UI option */
} NodeMath;
@@ -401,6 +404,8 @@ typedef enum ClosureType {
#define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID)
#define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID)
#define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID)
+#define CLOSURE_IS_BSDF_ANISOTROPIC(type) (type == CLOSURE_BSDF_WARD_ID)
+#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID)
#define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID)
#define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID)
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 1e3fc2fa03b..61d33aeb8cf 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -33,7 +33,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo
NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito;
Transform tfm;
- bool is_object = (sd->object != ~0);
+ bool is_object = (sd->object != OBJECT_NONE);
bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL);
/* From world */
@@ -91,9 +91,9 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo
if(type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL)
in = normalize(in);
- /* Output */
+ /* Output */
if(stack_valid(vector_out)) {
- stack_store_float3(stack, vector_out, in);
+ stack_store_float3(stack, vector_out, in);
}
}
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 7f597dc8bff..083a2f30e06 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -20,23 +20,16 @@ CCL_NAMESPACE_BEGIN
ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p)
{
- /* compute distance and point coordinate of 4 nearest neighbours */
- float4 dpa0 = voronoi_Fn(p, 1.0f, 0, -1);
-
- /* output */
- float fac;
- float3 color;
-
if(coloring == NODE_VORONOI_INTENSITY) {
- fac = fabsf(dpa0.w);
- color = make_float3(fac, fac, fac);
+ /* compute squared distance to the nearest neighbour */
+ float fac = voronoi_F1_distance(p);
+ return make_float4(fac, fac, fac, fac);
}
else {
- color = cellnoise_color(float4_to_float3(dpa0));
- fac = average(color);
+ /* compute color of the nearest neighbour */
+ float3 color = voronoi_F1_color(p);
+ return make_float4(color.x, color.y, color.z, average(color));
}
-
- return make_float4(color.x, color.y, color.z, fac);
}
ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index dca4003b89a..9e57c470c0f 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -43,33 +43,33 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt
// cie_colour_match[(lambda - 380) / 5][1] = yBar
// cie_colour_match[(lambda - 380) / 5][2] = zBar
const float cie_colour_match[81][3] = {
- {0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201},
- {0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102},
- {0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456},
- {0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230},
- {0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721},
- {0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281},
- {0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130},
- {0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533},
- {0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582},
- {0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573},
- {0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203},
- {0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057},
- {0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021},
- {0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014},
- {1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008},
- {1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002},
- {0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000},
- {0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000},
- {0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000},
- {0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000},
- {0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000},
- {0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000},
- {0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000},
- {0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000},
- {0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000},
- {0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000},
- {0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000}
+ {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
+ {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
+ {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
+ {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
+ {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
+ {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
+ {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
+ {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
+ {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
+ {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
+ {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
+ {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
+ {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
+ {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
+ {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
+ {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
+ {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
+ {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
+ {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
+ {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
+ {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
+ {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
+ {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
+ {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
+ {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
+ {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
+ {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
};
float lambda_nm = stack_load_float(stack, wavelength);
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index e560e6303cc..660e6e2ca47 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -45,17 +45,21 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta
/* Calculate wireframe */
#ifdef __HAIR__
- if (sd->prim != ~0 && sd->segment == ~0) {
+ if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE)
#else
- if (sd->prim != ~0) {
+ if (sd->prim != PRIM_NONE)
#endif
+ {
float3 Co[3];
float pixelwidth = 1.0f;
/* Triangles */
- float np = 3;
+ int np = 3;
- triangle_vertices(kg, sd->prim, Co);
+ if(sd->type & PRIMITIVE_TRIANGLE)
+ triangle_vertices(kg, sd->prim, Co);
+ else
+ motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co);
if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
object_position_transform(kg, sd, &Co[0]);
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index 7d00ed92164..449c1391980 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -16,6 +16,7 @@ set(INC_SYS
set(SRC
attribute.cpp
background.cpp
+ bake.cpp
blackbody.cpp
buffers.cpp
camera.cpp
@@ -43,6 +44,7 @@ set(SRC
set(SRC_HEADERS
attribute.h
+ bake.h
background.h
blackbody.h
buffers.h
diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp
index 61b9cf2f3bc..14805b6f11a 100644
--- a/intern/cycles/render/attribute.cpp
+++ b/intern/cycles/render/attribute.cpp
@@ -14,6 +14,7 @@
* limitations under the License
*/
+#include "image.h"
#include "mesh.h"
#include "attribute.h"
@@ -25,6 +26,17 @@ CCL_NAMESPACE_BEGIN
/* Attribute */
+Attribute::~Attribute()
+{
+ /* for voxel data, we need to remove the image from the image manager */
+ if(element == ATTR_ELEMENT_VOXEL) {
+ VoxelAttribute *voxel_data = data_voxel();
+
+ if(voxel_data)
+ voxel_data->manager->remove_image(voxel_data->slot);
+ }
+}
+
void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_)
{
name = name_;
@@ -38,9 +50,14 @@ void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_)
type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix);
}
-void Attribute::reserve(int numverts, int numtris, int numcurves, int numkeys)
+void Attribute::reserve(int numverts, int numtris, int numsteps, int numcurves, int numkeys, bool resize)
{
- buffer.resize(buffer_size(numverts, numtris, numcurves, numkeys), 0);
+ if (resize) {
+ buffer.resize(buffer_size(numverts, numtris, numsteps, numcurves, numkeys), 0);
+ }
+ else {
+ buffer.reserve(buffer_size(numverts, numtris, numsteps, numcurves, numkeys));
+ }
}
void Attribute::add(const float& f)
@@ -70,9 +87,28 @@ void Attribute::add(const Transform& f)
buffer.push_back(data[i]);
}
+void Attribute::add(const VoxelAttribute& f)
+{
+ char *data = (char*)&f;
+ size_t size = sizeof(f);
+
+ for(size_t i = 0; i < size; i++)
+ buffer.push_back(data[i]);
+}
+
+void Attribute::add(const char *data)
+{
+ size_t size = data_sizeof();
+
+ for(size_t i = 0; i < size; i++)
+ buffer.push_back(data[i]);
+}
+
size_t Attribute::data_sizeof() const
{
- if(type == TypeDesc::TypeFloat)
+ if(element == ATTR_ELEMENT_VOXEL)
+ return sizeof(VoxelAttribute);
+ else if(type == TypeDesc::TypeFloat)
return sizeof(float);
else if(type == TypeDesc::TypeMatrix)
return sizeof(Transform);
@@ -80,18 +116,22 @@ size_t Attribute::data_sizeof() const
return sizeof(float3);
}
-size_t Attribute::element_size(int numverts, int numtris, int numcurves, int numkeys) const
+size_t Attribute::element_size(int numverts, int numtris, int numsteps, int numcurves, int numkeys) const
{
size_t size;
switch(element) {
case ATTR_ELEMENT_OBJECT:
case ATTR_ELEMENT_MESH:
+ case ATTR_ELEMENT_VOXEL:
size = 1;
break;
case ATTR_ELEMENT_VERTEX:
size = numverts;
break;
+ case ATTR_ELEMENT_VERTEX_MOTION:
+ size = numverts * (numsteps - 1);
+ break;
case ATTR_ELEMENT_FACE:
size = numtris;
break;
@@ -104,6 +144,9 @@ size_t Attribute::element_size(int numverts, int numtris, int numcurves, int num
case ATTR_ELEMENT_CURVE_KEY:
size = numkeys;
break;
+ case ATTR_ELEMENT_CURVE_KEY_MOTION:
+ size = numkeys * (numsteps - 1);
+ break;
default:
size = 0;
break;
@@ -112,9 +155,9 @@ size_t Attribute::element_size(int numverts, int numtris, int numcurves, int num
return size;
}
-size_t Attribute::buffer_size(int numverts, int numtris, int numcurves, int numkeys) const
+size_t Attribute::buffer_size(int numverts, int numtris, int numsteps, int numcurves, int numkeys) const
{
- return element_size(numverts, numtris, numcurves, numkeys)*data_sizeof();
+ return element_size(numverts, numtris, numsteps, numcurves, numkeys)*data_sizeof();
}
bool Attribute::same_storage(TypeDesc a, TypeDesc b)
@@ -136,40 +179,65 @@ bool Attribute::same_storage(TypeDesc a, TypeDesc b)
const char *Attribute::standard_name(AttributeStandard std)
{
- if(std == ATTR_STD_VERTEX_NORMAL)
- return "N";
- else if(std == ATTR_STD_FACE_NORMAL)
- return "Ng";
- else if(std == ATTR_STD_UV)
- return "uv";
- else if(std == ATTR_STD_GENERATED)
- return "generated";
- else if(std == ATTR_STD_UV_TANGENT)
- return "tangent";
- else if(std == ATTR_STD_UV_TANGENT_SIGN)
- return "tangent_sign";
- else if(std == ATTR_STD_POSITION_UNDEFORMED)
- return "undeformed";
- else if(std == ATTR_STD_POSITION_UNDISPLACED)
- return "undisplaced";
- else if(std == ATTR_STD_MOTION_PRE)
- return "motion_pre";
- else if(std == ATTR_STD_MOTION_POST)
- return "motion_post";
- else if(std == ATTR_STD_PARTICLE)
- return "particle";
- else if(std == ATTR_STD_CURVE_INTERCEPT)
- return "curve_intercept";
- else if(std == ATTR_STD_PTEX_FACE_ID)
- return "ptex_face_id";
- else if(std == ATTR_STD_PTEX_UV)
- return "ptex_uv";
- else if(std == ATTR_STD_GENERATED_TRANSFORM)
- return "generated_transform";
+ switch(std) {
+ case ATTR_STD_VERTEX_NORMAL:
+ return "N";
+ case ATTR_STD_FACE_NORMAL:
+ return "Ng";
+ case ATTR_STD_UV:
+ return "uv";
+ case ATTR_STD_GENERATED:
+ return "generated";
+ case ATTR_STD_GENERATED_TRANSFORM:
+ return "generated_transform";
+ case ATTR_STD_UV_TANGENT:
+ return "tangent";
+ case ATTR_STD_UV_TANGENT_SIGN:
+ return "tangent_sign";
+ case ATTR_STD_POSITION_UNDEFORMED:
+ return "undeformed";
+ case ATTR_STD_POSITION_UNDISPLACED:
+ return "undisplaced";
+ case ATTR_STD_MOTION_VERTEX_POSITION:
+ return "motion_P";
+ case ATTR_STD_MOTION_VERTEX_NORMAL:
+ return "motion_N";
+ case ATTR_STD_PARTICLE:
+ return "particle";
+ case ATTR_STD_CURVE_INTERCEPT:
+ return "curve_intercept";
+ case ATTR_STD_PTEX_FACE_ID:
+ return "ptex_face_id";
+ case ATTR_STD_PTEX_UV:
+ return "ptex_uv";
+ case ATTR_STD_VOLUME_DENSITY:
+ return "density";
+ case ATTR_STD_VOLUME_COLOR:
+ return "color";
+ case ATTR_STD_VOLUME_FLAME:
+ return "flame";
+ case ATTR_STD_VOLUME_HEAT:
+ return "heat";
+ case ATTR_STD_VOLUME_VELOCITY:
+ return "velocity";
+ case ATTR_STD_NOT_FOUND:
+ case ATTR_STD_NONE:
+ case ATTR_STD_NUM:
+ return "";
+ }
return "";
}
+AttributeStandard Attribute::name_standard(const char *name)
+{
+ for(int std = ATTR_STD_NONE; std < ATTR_STD_NUM; std++)
+ if(strcmp(name, Attribute::standard_name((AttributeStandard)std)) == 0)
+ return (AttributeStandard)std;
+
+ return ATTR_STD_NONE;
+}
+
/* Attribute Set */
AttributeSet::AttributeSet()
@@ -182,7 +250,7 @@ AttributeSet::~AttributeSet()
{
}
-Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement element)
+Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement element, bool resize)
{
Attribute *attr = find(name);
@@ -202,9 +270,9 @@ Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement eleme
/* this is weak .. */
if(triangle_mesh)
- attr->reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), 0, 0);
+ attr->reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), triangle_mesh->motion_steps, 0, 0, resize);
if(curve_mesh)
- attr->reserve(0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size());
+ attr->reserve(0, 0, curve_mesh->motion_steps, curve_mesh->curves.size(), curve_mesh->curve_keys.size(), resize);
return attr;
}
@@ -261,10 +329,14 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_GENERATED:
case ATTR_STD_POSITION_UNDEFORMED:
case ATTR_STD_POSITION_UNDISPLACED:
- case ATTR_STD_MOTION_PRE:
- case ATTR_STD_MOTION_POST:
attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_VERTEX);
break;
+ case ATTR_STD_MOTION_VERTEX_POSITION:
+ attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_VERTEX_MOTION);
+ break;
+ case ATTR_STD_MOTION_VERTEX_NORMAL:
+ attr = add(name, TypeDesc::TypeNormal, ATTR_ELEMENT_VERTEX_MOTION);
+ break;
case ATTR_STD_PTEX_FACE_ID:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_FACE);
break;
@@ -274,6 +346,17 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_GENERATED_TRANSFORM:
attr = add(name, TypeDesc::TypeMatrix, ATTR_ELEMENT_MESH);
break;
+ case ATTR_STD_VOLUME_DENSITY:
+ case ATTR_STD_VOLUME_FLAME:
+ case ATTR_STD_VOLUME_HEAT:
+ attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL);
+ break;
+ case ATTR_STD_VOLUME_COLOR:
+ attr = add(name, TypeDesc::TypeColor, ATTR_ELEMENT_VOXEL);
+ break;
+ case ATTR_STD_VOLUME_VELOCITY:
+ attr = add(name, TypeDesc::TypeVector, ATTR_ELEMENT_VOXEL);
+ break;
default:
assert(0);
break;
@@ -285,9 +368,8 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name)
case ATTR_STD_GENERATED:
attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE);
break;
- case ATTR_STD_MOTION_PRE:
- case ATTR_STD_MOTION_POST:
- attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY);
+ case ATTR_STD_MOTION_VERTEX_POSITION:
+ attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY_MOTION);
break;
case ATTR_STD_CURVE_INTERCEPT:
attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY);
@@ -343,9 +425,9 @@ void AttributeSet::reserve()
{
foreach(Attribute& attr, attributes) {
if(triangle_mesh)
- attr.reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), 0, 0);
+ attr.reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), triangle_mesh->motion_steps, 0, 0, true);
if(curve_mesh)
- attr.reserve(0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size());
+ attr.reserve(0, 0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size(), true);
}
}
diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h
index 0b8905ae5a3..9fc32db8444 100644
--- a/intern/cycles/render/attribute.h
+++ b/intern/cycles/render/attribute.h
@@ -27,12 +27,20 @@
CCL_NAMESPACE_BEGIN
class Attribute;
-class AttributeSet;
class AttributeRequest;
class AttributeRequestSet;
+class AttributeSet;
+class ImageManager;
class Mesh;
struct Transform;
+/* Attributes for voxels are images */
+
+struct VoxelAttribute {
+ ImageManager *manager;
+ int slot;
+};
+
/* Attribute
*
* Arbitrary data layers on meshes.
@@ -48,29 +56,37 @@ public:
AttributeElement element;
Attribute() {}
+ ~Attribute();
void set(ustring name, TypeDesc type, AttributeElement element);
- void reserve(int numverts, int numfaces, int numcurves, int numkeys);
+ void reserve(int numverts, int numfaces, int numsteps, int numcurves, int numkeys, bool resize);
size_t data_sizeof() const;
- size_t element_size(int numverts, int numfaces, int numcurves, int numkeys) const;
- size_t buffer_size(int numverts, int numfaces, int numcurves, int numkeys) const;
+ size_t element_size(int numverts, int numfaces, int numsteps, int numcurves, int numkeys) const;
+ size_t buffer_size(int numverts, int numfaces, int numsteps, int numcurves, int numkeys) const;
char *data() { return (buffer.size())? &buffer[0]: NULL; };
float3 *data_float3() { return (float3*)data(); }
+ float4 *data_float4() { return (float4*)data(); }
float *data_float() { return (float*)data(); }
Transform *data_transform() { return (Transform*)data(); }
+ VoxelAttribute *data_voxel() { return ( VoxelAttribute*)data(); }
const char *data() const { return (buffer.size())? &buffer[0]: NULL; }
const float3 *data_float3() const { return (const float3*)data(); }
+ const float4 *data_float4() const { return (const float4*)data(); }
const float *data_float() const { return (const float*)data(); }
const Transform *data_transform() const { return (const Transform*)data(); }
+ const VoxelAttribute *data_voxel() const { return (const VoxelAttribute*)data(); }
void add(const float& f);
void add(const float3& f);
void add(const Transform& f);
+ void add(const VoxelAttribute& f);
+ void add(const char *data);
static bool same_storage(TypeDesc a, TypeDesc b);
static const char *standard_name(AttributeStandard std);
+ static AttributeStandard name_standard(const char *name);
};
/* Attribute Set
@@ -86,7 +102,7 @@ public:
AttributeSet();
~AttributeSet();
- Attribute *add(ustring name, TypeDesc type, AttributeElement element);
+ Attribute *add(ustring name, TypeDesc type, AttributeElement element, bool resize = true);
Attribute *find(ustring name) const;
void remove(ustring name);
diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp
index c9c66dad3fe..a877c52fbed 100644
--- a/intern/cycles/render/background.cpp
+++ b/intern/cycles/render/background.cpp
@@ -35,7 +35,7 @@ Background::Background()
use = true;
- visibility = ~0;
+ visibility = PATH_RAY_ALL_VISIBILITY;
shader = 0;
transparent = false;
@@ -70,7 +70,7 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene
if(scene->shaders[shader]->has_volume)
kbackground->volume_shader = kbackground->surface_shader;
else
- kbackground->volume_shader = SHADER_NO_ID;
+ kbackground->volume_shader = SHADER_NONE;
if(!(visibility & PATH_RAY_DIFFUSE))
kbackground->surface_shader |= SHADER_EXCLUDE_DIFFUSE;
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
new file mode 100644
index 00000000000..aa317ab672f
--- /dev/null
+++ b/intern/cycles/render/bake.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#include "bake.h"
+
+CCL_NAMESPACE_BEGIN
+
+BakeData::BakeData(const int object, const int tri_offset, const int num_pixels):
+m_object(object),
+m_tri_offset(tri_offset),
+m_num_pixels(num_pixels)
+{
+ m_primitive.resize(num_pixels);
+ m_u.resize(num_pixels);
+ m_v.resize(num_pixels);
+ m_dudx.resize(num_pixels);
+ m_dudy.resize(num_pixels);
+ m_dvdx.resize(num_pixels);
+ m_dvdy.resize(num_pixels);
+}
+
+BakeData::~BakeData()
+{
+ m_primitive.clear();
+ m_u.clear();
+ m_v.clear();
+ m_dudx.clear();
+ m_dudy.clear();
+ m_dvdx.clear();
+ m_dvdy.clear();
+}
+
+void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy)
+{
+ m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim);
+ m_u[i] = uv[0];
+ m_v[i] = uv[1];
+ m_dudx[i] = dudx;
+ m_dudy[i] = dudy;
+ m_dvdx[i] = dvdx;
+ m_dvdy[i] = dvdy;
+}
+
+int BakeData::object()
+{
+ return m_object;
+}
+
+int BakeData::size()
+{
+ return m_num_pixels;
+}
+
+bool BakeData::is_valid(int i)
+{
+ return m_primitive[i] != -1;
+}
+
+uint4 BakeData::data(int i)
+{
+ return make_uint4(
+ m_object,
+ m_primitive[i],
+ __float_as_int(m_u[i]),
+ __float_as_int(m_v[i])
+ );
+}
+
+uint4 BakeData::differentials(int i)
+{
+ return make_uint4(
+ __float_as_int(m_dudx[i]),
+ __float_as_int(m_dudy[i]),
+ __float_as_int(m_dvdx[i]),
+ __float_as_int(m_dvdy[i])
+ );
+}
+
+BakeManager::BakeManager()
+{
+ m_bake_data = NULL;
+ m_is_baking = false;
+ need_update = true;
+}
+
+BakeManager::~BakeManager()
+{
+ if(m_bake_data)
+ delete m_bake_data;
+}
+
+bool BakeManager::get_baking()
+{
+ return m_is_baking;
+}
+
+void BakeManager::set_baking(const bool value)
+{
+ m_is_baking = value;
+}
+
+BakeData *BakeManager::init(const int object, const int tri_offset, const int num_pixels)
+{
+ m_bake_data = new BakeData(object, tri_offset, num_pixels);
+ return m_bake_data;
+}
+
+bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[])
+{
+ size_t limit = bake_data->size();
+
+ /* setup input for device task */
+ device_vector<uint4> d_input;
+ uint4 *d_input_data = d_input.resize(limit * 2);
+ size_t d_input_size = 0;
+
+ for(size_t i = 0; i < limit; i++) {
+ d_input_data[d_input_size++] = bake_data->data(i);
+ d_input_data[d_input_size++] = bake_data->differentials(i);
+ }
+
+ if(d_input_size == 0)
+ return false;
+
+ /* run device task */
+ device_vector<float4> d_output;
+ d_output.resize(limit);
+
+ /* needs to be up to data for attribute access */
+ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
+
+ device->mem_alloc(d_input, MEM_READ_ONLY);
+ device->mem_copy_to(d_input);
+ device->mem_alloc(d_output, MEM_WRITE_ONLY);
+
+ DeviceTask task(DeviceTask::SHADER);
+ task.shader_input = d_input.device_pointer;
+ task.shader_output = d_output.device_pointer;
+ task.shader_eval_type = shader_type;
+ task.shader_x = 0;
+ task.shader_w = d_output.size();
+ task.get_cancel = function_bind(&Progress::get_cancel, &progress);
+
+ device->task_add(task);
+ device->task_wait();
+
+ if(progress.get_cancel()) {
+ device->mem_free(d_input);
+ device->mem_free(d_output);
+ m_is_baking = false;
+ return false;
+ }
+
+ device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4));
+ device->mem_free(d_input);
+ device->mem_free(d_output);
+
+ /* read result */
+ int k = 0;
+
+ float4 *offset = (float4*)d_output.data_pointer;
+
+ size_t depth = 4;
+ for(size_t i = 0; i < limit; i++) {
+ size_t index = i * depth;
+ float4 out = offset[k++];
+
+ if(bake_data->is_valid(i)) {
+ for(size_t j=0; j < 4; j++) {
+ result[index + j] = out[j];
+ }
+ }
+ }
+
+ m_is_baking = false;
+ return true;
+}
+
+void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
+{
+ if(!need_update)
+ return;
+
+ if(progress.get_cancel()) return;
+
+ need_update = false;
+}
+
+void BakeManager::device_free(Device *device, DeviceScene *dscene)
+{
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
new file mode 100644
index 00000000000..ea403f7d39a
--- /dev/null
+++ b/intern/cycles/render/bake.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __BAKE_H__
+#define __BAKE_H__
+
+#include "util_vector.h"
+#include "device.h"
+#include "scene.h"
+#include "session.h"
+
+CCL_NAMESPACE_BEGIN
+
+class BakeData {
+public:
+ BakeData(const int object, const int tri_offset, const int num_pixels);
+ ~BakeData();
+
+ void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy);
+ int object();
+ int size();
+ uint4 data(int i);
+ uint4 differentials(int i);
+ bool is_valid(int i);
+
+private:
+ int m_object;
+ int m_tri_offset;
+ int m_num_pixels;
+ vector<int>m_primitive;
+ vector<float>m_u;
+ vector<float>m_v;
+ vector<float>m_dudx;
+ vector<float>m_dudy;
+ vector<float>m_dvdx;
+ vector<float>m_dvdy;
+};
+
+class BakeManager {
+public:
+ BakeManager();
+ ~BakeManager();
+
+ bool get_baking();
+ void set_baking(const bool value);
+
+ BakeData *init(const int object, const int tri_offset, const int num_pixels);
+
+ bool bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]);
+
+ void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress);
+ void device_free(Device *device, DeviceScene *dscene);
+
+ bool need_update;
+
+private:
+ BakeData *m_bake_data;
+ bool m_is_baking;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __BAKE_H__ */
+
diff --git a/intern/cycles/render/blackbody.cpp b/intern/cycles/render/blackbody.cpp
index ab61886e262..89af714e8ec 100644
--- a/intern/cycles/render/blackbody.cpp
+++ b/intern/cycles/render/blackbody.cpp
@@ -59,33 +59,33 @@ vector<float> blackbody_table()
*/
const float cie_colour_match[81][3] = {
- {0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201},
- {0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102},
- {0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456},
- {0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230},
- {0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721},
- {0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281},
- {0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130},
- {0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533},
- {0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582},
- {0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573},
- {0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203},
- {0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057},
- {0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021},
- {0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014},
- {1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008},
- {1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002},
- {0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000},
- {0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000},
- {0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000},
- {0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000},
- {0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000},
- {0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000},
- {0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000},
- {0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000},
- {0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000},
- {0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000},
- {0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000}
+ {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
+ {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
+ {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
+ {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
+ {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
+ {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
+ {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
+ {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
+ {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
+ {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
+ {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
+ {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
+ {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
+ {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
+ {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
+ {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
+ {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
+ {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
+ {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
+ {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
+ {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
+ {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
+ {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
+ {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
+ {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
+ {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
+ {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
};
const double c1 = 3.74183e-16; // 2*pi*h*c^2, W*m^2
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index da1b7484b77..fc65922fc87 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -358,14 +358,14 @@ void DisplayBuffer::draw_set(int width, int height)
draw_height = height;
}
-void DisplayBuffer::draw(Device *device)
+void DisplayBuffer::draw(Device *device, const DeviceDrawParams& draw_params)
{
if(draw_width != 0 && draw_height != 0) {
glPushMatrix();
glTranslatef(params.full_x, params.full_y, 0.0f);
device_memory& rgba = rgba_data();
- device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent);
+ device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent, draw_params);
glPopMatrix();
}
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index 81eaf41077f..27ab20bbafd 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -31,6 +31,7 @@
CCL_NAMESPACE_BEGIN
class Device;
+struct DeviceDrawParams;
struct float4;
/* Buffer Parameters
@@ -114,7 +115,7 @@ public:
void write(Device *device, const string& filename);
void draw_set(int width, int height);
- void draw(Device *device);
+ void draw(Device *device, const DeviceDrawParams& draw_params);
bool draw_ready();
device_memory& rgba_data();
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index edf7f7fb09d..8659fe4f7a3 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -44,8 +44,8 @@ Camera::Camera()
fisheye_lens = 10.5f;
fov = M_PI_4_F;
- sensorwidth = 0.036;
- sensorheight = 0.024;
+ sensorwidth = 0.036f;
+ sensorheight = 0.024f;
nearclip = 1e-5f;
farclip = 1e5f;
@@ -78,6 +78,24 @@ Camera::~Camera()
{
}
+void Camera::compute_auto_viewplane()
+{
+ float aspect = (float)width/(float)height;
+
+ if(width >= height) {
+ viewplane.left = -aspect;
+ viewplane.right = aspect;
+ viewplane.bottom = -1.0f;
+ viewplane.top = 1.0f;
+ }
+ else {
+ viewplane.left = -1.0f;
+ viewplane.right = 1.0f;
+ viewplane.bottom = -1.0f/aspect;
+ viewplane.top = 1.0f/aspect;
+ }
+}
+
void Camera::update()
{
if(!need_update)
diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h
index 4e8f3d72111..c28670bc55f 100644
--- a/intern/cycles/render/camera.h
+++ b/intern/cycles/render/camera.h
@@ -102,6 +102,8 @@ public:
/* functions */
Camera();
~Camera();
+
+ void compute_auto_viewplane();
void update();
diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp
index 6e6b11ca92f..2c96ffa655e 100644
--- a/intern/cycles/render/curves.cpp
+++ b/intern/cycles/render/curves.cpp
@@ -110,7 +110,7 @@ void CurveSystemManager::device_update(Device *device, DeviceScene *dscene, Scen
progress.set_status("Updating Hair settings", "Copying Hair settings to device");
- KernelCurves *kcurve= &dscene->data.curve;
+ KernelCurves *kcurve = &dscene->data.curve;
kcurve->curveflags = 0;
diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp
index 30ad86a8d4c..c1aefbcfbbc 100644
--- a/intern/cycles/render/film.cpp
+++ b/intern/cycles/render/film.cpp
@@ -155,6 +155,9 @@ void Pass::add(PassType type, vector<Pass>& passes)
pass.components = 4;
pass.exposure = false;
break;
+ case PASS_LIGHT:
+ /* ignores */
+ break;
}
passes.push_back(pass);
@@ -393,6 +396,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene)
kfilm->pass_shadow = kfilm->pass_stride;
kfilm->use_light_pass = 1;
break;
+
+ case PASS_LIGHT:
+ kfilm->use_light_pass = 1;
+ break;
case PASS_NONE:
break;
}
diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp
index 9142eb5308c..0ff904d06e7 100644
--- a/intern/cycles/render/graph.cpp
+++ b/intern/cycles/render/graph.cpp
@@ -227,7 +227,7 @@ void ShaderGraph::disconnect(ShaderInput *to)
from->links.erase(remove(from->links.begin(), from->links.end(), to), from->links.end());
}
-void ShaderGraph::finalize(bool do_bump, bool do_osl, bool do_multi_transform)
+void ShaderGraph::finalize(bool do_bump, bool do_osl)
{
/* before compiling, the shader graph may undergo a number of modifications.
* currently we set default geometry shader inputs, and create automatic bump
@@ -242,17 +242,15 @@ void ShaderGraph::finalize(bool do_bump, bool do_osl, bool do_multi_transform)
if(do_bump)
bump_from_displacement();
- if(do_multi_transform) {
- ShaderInput *surface_in = output()->input("Surface");
- ShaderInput *volume_in = output()->input("Volume");
+ ShaderInput *surface_in = output()->input("Surface");
+ ShaderInput *volume_in = output()->input("Volume");
- /* todo: make this work when surface and volume closures are tangled up */
+ /* todo: make this work when surface and volume closures are tangled up */
- if(surface_in->link)
- transform_multi_closure(surface_in->link->parent, NULL, false);
- if(volume_in->link)
- transform_multi_closure(volume_in->link->parent, NULL, true);
- }
+ if(surface_in->link)
+ transform_multi_closure(surface_in->link->parent, NULL, false);
+ if(volume_in->link)
+ transform_multi_closure(volume_in->link->parent, NULL, true);
finalized = true;
}
diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h
index f31e2103229..89a066195d6 100644
--- a/intern/cycles/render/graph.h
+++ b/intern/cycles/render/graph.h
@@ -193,6 +193,7 @@ public:
virtual bool has_surface_bssrdf() { return false; }
virtual bool has_converter_blackbody() { return false; }
virtual bool has_bssrdf_bump() { return false; }
+ virtual bool has_spatial_varying() { return false; }
vector<ShaderInput*> inputs;
vector<ShaderOutput*> outputs;
@@ -246,7 +247,7 @@ public:
void disconnect(ShaderInput *to);
void remove_unneeded_nodes();
- void finalize(bool do_bump = false, bool do_osl = false, bool do_multi_closure = false);
+ void finalize(bool do_bump = false, bool do_osl = false);
protected:
typedef pair<ShaderNode* const, ShaderNode*> NodePair;
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 91aae6f3ec3..86755badc42 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -59,11 +59,16 @@ void ImageManager::set_osl_texture_system(void *texture_system)
osl_texture_system = texture_system;
}
-void ImageManager::set_extended_image_limits(void)
+void ImageManager::set_extended_image_limits(const DeviceInfo& info)
{
- tex_num_images = TEX_EXTENDED_NUM_IMAGES;
- tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
- tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
+ if(info.type == DEVICE_CPU) {
+ tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU;
+ tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES;
+ tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START;
+ }
+ else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
+ tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU;
+ }
}
bool ImageManager::set_animation_frame_update(int frame)
@@ -90,8 +95,8 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo
if(builtin_data) {
if(builtin_image_info_cb) {
- int width, height, channels;
- builtin_image_info_cb(filename, builtin_data, is_float, width, height, channels);
+ int width, height, depth, channels;
+ builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels);
}
if(is_float)
@@ -145,7 +150,14 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo
return is_float;
}
-int ImageManager::add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear)
+static bool image_equals(ImageManager::Image *image, const string& filename, void *builtin_data, InterpolationType interpolation)
+{
+ return image->filename == filename &&
+ image->builtin_data == builtin_data &&
+ image->interpolation == interpolation;
+}
+
+int ImageManager::add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear, InterpolationType interpolation, bool use_alpha)
{
Image *img;
size_t slot;
@@ -156,7 +168,7 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani
if(is_float) {
/* find existing image */
for(slot = 0; slot < float_images.size(); slot++) {
- if(float_images[slot] && float_images[slot]->filename == filename) {
+ if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) {
float_images[slot]->users++;
return slot;
}
@@ -185,13 +197,15 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani
img->builtin_data = builtin_data;
img->need_load = true;
img->animated = animated;
+ img->interpolation = interpolation;
img->users = 1;
+ img->use_alpha = use_alpha;
float_images[slot] = img;
}
else {
for(slot = 0; slot < images.size(); slot++) {
- if(images[slot] && images[slot]->filename == filename) {
+ if(images[slot] && image_equals(images[slot], filename, builtin_data, interpolation)) {
images[slot]->users++;
return slot+tex_image_byte_start;
}
@@ -220,7 +234,9 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani
img->builtin_data = builtin_data;
img->need_load = true;
img->animated = animated;
+ img->interpolation = interpolation;
img->users = 1;
+ img->use_alpha = use_alpha;
images[slot] = img;
@@ -231,22 +247,43 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani
return slot;
}
-void ImageManager::remove_image(const string& filename, void *builtin_data)
+void ImageManager::remove_image(int slot)
{
- size_t slot;
+ if(slot >= tex_image_byte_start) {
+ slot -= tex_image_byte_start;
- for(slot = 0; slot < images.size(); slot++) {
- if(images[slot] && images[slot]->filename == filename && images[slot]->builtin_data == builtin_data) {
- /* decrement user count */
- images[slot]->users--;
- assert(images[slot]->users >= 0);
+ assert(images[slot] != NULL);
+
+ /* decrement user count */
+ images[slot]->users--;
+ assert(images[slot]->users >= 0);
+
+ /* don't remove immediately, rather do it all together later on. one of
+ * the reasons for this is that on shader changes we add and remove nodes
+ * that use them, but we do not want to reload the image all the time. */
+ if(images[slot]->users == 0)
+ need_update = true;
+ }
+ else {
+ /* decrement user count */
+ float_images[slot]->users--;
+ assert(float_images[slot]->users >= 0);
+
+ /* don't remove immediately, rather do it all together later on. one of
+ * the reasons for this is that on shader changes we add and remove nodes
+ * that use them, but we do not want to reload the image all the time. */
+ if(float_images[slot]->users == 0)
+ need_update = true;
+ }
+}
- /* don't remove immediately, rather do it all together later on. one of
- * the reasons for this is that on shader changes we add and remove nodes
- * that use them, but we do not want to reload the image all the time. */
- if(images[slot]->users == 0)
- need_update = true;
+void ImageManager::remove_image(const string& filename, void *builtin_data, InterpolationType interpolation)
+{
+ size_t slot;
+ for(slot = 0; slot < images.size(); slot++) {
+ if(images[slot] && image_equals(images[slot], filename, builtin_data, interpolation)) {
+ remove_image(slot+tex_image_byte_start);
break;
}
}
@@ -254,17 +291,8 @@ void ImageManager::remove_image(const string& filename, void *builtin_data)
if(slot == images.size()) {
/* see if it's in a float texture slot */
for(slot = 0; slot < float_images.size(); slot++) {
- if(float_images[slot] && float_images[slot]->filename == filename && float_images[slot]->builtin_data == builtin_data) {
- /* decrement user count */
- float_images[slot]->users--;
- assert(float_images[slot]->users >= 0);
-
- /* don't remove immediately, rather do it all together later on. one of
- * the reasons for this is that on shader changes we add and remove nodes
- * that use them, but we do not want to reload the image all the time. */
- if(float_images[slot]->users == 0)
- need_update = true;
-
+ if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) {
+ remove_image(slot);
break;
}
}
@@ -277,7 +305,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
return false;
ImageInput *in = NULL;
- int width, height, components;
+ int width, height, depth, components;
if(!img->builtin_data) {
/* load image from file through OIIO */
@@ -286,15 +314,20 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
if(!in)
return false;
- ImageSpec spec;
+ ImageSpec spec = ImageSpec();
+ ImageSpec config = ImageSpec();
+
+ if(img->use_alpha == false)
+ config.attribute("oiio:UnassociatedAlpha", 1);
- if(!in->open(img->filename, spec)) {
+ if(!in->open(img->filename, spec, config)) {
delete in;
return false;
}
width = spec.width;
height = spec.height;
+ depth = spec.depth;
components = spec.nchannels;
}
else {
@@ -303,7 +336,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
return false;
bool is_float;
- builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, components);
+ builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components);
}
/* we only handle certain number of components */
@@ -317,15 +350,21 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
}
/* read RGBA pixels */
- uchar *pixels = (uchar*)tex_img.resize(width, height);
- int scanlinesize = width*components*sizeof(uchar);
+ uchar *pixels = (uchar*)tex_img.resize(width, height, depth);
if(in) {
- in->read_image(TypeDesc::UINT8,
- (uchar*)pixels + (height-1)*scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
+ if(depth <= 1) {
+ int scanlinesize = width*components*sizeof(uchar);
+
+ in->read_image(TypeDesc::UINT8,
+ (uchar*)pixels + (height-1)*scanlinesize,
+ AutoStride,
+ -scanlinesize,
+ AutoStride);
+ }
+ else {
+ in->read_image(TypeDesc::UINT8, (uchar*)pixels);
+ }
in->close();
delete in;
@@ -335,7 +374,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
}
if(components == 2) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = pixels[i*2+1];
pixels[i*4+2] = pixels[i*2+0];
pixels[i*4+1] = pixels[i*2+0];
@@ -343,7 +382,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
}
}
else if(components == 3) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = 255;
pixels[i*4+2] = pixels[i*3+2];
pixels[i*4+1] = pixels[i*3+1];
@@ -351,7 +390,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
}
}
else if(components == 1) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = 255;
pixels[i*4+2] = pixels[i];
pixels[i*4+1] = pixels[i];
@@ -359,6 +398,12 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
}
}
+ if(img->use_alpha == false) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
+ pixels[i*4+3] = 255;
+ }
+ }
+
return true;
}
@@ -368,7 +413,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
return false;
ImageInput *in = NULL;
- int width, height, components;
+ int width, height, depth, components;
if(!img->builtin_data) {
/* load image from file through OIIO */
@@ -377,9 +422,13 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
if(!in)
return false;
- ImageSpec spec;
+ ImageSpec spec = ImageSpec();
+ ImageSpec config = ImageSpec();
+
+ if(img->use_alpha == false)
+ config.attribute("oiio:UnassociatedAlpha",1);
- if(!in->open(img->filename, spec)) {
+ if(!in->open(img->filename, spec, config)) {
delete in;
return false;
}
@@ -387,6 +436,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
/* we only handle certain number of components */
width = spec.width;
height = spec.height;
+ depth = spec.depth;
components = spec.nchannels;
}
else {
@@ -395,7 +445,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
return false;
bool is_float;
- builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, components);
+ builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components);
}
if(!(components >= 1 && components <= 4)) {
@@ -407,15 +457,21 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
}
/* read RGBA pixels */
- float *pixels = (float*)tex_img.resize(width, height);
- int scanlinesize = width*components*sizeof(float);
+ float *pixels = (float*)tex_img.resize(width, height, depth);
if(in) {
- in->read_image(TypeDesc::FLOAT,
- (uchar*)pixels + (height-1)*scanlinesize,
- AutoStride,
- -scanlinesize,
- AutoStride);
+ if(depth <= 1) {
+ int scanlinesize = width*components*sizeof(float);
+
+ in->read_image(TypeDesc::FLOAT,
+ (uchar*)pixels + (height-1)*scanlinesize,
+ AutoStride,
+ -scanlinesize,
+ AutoStride);
+ }
+ else {
+ in->read_image(TypeDesc::FLOAT, (uchar*)pixels);
+ }
in->close();
delete in;
@@ -425,7 +481,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
}
if(components == 2) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = pixels[i*2+1];
pixels[i*4+2] = pixels[i*2+0];
pixels[i*4+1] = pixels[i*2+0];
@@ -433,7 +489,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
}
}
else if(components == 3) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = 1.0f;
pixels[i*4+2] = pixels[i*3+2];
pixels[i*4+1] = pixels[i*3+1];
@@ -441,7 +497,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
}
}
else if(components == 1) {
- for(int i = width*height-1; i >= 0; i--) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
pixels[i*4+3] = 1.0f;
pixels[i*4+2] = pixels[i];
pixels[i*4+1] = pixels[i];
@@ -449,6 +505,12 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_
}
}
+ if(img->use_alpha == false) {
+ for(int i = width*height*depth-1; i >= 0; i--) {
+ pixels[i*4+3] = 1.0f;
+ }
+ }
+
return true;
}
@@ -456,9 +518,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
{
if(progress->get_cancel())
return;
- if(osl_texture_system)
- return;
-
+
Image *img;
bool is_float;
@@ -471,6 +531,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
is_float = true;
}
+ if(osl_texture_system && !img->builtin_data)
+ return;
+
if(is_float) {
string filename = path_filename(float_images[slot]->filename);
progress->set_status("Updating Images", "Loading " + filename);
@@ -499,7 +562,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
if(!pack_images) {
thread_scoped_lock device_lock(device_mutex);
- device->tex_alloc(name.c_str(), tex_img, true, true);
+ device->tex_alloc(name.c_str(), tex_img, img->interpolation, true);
}
}
else {
@@ -530,7 +593,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl
if(!pack_images) {
thread_scoped_lock device_lock(device_mutex);
- device->tex_alloc(name.c_str(), tex_img, true, true);
+ device->tex_alloc(name.c_str(), tex_img, img->interpolation, true);
}
}
@@ -552,7 +615,7 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int sl
}
if(img) {
- if(osl_texture_system) {
+ if(osl_texture_system && !img->builtin_data) {
#ifdef WITH_OSL
ustring filename(images[slot]->filename);
((OSL::TextureSystem*)osl_texture_system)->invalidate(filename);
@@ -602,7 +665,7 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
device_free_image(device, dscene, slot + tex_image_byte_start);
}
else if(images[slot]->need_load) {
- if(!osl_texture_system)
+ if(!osl_texture_system || images[slot]->builtin_data)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + tex_image_byte_start, &progress));
}
}
@@ -615,7 +678,7 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress&
device_free_image(device, dscene, slot);
}
else if(float_images[slot]->need_load) {
- if(!osl_texture_system)
+ if(!osl_texture_system || float_images[slot]->builtin_data)
pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress));
}
}
@@ -653,16 +716,32 @@ void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progr
device_vector<uchar4>& tex_img = dscene->tex_image[slot];
- info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1);
+ /* todo: support 3D textures, only CPU for now */
+
+ /* The image options are packed
+ bit 0 -> periodic
+ bit 1 + 2 -> interpolation type */
+ uint8_t interpolation = (images[slot]->interpolation << 1) + 1;
+ info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
offset += tex_img.size();
}
- if(dscene->tex_image_packed.size())
+ if(dscene->tex_image_packed.size()) {
+ if(dscene->tex_image_packed.device_pointer) {
+ thread_scoped_lock device_lock(device_mutex);
+ device->tex_free(dscene->tex_image_packed);
+ }
device->tex_alloc("__tex_image_packed", dscene->tex_image_packed);
- if(dscene->tex_image_packed_info.size())
+ }
+ if(dscene->tex_image_packed_info.size()) {
+ if(dscene->tex_image_packed_info.device_pointer) {
+ thread_scoped_lock device_lock(device_mutex);
+ device->tex_free(dscene->tex_image_packed_info);
+ }
device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
+ }
}
void ImageManager::device_free(Device *device, DeviceScene *dscene)
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 187c5fd0f02..561550fe0d2 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -17,6 +17,7 @@
#ifndef __IMAGE_H__
#define __IMAGE_H__
+#include "device.h"
#include "device_memory.h"
#include "util_string.h"
@@ -27,11 +28,16 @@
CCL_NAMESPACE_BEGIN
+/* generic */
#define TEX_NUM_IMAGES 95
#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES
+/* extended gpu */
+#define TEX_EXTENDED_NUM_IMAGES_GPU 145
+
+/* extended cpu */
#define TEX_EXTENDED_NUM_FLOAT_IMAGES 1024
-#define TEX_EXTENDED_NUM_IMAGES 1024
+#define TEX_EXTENDED_NUM_IMAGES_CPU 1024
#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES
/* color to use when textures are not found */
@@ -49,8 +55,9 @@ public:
ImageManager();
~ImageManager();
- int add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear);
- void remove_image(const string& filename, void *builtin_data);
+ int add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear, InterpolationType interpolation, bool use_alpha);
+ void remove_image(int slot);
+ void remove_image(const string& filename, void *builtin_data, InterpolationType interpolation);
bool is_float_image(const string& filename, void *builtin_data, bool& is_linear);
void device_update(Device *device, DeviceScene *dscene, Progress& progress);
@@ -58,30 +65,34 @@ public:
void set_osl_texture_system(void *texture_system);
void set_pack_images(bool pack_images_);
- void set_extended_image_limits(void);
+ void set_extended_image_limits(const DeviceInfo& info);
bool set_animation_frame_update(int frame);
bool need_update;
- boost::function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &channels)> builtin_image_info_cb;
+ boost::function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &depth, int &channels)> builtin_image_info_cb;
boost::function<bool(const string &filename, void *data, unsigned char *pixels)> builtin_image_pixels_cb;
boost::function<bool(const string &filename, void *data, float *pixels)> builtin_image_float_pixels_cb;
-private:
- int tex_num_images;
- int tex_num_float_images;
- int tex_image_byte_start;
- thread_mutex device_mutex;
- int animation_frame;
struct Image {
string filename;
void *builtin_data;
+ bool use_alpha;
bool need_load;
bool animated;
+ InterpolationType interpolation;
+
int users;
};
+private:
+ int tex_num_images;
+ int tex_num_float_images;
+ int tex_image_byte_start;
+ thread_mutex device_mutex;
+ int animation_frame;
+
vector<Image*> images;
vector<Image*> float_images;
void *osl_texture_system;
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index f48e04f31e1..59a0de07e5a 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -34,15 +34,14 @@ Integrator::Integrator()
max_glossy_bounce = max_bounce;
max_transmission_bounce = max_bounce;
max_volume_bounce = max_bounce;
- probalistic_termination = true;
transparent_min_bounce = min_bounce;
transparent_max_bounce = max_bounce;
- transparent_probalistic = true;
transparent_shadows = false;
+ volume_homogeneous_sampling = 0;
volume_max_steps = 1024;
- volume_step_size = 0.1;
+ volume_step_size = 0.1f;
no_caustics = false;
filter_glossy = 0.0f;
@@ -82,10 +81,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
/* integrator parameters */
kintegrator->max_bounce = max_bounce + 1;
- if(probalistic_termination)
- kintegrator->min_bounce = min_bounce + 1;
- else
- kintegrator->min_bounce = kintegrator->max_bounce;
+ kintegrator->min_bounce = min_bounce + 1;
kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1;
kintegrator->max_glossy_bounce = max_glossy_bounce + 1;
@@ -97,13 +93,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->max_volume_bounce = 1;
kintegrator->transparent_max_bounce = transparent_max_bounce + 1;
- if(transparent_probalistic)
- kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
- else
- kintegrator->transparent_min_bounce = kintegrator->transparent_max_bounce;
+ kintegrator->transparent_min_bounce = transparent_min_bounce + 1;
kintegrator->transparent_shadows = transparent_shadows;
+ kintegrator->volume_homogeneous_sampling = volume_homogeneous_sampling;
kintegrator->volume_max_steps = volume_max_steps;
kintegrator->volume_step_size = volume_step_size;
@@ -120,7 +114,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f;
kintegrator->branched = (method == BRANCHED_PATH);
- kintegrator->aa_samples = aa_samples;
kintegrator->diffuse_samples = diffuse_samples;
kintegrator->glossy_samples = glossy_samples;
kintegrator->transmission_samples = transmission_samples;
@@ -128,8 +121,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
kintegrator->mesh_light_samples = mesh_light_samples;
kintegrator->subsurface_samples = subsurface_samples;
kintegrator->volume_samples = volume_samples;
+ kintegrator->sample_all_lights_direct = sample_all_lights_direct;
+ kintegrator->sample_all_lights_indirect = sample_all_lights_indirect;
kintegrator->sampling_pattern = sampling_pattern;
+ kintegrator->aa_samples = aa_samples;
/* sobol directions table */
int max_samples = 1;
@@ -171,11 +167,10 @@ bool Integrator::modified(const Integrator& integrator)
max_glossy_bounce == integrator.max_glossy_bounce &&
max_transmission_bounce == integrator.max_transmission_bounce &&
max_volume_bounce == integrator.max_volume_bounce &&
- probalistic_termination == integrator.probalistic_termination &&
transparent_min_bounce == integrator.transparent_min_bounce &&
transparent_max_bounce == integrator.transparent_max_bounce &&
- transparent_probalistic == integrator.transparent_probalistic &&
transparent_shadows == integrator.transparent_shadows &&
+ volume_homogeneous_sampling == integrator.volume_homogeneous_sampling &&
volume_max_steps == integrator.volume_max_steps &&
volume_step_size == integrator.volume_step_size &&
no_caustics == integrator.no_caustics &&
@@ -194,7 +189,9 @@ bool Integrator::modified(const Integrator& integrator)
subsurface_samples == integrator.subsurface_samples &&
volume_samples == integrator.volume_samples &&
motion_blur == integrator.motion_blur &&
- sampling_pattern == integrator.sampling_pattern);
+ sampling_pattern == integrator.sampling_pattern &&
+ sample_all_lights_direct == integrator.sample_all_lights_direct &&
+ sample_all_lights_indirect == integrator.sample_all_lights_indirect);
}
void Integrator::tag_update(Scene *scene)
diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h
index 573b258af60..380c1a65722 100644
--- a/intern/cycles/render/integrator.h
+++ b/intern/cycles/render/integrator.h
@@ -34,13 +34,12 @@ public:
int max_glossy_bounce;
int max_transmission_bounce;
int max_volume_bounce;
- bool probalistic_termination;
int transparent_min_bounce;
int transparent_max_bounce;
- bool transparent_probalistic;
bool transparent_shadows;
+ int volume_homogeneous_sampling;
int volume_max_steps;
float volume_step_size;
@@ -62,6 +61,8 @@ public:
int mesh_light_samples;
int subsurface_samples;
int volume_samples;
+ bool sample_all_lights_direct;
+ bool sample_all_lights_indirect;
enum Method {
BRANCHED_PATH = 0,
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index bab4218aae9..7bdb1fbf8af 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -29,7 +29,7 @@
CCL_NAMESPACE_BEGIN
-static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels)
+static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels, Progress& progress)
{
/* create input */
int width = res;
@@ -66,6 +66,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
main_task.shader_eval_type = SHADER_EVAL_BACKGROUND;
main_task.shader_x = 0;
main_task.shader_w = width*height;
+ main_task.get_cancel = function_bind(&Progress::get_cancel, &progress);
/* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */
list<DeviceTask> split_tasks;
@@ -149,7 +150,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
size_t num_lights = scene->lights.size();
size_t num_background_lights = 0;
size_t num_triangles = 0;
- size_t num_curve_segments = 0;
foreach(Object *object, scene->objects) {
Mesh *mesh = object->mesh;
@@ -159,6 +159,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT)))
continue;
+ /* skip motion blurred deforming meshes, not supported yet */
+ if(mesh->has_motion_blur())
+ continue;
+
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
Shader *shader = scene->shaders[sindex];
@@ -177,20 +181,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
if(shader->use_mis && shader->has_surface_emission)
num_triangles++;
}
-
- /* disabled for curves */
-#if 0
- foreach(Mesh::Curve& curve, mesh->curves) {
- Shader *shader = scene->shaders[curve.shader];
-
- if(shader->use_mis && shader->has_surface_emission)
- num_curve_segments += curve.num_segments();
-#endif
}
}
- size_t num_distribution = num_triangles + num_curve_segments;
- num_distribution += num_lights;
+ size_t num_distribution = num_triangles + num_lights;
/* emission area */
float4 *distribution = dscene->light_distribution.resize(num_distribution + 1);
@@ -210,6 +204,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
continue;
}
+ /* skip motion blurred deforming meshes, not supported yet */
+ if(mesh->has_motion_blur())
+ continue;
+
/* skip if we have no emission shaders */
foreach(uint sindex, mesh->used_shaders) {
Shader *shader = scene->shaders[sindex];
@@ -225,21 +223,21 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
bool transform_applied = mesh->transform_applied;
Transform tfm = object->tfm;
int object_id = j;
- int shader_id = SHADER_MASK;
+ int shader_flag = 0;
if(transform_applied)
object_id = ~object_id;
if(!(object->visibility & PATH_RAY_DIFFUSE)) {
- shader_id |= SHADER_EXCLUDE_DIFFUSE;
+ shader_flag |= SHADER_EXCLUDE_DIFFUSE;
use_light_visibility = true;
}
if(!(object->visibility & PATH_RAY_GLOSSY)) {
- shader_id |= SHADER_EXCLUDE_GLOSSY;
+ shader_flag |= SHADER_EXCLUDE_GLOSSY;
use_light_visibility = true;
}
if(!(object->visibility & PATH_RAY_TRANSMIT)) {
- shader_id |= SHADER_EXCLUDE_TRANSMIT;
+ shader_flag |= SHADER_EXCLUDE_TRANSMIT;
use_light_visibility = true;
}
@@ -249,7 +247,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
if(shader->use_mis && shader->has_surface_emission) {
distribution[offset].x = totarea;
distribution[offset].y = __int_as_float(i + mesh->tri_offset);
- distribution[offset].z = __int_as_float(shader_id);
+ distribution[offset].z = __int_as_float(shader_flag);
distribution[offset].w = __int_as_float(object_id);
offset++;
@@ -267,40 +265,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
totarea += triangle_area(p1, p2, p3);
}
}
-
- /* sample as light disabled for strands */
-#if 0
- size_t i = 0;
-
- foreach(Mesh::Curve& curve, mesh->curves) {
- Shader *shader = scene->shaders[curve.shader];
- int first_key = curve.first_key;
-
- if(shader->use_mis && shader->has_surface_emission) {
- for(int j = 0; j < curve.num_segments(); j++) {
- distribution[offset].x = totarea;
- distribution[offset].y = __int_as_float(i + mesh->curve_offset); // XXX fix kernel code
- distribution[offset].z = __int_as_float(j) & SHADER_MASK;
- distribution[offset].w = __int_as_float(object_id);
- offset++;
-
- float3 p1 = mesh->curve_keys[first_key + j].loc;
- float r1 = mesh->curve_keys[first_key + j].radius;
- float3 p2 = mesh->curve_keys[first_key + j + 1].loc;
- float r2 = mesh->curve_keys[first_key + j + 1].radius;
-
- if(!transform_applied) {
- p1 = transform_point(&tfm, p1);
- p2 = transform_point(&tfm, p2);
- }
-
- totarea += M_PI_F * (r1 + r2) * len(p1 - p2);
- }
- }
-
- i++;
- }
-#endif
}
if(progress.get_cancel()) return;
@@ -432,7 +396,7 @@ void LightManager::device_update_background(Device *device, DeviceScene *dscene,
assert(res > 0);
vector<float3> pixels;
- shade_background_pixels(device, dscene, res, pixels);
+ shade_background_pixels(device, dscene, res, pixels, progress);
if(progress.get_cancel())
return;
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 93f24886dc9..9c5ddd55010 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -18,6 +18,7 @@
#include "bvh_build.h"
#include "camera.h"
+#include "curves.h"
#include "device.h"
#include "shader.h"
#include "light.h"
@@ -34,6 +35,39 @@
CCL_NAMESPACE_BEGIN
+/* Triangle */
+
+void Mesh::Triangle::bounds_grow(const float3 *verts, BoundBox& bounds) const
+{
+ bounds.grow(verts[v[0]]);
+ bounds.grow(verts[v[1]]);
+ bounds.grow(verts[v[2]]);
+}
+
+/* Curve */
+
+void Mesh::Curve::bounds_grow(const int k, const float4 *curve_keys, BoundBox& bounds) const
+{
+ float3 P[4];
+
+ P[0] = float4_to_float3(curve_keys[max(first_key + k - 1,first_key)]);
+ P[1] = float4_to_float3(curve_keys[first_key + k]);
+ P[2] = float4_to_float3(curve_keys[first_key + k + 1]);
+ P[3] = float4_to_float3(curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]);
+
+ float3 lower;
+ float3 upper;
+
+ curvebounds(&lower.x, &upper.x, P, 0);
+ curvebounds(&lower.y, &upper.y, P, 1);
+ curvebounds(&lower.z, &upper.z, P, 2);
+
+ float mr = max(curve_keys[first_key + k].w, curve_keys[first_key + k + 1].w);
+
+ bounds.grow(lower, mr);
+ bounds.grow(upper, mr);
+}
+
/* Mesh */
Mesh::Mesh()
@@ -46,6 +80,9 @@ Mesh::Mesh()
displacement_method = DISPLACE_BUMP;
bounds = BoundBox::empty;
+ motion_steps = 3;
+ use_motion_blur = false;
+
bvh = NULL;
tri_offset = 0;
@@ -97,6 +134,22 @@ void Mesh::clear()
transform_normal = transform_identity();
}
+int Mesh::split_vertex(int vertex)
+{
+ /* copy vertex location and vertex attributes */
+ verts.push_back(verts[vertex]);
+
+ foreach(Attribute& attr, attributes.attributes) {
+ if(attr.element == ATTR_ELEMENT_VERTEX) {
+ vector<char> tmp(attr.data_sizeof());
+ memcpy(&tmp[0], attr.data() + tmp.size()*vertex, tmp.size());
+ attr.add(&tmp[0]);
+ }
+ }
+
+ return verts.size() - 1;
+}
+
void Mesh::set_triangle(int i, int v0, int v1, int v2, int shader_, bool smooth_)
{
Triangle tri;
@@ -123,9 +176,8 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_)
void Mesh::add_curve_key(float3 co, float radius)
{
- CurveKey key;
- key.co = co;
- key.radius = radius;
+ float4 key = float3_to_float4(co);
+ key.w = radius;
curve_keys.push_back(key);
}
@@ -151,7 +203,25 @@ void Mesh::compute_bounds()
bnds.grow(verts[i]);
for(size_t i = 0; i < curve_keys_size; i++)
- bnds.grow(curve_keys[i].co, curve_keys[i].radius);
+ bnds.grow(float4_to_float3(curve_keys[i]), curve_keys[i].w);
+
+ Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if (use_motion_blur && attr) {
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow(vert_steps[i]);
+ }
+
+ Attribute *curve_attr = curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ if(use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow(key_steps[i]);
+ }
if(!bnds.valid()) {
bnds = BoundBox::empty;
@@ -161,7 +231,23 @@ void Mesh::compute_bounds()
bnds.grow_safe(verts[i]);
for(size_t i = 0; i < curve_keys_size; i++)
- bnds.grow_safe(curve_keys[i].co, curve_keys[i].radius);
+ bnds.grow_safe(float4_to_float3(curve_keys[i]), curve_keys[i].w);
+
+ if (use_motion_blur && attr) {
+ size_t steps_size = verts.size() * (motion_steps - 1);
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow_safe(vert_steps[i]);
+ }
+
+ if (use_motion_blur && curve_attr) {
+ size_t steps_size = curve_keys.size() * (motion_steps - 1);
+ float3 *key_steps = curve_attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ bnds.grow_safe(key_steps[i]);
+ }
}
}
@@ -173,6 +259,21 @@ void Mesh::compute_bounds()
bounds = bnds;
}
+static float3 compute_face_normal(const Mesh::Triangle& t, float3 *verts)
+{
+ float3 v0 = verts[t.v[0]];
+ float3 v1 = verts[t.v[1]];
+ float3 v2 = verts[t.v[2]];
+
+ float3 norm = cross(v1 - v0, v2 - v0);
+ float normlen = len(norm);
+
+ if(normlen == 0.0f)
+ return make_float3(0.0f, 0.0f, 0.0f);
+
+ return norm / normlen;
+}
+
void Mesh::add_face_normals()
{
/* don't compute if already there */
@@ -192,17 +293,7 @@ void Mesh::add_face_normals()
Triangle *triangles_ptr = &triangles[0];
for(size_t i = 0; i < triangles_size; i++) {
- Triangle t = triangles_ptr[i];
- float3 v0 = verts_ptr[t.v[0]];
- float3 v1 = verts_ptr[t.v[1]];
- float3 v2 = verts_ptr[t.v[2]];
-
- float3 norm = cross(v1 - v0, v2 - v0);
- float normlen = len(norm);
- if(normlen == 0.0f)
- fN[i] = make_float3(0.0f, 0.0f, 0.0f);
- else
- fN[i] = norm / normlen;
+ fN[i] = compute_face_normal(triangles_ptr[i], verts_ptr);
if(flip)
fN[i] = -fN[i];
@@ -220,36 +311,69 @@ void Mesh::add_face_normals()
void Mesh::add_vertex_normals()
{
- /* don't compute if already there */
- if(attributes.find(ATTR_STD_VERTEX_NORMAL))
- return;
-
- /* get attributes */
- Attribute *attr_fN = attributes.find(ATTR_STD_FACE_NORMAL);
- Attribute *attr_vN = attributes.add(ATTR_STD_VERTEX_NORMAL);
+ bool flip = transform_negative_scaled;
+ size_t verts_size = verts.size();
+ size_t triangles_size = triangles.size();
- float3 *fN = attr_fN->data_float3();
- float3 *vN = attr_vN->data_float3();
+ /* static vertex normals */
+ if(!attributes.find(ATTR_STD_VERTEX_NORMAL)) {
+ /* get attributes */
+ Attribute *attr_fN = attributes.find(ATTR_STD_FACE_NORMAL);
+ Attribute *attr_vN = attributes.add(ATTR_STD_VERTEX_NORMAL);
- /* compute vertex normals */
- memset(vN, 0, verts.size()*sizeof(float3));
+ float3 *fN = attr_fN->data_float3();
+ float3 *vN = attr_vN->data_float3();
- size_t verts_size = verts.size();
- size_t triangles_size = triangles.size();
- bool flip = transform_negative_scaled;
+ /* compute vertex normals */
+ memset(vN, 0, verts.size()*sizeof(float3));
- if(triangles_size) {
- Triangle *triangles_ptr = &triangles[0];
+ if(triangles_size) {
+ Triangle *triangles_ptr = &triangles[0];
- for(size_t i = 0; i < triangles_size; i++)
- for(size_t j = 0; j < 3; j++)
- vN[triangles_ptr[i].v[j]] += fN[i];
+ for(size_t i = 0; i < triangles_size; i++)
+ for(size_t j = 0; j < 3; j++)
+ vN[triangles_ptr[i].v[j]] += fN[i];
+ }
+
+ for(size_t i = 0; i < verts_size; i++) {
+ vN[i] = normalize(vN[i]);
+ if(flip)
+ vN[i] = -vN[i];
+ }
}
- for(size_t i = 0; i < verts_size; i++) {
- vN[i] = normalize(vN[i]);
- if(flip)
- vN[i] = -vN[i];
+ /* motion vertex normals */
+ Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+ Attribute *attr_mN = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+ if(has_motion_blur() && attr_mP && !attr_mN) {
+ /* create attribute */
+ attr_mN = attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL);
+
+ for(int step = 0; step < motion_steps - 1; step++) {
+ float3 *mP = attr_mP->data_float3() + step*verts.size();
+ float3 *mN = attr_mN->data_float3() + step*verts.size();
+
+ /* compute */
+ memset(mN, 0, verts.size()*sizeof(float3));
+
+ if(triangles_size) {
+ Triangle *triangles_ptr = &triangles[0];
+
+ for(size_t i = 0; i < triangles_size; i++) {
+ for(size_t j = 0; j < 3; j++) {
+ float3 fN = compute_face_normal(triangles_ptr[i], mP);
+ mN[triangles_ptr[i].v[j]] += fN;
+ }
+ }
+ }
+
+ for(size_t i = 0; i < verts_size; i++) {
+ mN[i] = normalize(mN[i]);
+ if(flip)
+ mN[i] = -mN[i];
+ }
+ }
}
}
@@ -335,18 +459,14 @@ void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset)
void Mesh::pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset)
{
size_t curve_keys_size = curve_keys.size();
- CurveKey *keys_ptr = NULL;
+ float4 *keys_ptr = NULL;
/* pack curve keys */
if(curve_keys_size) {
keys_ptr = &curve_keys[0];
- for(size_t i = 0; i < curve_keys_size; i++) {
- float3 p = keys_ptr[i].co;
- float radius = keys_ptr[i].radius;
-
- curve_key_co[i] = make_float4(p.x, p.y, p.z, radius);
- }
+ for(size_t i = 0; i < curve_keys_size; i++)
+ curve_key_co[i] = keys_ptr[i];
}
/* pack curve segments */
@@ -430,6 +550,13 @@ void Mesh::tag_update(Scene *scene, bool rebuild)
scene->object_manager->need_update = true;
}
+bool Mesh::has_motion_blur() const
+{
+ return (use_motion_blur &&
+ (attributes.find(ATTR_STD_MOTION_VERTEX_POSITION) ||
+ curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)));
+}
+
/* Mesh Manager */
MeshManager::MeshManager()
@@ -641,10 +768,16 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa
size_t size = mattr->element_size(
mesh->verts.size(),
mesh->triangles.size(),
+ mesh->motion_steps,
mesh->curves.size(),
mesh->curve_keys.size());
- if(mattr->type == TypeDesc::TypeFloat) {
+ if(mattr->element == ATTR_ELEMENT_VOXEL) {
+ /* store slot in offset value */
+ VoxelAttribute *voxel_data = mattr->data_voxel();
+ offset = voxel_data->slot;
+ }
+ else if(mattr->type == TypeDesc::TypeFloat) {
float *data = mattr->data_float();
offset = attr_float.size();
@@ -663,19 +796,21 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa
attr_float3[offset+k] = (&tfm->x)[k];
}
else {
- float3 *data = mattr->data_float3();
+ float4 *data = mattr->data_float4();
offset = attr_float3.size();
attr_float3.resize(attr_float3.size() + size);
for(size_t k = 0; k < size; k++)
- attr_float3[offset+k] = float3_to_float4(data[k]);
+ attr_float3[offset+k] = data[k];
}
/* mesh vertex/curve index is global, not per object, so we sneak
* a correction for that in here */
if(element == ATTR_ELEMENT_VERTEX)
offset -= mesh->vert_offset;
+ else if(element == ATTR_ELEMENT_VERTEX_MOTION)
+ offset -= mesh->vert_offset;
else if(element == ATTR_ELEMENT_FACE)
offset -= mesh->tri_offset;
else if(element == ATTR_ELEMENT_CORNER)
@@ -684,6 +819,8 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa
offset -= mesh->curve_offset;
else if(element == ATTR_ELEMENT_CURVE_KEY)
offset -= mesh->curvekey_offset;
+ else if(element == ATTR_ELEMENT_CURVE_KEY_MOTION)
+ offset -= mesh->curvekey_offset;
}
else {
/* attribute not found */
@@ -750,8 +887,8 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
/* create attribute lookup maps */
if(scene->shader_manager->use_osl())
update_osl_attributes(device, scene, mesh_attributes);
- else
- update_svm_attributes(device, dscene, scene, mesh_attributes);
+
+ update_svm_attributes(device, dscene, scene, mesh_attributes);
if(progress.get_cancel()) return;
@@ -866,9 +1003,9 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
dscene->tri_woop.reference(&pack.tri_woop[0], pack.tri_woop.size());
device->tex_alloc("__tri_woop", dscene->tri_woop);
}
- if(pack.prim_segment.size()) {
- dscene->prim_segment.reference((uint*)&pack.prim_segment[0], pack.prim_segment.size());
- device->tex_alloc("__prim_segment", dscene->prim_segment);
+ if(pack.prim_type.size()) {
+ dscene->prim_type.reference((uint*)&pack.prim_type[0], pack.prim_type.size());
+ device->tex_alloc("__prim_type", dscene->prim_type);
}
if(pack.prim_visibility.size()) {
dscene->prim_visibility.reference((uint*)&pack.prim_visibility[0], pack.prim_visibility.size());
@@ -956,7 +1093,6 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
foreach(Shader *shader, scene->shaders)
shader->need_update_attributes = false;
- float shuttertime = scene->camera->shuttertime;
#ifdef __OBJECT_MOTION__
Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
bool motion_blur = need_motion == Scene::MOTION_BLUR;
@@ -965,7 +1101,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
#endif
foreach(Object *object, scene->objects)
- object->compute_bounds(motion_blur, shuttertime);
+ object->compute_bounds(motion_blur);
if(progress.get_cancel()) return;
@@ -979,7 +1115,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
device->tex_free(dscene->bvh_nodes);
device->tex_free(dscene->object_node);
device->tex_free(dscene->tri_woop);
- device->tex_free(dscene->prim_segment);
+ device->tex_free(dscene->prim_type);
device->tex_free(dscene->prim_visibility);
device->tex_free(dscene->prim_index);
device->tex_free(dscene->prim_object);
@@ -996,7 +1132,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene)
dscene->bvh_nodes.clear();
dscene->object_node.clear();
dscene->tri_woop.clear();
- dscene->prim_segment.clear();
+ dscene->prim_type.clear();
dscene->prim_visibility.clear();
dscene->prim_index.clear();
dscene->prim_object.clear();
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index 281a8f0645e..247e3dd555e 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -46,6 +46,8 @@ public:
/* Mesh Triangle */
struct Triangle {
int v[3];
+
+ void bounds_grow(const float3 *verts, BoundBox& bounds) const;
};
/* Mesh Curve */
@@ -55,11 +57,8 @@ public:
uint shader;
int num_segments() { return num_keys - 1; }
- };
- struct CurveKey {
- float3 co;
- float radius;
+ void bounds_grow(const int k, const float4 *curve_keys, BoundBox& bounds) const;
};
/* Displacement */
@@ -77,7 +76,7 @@ public:
vector<uint> shader;
vector<bool> smooth;
- vector<CurveKey> curve_keys;
+ vector<float4> curve_keys; /* co + radius */
vector<Curve> curves;
vector<uint> used_shaders;
@@ -90,6 +89,9 @@ public:
Transform transform_normal;
DisplacementMethod displacement_method;
+ uint motion_steps;
+ bool use_motion_blur;
+
/* Update Flags */
bool need_update;
bool need_update_rebuild;
@@ -112,6 +114,7 @@ public:
void add_triangle(int v0, int v1, int v2, int shader, bool smooth);
void add_curve_key(float3 loc, float radius);
void add_curve(int first_key, int num_keys, int shader);
+ int split_vertex(int vertex);
void compute_bounds();
void add_face_normals();
@@ -126,6 +129,8 @@ public:
bool need_attribute(Scene *scene, ustring name);
void tag_update(Scene *scene, bool rebuild);
+
+ bool has_motion_blur() const;
};
/* Mesh Manager */
diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp
index 2fd8a978511..661fd9c66c1 100644
--- a/intern/cycles/render/mesh_displace.cpp
+++ b/intern/cycles/render/mesh_displace.cpp
@@ -44,7 +44,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
progress.set_status("Updating Mesh", msg);
/* find object index. todo: is arbitrary */
- size_t object_index = ~0;
+ size_t object_index = OBJECT_NONE;
for(size_t i = 0; i < scene->objects.size(); i++) {
if(scene->objects[i]->mesh == mesh) {
@@ -119,17 +119,21 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
task.shader_eval_type = SHADER_EVAL_DISPLACE;
task.shader_x = 0;
task.shader_w = d_output.size();
+ task.get_cancel = function_bind(&Progress::get_cancel, &progress);
device->task_add(task);
device->task_wait();
+ if(progress.get_cancel()) {
+ device->mem_free(d_input);
+ device->mem_free(d_output);
+ return false;
+ }
+
device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4));
device->mem_free(d_input);
device->mem_free(d_output);
- if(progress.get_cancel())
- return false;
-
/* read result */
done.clear();
done.resize(mesh->verts.size(), false);
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index af6fca29ab0..a53e0b39435 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -189,10 +189,12 @@ ImageTextureNode::ImageTextureNode()
slot = -1;
is_float = -1;
is_linear = false;
+ use_alpha = true;
filename = "";
builtin_data = NULL;
color_space = ustring("Color");
projection = ustring("Flat");
+ interpolation = INTERPOLATION_LINEAR;
projection_blend = 0.0f;
animated = false;
@@ -204,7 +206,7 @@ ImageTextureNode::ImageTextureNode()
ImageTextureNode::~ImageTextureNode()
{
if(image_manager)
- image_manager->remove_image(filename, builtin_data);
+ image_manager->remove_image(filename, builtin_data, interpolation);
}
ShaderNode *ImageTextureNode::clone() const
@@ -241,7 +243,7 @@ void ImageTextureNode::compile(SVMCompiler& compiler)
image_manager = compiler.image_manager;
if(is_float == -1) {
bool is_float_bool;
- slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear);
+ slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear, interpolation, use_alpha);
is_float = (int)is_float_bool;
}
@@ -315,6 +317,22 @@ void ImageTextureNode::compile(OSLCompiler& compiler)
compiler.parameter("projection_blend", projection_blend);
compiler.parameter("is_float", is_float);
compiler.parameter("use_alpha", !alpha_out->links.empty());
+
+ switch (interpolation) {
+ case INTERPOLATION_CLOSEST:
+ compiler.parameter("interpolation", "closest");
+ break;
+ case INTERPOLATION_CUBIC:
+ compiler.parameter("interpolation", "cubic");
+ break;
+ case INTERPOLATION_SMART:
+ compiler.parameter("interpolation", "smart");
+ break;
+ case INTERPOLATION_LINEAR:
+ default:
+ compiler.parameter("interpolation", "linear");
+ break;
+ }
compiler.add(this, "node_image_texture");
}
@@ -340,6 +358,7 @@ EnvironmentTextureNode::EnvironmentTextureNode()
slot = -1;
is_float = -1;
is_linear = false;
+ use_alpha = true;
filename = "";
builtin_data = NULL;
color_space = ustring("Color");
@@ -354,7 +373,7 @@ EnvironmentTextureNode::EnvironmentTextureNode()
EnvironmentTextureNode::~EnvironmentTextureNode()
{
if(image_manager)
- image_manager->remove_image(filename, builtin_data);
+ image_manager->remove_image(filename, builtin_data, INTERPOLATION_LINEAR);
}
ShaderNode *EnvironmentTextureNode::clone() const
@@ -389,7 +408,7 @@ void EnvironmentTextureNode::compile(SVMCompiler& compiler)
image_manager = compiler.image_manager;
if(slot == -1) {
bool is_float_bool;
- slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear);
+ slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear, INTERPOLATION_LINEAR, use_alpha);
is_float = (int)is_float_bool;
}
@@ -565,13 +584,13 @@ static void sky_texture_precompute_new(SunSky *sunsky, float3 dir, float turbidi
/* Copy values from sky_state to SunSky */
for (int i = 0; i < 9; ++i) {
- sunsky->config_x[i] = sky_state->configs[0][i];
- sunsky->config_y[i] = sky_state->configs[1][i];
- sunsky->config_z[i] = sky_state->configs[2][i];
+ sunsky->config_x[i] = (float)sky_state->configs[0][i];
+ sunsky->config_y[i] = (float)sky_state->configs[1][i];
+ sunsky->config_z[i] = (float)sky_state->configs[2][i];
}
- sunsky->radiance_x = sky_state->radiances[0];
- sunsky->radiance_y = sky_state->radiances[1];
- sunsky->radiance_z = sky_state->radiances[2];
+ sunsky->radiance_x = (float)sky_state->radiances[0];
+ sunsky->radiance_y = (float)sky_state->radiances[1];
+ sunsky->radiance_z = (float)sky_state->radiances[2];
/* Free sky_state */
arhosekskymodelstate_free(sky_state);
@@ -612,6 +631,8 @@ void SkyTextureNode::compile(SVMCompiler& compiler)
sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
else if(type_enum[type] == NODE_SKY_NEW)
sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ else
+ assert(false);
if(vector_in->link)
compiler.stack_assign(vector_in);
@@ -649,6 +670,8 @@ void SkyTextureNode::compile(OSLCompiler& compiler)
sky_texture_precompute_old(&sunsky, sun_direction, turbidity);
else if(type_enum[type] == NODE_SKY_NEW)
sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo);
+ else
+ assert(false);
compiler.parameter("sky_model", type);
compiler.parameter("theta", sunsky.theta);
@@ -2192,8 +2215,9 @@ void TextureCoordinateNode::attributes(Shader *shader, AttributeRequestSet *attr
if(shader->has_volume) {
if(!from_dupli) {
- if(!output("Generated")->links.empty())
+ if(!output("Generated")->links.empty()) {
attributes->add(ATTR_STD_GENERATED_TRANSFORM);
+ }
}
}
@@ -2310,6 +2334,78 @@ void TextureCoordinateNode::compile(OSLCompiler& compiler)
compiler.add(this, "node_texture_coordinate");
}
+UVMapNode::UVMapNode()
+: ShaderNode("uvmap")
+{
+ attribute = "";
+ from_dupli = false;
+
+ add_output("UV", SHADER_SOCKET_POINT);
+}
+
+void UVMapNode::attributes(Shader *shader, AttributeRequestSet *attributes)
+{
+ if(shader->has_surface) {
+ if(!from_dupli) {
+ if(!output("UV")->links.empty()) {
+ if (attribute != "")
+ attributes->add(attribute);
+ else
+ attributes->add(ATTR_STD_UV);
+ }
+ }
+ }
+
+ ShaderNode::attributes(shader, attributes);
+}
+
+void UVMapNode::compile(SVMCompiler& compiler)
+{
+ ShaderOutput *out = output("UV");
+ NodeType texco_node = NODE_TEX_COORD;
+ NodeType attr_node = NODE_ATTR;
+ int attr;
+
+ if(bump == SHADER_BUMP_DX) {
+ texco_node = NODE_TEX_COORD_BUMP_DX;
+ attr_node = NODE_ATTR_BUMP_DX;
+ }
+ else if(bump == SHADER_BUMP_DY) {
+ texco_node = NODE_TEX_COORD_BUMP_DY;
+ attr_node = NODE_ATTR_BUMP_DY;
+ }
+
+ if(!out->links.empty()) {
+ if(from_dupli) {
+ compiler.stack_assign(out);
+ compiler.add_node(texco_node, NODE_TEXCO_DUPLI_UV, out->stack_offset);
+ }
+ else {
+ if (attribute != "")
+ attr = compiler.attribute(attribute);
+ else
+ attr = compiler.attribute(ATTR_STD_UV);
+
+ compiler.stack_assign(out);
+ compiler.add_node(attr_node, attr, out->stack_offset, NODE_ATTR_FLOAT3);
+ }
+ }
+}
+
+void UVMapNode::compile(OSLCompiler& compiler)
+{
+ if(bump == SHADER_BUMP_DX)
+ compiler.parameter("bump_offset", "dx");
+ else if(bump == SHADER_BUMP_DY)
+ compiler.parameter("bump_offset", "dy");
+ else
+ compiler.parameter("bump_offset", "center");
+
+ compiler.parameter("from_dupli", from_dupli);
+ compiler.parameter("name", attribute.c_str());
+ compiler.add(this, "node_uv_map");
+}
+
/* Light Path */
LightPathNode::LightPathNode()
@@ -2325,6 +2421,7 @@ LightPathNode::LightPathNode()
add_output("Is Volume Scatter Ray", SHADER_SOCKET_FLOAT);
add_output("Ray Length", SHADER_SOCKET_FLOAT);
add_output("Ray Depth", SHADER_SOCKET_FLOAT);
+ add_output("Transparent Depth", SHADER_SOCKET_FLOAT);
}
void LightPathNode::compile(SVMCompiler& compiler)
@@ -2392,6 +2489,11 @@ void LightPathNode::compile(SVMCompiler& compiler)
compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_depth, out->stack_offset);
}
+ out = output("Transparent Depth");
+ if(!out->links.empty()) {
+ compiler.stack_assign(out);
+ compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_transparent, out->stack_offset);
+ }
}
void LightPathNode::compile(OSLCompiler& compiler)
@@ -2612,7 +2714,7 @@ void HairInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes)
if(!intercept_out->links.empty())
attributes->add(ATTR_STD_CURVE_INTERCEPT);
}
-
+
ShaderNode::attributes(shader, attributes);
}
@@ -3126,15 +3228,22 @@ AttributeNode::AttributeNode()
void AttributeNode::attributes(Shader *shader, AttributeRequestSet *attributes)
{
- if(shader->has_surface) {
- ShaderOutput *color_out = output("Color");
- ShaderOutput *vector_out = output("Vector");
- ShaderOutput *fac_out = output("Fac");
+ ShaderOutput *color_out = output("Color");
+ ShaderOutput *vector_out = output("Vector");
+ ShaderOutput *fac_out = output("Fac");
- if(!color_out->links.empty() || !vector_out->links.empty() || !fac_out->links.empty())
+ if(!color_out->links.empty() || !vector_out->links.empty() || !fac_out->links.empty()) {
+ AttributeStandard std = Attribute::name_standard(attribute.c_str());
+
+ if(std != ATTR_STD_NONE)
+ attributes->add(std);
+ else
attributes->add(attribute);
}
-
+
+ if(shader->has_volume)
+ attributes->add(ATTR_STD_GENERATED_TRANSFORM);
+
ShaderNode::attributes(shader, attributes);
}
@@ -3144,6 +3253,13 @@ void AttributeNode::compile(SVMCompiler& compiler)
ShaderOutput *vector_out = output("Vector");
ShaderOutput *fac_out = output("Fac");
NodeType attr_node = NODE_ATTR;
+ AttributeStandard std = Attribute::name_standard(attribute.c_str());
+ int attr;
+
+ if(std != ATTR_STD_NONE)
+ attr = compiler.attribute(std);
+ else
+ attr = compiler.attribute(attribute);
if(bump == SHADER_BUMP_DX)
attr_node = NODE_ATTR_BUMP_DX;
@@ -3151,8 +3267,6 @@ void AttributeNode::compile(SVMCompiler& compiler)
attr_node = NODE_ATTR_BUMP_DY;
if(!color_out->links.empty() || !vector_out->links.empty()) {
- int attr = compiler.attribute(attribute);
-
if(!color_out->links.empty()) {
compiler.stack_assign(color_out);
compiler.add_node(attr_node, attr, color_out->stack_offset, NODE_ATTR_FLOAT3);
@@ -3164,8 +3278,6 @@ void AttributeNode::compile(SVMCompiler& compiler)
}
if(!fac_out->links.empty()) {
- int attr = compiler.attribute(attribute);
-
compiler.stack_assign(fac_out);
compiler.add_node(attr_node, attr, fac_out->stack_offset, NODE_ATTR_FLOAT);
}
@@ -3179,8 +3291,12 @@ void AttributeNode::compile(OSLCompiler& compiler)
compiler.parameter("bump_offset", "dy");
else
compiler.parameter("bump_offset", "center");
+
+ if(Attribute::name_standard(attribute.c_str()) != ATTR_STD_NONE)
+ compiler.parameter("name", (string("geom:") + attribute.c_str()).c_str());
+ else
+ compiler.parameter("name", attribute.c_str());
- compiler.parameter("name", attribute.c_str());
compiler.add(this, "node_attribute");
}
@@ -3428,6 +3544,7 @@ static ShaderEnum math_type_init()
enm.insert("Less Than", NODE_MATH_LESS_THAN);
enm.insert("Greater Than", NODE_MATH_GREATER_THAN);
enm.insert("Modulo", NODE_MATH_MODULO);
+ enm.insert("Absolute", NODE_MATH_ABSOLUTE);
return enm;
}
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 86c4f490875..d94d8ce6033 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -72,10 +72,12 @@ public:
int slot;
int is_float;
bool is_linear;
+ bool use_alpha;
string filename;
void *builtin_data;
ustring color_space;
ustring projection;
+ InterpolationType interpolation;
float projection_blend;
bool animated;
@@ -94,6 +96,7 @@ public:
int slot;
int is_float;
bool is_linear;
+ bool use_alpha;
string filename;
void *builtin_data;
ustring color_space;
@@ -208,6 +211,7 @@ public:
BsdfNode(bool scattering = false);
SHADER_NODE_BASE_CLASS(BsdfNode);
+ bool has_spatial_varying() { return true; }
void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL, ShaderInput *param4 = NULL);
ClosureType closure;
@@ -279,6 +283,7 @@ public:
SHADER_NODE_CLASS(SubsurfaceScatteringNode)
bool has_surface_bssrdf() { return true; }
bool has_bssrdf_bump();
+ bool has_spatial_varying() { return true; }
static ShaderEnum falloff_enum;
};
@@ -288,6 +293,7 @@ public:
SHADER_NODE_CLASS(EmissionNode)
bool has_surface_emission() { return true; }
+ bool has_spatial_varying() { return true; }
bool total_power;
};
@@ -305,6 +311,8 @@ public:
class AmbientOcclusionNode : public ShaderNode {
public:
SHADER_NODE_CLASS(AmbientOcclusionNode)
+
+ bool has_spatial_varying() { return true; }
};
class VolumeNode : public ShaderNode {
@@ -339,16 +347,28 @@ class GeometryNode : public ShaderNode {
public:
SHADER_NODE_CLASS(GeometryNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
};
class TextureCoordinateNode : public ShaderNode {
public:
SHADER_NODE_CLASS(TextureCoordinateNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
bool from_dupli;
};
+class UVMapNode : public ShaderNode {
+public:
+ SHADER_NODE_CLASS(UVMapNode)
+ void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
+
+ ustring attribute;
+ bool from_dupli;
+};
+
class LightPathNode : public ShaderNode {
public:
SHADER_NODE_CLASS(LightPathNode)
@@ -357,6 +377,7 @@ public:
class LightFalloffNode : public ShaderNode {
public:
SHADER_NODE_CLASS(LightFalloffNode)
+ bool has_spatial_varying() { return true; }
};
class ObjectInfoNode : public ShaderNode {
@@ -375,6 +396,7 @@ public:
SHADER_NODE_CLASS(HairInfoNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
};
class ValueNode : public ShaderNode {
@@ -460,6 +482,7 @@ class AttributeNode : public ShaderNode {
public:
SHADER_NODE_CLASS(AttributeNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
ustring attribute;
};
@@ -467,21 +490,25 @@ public:
class CameraNode : public ShaderNode {
public:
SHADER_NODE_CLASS(CameraNode)
+ bool has_spatial_varying() { return true; }
};
class FresnelNode : public ShaderNode {
public:
SHADER_NODE_CLASS(FresnelNode)
+ bool has_spatial_varying() { return true; }
};
class LayerWeightNode : public ShaderNode {
public:
SHADER_NODE_CLASS(LayerWeightNode)
+ bool has_spatial_varying() { return true; }
};
class WireframeNode : public ShaderNode {
public:
SHADER_NODE_CLASS(WireframeNode)
+ bool has_spatial_varying() { return true; }
bool use_pixel_size;
};
@@ -538,6 +565,8 @@ public:
class BumpNode : public ShaderNode {
public:
SHADER_NODE_CLASS(BumpNode)
+ bool has_spatial_varying() { return true; }
+
bool invert;
};
@@ -568,6 +597,10 @@ public:
class OSLScriptNode : public ShaderNode {
public:
SHADER_NODE_CLASS(OSLScriptNode)
+
+ /* ideally we could beter detect this, but we can't query this now */
+ bool has_spatial_varying() { return true; }
+
string filepath;
string bytecode_hash;
@@ -581,6 +614,7 @@ class NormalMapNode : public ShaderNode {
public:
SHADER_NODE_CLASS(NormalMapNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
ustring space;
static ShaderEnum space_enum;
@@ -592,6 +626,7 @@ class TangentNode : public ShaderNode {
public:
SHADER_NODE_CLASS(TangentNode)
void attributes(Shader *shader, AttributeRequestSet *attributes);
+ bool has_spatial_varying() { return true; }
ustring direction_type;
static ShaderEnum direction_type_enum;
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 3edb934ef2c..027bfd71931 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -19,6 +19,7 @@
#include "mesh.h"
#include "curves.h"
#include "object.h"
+#include "particles.h"
#include "scene.h"
#include "util_foreach.h"
@@ -38,7 +39,8 @@ Object::Object()
visibility = ~0;
random_id = 0;
pass_id = 0;
- particle_id = 0;
+ particle_system = NULL;
+ particle_index = 0;
bounds = BoundBox::empty;
motion.pre = transform_identity();
motion.mid = transform_identity();
@@ -53,7 +55,7 @@ Object::~Object()
{
}
-void Object::compute_bounds(bool motion_blur, float shuttertime)
+void Object::compute_bounds(bool motion_blur)
{
BoundBox mbounds = mesh->bounds;
@@ -66,10 +68,7 @@ void Object::compute_bounds(bool motion_blur, float shuttertime)
/* todo: this is really terrible. according to pbrt there is a better
* way to find this iteratively, but did not find implementation yet
* or try to implement myself */
- float start_t = 0.5f - shuttertime*0.25f;
- float end_t = 0.5f + shuttertime*0.25f;
-
- for(float t = start_t; t < end_t; t += (1.0f/128.0f)*shuttertime) {
+ for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) {
Transform ttfm;
transform_motion_interpolate(&ttfm, &decomp, t);
@@ -80,29 +79,83 @@ void Object::compute_bounds(bool motion_blur, float shuttertime)
bounds = mbounds.transformed(&tfm);
}
-void Object::apply_transform()
+void Object::apply_transform(bool apply_to_motion)
{
if(!mesh || tfm == transform_identity())
return;
+
+ /* triangles */
+ if(mesh->verts.size()) {
+ /* store matrix to transform later. when accessing these as attributes we
+ * do not want the transform to be applied for consistency between static
+ * and dynamic BVH, so we do it on packing. */
+ mesh->transform_normal = transform_transpose(transform_inverse(tfm));
+
+ /* apply to mesh vertices */
+ for(size_t i = 0; i < mesh->verts.size(); i++)
+ mesh->verts[i] = transform_point(&tfm, mesh->verts[i]);
+
+ if(apply_to_motion) {
+ Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (attr) {
+ size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
+ float3 *vert_steps = attr->data_float3();
+
+ for (size_t i = 0; i < steps_size; i++)
+ vert_steps[i] = transform_point(&tfm, vert_steps[i]);
+ }
- float3 c0 = transform_get_column(&tfm, 0);
- float3 c1 = transform_get_column(&tfm, 1);
- float3 c2 = transform_get_column(&tfm, 2);
- float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f);
+ Attribute *attr_N = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL);
- for(size_t i = 0; i < mesh->verts.size(); i++)
- mesh->verts[i] = transform_point(&tfm, mesh->verts[i]);
+ if(attr_N) {
+ Transform ntfm = mesh->transform_normal;
+ size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1);
+ float3 *normal_steps = attr_N->data_float3();
- for(size_t i = 0; i < mesh->curve_keys.size(); i++) {
- mesh->curve_keys[i].co = transform_point(&tfm, mesh->curve_keys[i].co);
- /* scale for strand radius - only correct for uniform transforms*/
- mesh->curve_keys[i].radius *= scalar;
+ for (size_t i = 0; i < steps_size; i++)
+ normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i]));
+ }
+ }
}
- /* store matrix to transform later. when accessing these as attributes we
- * do not want the transform to be applied for consistency between static
- * and dynamic BVH, so we do it on packing. */
- mesh->transform_normal = transform_transpose(transform_inverse(tfm));
+ /* curves */
+ if(mesh->curve_keys.size()) {
+ /* compute uniform scale */
+ float3 c0 = transform_get_column(&tfm, 0);
+ float3 c1 = transform_get_column(&tfm, 1);
+ float3 c2 = transform_get_column(&tfm, 2);
+ float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f);
+
+ /* apply transform to curve keys */
+ for(size_t i = 0; i < mesh->curve_keys.size(); i++) {
+ float3 co = transform_point(&tfm, float4_to_float3(mesh->curve_keys[i]));
+ float radius = mesh->curve_keys[i].w * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ mesh->curve_keys[i] = float3_to_float4(co);
+ mesh->curve_keys[i].w = radius;
+ }
+
+ if(apply_to_motion) {
+ Attribute *curve_attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
+
+ if (curve_attr) {
+ /* apply transform to motion curve keys */
+ size_t steps_size = mesh->curve_keys.size() * (mesh->motion_steps - 1);
+ float4 *key_steps = curve_attr->data_float4();
+
+ for (size_t i = 0; i < steps_size; i++) {
+ float3 co = transform_point(&tfm, float4_to_float3(key_steps[i]));
+ float radius = key_steps[i].w * scalar;
+
+ /* scale for curve radius is only correct for uniform scale */
+ key_steps[i] = float3_to_float4(co);
+ key_steps[i].w = radius;
+ }
+ }
+ }
+ }
/* we keep normals pointing in same direction on negative scale, notify
* mesh about this in it (re)calculates normals */
@@ -111,7 +164,7 @@ void Object::apply_transform()
if(bounds.valid()) {
mesh->compute_bounds();
- compute_bounds(false, 0.0f);
+ compute_bounds(false);
}
/* tfm is not reset to identity, all code that uses it needs to check the
@@ -137,6 +190,26 @@ void Object::tag_update(Scene *scene)
scene->object_manager->need_update = true;
}
+vector<float> Object::motion_times()
+{
+ /* compute times at which we sample motion for this object */
+ vector<float> times;
+
+ if(!mesh || mesh->motion_steps == 1)
+ return times;
+
+ int motion_steps = mesh->motion_steps;
+
+ for(int step = 0; step < motion_steps; step++) {
+ if(step != motion_steps / 2) {
+ float time = 2.0f * step / (motion_steps - 1) - 1.0f;
+ times.push_back(time);
+ }
+ }
+
+ return times;
+}
+
/* Object Manager */
ObjectManager::ObjectManager()
@@ -154,6 +227,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
float4 *objects_vector = NULL;
int i = 0;
map<Mesh*, float> surface_area_map;
+ map<ParticleSystem*, int> particle_offset;
Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);
bool have_motion = false;
bool have_curves = false;
@@ -162,6 +236,15 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
if(need_motion == Scene::MOTION_PASS)
objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size());
+ /* particle system device offsets
+ * 0 is dummy particle, index starts at 1
+ */
+ int numparticles = 1;
+ foreach(ParticleSystem *psys, scene->particle_systems) {
+ particle_offset[psys] = numparticles;
+ numparticles += psys->particles.size();
+ }
+
foreach(Object *ob, scene->objects) {
Mesh *mesh = ob->mesh;
uint flag = 0;
@@ -177,6 +260,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
float surface_area = 0.0f;
float pass_id = ob->pass_id;
float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF);
+ int particle_index = (ob->particle_system)? ob->particle_index + particle_offset[ob->particle_system]: 0;
if(transform_uniform_scale(tfm, uniform_scale)) {
map<Mesh*, float>::iterator it = surface_area_map.find(mesh);
@@ -190,20 +274,6 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
surface_area += triangle_area(p1, p2, p3);
}
- foreach(Mesh::Curve& curve, mesh->curves) {
- int first_key = curve.first_key;
-
- for(int i = 0; i < curve.num_segments(); i++) {
- float3 p1 = mesh->curve_keys[first_key + i].co;
- float r1 = mesh->curve_keys[first_key + i].radius;
- float3 p2 = mesh->curve_keys[first_key + i + 1].co;
- float r2 = mesh->curve_keys[first_key + i + 1].radius;
-
- /* currently ignores segment overlaps*/
- surface_area += M_PI_F *(r1 + r2) * len(p1 - p2);
- }
- }
-
surface_area_map[mesh] = surface_area;
}
else
@@ -219,31 +289,17 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
surface_area += triangle_area(p1, p2, p3);
}
-
- foreach(Mesh::Curve& curve, mesh->curves) {
- int first_key = curve.first_key;
-
- for(int i = 0; i < curve.num_segments(); i++) {
- float3 p1 = mesh->curve_keys[first_key + i].co;
- float r1 = mesh->curve_keys[first_key + i].radius;
- float3 p2 = mesh->curve_keys[first_key + i + 1].co;
- float r2 = mesh->curve_keys[first_key + i + 1].radius;
-
- p1 = transform_point(&tfm, p1);
- p2 = transform_point(&tfm, p2);
-
- /* currently ignores segment overlaps*/
- surface_area += M_PI_F *(r1 + r2) * len(p1 - p2);
- }
- }
}
/* pack in texture */
int offset = i*OBJECT_SIZE;
+ /* OBJECT_TRANSFORM */
memcpy(&objects[offset], &tfm, sizeof(float4)*3);
+ /* OBJECT_INVERSE_TRANSFORM */
memcpy(&objects[offset+4], &itfm, sizeof(float4)*3);
- objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(ob->particle_id));
+ /* OBJECT_PROPERTIES */
+ objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index));
if(need_motion == Scene::MOTION_PASS) {
/* motion transformations, is world/object space depending if mesh
@@ -252,10 +308,10 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
Transform mtfm_pre = ob->motion.pre;
Transform mtfm_post = ob->motion.post;
- if(!(mesh->attributes.find(ATTR_STD_MOTION_PRE) || mesh->curve_attributes.find(ATTR_STD_MOTION_PRE)))
+ if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) {
mtfm_pre = mtfm_pre * itfm;
- if(!(mesh->attributes.find(ATTR_STD_MOTION_POST) || mesh->curve_attributes.find(ATTR_STD_MOTION_POST)))
mtfm_post = mtfm_post * itfm;
+ }
memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3);
memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3);
@@ -274,9 +330,17 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene
}
#endif
- /* dupli object coords */
- objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], 0.0f);
- objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], 0.0f, 0.0f);
+ if(mesh->use_motion_blur)
+ have_motion = true;
+
+ /* dupli object coords and motion info */
+ int totalsteps = mesh->motion_steps;
+ int numsteps = (totalsteps - 1)/2;
+ int numverts = mesh->verts.size();
+ int numkeys = mesh->curve_keys.size();
+
+ objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys));
+ objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts));
/* object flag */
if(ob->use_holdout)
@@ -355,6 +419,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
#ifdef __OBJECT_MOTION__
Scene::MotionType need_motion = scene->need_motion();
bool motion_blur = need_motion == Scene::MOTION_BLUR;
+ bool apply_to_motion = need_motion != Scene::MOTION_PASS;
#else
bool motion_blur = false;
#endif
@@ -377,7 +442,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u
if(mesh_users[object->mesh] == 1) {
if(!(motion_blur && object->use_motion)) {
if(!object->mesh->transform_applied) {
- object->apply_transform();
+ object->apply_transform(apply_to_motion);
object->mesh->transform_applied = true;
if(progress.get_cancel()) return;
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 5da85be3873..677526b715f 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -27,6 +27,7 @@ CCL_NAMESPACE_BEGIN
class Device;
class DeviceScene;
class Mesh;
+class ParticleSystem;
class Progress;
class Scene;
struct Transform;
@@ -50,15 +51,18 @@ public:
float3 dupli_generated;
float2 dupli_uv;
- int particle_id;
-
+ ParticleSystem *particle_system;
+ int particle_index;
+
Object();
~Object();
void tag_update(Scene *scene);
- void compute_bounds(bool motion_blur, float shuttertime);
- void apply_transform();
+ void compute_bounds(bool motion_blur);
+ void apply_transform(bool apply_to_motion);
+
+ vector<float> motion_times();
};
/* Object Manager */
diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp
index e2798f438e2..94866102f60 100644
--- a/intern/cycles/render/osl.cpp
+++ b/intern/cycles/render/osl.cpp
@@ -203,7 +203,6 @@ void OSLShaderManager::shading_system_init()
"glossy", /* PATH_RAY_GLOSSY */
"singular", /* PATH_RAY_SINGULAR */
"transparent", /* PATH_RAY_TRANSPARENT */
- "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
"shadow", /* PATH_RAY_SHADOW_OPAQUE */
"shadow", /* PATH_RAY_SHADOW_TRANSPARENT */
@@ -212,6 +211,8 @@ void OSLShaderManager::shading_system_init()
"diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */
"glossy_ancestor", /* PATH_RAY_GLOSSY_ANCESTOR */
"bssrdf_ancestor", /* PATH_RAY_BSSRDF_ANCESTOR */
+ "__unused__", /* PATH_RAY_SINGLE_PASS_DONE */
+ "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */
};
const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]);
@@ -512,16 +513,14 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
}
}
- /* create shader of the appropriate type. we pass "surface" to all shaders,
- * because "volume" and "displacement" don't work yet in OSL. the shaders
- * work fine, but presumably these values would be used for more strict
- * checking, so when that is fixed, we should update the code here too. */
+ /* create shader of the appropriate type. OSL only distinguishes between "surface"
+ * and "displacement" atm */
if(current_type == SHADER_TYPE_SURFACE)
ss->Shader("surface", name, id(node).c_str());
else if(current_type == SHADER_TYPE_VOLUME)
ss->Shader("surface", name, id(node).c_str());
else if(current_type == SHADER_TYPE_DISPLACEMENT)
- ss->Shader("surface", name, id(node).c_str());
+ ss->Shader("displacement", name, id(node).c_str());
else
assert(0);
@@ -544,7 +543,7 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
/* test if we shader contains specific closures */
OSLShaderInfo *info = ((OSLShaderManager*)manager)->shader_loaded_info(name);
- if(info) {
+ if(info && current_type == SHADER_TYPE_SURFACE) {
if(info->has_surface_emission)
current_shader->has_surface_emission = true;
if(info->has_surface_transparent)
@@ -554,6 +553,10 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath)
current_shader->has_bssrdf_bump = true; /* can't detect yet */
}
}
+ else if(current_type == SHADER_TYPE_VOLUME) {
+ if(node->has_spatial_varying())
+ current_shader->has_heterogeneous_volume = true;
+ }
}
void OSLCompiler::parameter(const char *name, float f)
@@ -709,14 +712,20 @@ void OSLCompiler::generate_nodes(const set<ShaderNode*>& nodes)
node->compile(*this);
done.insert(node);
- if(node->has_surface_emission())
- current_shader->has_surface_emission = true;
- if(node->has_surface_transparent())
- current_shader->has_surface_transparent = true;
- if(node->has_surface_bssrdf()) {
- current_shader->has_surface_bssrdf = true;
- if(node->has_bssrdf_bump())
- current_shader->has_bssrdf_bump = true;
+ if(current_type == SHADER_TYPE_SURFACE) {
+ if(node->has_surface_emission())
+ current_shader->has_surface_emission = true;
+ if(node->has_surface_transparent())
+ current_shader->has_surface_transparent = true;
+ if(node->has_surface_bssrdf()) {
+ current_shader->has_surface_bssrdf = true;
+ if(node->has_bssrdf_bump())
+ current_shader->has_bssrdf_bump = true;
+ }
+ }
+ else if(current_type == SHADER_TYPE_VOLUME) {
+ if(node->has_spatial_varying())
+ current_shader->has_heterogeneous_volume = true;
}
}
else
@@ -798,6 +807,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader)
shader->has_bssrdf_bump = false;
shader->has_volume = false;
shader->has_displacement = false;
+ shader->has_heterogeneous_volume = false;
/* generate surface shader */
if(shader->used && graph && output->input("Surface")->link) {
diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp
index 71f5a9dafed..4f5ad439520 100644
--- a/intern/cycles/render/scene.cpp
+++ b/intern/cycles/render/scene.cpp
@@ -17,6 +17,7 @@
#include <stdlib.h>
#include "background.h"
+#include "bake.h"
#include "camera.h"
#include "curves.h"
#include "device.h"
@@ -54,6 +55,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
image_manager = new ImageManager();
particle_system_manager = new ParticleSystemManager();
curve_system_manager = new CurveSystemManager();
+ bake_manager = new BakeManager();
/* OSL only works on the CPU */
if(device_info_.type == DEVICE_CPU)
@@ -61,8 +63,8 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_)
else
shader_manager = ShaderManager::create(this, SceneParams::SVM);
- if (device_info_.type == DEVICE_CPU)
- image_manager->set_extended_image_limits();
+ /* Extended image limits for CPU and GPUs */
+ image_manager->set_extended_image_limits(device_info_);
}
Scene::~Scene()
@@ -103,6 +105,8 @@ void Scene::free_memory(bool final)
particle_system_manager->device_free(device, &dscene);
curve_system_manager->device_free(device, &dscene);
+ bake_manager->device_free(device, &dscene);
+
if(!params.persistent_data || final)
image_manager->device_free(device, &dscene);
@@ -122,6 +126,7 @@ void Scene::free_memory(bool final)
delete particle_system_manager;
delete curve_system_manager;
delete image_manager;
+ delete bake_manager;
}
}
@@ -137,6 +142,8 @@ void Scene::device_update(Device *device_, Progress& progress)
* - Camera may be used for adapative subdivison.
* - Displacement shader must have all shader data available.
* - Light manager needs lookup tables and final mesh data to compute emission CDF.
+ * - Film needs light manager to run for use_light_visibility
+ * - Lookup tables are done a second time to handle film tables
*/
image_manager->set_pack_images(device->info.pack_images);
@@ -171,11 +178,6 @@ void Scene::device_update(Device *device_, Progress& progress)
if(progress.get_cancel()) return;
- progress.set_status("Updating Film");
- film->device_update(device, &dscene, this);
-
- if(progress.get_cancel()) return;
-
progress.set_status("Updating Lookup Tables");
lookup_tables->device_update(device, &dscene);
@@ -196,11 +198,26 @@ void Scene::device_update(Device *device_, Progress& progress)
if(progress.get_cancel()) return;
+ progress.set_status("Updating Film");
+ film->device_update(device, &dscene, this);
+
+ if(progress.get_cancel()) return;
+
progress.set_status("Updating Integrator");
integrator->device_update(device, &dscene, this);
if(progress.get_cancel()) return;
+ progress.set_status("Updating Lookup Tables");
+ lookup_tables->device_update(device, &dscene);
+
+ if(progress.get_cancel()) return;
+
+ progress.set_status("Updating Baking");
+ bake_manager->device_update(device, &dscene, this, progress);
+
+ if(progress.get_cancel()) return;
+
progress.set_status("Updating Device", "Writing constant memory");
device->const_copy_to("__data", &dscene.data, sizeof(dscene.data));
}
@@ -219,8 +236,10 @@ bool Scene::need_global_attribute(AttributeStandard std)
{
if(std == ATTR_STD_UV)
return Pass::contains(film->passes, PASS_UV);
- if(std == ATTR_STD_MOTION_PRE || std == ATTR_STD_MOTION_POST)
- return need_motion() == MOTION_PASS;
+ else if(std == ATTR_STD_MOTION_VERTEX_POSITION)
+ return need_motion() != MOTION_NONE;
+ else if(std == ATTR_STD_MOTION_VERTEX_NORMAL)
+ return need_motion() == MOTION_BLUR;
return false;
}
@@ -249,7 +268,8 @@ bool Scene::need_reset()
|| integrator->need_update
|| shader_manager->need_update
|| particle_system_manager->need_update
- || curve_system_manager->need_update);
+ || curve_system_manager->need_update
+ || bake_manager->need_update);
}
void Scene::reset()
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 2c223192536..0f0bb725823 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -51,6 +51,8 @@ class CurveSystemManager;
class Shader;
class ShaderManager;
class Progress;
+class BakeManager;
+class BakeData;
/* Scene Device Data */
@@ -60,7 +62,7 @@ public:
device_vector<float4> bvh_nodes;
device_vector<uint> object_node;
device_vector<float4> tri_woop;
- device_vector<uint> prim_segment;
+ device_vector<uint> prim_type;
device_vector<uint> prim_visibility;
device_vector<uint> prim_index;
device_vector<uint> prim_object;
@@ -103,8 +105,8 @@ public:
/* integrator */
device_vector<uint> sobol_directions;
- /* images */
- device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES];
+ /* cpu images */
+ device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES_CPU];
device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES];
/* opencl images */
@@ -174,6 +176,7 @@ public:
ObjectManager *object_manager;
ParticleSystemManager *particle_system_manager;
CurveSystemManager *curve_system_manager;
+ BakeManager *bake_manager;
/* default shaders */
int default_surface;
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 0805a685467..28b44df6b36 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -23,6 +23,7 @@
#include "integrator.h"
#include "scene.h"
#include "session.h"
+#include "bake.h"
#include "util_foreach.h"
#include "util_function.h"
@@ -50,7 +51,7 @@ Session::Session(const SessionParams& params_)
device = Device::create(params.device, stats, params.background);
- if(params.background) {
+ if(params.background && params.output_path.empty()) {
buffers = NULL;
display = NULL;
}
@@ -81,6 +82,7 @@ Session::Session(const SessionParams& params_)
Session::~Session()
{
if(session_thread) {
+ /* wait for session thread to end */
progress.set_cancel("Exiting");
gpu_need_tonemap = false;
@@ -95,13 +97,19 @@ Session::~Session()
wait();
}
- if(display && !params.output_path.empty()) {
- tonemap();
+ if(!params.output_path.empty()) {
+ /* tonemap and write out image if requested */
+ delete display;
+
+ display = new DisplayBuffer(device, false);
+ display->reset(device, buffers->params);
+ tonemap(params.samples);
progress.set_status("Writing Image", params.output_path);
display->write(device, params.output_path);
}
+ /* clean up */
foreach(RenderBuffers *buffers, tile_buffers)
delete buffers;
@@ -151,7 +159,7 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples)
pause_cond.notify_all();
}
-bool Session::draw_gpu(BufferParams& buffer_params)
+bool Session::draw_gpu(BufferParams& buffer_params, DeviceDrawParams& draw_params)
{
/* block for buffer access */
thread_scoped_lock display_lock(display_mutex);
@@ -165,12 +173,12 @@ bool Session::draw_gpu(BufferParams& buffer_params)
* only access GL buffers from the main thread */
if(gpu_need_tonemap) {
thread_scoped_lock buffers_lock(buffers_mutex);
- tonemap();
+ tonemap(tile_manager.state.sample);
gpu_need_tonemap = false;
gpu_need_tonemap_cond.notify_all();
}
- display->draw(device);
+ display->draw(device, draw_params);
if(display_outdated && (time_dt() - reset_time) > params.text_timeout)
return false;
@@ -315,7 +323,7 @@ void Session::reset_cpu(BufferParams& buffer_params, int samples)
pause_cond.notify_all();
}
-bool Session::draw_cpu(BufferParams& buffer_params)
+bool Session::draw_cpu(BufferParams& buffer_params, DeviceDrawParams& draw_params)
{
thread_scoped_lock display_lock(display_mutex);
@@ -324,7 +332,7 @@ bool Session::draw_cpu(BufferParams& buffer_params)
/* then verify the buffers have the expected size, so we don't
* draw previous results in a resized window */
if(!buffer_params.modified(display->params)) {
- display->draw(device);
+ display->draw(device, draw_params);
if(display_outdated && (time_dt() - reset_time) > params.text_timeout)
return false;
@@ -367,7 +375,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
/* in case of a permanent buffer, return it, otherwise we will allocate
* a new temporary buffer */
- if(!params.background) {
+ if(!(params.background && params.output_path.empty())) {
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = buffers->buffer.device_pointer;
@@ -567,8 +575,8 @@ void Session::run_cpu()
}
else if(need_tonemap) {
/* tonemap only if we do not reset, we don't we don't
- * want to show the result of an incomplete sample*/
- tonemap();
+ * want to show the result of an incomplete sample */
+ tonemap(tile_manager.state.sample);
}
if(!device->error_message().empty())
@@ -624,12 +632,12 @@ void Session::run()
progress.set_update();
}
-bool Session::draw(BufferParams& buffer_params)
+bool Session::draw(BufferParams& buffer_params, DeviceDrawParams &draw_params)
{
if(device_use_gl)
- return draw_gpu(buffer_params);
+ return draw_gpu(buffer_params, draw_params);
else
- return draw_cpu(buffer_params);
+ return draw_cpu(buffer_params, draw_params);
}
void Session::reset_(BufferParams& buffer_params, int samples)
@@ -726,10 +734,14 @@ void Session::update_scene()
cam->tag_update();
}
- /* number of samples is needed by multi jittered sampling pattern */
+ /* number of samples is needed by multi jittered
+ * sampling pattern and by baking */
Integrator *integrator = scene->integrator;
+ BakeManager *bake_manager = scene->bake_manager;
- if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ) {
+ if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ ||
+ bake_manager->get_baking())
+ {
int aa_samples = tile_manager.num_samples;
if(aa_samples != integrator->aa_samples) {
@@ -834,7 +846,7 @@ void Session::path_trace()
device->task_add(task);
}
-void Session::tonemap()
+void Session::tonemap(int sample)
{
/* add tonemap task */
DeviceTask task(DeviceTask::FILM_CONVERT);
@@ -846,7 +858,7 @@ void Session::tonemap()
task.rgba_byte = display->rgba_byte.device_pointer;
task.rgba_half = display->rgba_half.device_pointer;
task.buffer = buffers->buffer.device_pointer;
- task.sample = tile_manager.state.sample;
+ task.sample = sample;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
if(task.w > 0 && task.h > 0) {
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 1227edf81b6..1e625158652 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -128,7 +128,7 @@ public:
~Session();
void start();
- bool draw(BufferParams& params);
+ bool draw(BufferParams& params, DeviceDrawParams& draw_params);
void wait();
bool ready_to_reset();
@@ -136,6 +136,7 @@ public:
void set_samples(int samples);
void set_pause(bool pause);
+ void update_scene();
void device_free();
protected:
struct DelayedReset {
@@ -147,19 +148,18 @@ protected:
void run();
- void update_scene();
void update_status_time(bool show_pause = false, bool show_done = false);
- void tonemap();
+ void tonemap(int sample);
void path_trace();
void reset_(BufferParams& params, int samples);
void run_cpu();
- bool draw_cpu(BufferParams& params);
+ bool draw_cpu(BufferParams& params, DeviceDrawParams& draw_params);
void reset_cpu(BufferParams& params, int samples);
void run_gpu();
- bool draw_gpu(BufferParams& params);
+ bool draw_gpu(BufferParams& params, DeviceDrawParams& draw_params);
void reset_gpu(BufferParams& params, int samples);
bool acquire_tile(Device *tile_device, RenderTile& tile);
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 20f0fd7ed1e..b25673b36c3 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -53,6 +53,7 @@ Shader::Shader()
has_volume = false;
has_displacement = false;
has_bssrdf_bump = false;
+ has_heterogeneous_volume = false;
used = false;
@@ -249,7 +250,7 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc
* the case with camera inside volumes too */
flag |= SD_HAS_TRANSPARENT_SHADOW;
}
- if(shader->heterogeneous_volume)
+ if(shader->heterogeneous_volume && shader->has_heterogeneous_volume)
flag |= SD_HETEROGENEOUS_VOLUME;
if(shader->has_bssrdf_bump)
flag |= SD_HAS_BSSRDF_BUMP;
diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h
index 5f87050fe19..874e8face7a 100644
--- a/intern/cycles/render/shader.h
+++ b/intern/cycles/render/shader.h
@@ -77,6 +77,7 @@ public:
bool has_surface_bssrdf;
bool has_converter_blackbody;
bool has_bssrdf_bump;
+ bool has_heterogeneous_volume;
/* requested mesh attributes */
AttributeRequestSet attributes;
diff --git a/intern/cycles/render/sky_model.cpp b/intern/cycles/render/sky_model.cpp
index 6f250c06bc1..adb07d9e288 100644
--- a/intern/cycles/render/sky_model.cpp
+++ b/intern/cycles/render/sky_model.cpp
@@ -310,7 +310,7 @@ double arhosekskymodel_radiance(
double wavelength
)
{
- int low_wl = (wavelength - 320.0 ) / 40.0;
+ int low_wl = (int)((wavelength - 320.0) / 40.0);
if ( low_wl < 0 || low_wl >= 11 )
return 0.0f;
diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp
index 538b1aae313..576c176759c 100644
--- a/intern/cycles/render/svm.cpp
+++ b/intern/cycles/render/svm.cpp
@@ -63,8 +63,6 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0));
}
- bool use_multi_closure = device->info.advanced_shading;
-
for(i = 0; i < scene->shaders.size(); i++) {
Shader *shader = scene->shaders[i];
@@ -75,8 +73,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
if(shader->use_mis && shader->has_surface_emission)
scene->light_manager->need_update = true;
- SVMCompiler compiler(scene->shader_manager, scene->image_manager,
- use_multi_closure);
+ SVMCompiler compiler(scene->shader_manager, scene->image_manager);
compiler.background = ((int)i == scene->default_background);
compiler.compile(shader, svm_nodes, i);
}
@@ -104,7 +101,7 @@ void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s
/* Graph Compiler */
-SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_)
+SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_)
{
shader_manager = shader_manager_;
image_manager = image_manager_;
@@ -114,7 +111,6 @@ SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_man
current_graph = NULL;
background = false;
mix_weight_offset = SVM_STACK_INVALID;
- use_multi_closure = use_multi_closure_;
compile_failed = false;
}
@@ -230,7 +226,8 @@ void SVMCompiler::stack_assign(ShaderInput *input)
else if(input->type == SHADER_SOCKET_VECTOR ||
input->type == SHADER_SOCKET_NORMAL ||
input->type == SHADER_SOCKET_POINT ||
- input->type == SHADER_SOCKET_COLOR) {
+ input->type == SHADER_SOCKET_COLOR)
+ {
add_node(NODE_VALUE_V, input->stack_offset);
add_node(NODE_VALUE_V, input->value);
@@ -379,6 +376,22 @@ void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<Sh
}
}
+void SVMCompiler::generate_node(ShaderNode *node, set<ShaderNode*>& done)
+{
+ node->compile(*this);
+ stack_clear_users(node, done);
+ stack_clear_temporary(node);
+
+ if(current_type == SHADER_TYPE_VOLUME) {
+ if(node->has_spatial_varying())
+ current_shader->has_heterogeneous_volume = true;
+ }
+
+ /* detect if we have a blackbody converter, to prepare lookup table */
+ if(node->has_converter_blackbody())
+ current_shader->has_converter_blackbody = true;
+}
+
void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done)
{
bool nodes_done;
@@ -396,13 +409,7 @@ void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNo
inputs_done = false;
if(inputs_done) {
- /* Detect if we have a blackbody converter, to prepare lookup table */
- if(node->has_converter_blackbody())
- current_shader->has_converter_blackbody = true;
-
- node->compile(*this);
- stack_clear_users(node, done);
- stack_clear_temporary(node);
+ generate_node(node, done);
done.insert(node);
}
else
@@ -412,83 +419,34 @@ void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNo
} while(!nodes_done);
}
-void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
+void SVMCompiler::generate_closure_node(ShaderNode *node, set<ShaderNode*>& done)
{
- if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) {
- ShaderInput *fin = node->input("Fac");
- ShaderInput *cl1in = node->input("Closure1");
- ShaderInput *cl2in = node->input("Closure2");
-
- /* execute dependencies for mix weight */
- if(fin) {
+ /* execute dependencies for closure */
+ foreach(ShaderInput *in, node->inputs) {
+ if(!node_skip_input(node, in) && in->link) {
set<ShaderNode*> dependencies;
- find_dependencies(dependencies, done, fin);
+ find_dependencies(dependencies, done, in);
generate_svm_nodes(dependencies, done);
-
- /* add mix node */
- stack_assign(fin);
- }
-
- int mix_offset = svm_nodes.size();
-
- if(fin)
- add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0);
- else
- add_node(NODE_ADD_CLOSURE, 0, 0, 0);
-
- /* generate code for closure 1
- * note we backup all compiler state and restore it afterwards, so one
- * closure choice doesn't influence the other*/
- if(cl1in->link) {
- StackBackup backup;
- stack_backup(backup, done);
-
- generate_closure(cl1in->link->parent, done);
- add_node(NODE_END, 0, 0, 0);
-
- stack_restore(backup, done);
}
- else
- add_node(NODE_END, 0, 0, 0);
-
- /* generate code for closure 2 */
- int cl2_offset = svm_nodes.size();
-
- if(cl2in->link) {
- StackBackup backup;
- stack_backup(backup, done);
-
- generate_closure(cl2in->link->parent, done);
- add_node(NODE_END, 0, 0, 0);
-
- stack_restore(backup, done);
- }
- else
- add_node(NODE_END, 0, 0, 0);
+ }
- /* set jump for mix node, -1 because offset is already
- * incremented when this jump is added to it */
- svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1;
+ /* closure mix weight */
+ const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
+ ShaderInput *weight_in = node->input(weight_name);
- done.insert(node);
- stack_clear_users(node, done);
- stack_clear_temporary(node);
+ if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) {
+ stack_assign(weight_in);
+ mix_weight_offset = weight_in->stack_offset;
}
- else {
- /* execute dependencies for closure */
- foreach(ShaderInput *in, node->inputs) {
- if(!node_skip_input(node, in) && in->link) {
- set<ShaderNode*> dependencies;
- find_dependencies(dependencies, done, in);
- generate_svm_nodes(dependencies, done);
- }
- }
+ else
+ mix_weight_offset = SVM_STACK_INVALID;
- /* compile closure itself */
- node->compile(*this);
- stack_clear_users(node, done);
- stack_clear_temporary(node);
+ /* compile closure itself */
+ generate_node(node, done);
+ mix_weight_offset = SVM_STACK_INVALID;
+
+ if(current_type == SHADER_TYPE_SURFACE) {
if(node->has_surface_emission())
current_shader->has_surface_emission = true;
if(node->has_surface_transparent())
@@ -498,18 +456,24 @@ void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done)
if(node->has_bssrdf_bump())
current_shader->has_bssrdf_bump = true;
}
+ }
+}
- /* end node is added outside of this */
+void SVMCompiler::generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared)
+{
+ if(shared.find(node) != shared.end()) {
+ generate_multi_closure(node, done, closure_done);
+ }
+ else {
+ foreach(ShaderInput *in, node->inputs) {
+ if(in->type == SHADER_SOCKET_CLOSURE && in->link)
+ generated_shared_closure_nodes(in->link->parent, done, closure_done, shared);
+ }
}
}
void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done)
{
- /* todo: the weak point here is that unlike the single closure sampling
- * we will evaluate all nodes even if they are used as input for closures
- * that are unused. it's not clear what would be the best way to skip such
- * nodes at runtime, especially if they are tangled up */
-
/* only generate once */
if(closure_done.find(node) != closure_done.end())
return;
@@ -520,50 +484,81 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don
/* weighting is already taken care of in ShaderGraph::transform_multi_closure */
ShaderInput *cl1in = node->input("Closure1");
ShaderInput *cl2in = node->input("Closure2");
+ ShaderInput *facin = node->input("Fac");
- if(cl1in->link)
- generate_multi_closure(cl1in->link->parent, done, closure_done);
- if(cl2in->link)
- generate_multi_closure(cl2in->link->parent, done, closure_done);
- }
- else {
- /* execute dependencies for closure */
- foreach(ShaderInput *in, node->inputs) {
- if(!node_skip_input(node, in) && in->link) {
- set<ShaderNode*> dependencies;
- find_dependencies(dependencies, done, in);
- generate_svm_nodes(dependencies, done);
+ /* skip empty mix/add closure nodes */
+ if(!cl1in->link && !cl2in->link)
+ return;
+
+ if(facin && facin->link) {
+ /* mix closure: generate instructions to compute mix weight */
+ set<ShaderNode*> dependencies;
+ find_dependencies(dependencies, done, facin);
+ generate_svm_nodes(dependencies, done);
+
+ stack_assign(facin);
+
+ /* execute shared dependencies. this is needed to allow skipping
+ * of zero weight closures and their dependencies later, so we
+ * ensure that they only skip dependencies that are unique to them */
+ set<ShaderNode*> cl1deps, cl2deps, shareddeps;
+
+ find_dependencies(cl1deps, done, cl1in);
+ find_dependencies(cl2deps, done, cl2in);
+
+ set_intersection(cl1deps.begin(), cl1deps.end(),
+ cl2deps.begin(), cl2deps.end(),
+ std::inserter(shareddeps, shareddeps.begin()));
+
+ if(!shareddeps.empty()) {
+ if(cl1in->link)
+ generated_shared_closure_nodes(cl1in->link->parent, done, closure_done, shareddeps);
+ if(cl2in->link)
+ generated_shared_closure_nodes(cl2in->link->parent, done, closure_done, shareddeps);
+
+ generate_svm_nodes(shareddeps, done);
}
- }
- /* closure mix weight */
- const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight";
- ShaderInput *weight_in = node->input(weight_name);
+ /* generate instructions for input closure 1 */
+ if(cl1in->link) {
+ /* add instruction to skip closure and its dependencies if mix weight is zero */
+ svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE, 0, facin->stack_offset, 0));
+ int node_jump_skip_index = svm_nodes.size() - 1;
- if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) {
- stack_assign(weight_in);
- mix_weight_offset = weight_in->stack_offset;
- }
- else
- mix_weight_offset = SVM_STACK_INVALID;
+ generate_multi_closure(cl1in->link->parent, done, closure_done);
- /* compile closure itself */
- node->compile(*this);
- stack_clear_users(node, done);
- stack_clear_temporary(node);
+ /* fill in jump instruction location to be after closure */
+ svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+ }
- mix_weight_offset = SVM_STACK_INVALID;
+ /* generate instructions for input closure 2 */
+ if(cl2in->link) {
+ /* add instruction to skip closure and its dependencies if mix weight is zero */
+ svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO, 0, facin->stack_offset, 0));
+ int node_jump_skip_index = svm_nodes.size() - 1;
- if(node->has_surface_emission())
- current_shader->has_surface_emission = true;
- if(node->has_surface_transparent())
- current_shader->has_surface_transparent = true;
- if(node->has_surface_bssrdf()) {
- current_shader->has_surface_bssrdf = true;
- if(node->has_bssrdf_bump())
- current_shader->has_bssrdf_bump = true;
+ generate_multi_closure(cl2in->link->parent, done, closure_done);
+
+ /* fill in jump instruction location to be after closure */
+ svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1;
+ }
+
+ /* unassign */
+ facin->stack_offset = SVM_STACK_INVALID;
+ }
+ else {
+ /* execute closures and their dependencies, no runtime checks
+ * to skip closures here because was already optimized due to
+ * fixed weight or add closure that always needs both */
+ if(cl1in->link)
+ generate_multi_closure(cl1in->link->parent, done, closure_done);
+ if(cl2in->link)
+ generate_multi_closure(cl2in->link->parent, done, closure_done);
}
}
+ else {
+ generate_closure_node(node, done);
+ }
done.insert(node);
}
@@ -642,14 +637,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
}
if(generate) {
- set<ShaderNode*> done;
-
- if(use_multi_closure) {
- set<ShaderNode*> closure_done;
- generate_multi_closure(clin->link->parent, done, closure_done);
- }
- else
- generate_closure(clin->link->parent, done);
+ set<ShaderNode*> done, closure_done;
+ generate_multi_closure(clin->link->parent, done, closure_done);
}
}
@@ -676,9 +665,9 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in
shader->graph_bump = shader->graph->copy();
/* finalize */
- shader->graph->finalize(false, false, use_multi_closure);
+ shader->graph->finalize(false, false);
if(shader->graph_bump)
- shader->graph_bump->finalize(true, false, use_multi_closure);
+ shader->graph_bump->finalize(true, false);
current_shader = shader;
@@ -690,6 +679,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in
shader->has_converter_blackbody = false;
shader->has_volume = false;
shader->has_displacement = false;
+ shader->has_heterogeneous_volume = false;
/* generate surface shader */
compile_type(shader, shader->graph, SHADER_TYPE_SURFACE);
diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h
index 3d84a67e173..45aa4d26926 100644
--- a/intern/cycles/render/svm.h
+++ b/intern/cycles/render/svm.h
@@ -52,8 +52,7 @@ public:
class SVMCompiler {
public:
- SVMCompiler(ShaderManager *shader_manager, ImageManager *image_manager,
- bool use_multi_closure_);
+ SVMCompiler(ShaderManager *shader_manager, ImageManager *image_manager);
void compile(Shader *shader, vector<int4>& svm_nodes, int index);
void stack_assign(ShaderOutput *output);
@@ -123,9 +122,13 @@ protected:
bool node_skip_input(ShaderNode *node, ShaderInput *input);
/* single closure */
- void find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input);
+ void find_dependencies(set<ShaderNode*>& dependencies,
+ const set<ShaderNode*>& done, ShaderInput *input);
+ void generate_node(ShaderNode *node, set<ShaderNode*>& done);
+ void generate_closure_node(ShaderNode *node, set<ShaderNode*>& done);
+ void generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done,
+ set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared);
void generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done);
- void generate_closure(ShaderNode *node, set<ShaderNode*>& done);
/* multi closure */
void generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done);
@@ -140,7 +143,6 @@ protected:
Stack active_stack;
int max_stack_use;
uint mix_weight_offset;
- bool use_multi_closure;
bool compile_failed;
};
diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp
index be0d4afbe2c..a8d502c432d 100644
--- a/intern/cycles/render/tables.cpp
+++ b/intern/cycles/render/tables.cpp
@@ -39,7 +39,10 @@ void LookupTables::device_update(Device *device, DeviceScene *dscene)
if(!need_update)
return;
- device->tex_alloc("__lookup_table", dscene->lookup_table);
+ device->tex_free(dscene->lookup_table);
+
+ if(lookup_tables.size() > 0)
+ device->tex_alloc("__lookup_table", dscene->lookup_table);
need_update = false;
}
diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp
index 417ecfffd49..6bbf4af3f85 100644
--- a/intern/cycles/subd/subd_split.cpp
+++ b/intern/cycles/subd/subd_split.cpp
@@ -94,7 +94,7 @@ void DiagSplit::partition_edge(Patch *patch, float2 *P, int *t0, int *t1, float2
*t1 = T(patch, *P, Pend);
}
else {
- int I = floor(t*0.5f);
+ int I = (int)floor((float)t*0.5f);
*P = interp(Pstart, Pend, (t == 0)? 0: I/(float)t); /* XXX is t faces or verts */
*t0 = I;
*t1 = t - I;
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 0cfa4049d3e..b72cc6bc873 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb)
h = 0.0f;
}
- if(s == 0.0f) {
- h = 0.0f;
- }
- else {
+ if(s != 0.0f) {
float3 cmax3 = make_float3(cmax, cmax, cmax);
c = (cmax3 - rgb)/cdelta;
- if(rgb.x == cmax) h = c.z - c.y;
- else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
- else h = 4.0f + c.y - c.x;
+ if (rgb.x == cmax) h = c.z - c.y;
+ else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
+ else h = 4.0f + c.y - c.x;
h /= 6.0f;
if(h < 0.0f)
h += 1.0f;
}
+ else {
+ h = 0.0f;
+ }
return make_float3(h, s, v);
}
@@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
s = hsv.y;
v = hsv.z;
- if(s == 0.0f) {
- rgb = make_float3(v, v, v);
- }
- else {
+ if(s != 0.0f) {
if(h == 1.0f)
h = 0.0f;
-
+
h *= 6.0f;
i = floorf(h);
f = h - i;
@@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv)
p = v*(1.0f-s);
q = v*(1.0f-(s*f));
t = v*(1.0f-(s*(1.0f-f)));
-
- if(i == 0.0f) rgb = make_float3(v, t, p);
+
+ if (i == 0.0f) rgb = make_float3(v, t, p);
else if(i == 1.0f) rgb = make_float3(q, v, p);
else if(i == 2.0f) rgb = make_float3(p, v, t);
else if(i == 3.0f) rgb = make_float3(p, q, v);
else if(i == 4.0f) rgb = make_float3(t, p, v);
- else rgb = make_float3(v, p, q);
+ else rgb = make_float3(v, p, q);
+ }
+ else {
+ rgb = make_float3(v, v, v);
}
return rgb;
@@ -132,8 +132,8 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y)
ccl_device float3 xyz_to_rgb(float x, float y, float z)
{
return make_float3(3.240479f * x + -1.537150f * y + -0.498535f * z,
- -0.969256f * x + 1.875991f * y + 0.041556f * z,
- 0.055648f * x + -0.204043f * y + 1.057311f * z);
+ -0.969256f * x + 1.875991f * y + 0.041556f * z,
+ 0.055648f * x + -0.204043f * y + 1.057311f * z);
}
#ifndef __KERNEL_OPENCL__
diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h
index deb2ff969d6..0c80303df9b 100644
--- a/intern/cycles/util/util_cuda.h
+++ b/intern/cycles/util/util_cuda.h
@@ -206,7 +206,8 @@ typedef enum CUjit_target_enum
CU_TARGET_COMPUTE_20,
CU_TARGET_COMPUTE_21,
CU_TARGET_COMPUTE_30,
- CU_TARGET_COMPUTE_35
+ CU_TARGET_COMPUTE_35,
+ CU_TARGET_COMPUTE_50
} CUjit_target;
typedef enum CUjit_fallback_enum
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 21192024f7f..da6fae79bb9 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -19,13 +19,17 @@
#include "util_types.h"
+#ifdef __KERNEL_SSE2__
+#include "util_simd.h"
+#endif
+
CCL_NAMESPACE_BEGIN
/* Half Floats */
#ifdef __KERNEL_OPENCL__
-#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h);
+#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
#else
@@ -34,24 +38,24 @@ struct half4 { half x, y, z, w; };
#ifdef __KERNEL_CUDA__
-ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f->x * scale);
- h[1] = __float2half_rn(f->y * scale);
- h[2] = __float2half_rn(f->z * scale);
- h[3] = __float2half_rn(f->w * scale);
+ h[0] = __float2half_rn(f.x * scale);
+ h[1] = __float2half_rn(f.y * scale);
+ h[2] = __float2half_rn(f.z * scale);
+ h[3] = __float2half_rn(f.w * scale);
}
#else
-ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
#ifndef __KERNEL_SSE2__
for(int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
- float fscale = (*f)[i] * scale;
+ float fscale = f[i] * scale;
in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f;
int x = in.i;
@@ -70,7 +74,7 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale)
const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF);
const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000);
- __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale);
+ __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale);
__m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f)));
__m128i absolute = _mm_and_si128(x, mm_7FFFFFFF);
__m128i Z = _mm_add_epi32(absolute, mm_C8000000);
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index ded25c92b90..edd2448efa4 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
static inline uint hash_int_2d(uint kx, uint ky)
{
- #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
uint a, b, c;
@@ -41,7 +41,7 @@ static inline uint hash_int_2d(uint kx, uint ky)
return c;
- #undef rot
+#undef rot
}
static inline uint hash_int(uint k)
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 2e73639d2bb..ded75762cd2 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -163,11 +163,7 @@ ccl_device_inline float clamp(float a, float mn, float mx)
ccl_device_inline int float_to_int(float f)
{
-#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER)
- return _mm_cvtt_ss2si(_mm_load_ss(&f));
-#else
return (int)f;
-#endif
}
ccl_device_inline int floor_to_int(float f)
@@ -469,6 +465,15 @@ ccl_device_inline float dot(const float3 a, const float3 b)
#endif
}
+ccl_device_inline float dot(const float4 a, const float4 b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
+#else
+ return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w);
+#endif
+}
+
ccl_device_inline float3 cross(const float3 a, const float3 b)
{
float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
@@ -493,6 +498,11 @@ ccl_device_inline float len_squared(const float3 a)
#ifndef __KERNEL_OPENCL__
+ccl_device_inline float len_squared(const float4 a)
+{
+ return dot(a, a);
+}
+
ccl_device_inline float3 normalize(const float3 a)
{
#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
@@ -812,11 +822,6 @@ ccl_device_inline float average(const float4& a)
return reduce_add(a) * 0.25f;
}
-ccl_device_inline float dot(const float4& a, const float4& b)
-{
- return reduce_add(a * b);
-}
-
ccl_device_inline float len(const float4 a)
{
return sqrtf(dot(a, a));
@@ -1113,6 +1118,17 @@ ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
/* Color division */
+ccl_device_inline float3 safe_invert_color(float3 a)
+{
+ float x, y, z;
+
+ x = (a.x != 0.0f)? 1.0f/a.x: 0.0f;
+ y = (a.y != 0.0f)? 1.0f/a.y: 0.0f;
+ z = (a.z != 0.0f)? 1.0f/a.z: 0.0f;
+
+ return make_float3(x, y, z);
+}
+
ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
{
float x, y, z;
@@ -1221,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y)
ccl_device float safe_powf(float a, float b)
{
- if(a < 0.0f && b != float_to_int(b))
+ if(UNLIKELY(a < 0.0f && b != float_to_int(b)))
return 0.0f;
return compatible_powf(a, b);
@@ -1229,7 +1245,7 @@ ccl_device float safe_powf(float a, float b)
ccl_device float safe_logf(float a, float b)
{
- if(a < 0.0f || b < 0.0f)
+ if(UNLIKELY(a < 0.0f || b < 0.0f))
return 0.0f;
return logf(a)/logf(b);
@@ -1289,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect(
float3 disk_N = normalize_len(ray_P - disk_P, &disk_t);
float div = dot(ray_D, disk_N);
- if(div == 0.0f)
+ if(UNLIKELY(div == 0.0f))
return false;
/* compute t to intersection point */
@@ -1319,7 +1335,7 @@ ccl_device bool ray_triangle_intersect(
float3 s1 = cross(ray_D, e2);
const float divisor = dot(s1, e1);
- if(divisor == 0.0f)
+ if(UNLIKELY(divisor == 0.0f))
return false;
const float invdivisor = 1.0f/divisor;
@@ -1351,6 +1367,50 @@ ccl_device bool ray_triangle_intersect(
return true;
}
+ccl_device bool ray_triangle_intersect_uv(
+ float3 ray_P, float3 ray_D, float ray_t,
+ float3 v0, float3 v1, float3 v2,
+ float *isect_u, float *isect_v, float *isect_t)
+{
+ /* Calculate intersection */
+ float3 e1 = v1 - v0;
+ float3 e2 = v2 - v0;
+ float3 s1 = cross(ray_D, e2);
+
+ const float divisor = dot(s1, e1);
+ if(UNLIKELY(divisor == 0.0f))
+ return false;
+
+ const float invdivisor = 1.0f/divisor;
+
+ /* compute first barycentric coordinate */
+ const float3 d = ray_P - v0;
+ const float u = dot(d, s1)*invdivisor;
+ if(u < 0.0f)
+ return false;
+
+ /* Compute second barycentric coordinate */
+ const float3 s2 = cross(d, e1);
+ const float v = dot(ray_D, s2)*invdivisor;
+ if(v < 0.0f)
+ return false;
+
+ const float b0 = 1.0f - u - v;
+ if(b0 < 0.0f)
+ return false;
+
+ /* compute t to intersection point */
+ const float t = dot(e2, s2)*invdivisor;
+ if(t < 0.0f || t > ray_t)
+ return false;
+
+ *isect_u = u;
+ *isect_v = v;
+ *isect_t = t;
+
+ return true;
+}
+
ccl_device bool ray_quad_intersect(
float3 ray_P, float3 ray_D, float ray_t,
float3 quad_P, float3 quad_u, float3 quad_v,
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
index c53fbd90c67..add0d18c742 100644
--- a/intern/cycles/util/util_md5.cpp
+++ b/intern/cycles/util/util_md5.cpp
@@ -367,7 +367,7 @@ string MD5Hash::get_hex()
finish(digest);
for(int i = 0; i < 16; i++)
- sprintf(buf + i*2, "%02X", digest[i]);
+ sprintf(buf + i*2, "%02X", (unsigned int)digest[i]);
buf[sizeof(buf)-1] = '\0';
return string(buf);
diff --git a/intern/cycles/util/util_opencl.h b/intern/cycles/util/util_opencl.h
index 5f3f1667bcc..141c5e38273 100644
--- a/intern/cycles/util/util_opencl.h
+++ b/intern/cycles/util/util_opencl.h
@@ -304,7 +304,9 @@ typedef struct _cl_kernel * cl_kernel;
typedef struct _cl_event * cl_event;
typedef struct _cl_sampler * cl_sampler;
-typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+/* WARNING! Unlike cl_ types in cl_platform.h,
+ * cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_uint cl_bool;
typedef cl_ulong cl_bitfield;
typedef cl_bitfield cl_device_type;
typedef cl_uint cl_platform_info;
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 4fd5df4316d..85d19b6a325 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -111,6 +111,11 @@ string path_escape(const string& path)
return result;
}
+bool path_is_relative(const string& path)
+{
+ return to_boost(path).is_relative();
+}
+
bool path_exists(const string& path)
{
return boost::filesystem::exists(to_boost(path));
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index e9041e63dae..fd9ea11740d 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -41,6 +41,7 @@ string path_filename(const string& path);
string path_dirname(const string& path);
string path_join(const string& dir, const string& file);
string path_escape(const string& path);
+bool path_is_relative(const string& path);
/* file info */
bool path_exists(const string& path);
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index fd5ba1de37b..f0f37fa57aa 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -71,7 +71,7 @@ ccl_device_inline const __m128 shuffle_swap(const __m128& a, shuffle_swap_t shuf
#ifdef __KERNEL_SSE41__
ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap,
- const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3])
+ const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3])
{
const __m128 idirsplat_raw[] = { _mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z) };
idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn);
@@ -87,7 +87,7 @@ ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t
}
#else
ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap,
- const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3])
+ const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3])
{
idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), pn);
idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), pn);
@@ -154,6 +154,12 @@ ccl_device_inline const __m128 fms(const __m128& a, const __m128& b, const __m12
return _mm_sub_ps(_mm_mul_ps(a, b), c);
}
+/* calculate -a*b+c (replacement for fused negated-multiply-subtract on SSE CPUs) */
+ccl_device_inline const __m128 fnma(const __m128& a, const __m128& b, const __m128& c)
+{
+ return _mm_sub_ps(c, _mm_mul_ps(a, b));
+}
+
template<size_t N> ccl_device_inline const __m128 broadcast(const __m128& a)
{
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(N, N, N, N)));
@@ -180,6 +186,88 @@ ccl_device_inline const __m128 set_sign_bit(const __m128 &a)
return _mm_xor_ps(a, _mm_castsi128_ps(_mm_setr_epi32(S1 << 31, S2 << 31, S3 << 31, S4 << 31)));
}
+#ifdef __KERNEL_WITH_SSE_ALIGN__
+ccl_device_inline const __m128 load_m128(const float4 &vec)
+{
+ return _mm_load_ps(&vec.x);
+}
+
+ccl_device_inline const __m128 load_m128(const float3 &vec)
+{
+ return _mm_load_ps(&vec.x);
+}
+
+#else
+
+ccl_device_inline const __m128 load_m128(const float4 &vec)
+{
+ return _mm_loadu_ps(&vec.x);
+}
+
+ccl_device_inline const __m128 load_m128(const float3 &vec)
+{
+ return _mm_loadu_ps(&vec.x);
+}
+#endif /* __KERNEL_WITH_SSE_ALIGN__ */
+
+ccl_device_inline const __m128 dot3_splat(const __m128& a, const __m128& b)
+{
+#ifdef __KERNEL_SSE41__
+ return _mm_dp_ps(a, b, 0x7f);
+#else
+ __m128 t = _mm_mul_ps(a, b);
+ return _mm_set1_ps(((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]);
+#endif
+}
+
+/* squared length taking only specified axes into account */
+template<size_t X, size_t Y, size_t Z, size_t W>
+ccl_device_inline float len_squared(const __m128& a)
+{
+#ifndef __KERNEL_SSE41__
+ float4& t = (float4 &)a;
+ return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + (W ? t.w * t.w : 0.0f);
+#else
+ return _mm_cvtss_f32(_mm_dp_ps(a, a, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf));
+#endif
+}
+
+ccl_device_inline float dot3(const __m128& a, const __m128& b)
+{
+#ifdef __KERNEL_SSE41__
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f));
+#else
+ __m128 t = _mm_mul_ps(a, b);
+ return ((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2];
+#endif
+}
+
+ccl_device_inline const __m128 len3_squared_splat(const __m128& a)
+{
+ return dot3_splat(a, a);
+}
+
+ccl_device_inline float len3_squared(const __m128& a)
+{
+ return dot3(a, a);
+}
+
+ccl_device_inline float len3(const __m128& a)
+{
+ return _mm_cvtss_f32(_mm_sqrt_ss(dot3_splat(a, a)));
+}
+
+/* calculate shuffled cross product, useful when order of components does not matter */
+ccl_device_inline const __m128 cross_zxy(const __m128& a, const __m128& b)
+{
+ return fms(a, shuffle<1, 2, 0, 3>(b), _mm_mul_ps(b, shuffle<1, 2, 0, 3>(a)));
+}
+
+ccl_device_inline const __m128 cross(const __m128& a, const __m128& b)
+{
+ return shuffle<1, 2, 0, 3>(cross_zxy(a, b));
+}
+
#endif /* __KERNEL_SSE2__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 3d7781f6146..0764f7d9345 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -161,8 +161,25 @@ static CPUCapabilities& system_cpu_capabilities()
caps.sse41 = (result[2] & ((int)1 << 19)) != 0;
caps.sse42 = (result[2] & ((int)1 << 20)) != 0;
- caps.avx = (result[2] & ((int)1 << 28)) != 0;
caps.fma3 = (result[2] & ((int)1 << 12)) != 0;
+ caps.avx = false;
+ bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0;
+ bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0;
+
+ if( os_uses_xsave_xrestore && cpu_avx_support) {
+ // Check if the OS will save the YMM registers
+ uint32_t xcr_feature_mask;
+#if defined(__GNUC__)
+ int edx; /* not used */
+ /* actual opcode for xgetbv */
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) );
+#elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */
+#else
+ xcr_feature_mask = 0;
+#endif
+ caps.avx = (xcr_feature_mask & 0x6) == 0x6;
+ }
}
#if 0
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 12c2270a8d4..14613558501 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
}
}
- if(pivotsize == 0)
+ if(UNLIKELY(pivotsize == 0.0f))
return false;
if(pivot != i) {
@@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4])
for(int i = 3; i >= 0; --i) {
float f;
- if((f = M[i][i]) == 0)
+ if(UNLIKELY((f = M[i][i]) == 0.0f))
return false;
for(int j = 0; j < 4; j++) {
@@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm)
memcpy(R, &tfmR, sizeof(R));
memcpy(M, &tfm, sizeof(M));
- if(!transform_matrix4_gj_inverse(R, M)) {
+ if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
/* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
* never be in this situation, but try to invert it anyway with tweak */
M[0][0] += 1e-8f;
M[1][1] += 1e-8f;
M[2][2] += 1e-8f;
- if(!transform_matrix4_gj_inverse(R, M))
+ if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
return transform_identity();
+ }
}
memcpy(&tfmR, R, sizeof(R));
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index 4c7ce12d1de..5b3dbe42f69 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -108,9 +108,9 @@ ccl_device_inline Transform transform_transpose(const Transform a)
}
ccl_device_inline Transform make_transform(float a, float b, float c, float d,
- float e, float f, float g, float h,
- float i, float j, float k, float l,
- float m, float n, float o, float p)
+ float e, float f, float g, float h,
+ float i, float j, float k, float l,
+ float m, float n, float o, float p)
{
Transform t;
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index c770931c69b..bfaab3dba3b 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -37,6 +37,7 @@
#define ccl_device_noinline static
#define ccl_global
#define ccl_constant
+#define __KERNEL_WITH_SSE_ALIGN__
#if defined(_WIN32) && !defined(FREE_WINDOWS)
@@ -45,6 +46,7 @@
#ifdef __KERNEL_64_BIT__
#define ccl_try_align(...) __declspec(align(__VA_ARGS__))
#else
+#undef __KERNEL_WITH_SSE_ALIGN__
#define ccl_try_align(...) /* not support for function arguments (error C2719) */
#endif
#define ccl_may_alias
@@ -63,8 +65,6 @@
#endif
-#else
-#define ccl_align(...)
#endif
/* Standard Integer Types */
@@ -159,8 +159,8 @@ struct int2 {
__forceinline int& operator[](int i) { return *(&x + i); }
};
-#ifdef __KERNEL_SSE__
struct ccl_try_align(16) int3 {
+#ifdef __KERNEL_SSE__
union {
__m128i m128;
struct { int x, y, z, w; };
@@ -171,7 +171,6 @@ struct ccl_try_align(16) int3 {
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
#else
-struct ccl_try_align(16) int3 {
int x, y, z, w;
#endif
@@ -179,8 +178,8 @@ struct ccl_try_align(16) int3 {
__forceinline int& operator[](int i) { return *(&x + i); }
};
-#ifdef __KERNEL_SSE__
struct ccl_try_align(16) int4 {
+#ifdef __KERNEL_SSE__
union {
__m128i m128;
struct { int x, y, z, w; };
@@ -191,7 +190,6 @@ struct ccl_try_align(16) int4 {
__forceinline operator const __m128i&(void) const { return m128; }
__forceinline operator __m128i&(void) { return m128; }
#else
-struct ccl_try_align(16) int4 {
int x, y, z, w;
#endif
@@ -227,8 +225,8 @@ struct float2 {
__forceinline float& operator[](int i) { return *(&x + i); }
};
-#ifdef __KERNEL_SSE__
struct ccl_try_align(16) float3 {
+#ifdef __KERNEL_SSE__
union {
__m128 m128;
struct { float x, y, z, w; };
@@ -239,7 +237,6 @@ struct ccl_try_align(16) float3 {
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
#else
-struct ccl_try_align(16) float3 {
float x, y, z, w;
#endif
@@ -247,8 +244,8 @@ struct ccl_try_align(16) float3 {
__forceinline float& operator[](int i) { return *(&x + i); }
};
-#ifdef __KERNEL_SSE__
struct ccl_try_align(16) float4 {
+#ifdef __KERNEL_SSE__
union {
__m128 m128;
struct { float x, y, z, w; };
@@ -259,7 +256,6 @@ struct ccl_try_align(16) float4 {
__forceinline operator const __m128&(void) const { return m128; }
__forceinline operator __m128&(void) { return m128; }
#else
-struct ccl_try_align(16) float4 {
float x, y, z, w;
#endif
@@ -450,6 +446,53 @@ ccl_device_inline int4 make_int4(const float3& f)
#endif
+/* Interpolation types for textures
+ * cuda also use texture space to store other objects */
+enum InterpolationType {
+ INTERPOLATION_NONE = -1,
+ INTERPOLATION_LINEAR = 0,
+ INTERPOLATION_CLOSEST = 1,
+ INTERPOLATION_CUBIC = 2,
+ INTERPOLATION_SMART = 3,
+};
+
+
+/* macros */
+
+/* hints for branch prediction, only use in code that runs a _lot_ */
+#if defined(__GNUC__) && defined(__KERNEL_CPU__)
+# define LIKELY(x) __builtin_expect(!!(x), 1)
+# define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+# define LIKELY(x) (x)
+# define UNLIKELY(x) (x)
+#endif
+
+/* Causes warning:
+ * incompatible types when assigning to type 'Foo' from type 'Bar'
+ * ... the compiler optimizes away the temp var */
+#ifdef __GNUC__
+#define CHECK_TYPE(var, type) { \
+ __typeof(var) *__tmp; \
+ __tmp = (type *)NULL; \
+ (void)__tmp; \
+} (void)0
+
+#define CHECK_TYPE_PAIR(var_a, var_b) { \
+ __typeof(var_a) *__tmp; \
+ __tmp = (__typeof(var_b) *)NULL; \
+ (void)__tmp; \
+} (void)0
+#else
+# define CHECK_TYPE(var, type)
+# define CHECK_TYPE_PAIR(var_a, var_b)
+#endif
+
+/* can be used in simple macros */
+#define CHECK_TYPE_INLINE(val, type) \
+ ((void)(((type)0) != (val)))
+
+
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index 361a7bc95f2..6bf9c9ed8c0 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -80,8 +80,8 @@ void view_display_info(const char *info)
void view_display_help()
{
- const int w = V.width / 1.15;
- const int h = V.height / 1.15;
+ const int w = (int)((float)V.width / 1.15f);
+ const int h = (int)((float)V.height / 1.15f);
const int x1 = (V.width - w) / 2;
const int x2 = x1 + w;
@@ -100,14 +100,16 @@ void view_display_help()
view_display_text(x1+20, y2-20, "Cycles Renderer");
view_display_text(x1+20, y2-40, "(C) 2011-2014 Blender Foundation");
view_display_text(x1+20, y2-80, "Controls:");
- view_display_text(x1+20, y2-100, "h: Show/Hide this help message");
- view_display_text(x1+20, y2-120, "r: Restart the render");
- view_display_text(x1+20, y2-140, "q: Quit the program");
- view_display_text(x1+20, y2-160, "esc: Cancel the render");
+ view_display_text(x1+20, y2-100, "h: Info/Help");
+ view_display_text(x1+20, y2-120, "r: Reset");
+ view_display_text(x1+20, y2-140, "p: Pause");
+ view_display_text(x1+20, y2-160, "esc: Cancel");
+ view_display_text(x1+20, y2-180, "q: Quit program");
- view_display_text(x1+20, y2-190, "Interactive Mode (i-key):");
- view_display_text(x1+20, y2-210, "LMB: Move camera");
- view_display_text(x1+20, y2-230, "RMB: Rotate camera");
+ view_display_text(x1+20, y2-210, "i: Interactive mode");
+ view_display_text(x1+20, y2-230, "Left mouse: Move camera");
+ view_display_text(x1+20, y2-250, "Right mouse: Rotate camera");
+ view_display_text(x1+20, y2-270, "W/A/S/D: Move camera");
glColor3f(1.0f, 1.0f, 1.0f);
}
@@ -246,9 +248,7 @@ void view_main_loop(const char *title, int width, int height,
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutCreateWindow(title);
-#ifndef __APPLE__
glewInit();
-#endif
view_reshape(width, height);
diff --git a/intern/elbeem/intern/mvmcoords.cpp b/intern/elbeem/intern/mvmcoords.cpp
index 281a9656fcf..838fc54491d 100644
--- a/intern/elbeem/intern/mvmcoords.cpp
+++ b/intern/elbeem/intern/mvmcoords.cpp
@@ -18,7 +18,7 @@
#include <algorithm>
#if defined(_MSC_VER) && _MSC_VER > 1600
-// sdt::greater
+// std::greater
#include <functional>
#endif
diff --git a/intern/ffmpeg/ffmpeg_compat.h b/intern/ffmpeg/ffmpeg_compat.h
index ff2cc405f4c..ac4da5b6133 100644
--- a/intern/ffmpeg/ffmpeg_compat.h
+++ b/intern/ffmpeg/ffmpeg_compat.h
@@ -103,6 +103,7 @@ FFMPEG_INLINE
int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt)
{
/* no planar formats in FFmpeg < 0.9 */
+ (void) sample_fmt;
return 0;
}
@@ -172,6 +173,7 @@ FFMPEG_INLINE
int av_opt_set(void *obj, const char *name, const char *val, int search_flags)
{
const AVOption *rv = NULL;
+ (void) search_flags;
av_set_string3(obj, name, val, 1, &rv);
return rv != NULL;
}
@@ -180,6 +182,7 @@ FFMPEG_INLINE
int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags)
{
const AVOption *rv = NULL;
+ (void) search_flags;
rv = av_set_int(obj, name, val);
return rv != NULL;
}
@@ -188,6 +191,7 @@ FFMPEG_INLINE
int av_opt_set_double(void *obj, const char *name, double val, int search_flags)
{
const AVOption *rv = NULL;
+ (void) search_flags;
rv = av_set_double(obj, name, val);
return rv != NULL;
}
@@ -210,15 +214,12 @@ enum AVSampleFormat av_get_packed_sample_fmt(enum AVSampleFormat sample_fmt)
}
#endif
-#if ((LIBAVFORMAT_VERSION_MAJOR < 53) || ((LIBAVFORMAT_VERSION_MAJOR == 53) && (LIBAVFORMAT_VERSION_MINOR < 24)) || ((LIBAVFORMAT_VERSION_MAJOR == 53) && (LIBAVFORMAT_VERSION_MINOR < 24) && (LIBAVFORMAT_VERSION_MICRO < 2)))
-# define avformat_close_input(x) av_close_input_file(*(x))
-#endif
-
#if ((LIBAVCODEC_VERSION_MAJOR < 53) || (LIBAVCODEC_VERSION_MAJOR == 53 && LIBAVCODEC_VERSION_MINOR < 35))
FFMPEG_INLINE
int avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options)
{
/* TODO: no options are taking into account */
+ (void) options;
return avcodec_open(avctx, codec);
}
#endif
@@ -228,6 +229,7 @@ FFMPEG_INLINE
AVStream *avformat_new_stream(AVFormatContext *s, AVCodec *c)
{
/* TODO: no codec is taking into account */
+ (void) c;
return av_new_stream(s, 0);
}
@@ -235,6 +237,7 @@ FFMPEG_INLINE
int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
{
/* TODO: no options are taking into account */
+ (void) options;
return av_find_stream_info(ic);
}
#endif
@@ -435,4 +438,12 @@ AVRational av_get_r_frame_rate_compat(const AVStream *stream)
#endif
}
+#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(51, 32, 0)
+# define AV_OPT_SEARCH_FAKE_OBJ 0
+#endif
+
+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(54, 59, 100)
+# define FFMPEG_HAVE_DEPRECATED_FLAGS2
+#endif
+
#endif
diff --git a/intern/ghost/intern/GHOST_NDOFManager.cpp b/intern/ghost/intern/GHOST_NDOFManager.cpp
index f8c707b668c..c99680641c3 100644
--- a/intern/ghost/intern/GHOST_NDOFManager.cpp
+++ b/intern/ghost/intern/GHOST_NDOFManager.cpp
@@ -295,14 +295,14 @@ bool GHOST_NDOFManager::setDevice(unsigned short vendor_id, unsigned short produ
return m_deviceType != NDOF_UnknownDevice;
}
-void GHOST_NDOFManager::updateTranslation(short t[3], GHOST_TUns64 time)
+void GHOST_NDOFManager::updateTranslation(const short t[3], GHOST_TUns64 time)
{
memcpy(m_translation, t, sizeof(m_translation));
m_motionTime = time;
m_motionEventPending = true;
}
-void GHOST_NDOFManager::updateRotation(short r[3], GHOST_TUns64 time)
+void GHOST_NDOFManager::updateRotation(const short r[3], GHOST_TUns64 time)
{
memcpy(m_rotation, r, sizeof(m_rotation));
m_motionTime = time;
@@ -506,7 +506,5 @@ bool GHOST_NDOFManager::sendMotionEvent()
m_system.pushEvent(event);
- m_prevMotionTime = m_motionTime;
-
return true;
}
diff --git a/intern/ghost/intern/GHOST_NDOFManager.h b/intern/ghost/intern/GHOST_NDOFManager.h
index 50f784d89c4..98aebfa4f30 100644
--- a/intern/ghost/intern/GHOST_NDOFManager.h
+++ b/intern/ghost/intern/GHOST_NDOFManager.h
@@ -128,8 +128,8 @@ public:
// rotations are + when CCW, - when CW
// each platform is responsible for getting axis data into this form
// these values should not be scaled (just shuffled or flipped)
- void updateTranslation(short t[3], GHOST_TUns64 time);
- void updateRotation(short r[3], GHOST_TUns64 time);
+ void updateTranslation(const short t[3], GHOST_TUns64 time);
+ void updateRotation(const short r[3], GHOST_TUns64 time);
// the latest raw button data from the device
// use HID button encoding (not NDOF_ButtonT)
diff --git a/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm b/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm
index 4fc4f8016e5..1a029257f09 100644
--- a/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm
+++ b/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm
@@ -79,8 +79,8 @@ static void NDOF_DeviceEvent(io_connect_t connection, natural_t messageType, voi
case kConnexionCmdHandleAxis:
{
// convert to blender view coordinates
- short t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]};
- short r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])};
+ const short t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]};
+ const short r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])};
ndof_manager->updateTranslation(t, now);
ndof_manager->updateRotation(r, now);
@@ -162,7 +162,7 @@ GHOST_NDOFManagerCocoa::~GHOST_NDOFManagerCocoa()
if (GHOST_NDOFManager3Dconnexion_available())
{
GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID);
- GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID);
+ GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID);
GHOST_NDOFManager3Dconnexion_CleanupConnexionHandlers();
ghost_system = NULL;
diff --git a/intern/ghost/intern/GHOST_NDOFManagerX11.cpp b/intern/ghost/intern/GHOST_NDOFManagerX11.cpp
index 947d8d74461..77e09e7ef49 100644
--- a/intern/ghost/intern/GHOST_NDOFManagerX11.cpp
+++ b/intern/ghost/intern/GHOST_NDOFManagerX11.cpp
@@ -77,23 +77,46 @@ bool GHOST_NDOFManagerX11::available()
return m_available;
}
+/*
+ * Workaround for a problem where we don't enter the 'GHOST_kFinished' state,
+ * this causes any proceeding event to have a very high 'dt' (time delta),
+ * many seconds for eg, causing the view to jump.
+ *
+ * this workaround expect's continuous events, if we miss a motion event,
+ * immediately send a dummy event with no motion to ensure the finished state is reached.
+ */
+#define USE_FINISH_GLITCH_WORKAROUND
+
+
+#ifdef USE_FINISH_GLITCH_WORKAROUND
+static bool motion_test_prev = false;
+#endif
+
bool GHOST_NDOFManagerX11::processEvents()
{
bool anyProcessed = false;
if (m_available) {
spnav_event e;
+
+#ifdef USE_FINISH_GLITCH_WORKAROUND
+ bool motion_test = false;
+#endif
+
while (spnav_poll_event(&e)) {
switch (e.type) {
case SPNAV_EVENT_MOTION:
{
/* convert to blender view coords */
GHOST_TUns64 now = m_system.getMilliSeconds();
- short t[3] = {(short)e.motion.x, (short)e.motion.y, (short)-e.motion.z};
- short r[3] = {(short)-e.motion.rx, (short)-e.motion.ry, (short)e.motion.rz};
+ const short t[3] = {(short)e.motion.x, (short)e.motion.y, (short)-e.motion.z};
+ const short r[3] = {(short)-e.motion.rx, (short)-e.motion.ry, (short)e.motion.rz};
updateTranslation(t, now);
updateRotation(r, now);
+#ifdef USE_FINISH_GLITCH_WORKAROUND
+ motion_test = true;
+#endif
break;
}
case SPNAV_EVENT_BUTTON:
@@ -103,6 +126,20 @@ bool GHOST_NDOFManagerX11::processEvents()
}
anyProcessed = true;
}
+
+#ifdef USE_FINISH_GLITCH_WORKAROUND
+ if (motion_test_prev == true && motion_test == false) {
+ GHOST_TUns64 now = m_system.getMilliSeconds();
+ const short v[3] = {0, 0, 0};
+
+ updateTranslation(v, now);
+ updateRotation(v, now);
+
+ anyProcessed = true;
+ }
+ motion_test_prev = motion_test;
+#endif
+
}
return anyProcessed;
diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp
index 8280474437b..070dd86c0fb 100644
--- a/intern/ghost/intern/GHOST_SystemWin32.cpp
+++ b/intern/ghost/intern/GHOST_SystemWin32.cpp
@@ -843,14 +843,14 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const& raw)
{
case 1: // translation
{
- short *axis = (short *)(data + 1);
+ const short *axis = (short *)(data + 1);
// massage into blender view coords (same goes for rotation)
- short t[3] = {axis[0], -axis[2], axis[1]};
+ const short t[3] = {axis[0], -axis[2], axis[1]};
m_ndofManager->updateTranslation(t, now);
if (raw.data.hid.dwSizeHid == 13)
{ // this report also includes rotation
- short r[3] = {-axis[3], axis[5], -axis[4]};
+ const short r[3] = {-axis[3], axis[5], -axis[4]};
m_ndofManager->updateRotation(r, now);
// I've never gotten one of these, has anyone else?
@@ -860,8 +860,8 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const& raw)
}
case 2: // rotation
{
- short *axis = (short *)(data + 1);
- short r[3] = {-axis[0], axis[2], -axis[1]};
+ const short *axis = (short *)(data + 1);
+ const short r[3] = {-axis[0], axis[2], -axis[1]};
m_ndofManager->updateRotation(r, now);
break;
}
diff --git a/intern/ghost/intern/GHOST_SystemX11.cpp b/intern/ghost/intern/GHOST_SystemX11.cpp
index 9900f7e153f..8f1f9867724 100644
--- a/intern/ghost/intern/GHOST_SystemX11.cpp
+++ b/intern/ghost/intern/GHOST_SystemX11.cpp
@@ -755,7 +755,7 @@ GHOST_SystemX11::processEvent(XEvent *xe)
case KeyRelease:
{
XKeyEvent *xke = &(xe->xkey);
- KeySym key_sym = XLookupKeysym(xke, 0);
+ KeySym key_sym;
char ascii;
#if defined(WITH_X11_XINPUT) && defined(X_HAVE_UTF8_STRING)
/* utf8_array[] is initial buffer used for Xutf8LookupString().
@@ -771,7 +771,29 @@ GHOST_SystemX11::processEvent(XEvent *xe)
char *utf8_buf = NULL;
#endif
- GHOST_TKey gkey = convertXKey(key_sym);
+ GHOST_TKey gkey;
+
+ /* In keyboards like latin ones,
+ * numbers needs a 'Shift' to be accessed but key_sym
+ * is unmodified (or anyone swapping the keys with xmodmap).
+ *
+ * Here we look at the 'Shifted' version of the key.
+ * If it is a number, then we take it instead of the normal key.
+ *
+ * The modified key is sent in the 'ascii's variable anyway.
+ */
+ if ((xke->keycode >= 10 && xke->keycode < 20) &&
+ ((key_sym = XLookupKeysym(xke, ShiftMask)) >= XK_0) && (key_sym <= XK_9))
+ {
+ /* pass (keep shift'ed key_sym) */
+ }
+ else {
+ /* regular case */
+ key_sym = XLookupKeysym(xke, 0);
+ }
+
+ gkey = convertXKey(key_sym);
+
GHOST_TEventType type = (xke->type == KeyPress) ?
GHOST_kEventKeyDown : GHOST_kEventKeyUp;
diff --git a/intern/ghost/intern/GHOST_WindowX11.cpp b/intern/ghost/intern/GHOST_WindowX11.cpp
index 4e3fcd4da3f..56e225e94a2 100644
--- a/intern/ghost/intern/GHOST_WindowX11.cpp
+++ b/intern/ghost/intern/GHOST_WindowX11.cpp
@@ -186,7 +186,8 @@ GHOST_WindowX11(
m_valid_setup(false),
m_invalid_window(false),
m_empty_cursor(None),
- m_custom_cursor(None)
+ m_custom_cursor(None),
+ m_visible_cursor(None)
{
/* Set up the minimum atrributes that we require and see if
@@ -1454,7 +1455,10 @@ setWindowCursorVisibility(
Cursor xcursor;
if (visible) {
- xcursor = getStandardCursor(getCursorShape() );
+ if (m_visible_cursor)
+ xcursor = m_visible_cursor;
+ else
+ xcursor = getStandardCursor(getCursorShape() );
}
else {
xcursor = getEmptyCursor();
@@ -1517,6 +1521,8 @@ setWindowCursorShape(
GHOST_TStandardCursor shape)
{
Cursor xcursor = getStandardCursor(shape);
+
+ m_visible_cursor = xcursor;
XDefineCursor(m_display, m_window, xcursor);
XFlush(m_display);
@@ -1566,6 +1572,8 @@ setWindowCustomCursorShape(
m_custom_cursor = XCreatePixmapCursor(m_display, bitmap_pix, mask_pix, &fg, &bg, hotX, hotY);
XDefineCursor(m_display, m_window, m_custom_cursor);
XFlush(m_display);
+
+ m_visible_cursor = m_custom_cursor;
XFreePixmap(m_display, bitmap_pix);
XFreePixmap(m_display, mask_pix);
diff --git a/intern/ghost/intern/GHOST_WindowX11.h b/intern/ghost/intern/GHOST_WindowX11.h
index ff7b7409627..93ee9edda0e 100644
--- a/intern/ghost/intern/GHOST_WindowX11.h
+++ b/intern/ghost/intern/GHOST_WindowX11.h
@@ -391,6 +391,9 @@ private:
/** XCursor structure of the custom cursor */
Cursor m_custom_cursor;
+
+ /** XCursor to show when cursor is visible */
+ Cursor m_visible_cursor;
/** Cache of XC_* ID's to XCursor structures */
std::map<unsigned int, Cursor> m_standard_cursors;
diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c
index 8fb46ffc385..9a192c17180 100644
--- a/intern/ghost/test/multitest/MultiTest.c
+++ b/intern/ghost/test/multitest/MultiTest.c
@@ -74,7 +74,7 @@ void multitestapp_exit(MultiTestApp *app);
/**/
-void rect_bevel_side(int rect[2][2], int side, float *lt, float *dk, float *col, int width)
+void rect_bevel_side(int rect[2][2], int side, float *lt, float *dk, const float col[3], int width)
{
int ltidx = (side / 2) % 4;
int dkidx = (ltidx + 1 + (side & 1)) % 4;
diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c
index 2ac01a6c7e4..e85fba7a6d0 100644
--- a/intern/guardedalloc/intern/mallocn.c
+++ b/intern/guardedalloc/intern/mallocn.c
@@ -15,11 +15,6 @@
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
- * All rights reserved.
- *
- * The Original Code is: all of this file.
- *
* Contributor(s): Brecht Van Lommel
* Campbell Barton
*
@@ -43,7 +38,7 @@ size_t (*MEM_allocN_len)(const void *vmemh) = MEM_lockfree_allocN_len;
void (*MEM_freeN)(void *vmemh) = MEM_lockfree_freeN;
void *(*MEM_dupallocN)(const void *vmemh) = MEM_lockfree_dupallocN;
void *(*MEM_reallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_reallocN_id;
-void *(*MEM_recallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_recallocN_id;;
+void *(*MEM_recallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_recallocN_id;
void *(*MEM_callocN)(size_t len, const char *str) = MEM_lockfree_callocN;
void *(*MEM_mallocN)(size_t len, const char *str) = MEM_lockfree_mallocN;
void *(*MEM_mapallocN)(size_t len, const char *str) = MEM_lockfree_mapallocN;
@@ -71,7 +66,7 @@ void MEM_use_guarded_allocator(void)
MEM_freeN = MEM_guarded_freeN;
MEM_dupallocN = MEM_guarded_dupallocN;
MEM_reallocN_id = MEM_guarded_reallocN_id;
- MEM_recallocN_id = MEM_guarded_recallocN_id;;
+ MEM_recallocN_id = MEM_guarded_recallocN_id;
MEM_callocN = MEM_guarded_callocN;
MEM_mallocN = MEM_guarded_mallocN;
MEM_mapallocN = MEM_guarded_mapallocN;
diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c
index 352d18df732..172c79d50cd 100644
--- a/intern/guardedalloc/intern/mallocn_guarded_impl.c
+++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c
@@ -497,9 +497,9 @@ void *MEM_guarded_mallocN(size_t len, const char *str)
memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail));
- if (memh) {
+ if (LIKELY(memh)) {
make_memhead_header(memh, len, str);
- if (malloc_debug_memset && len)
+ if (UNLIKELY(malloc_debug_memset && len))
memset(memh + 1, 255, len);
#ifdef DEBUG_MEMCOUNTER
@@ -544,7 +544,7 @@ void *MEM_guarded_mapallocN(size_t len, const char *str)
/* on 64 bit, simply use calloc instead, as mmap does not support
* allocating > 4 GB on Windows. the only reason mapalloc exists
* is to get around address space limitations in 32 bit OSes. */
- if(sizeof(void*) >= 8)
+ if (sizeof(void *) >= 8)
return MEM_guarded_callocN(len, str);
len = SIZET_ALIGN_4(len);
@@ -735,7 +735,7 @@ static void MEM_guarded_printmemlist_internal(int pydict)
membl->_count);
#else
print_error("%s len: " SIZET_FORMAT " %p\n",
- membl->name, SIZET_ARG(membl->len), membl + 1);
+ membl->name, SIZET_ARG(membl->len), (void *)(membl + 1));
#endif
#ifdef DEBUG_BACKTRACE
print_memhead_backtrace(membl);
@@ -951,7 +951,7 @@ static void rem_memblock(MemHead *memh)
#endif
}
else {
- if (malloc_debug_memset && memh->len)
+ if (UNLIKELY(malloc_debug_memset && memh->len))
memset(memh + 1, 255, memh->len);
free(memh);
}
diff --git a/intern/guardedalloc/intern/mallocn_intern.h b/intern/guardedalloc/intern/mallocn_intern.h
index db45b59b884..b0fd52d2766 100644
--- a/intern/guardedalloc/intern/mallocn_intern.h
+++ b/intern/guardedalloc/intern/mallocn_intern.h
@@ -77,6 +77,14 @@
#define SIZET_ALIGN_4(len) ((len + 3) & ~(size_t)3)
+#ifdef __GNUC__
+# define LIKELY(x) __builtin_expect(!!(x), 1)
+# define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+# define LIKELY(x) (x)
+# define UNLIKELY(x) (x)
+#endif
+
/* Prototypes for counted allocator functions */
size_t MEM_lockfree_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT;
void MEM_lockfree_freeN(void *vmemh);
diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
index 2c7c087966a..6fc01807af3 100644
--- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c
+++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
@@ -15,11 +15,6 @@
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
- * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
- * All rights reserved.
- *
- * The Original Code is: all of this file.
- *
* Contributor(s): Brecht Van Lommel
* Campbell Barton
* Sergey Sharybin
@@ -126,7 +121,7 @@ void MEM_lockfree_freeN(void *vmemh)
#endif
}
else {
- if (malloc_debug_memset && len) {
+ if (UNLIKELY(malloc_debug_memset && len)) {
memset(memh + 1, 255, len);
}
free(memh);
@@ -219,7 +214,7 @@ void *MEM_lockfree_callocN(size_t len, const char *str)
memh = (MemHead *)calloc(1, len + sizeof(MemHead));
- if (memh) {
+ if (LIKELY(memh)) {
memh->len = len;
atomic_add_u(&totblock, 1);
atomic_add_z(&mem_in_use, len);
@@ -242,8 +237,8 @@ void *MEM_lockfree_mallocN(size_t len, const char *str)
memh = (MemHead *)malloc(len + sizeof(MemHead));
- if (memh) {
- if (malloc_debug_memset && len) {
+ if (LIKELY(memh)) {
+ if (UNLIKELY(malloc_debug_memset && len)) {
memset(memh + 1, 255, len);
}
@@ -268,7 +263,7 @@ void *MEM_lockfree_mapallocN(size_t len, const char *str)
/* on 64 bit, simply use calloc instead, as mmap does not support
* allocating > 4 GB on Windows. the only reason mapalloc exists
* is to get around address space limitations in 32 bit OSes. */
- if(sizeof(void*) >= 8)
+ if (sizeof(void *) >= 8)
return MEM_lockfree_callocN(len, str);
len = SIZET_ALIGN_4(len);
diff --git a/intern/itasc/SConscript b/intern/itasc/SConscript
index 1b7709bb986..bd20368f001 100644
--- a/intern/itasc/SConscript
+++ b/intern/itasc/SConscript
@@ -35,7 +35,4 @@ incs = '. ../../extern/Eigen3'
defs = []
-if env['OURPLATFORM']=='darwin' and env['C_COMPILER_ID'] == 'clang' and env['CCVERSION'] >= '3.4': # workaround for friend declaration specifies a default argument expression, not allowed anymore
- env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100], cc_compilerchange='/usr/bin/gcc', cxx_compilerchange='/usr/bin/g++' )
-else:
- env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100])
+env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100])
diff --git a/intern/itasc/kdl/frameacc.hpp b/intern/itasc/kdl/frameacc.hpp
index 40dd5bfa712..bccd229804d 100644
--- a/intern/itasc/kdl/frameacc.hpp
+++ b/intern/itasc/kdl/frameacc.hpp
@@ -78,9 +78,9 @@ public:
IMETHOD friend VectorAcc operator / (const VectorAcc& r2,const doubleAcc& r1);
- IMETHOD friend bool Equal(const VectorAcc& r1,const VectorAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Vector& r1,const VectorAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const VectorAcc& r1,const Vector& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const VectorAcc& r1,const VectorAcc& r2,double eps);
+ IMETHOD friend bool Equal(const Vector& r1,const VectorAcc& r2,double eps);
+ IMETHOD friend bool Equal(const VectorAcc& r1,const Vector& r2,double eps);
IMETHOD friend VectorAcc operator - (const VectorAcc& r);
IMETHOD friend doubleAcc dot(const VectorAcc& lhs,const VectorAcc& rhs);
IMETHOD friend doubleAcc dot(const VectorAcc& lhs,const Vector& rhs);
@@ -132,9 +132,9 @@ public:
IMETHOD friend RotationAcc operator* (const RotationAcc& r1,const RotationAcc& r2);
IMETHOD friend RotationAcc operator* (const Rotation& r1,const RotationAcc& r2);
IMETHOD friend RotationAcc operator* (const RotationAcc& r1,const Rotation& r2);
- IMETHOD friend bool Equal(const RotationAcc& r1,const RotationAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Rotation& r1,const RotationAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const RotationAcc& r1,const Rotation& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const RotationAcc& r1,const RotationAcc& r2,double eps);
+ IMETHOD friend bool Equal(const Rotation& r1,const RotationAcc& r2,double eps);
+ IMETHOD friend bool Equal(const RotationAcc& r1,const Rotation& r2,double eps);
IMETHOD TwistAcc Inverse(const TwistAcc& arg) const;
IMETHOD TwistAcc Inverse(const Twist& arg) const;
IMETHOD TwistAcc operator * (const TwistAcc& arg) const;
@@ -170,9 +170,9 @@ public:
IMETHOD friend FrameAcc operator * (const FrameAcc& f1,const FrameAcc& f2);
IMETHOD friend FrameAcc operator * (const Frame& f1,const FrameAcc& f2);
IMETHOD friend FrameAcc operator * (const FrameAcc& f1,const Frame& f2);
- IMETHOD friend bool Equal(const FrameAcc& r1,const FrameAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Frame& r1,const FrameAcc& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const FrameAcc& r1,const Frame& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const FrameAcc& r1,const FrameAcc& r2,double eps);
+ IMETHOD friend bool Equal(const Frame& r1,const FrameAcc& r2,double eps);
+ IMETHOD friend bool Equal(const FrameAcc& r1,const Frame& r2,double eps);
IMETHOD TwistAcc Inverse(const TwistAcc& arg) const;
IMETHOD TwistAcc Inverse(const Twist& arg) const;
@@ -226,9 +226,9 @@ public:
// the new point.
// Complexity : 6M+6A
- IMETHOD friend bool Equal(const TwistAcc& a,const TwistAcc& b,double eps=epsilon);
- IMETHOD friend bool Equal(const Twist& a,const TwistAcc& b,double eps=epsilon);
- IMETHOD friend bool Equal(const TwistAcc& a,const Twist& b,double eps=epsilon);
+ IMETHOD friend bool Equal(const TwistAcc& a,const TwistAcc& b,double eps);
+ IMETHOD friend bool Equal(const Twist& a,const TwistAcc& b,double eps);
+ IMETHOD friend bool Equal(const TwistAcc& a,const Twist& b,double eps);
IMETHOD Twist GetTwist() const;
@@ -240,9 +240,18 @@ public:
};
-
-
-
+IMETHOD bool Equal(const VectorAcc&, const VectorAcc&, double = epsilon);
+IMETHOD bool Equal(const Vector&, const VectorAcc&, double = epsilon);
+IMETHOD bool Equal(const VectorAcc&, const Vector&, double = epsilon);
+IMETHOD bool Equal(const RotationAcc&, const RotationAcc&, double = epsilon);
+IMETHOD bool Equal(const Rotation&, const RotationAcc&, double = epsilon);
+IMETHOD bool Equal(const RotationAcc&, const Rotation&, double = epsilon);
+IMETHOD bool Equal(const FrameAcc&, const FrameAcc&, double = epsilon);
+IMETHOD bool Equal(const Frame&, const FrameAcc&, double = epsilon);
+IMETHOD bool Equal(const FrameAcc&, const Frame&, double = epsilon);
+IMETHOD bool Equal(const TwistAcc&, const TwistAcc&, double = epsilon);
+IMETHOD bool Equal(const Twist&, const TwistAcc&, double = epsilon);
+IMETHOD bool Equal(const TwistAcc&, const Twist&, double = epsilon);
#ifdef KDL_INLINE
diff --git a/intern/itasc/kdl/frames.hpp b/intern/itasc/kdl/frames.hpp
index 28a59898e20..87eedea29f7 100644
--- a/intern/itasc/kdl/frames.hpp
+++ b/intern/itasc/kdl/frames.hpp
@@ -248,10 +248,10 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Vector& a,const Vector& b,double eps=epsilon);
+ inline friend bool Equal(const Vector& a,const Vector& b,double eps);
//! return a normalized vector
- inline friend Vector Normalize(const Vector& a, double eps=epsilon);
+ inline friend Vector Normalize(const Vector& a, double eps);
//! The literal equality operator==(), also identical.
inline friend bool operator==(const Vector& a,const Vector& b);
@@ -261,7 +261,7 @@ public:
friend class Rotation;
friend class Frame;
};
-
+ inline Vector Normalize(const Vector&, double eps=epsilon);
/**
\brief represents rotations in 3 dimensional space.
@@ -502,7 +502,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- friend bool Equal(const Rotation& a,const Rotation& b,double eps=epsilon);
+
//! The literal equality operator==(), also identical.
friend bool operator==(const Rotation& a,const Rotation& b);
@@ -663,7 +663,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Frame& a,const Frame& b,double eps=epsilon);
+ inline friend bool Equal(const Frame& a,const Frame& b,double eps);
//! The literal equality operator==(), also identical.
inline friend bool operator==(const Frame& a,const Frame& b);
@@ -735,7 +735,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Twist& a,const Twist& b,double eps=epsilon);
+ inline friend bool Equal(const Twist& a,const Twist& b,double eps);
//! The literal equality operator==(), also identical.
inline friend bool operator==(const Twist& a,const Twist& b);
@@ -898,7 +898,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Wrench& a,const Wrench& b,double eps=epsilon);
+ inline friend bool Equal(const Wrench& a,const Wrench& b,double eps);
//! The literal equality operator==(), also identical.
inline friend bool operator==(const Wrench& a,const Wrench& b);
@@ -979,7 +979,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Vector2& a,const Vector2& b,double eps=epsilon);
+ inline friend bool Equal(const Vector2& a,const Vector2& b,double eps);
friend class Rotation2;
};
@@ -1026,7 +1026,7 @@ public:
//! do not use operator == because the definition of Equal(.,.) is slightly
//! different. It compares whether the 2 arguments are equal in an eps-interval
- inline friend bool Equal(const Rotation2& a,const Rotation2& b,double eps=epsilon);
+ inline friend bool Equal(const Rotation2& a,const Rotation2& b,double eps);
};
//! A 2D frame class, for further documentation see the Frames class
@@ -1067,9 +1067,18 @@ public:
tmp.SetIdentity();
return tmp;
}
- inline friend bool Equal(const Frame2& a,const Frame2& b,double eps=epsilon);
+ inline friend bool Equal(const Frame2& a,const Frame2& b,double eps);
};
+inline bool Equal(const Vector&, const Vector&, double = epsilon);
+ bool Equal(const Rotation&, const Rotation&, double = epsilon);
+inline bool Equal(const Frame&, const Frame&, double = epsilon);
+inline bool Equal(const Twist&, const Twist&, double = epsilon);
+inline bool Equal(const Wrench&, const Wrench&, double = epsilon);
+inline bool Equal(const Vector2&, const Vector2&, double = epsilon);
+inline bool Equal(const Rotation2&, const Rotation2&, double = epsilon);
+inline bool Equal(const Frame2&, const Frame2&, double = epsilon);
+
IMETHOD Vector diff(const Vector& a,const Vector& b,double dt=1);
IMETHOD Vector diff(const Rotation& R_a_b1,const Rotation& R_a_b2,double dt=1);
IMETHOD Twist diff(const Frame& F_a_b1,const Frame& F_a_b2,double dt=1);
diff --git a/intern/itasc/kdl/framevel.hpp b/intern/itasc/kdl/framevel.hpp
index e95c5ef7907..17e1f2adfa0 100644
--- a/intern/itasc/kdl/framevel.hpp
+++ b/intern/itasc/kdl/framevel.hpp
@@ -110,9 +110,9 @@ public:
IMETHOD friend void SetToZero(VectorVel& v);
- IMETHOD friend bool Equal(const VectorVel& r1,const VectorVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Vector& r1,const VectorVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const VectorVel& r1,const Vector& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const VectorVel& r1,const VectorVel& r2,double eps);
+ IMETHOD friend bool Equal(const Vector& r1,const VectorVel& r2,double eps);
+ IMETHOD friend bool Equal(const VectorVel& r1,const Vector& r2,double eps);
IMETHOD friend VectorVel operator - (const VectorVel& r);
IMETHOD friend doubleVel dot(const VectorVel& lhs,const VectorVel& rhs);
IMETHOD friend doubleVel dot(const VectorVel& lhs,const Vector& rhs);
@@ -166,9 +166,9 @@ public:
IMETHOD friend RotationVel operator* (const RotationVel& r1,const RotationVel& r2);
IMETHOD friend RotationVel operator* (const Rotation& r1,const RotationVel& r2);
IMETHOD friend RotationVel operator* (const RotationVel& r1,const Rotation& r2);
- IMETHOD friend bool Equal(const RotationVel& r1,const RotationVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Rotation& r1,const RotationVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const RotationVel& r1,const Rotation& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const RotationVel& r1,const RotationVel& r2,double eps);
+ IMETHOD friend bool Equal(const Rotation& r1,const RotationVel& r2,double eps);
+ IMETHOD friend bool Equal(const RotationVel& r1,const Rotation& r2,double eps);
IMETHOD TwistVel Inverse(const TwistVel& arg) const;
IMETHOD TwistVel Inverse(const Twist& arg) const;
@@ -220,9 +220,9 @@ public:
IMETHOD friend FrameVel operator * (const FrameVel& f1,const FrameVel& f2);
IMETHOD friend FrameVel operator * (const Frame& f1,const FrameVel& f2);
IMETHOD friend FrameVel operator * (const FrameVel& f1,const Frame& f2);
- IMETHOD friend bool Equal(const FrameVel& r1,const FrameVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const Frame& r1,const FrameVel& r2,double eps=epsilon);
- IMETHOD friend bool Equal(const FrameVel& r1,const Frame& r2,double eps=epsilon);
+ IMETHOD friend bool Equal(const FrameVel& r1,const FrameVel& r2,double eps);
+ IMETHOD friend bool Equal(const Frame& r1,const FrameVel& r2,double eps);
+ IMETHOD friend bool Equal(const FrameVel& r1,const Frame& r2,double eps);
IMETHOD TwistVel Inverse(const TwistVel& arg) const;
IMETHOD TwistVel Inverse(const Twist& arg) const;
@@ -292,9 +292,9 @@ public:
// = Equality operators
// do not use operator == because the definition of Equal(.,.) is slightly
// different. It compares whether the 2 arguments are equal in an eps-interval
- IMETHOD friend bool Equal(const TwistVel& a,const TwistVel& b,double eps=epsilon);
- IMETHOD friend bool Equal(const Twist& a,const TwistVel& b,double eps=epsilon);
- IMETHOD friend bool Equal(const TwistVel& a,const Twist& b,double eps=epsilon);
+ IMETHOD friend bool Equal(const TwistVel& a,const TwistVel& b,double eps);
+ IMETHOD friend bool Equal(const Twist& a,const TwistVel& b,double eps);
+ IMETHOD friend bool Equal(const TwistVel& a,const Twist& b,double eps);
// = Conversion to other entities
IMETHOD Twist GetTwist() const;
@@ -305,6 +305,19 @@ public:
};
+IMETHOD bool Equal(const VectorVel&, const VectorVel&, double = epsilon);
+IMETHOD bool Equal(const Vector&, const VectorVel&, double = epsilon);
+IMETHOD bool Equal(const VectorVel&, const Vector&, double = epsilon);
+IMETHOD bool Equal(const RotationVel&, const RotationVel&, double = epsilon);
+IMETHOD bool Equal(const Rotation&, const RotationVel&, double = epsilon);
+IMETHOD bool Equal(const RotationVel&, const Rotation&, double = epsilon);
+IMETHOD bool Equal(const FrameVel&, const FrameVel&, double = epsilon);
+IMETHOD bool Equal(const Frame&, const FrameVel&, double = epsilon);
+IMETHOD bool Equal(const FrameVel&, const Frame&, double = epsilon);
+IMETHOD bool Equal(const TwistVel&, const TwistVel&, double = epsilon);
+IMETHOD bool Equal(const Twist&, const TwistVel&, double = epsilon);
+IMETHOD bool Equal(const TwistVel&, const Twist&, double = epsilon);
+
IMETHOD VectorVel diff(const VectorVel& a,const VectorVel& b,double dt=1.0) {
return VectorVel(diff(a.p,b.p,dt),diff(a.v,b.v,dt));
}
diff --git a/intern/itasc/kdl/jacobian.hpp b/intern/itasc/kdl/jacobian.hpp
index e9057451c9f..9708ebd37be 100644
--- a/intern/itasc/kdl/jacobian.hpp
+++ b/intern/itasc/kdl/jacobian.hpp
@@ -45,7 +45,7 @@ namespace KDL
bool operator ==(const Jacobian& arg);
bool operator !=(const Jacobian& arg);
- friend bool Equal(const Jacobian& a,const Jacobian& b,double eps=epsilon);
+ friend bool Equal(const Jacobian& a,const Jacobian& b,double eps);
~Jacobian();
@@ -63,6 +63,7 @@ namespace KDL
};
+ bool Equal(const Jacobian&, const Jacobian&, double = epsilon);
}
#endif
diff --git a/intern/itasc/kdl/jntarray.hpp b/intern/itasc/kdl/jntarray.hpp
index ece6b0bdb6b..886171b11db 100644
--- a/intern/itasc/kdl/jntarray.hpp
+++ b/intern/itasc/kdl/jntarray.hpp
@@ -209,12 +209,12 @@ class MyTask : public RTT::TaskContext
* @return true if each element of src1 is within eps of the same
* element in src2, or if both src1 and src2 have no data (ie 0==rows())
*/
- friend bool Equal(const JntArray& src1,const JntArray& src2,double eps=epsilon);
+ friend bool Equal(const JntArray& src1,const JntArray& src2,double eps);
friend bool operator==(const JntArray& src1,const JntArray& src2);
//friend bool operator!=(const JntArray& src1,const JntArray& src2);
};
-
+ bool Equal(const JntArray&,const JntArray&, double = epsilon);
bool operator==(const JntArray& src1,const JntArray& src2);
//bool operator!=(const JntArray& src1,const JntArray& src2);
diff --git a/intern/itasc/kdl/jntarrayacc.hpp b/intern/itasc/kdl/jntarrayacc.hpp
index 275aa58f21e..fd1c26430e8 100644
--- a/intern/itasc/kdl/jntarrayacc.hpp
+++ b/intern/itasc/kdl/jntarrayacc.hpp
@@ -58,9 +58,10 @@ namespace KDL
friend void Divide(const JntArrayAcc& src,const doubleVel& factor,JntArrayAcc& dest);
friend void Divide(const JntArrayAcc& src,const doubleAcc& factor,JntArrayAcc& dest);
friend void SetToZero(JntArrayAcc& array);
- friend bool Equal(const JntArrayAcc& src1,const JntArrayAcc& src2,double eps=epsilon);
-
+ friend bool Equal(const JntArrayAcc& src1,const JntArrayAcc& src2,double eps);
};
+
+ bool Equal(const JntArrayAcc&, const JntArrayAcc&, double = epsilon);
}
#endif
diff --git a/intern/itasc/kdl/jntarrayvel.hpp b/intern/itasc/kdl/jntarrayvel.hpp
index faa82076ebb..480f84f1708 100644
--- a/intern/itasc/kdl/jntarrayvel.hpp
+++ b/intern/itasc/kdl/jntarrayvel.hpp
@@ -51,9 +51,10 @@ namespace KDL
friend void Divide(const JntArrayVel& src,const double& factor,JntArrayVel& dest);
friend void Divide(const JntArrayVel& src,const doubleVel& factor,JntArrayVel& dest);
friend void SetToZero(JntArrayVel& array);
- friend bool Equal(const JntArrayVel& src1,const JntArrayVel& src2,double eps=epsilon);
-
+ friend bool Equal(const JntArrayVel& src1,const JntArrayVel& src2,double eps);
};
+
+ bool Equal(const JntArrayVel&, const JntArrayVel&, double = epsilon);
}
#endif
diff --git a/intern/locale/CMakeLists.txt b/intern/locale/CMakeLists.txt
index 3599aa68545..217fe9a8c71 100644
--- a/intern/locale/CMakeLists.txt
+++ b/intern/locale/CMakeLists.txt
@@ -36,6 +36,14 @@ set(SRC
boost_locale_wrapper.h
)
+if(WITH_HEADLESS)
+ add_definitions(-DWITH_HEADLESS)
+endif()
+
+if(WITH_GHOST_SDL)
+ add_definitions(-DWITH_GHOST_SDL)
+endif()
+
if(WITH_INTERNATIONAL)
list(APPEND INC_SYS
${BOOST_INCLUDE_DIR}
@@ -51,5 +59,10 @@ blender_add_lib(bf_intern_locale "${SRC}" "${INC}" "${INC_SYS}")
set(MSFFMT_SRC
msgfmt.cc
)
-
add_executable(msgfmt ${MSFFMT_SRC})
+
+if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.4)))
+ # needed for clang 3.4+
+ target_link_libraries(msgfmt ${PLATFORM_LINKLIBS})
+endif()
+
diff --git a/intern/locale/SConscript b/intern/locale/SConscript
index 4136ac8237d..24828c120ec 100644
--- a/intern/locale/SConscript
+++ b/intern/locale/SConscript
@@ -66,10 +66,6 @@ if env['WITH_BF_INTERNATIONAL']:
locale = env.Clone()
- msgfmt_executable = targetpath
- if env['OURPLATFORM'] in ('win32-vc', 'win64-vc', 'win32-mingw', 'win64-mingw'):
- msgfmt_executable += ".exe"
-
# dependencies
dependencies = [msgfmt_target]
@@ -82,7 +78,7 @@ if env['WITH_BF_INTERNATIONAL']:
po_file = os.path.join(po_dir, f)
mo_file = os.path.join(build_dir, os.path.splitext(f)[0] + ".mo")
- command = "\"%s\" \"%s\" \"%s\"" % (msgfmt_executable, po_file, mo_file)
+ command = "\"%s\" \"%s\" \"%s\"" % (targetpath, po_file, mo_file)
locale.Command(mo_file, po_file, command)
locale.Depends(mo_file, dependencies)
diff --git a/intern/locale/boost_locale_wrapper.cpp b/intern/locale/boost_locale_wrapper.cpp
index 945d0bbc5da..25843d60578 100644
--- a/intern/locale/boost_locale_wrapper.cpp
+++ b/intern/locale/boost_locale_wrapper.cpp
@@ -64,7 +64,7 @@ void bl_locale_set(const char *locale)
_locale = gen(locale);
}
else {
-#ifdef __APPLE__
+#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
extern char GHOST_user_locale[128]; // pulled from Ghost_SystemCocoa
std::string locale_osx = GHOST_user_locale + std::string(".UTF-8");
_locale = gen(locale_osx.c_str());
@@ -113,7 +113,11 @@ const char *bl_locale_pgettext(const char *msgctxt, const char *msgid)
return r;
return msgid;
}
- catch(std::exception const &) {
+ catch(std::bad_cast const &e) { /* if std::has_facet<char_message_facet>(l) == false, LC_ALL = "C" case */
+// std::cout << "bl_locale_pgettext(" << msgid << "): " << e.what() << " \n";
+ return msgid;
+ }
+ catch(std::exception const &e) {
// std::cout << "bl_locale_pgettext(" << msgctxt << ", " << msgid << "): " << e.what() << " \n";
return msgid;
}
diff --git a/intern/rigidbody/rb_bullet_api.cpp b/intern/rigidbody/rb_bullet_api.cpp
index ab7b851911a..6d39e328e82 100644
--- a/intern/rigidbody/rb_bullet_api.cpp
+++ b/intern/rigidbody/rb_bullet_api.cpp
@@ -726,8 +726,8 @@ rbMeshData *RB_trimesh_data_new(int num_tris, int num_verts)
static void RB_trimesh_data_delete(rbMeshData *mesh)
{
delete mesh->index_array;
- delete mesh->vertices;
- delete mesh->triangles;
+ delete[] mesh->vertices;
+ delete[] mesh->triangles;
delete mesh;
}
diff --git a/intern/utfconv/utfconv.c b/intern/utfconv/utfconv.c
index 7f7a612528d..e5f8756917f 100644
--- a/intern/utfconv/utfconv.c
+++ b/intern/utfconv/utfconv.c
@@ -170,7 +170,7 @@ int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
{
char u;
char type = 0;
- wchar_t u32 = 0;
+ unsigned int u32 = 0;
wchar_t *out16end = out16 + size16;
int err = 0;
if (!size16 || !in8 || !out16) return UTF_ERROR_NULL_IN;