diff options
author | Jason Wilkins <Jason.A.Wilkins@gmail.com> | 2014-05-22 04:02:02 +0400 |
---|---|---|
committer | Jason Wilkins <Jason.A.Wilkins@gmail.com> | 2014-05-22 04:02:02 +0400 |
commit | 6eff1cbebcf0766d2fe69db9b0fb3f76ede2c06b (patch) | |
tree | 3af4122e291f53f88b63ec6ded2e0fa7790e04ac /intern | |
parent | 49de1ada8dcba35862759e0f7da5ca2209b4f588 (diff) | |
parent | 146a1c77eacb925eb7c86bb49495c0f09adc607c (diff) |
Merge branch 'soc-2014-viewport' into soc-2013-viewport_fx
Conflicts:
intern/cycles/device/device_cuda.cpp
intern/ghost/intern/GHOST_WindowCocoa.mm
source/blender/blenfont/intern/blf_font.c
source/blender/blenfont/intern/blf_translation.c
source/blender/blenkernel/BKE_brush.h
source/blender/blenkernel/BKE_pbvh.h
source/blender/blenkernel/intern/cdderivedmesh.c
source/blender/blenkernel/intern/editderivedmesh.c
source/blender/blenkernel/intern/mesh.c
source/blender/blenkernel/intern/subsurf_ccg.c
source/blender/blenlib/BLI_fileops.h
source/blender/blenlib/BLI_math_matrix.h
source/blender/blenlib/intern/fileops.c
source/blender/blenlib/intern/math_matrix.c
source/blender/editors/animation/anim_channels_defines.c
source/blender/editors/animation/anim_draw.c
source/blender/editors/animation/keyframes_draw.c
source/blender/editors/include/ED_armature.h
source/blender/editors/interface/interface.c
source/blender/editors/interface/interface_draw.c
source/blender/editors/interface/interface_icons.c
source/blender/editors/interface/interface_panel.c
source/blender/editors/interface/interface_widgets.c
source/blender/editors/interface/view2d.c
source/blender/editors/mask/mask_draw.c
source/blender/editors/mesh/editmesh_select.c
source/blender/editors/render/render_opengl.c
source/blender/editors/screen/area.c
source/blender/editors/screen/glutil.c
source/blender/editors/sculpt_paint/paint_cursor.c
source/blender/editors/sculpt_paint/paint_image.c
source/blender/editors/sculpt_paint/paint_image_proj.c
source/blender/editors/sculpt_paint/paint_utils.c
source/blender/editors/sculpt_paint/sculpt_intern.h
source/blender/editors/space_buttons/space_buttons.c
source/blender/editors/space_clip/clip_dopesheet_draw.c
source/blender/editors/space_clip/clip_draw.c
source/blender/editors/space_clip/clip_graph_draw.c
source/blender/editors/space_clip/clip_utils.c
source/blender/editors/space_console/console_draw.c
source/blender/editors/space_file/file_draw.c
source/blender/editors/space_file/file_ops.c
source/blender/editors/space_graph/graph_draw.c
source/blender/editors/space_info/info_draw.c
source/blender/editors/space_info/textview.c
source/blender/editors/space_logic/logic_window.c
source/blender/editors/space_nla/nla_draw.c
source/blender/editors/space_outliner/outliner_draw.c
source/blender/editors/space_sequencer/sequencer_draw.c
source/blender/editors/space_view3d/drawanimviz.c
source/blender/editors/space_view3d/drawarmature.c
source/blender/editors/space_view3d/drawmesh.c
source/blender/editors/space_view3d/drawobject.c
source/blender/editors/space_view3d/view3d_draw.c
source/blender/editors/space_view3d/view3d_fly.c
source/blender/editors/space_view3d/view3d_intern.h
source/blender/editors/space_view3d/view3d_walk.c
source/blender/editors/transform/transform.c
source/blender/editors/transform/transform_manipulator.c
source/blender/editors/util/ed_util.c
source/blender/editors/uvedit/uvedit_draw.c
source/blender/gpu/GPU_buffers.h
source/blender/gpu/intern/gpu_buffers.c
source/blender/gpu/intern/gpu_codegen.c
source/blender/gpu/intern/gpu_codegen.h
source/blender/gpu/intern/gpu_draw.c
source/blender/render/intern/source/convertblender.c
source/blender/windowmanager/intern/wm_operators.c
source/blender/windowmanager/intern/wm_subwindow.c
source/blender/windowmanager/intern/wm_window.c
Diffstat (limited to 'intern')
214 files changed, 9512 insertions, 4502 deletions
diff --git a/intern/SConscript b/intern/SConscript index 828c1adc20d..20803884a39 100644 --- a/intern/SConscript +++ b/intern/SConscript @@ -53,9 +53,6 @@ if env['WITH_BF_FLUID']: if env['WITH_BF_CYCLES']: SConscript(['cycles/SConscript']) -if env['WITH_BF_BOOLEAN']: - SConscript(['bsp/SConscript']) - if env['WITH_BF_INTERNATIONAL']: SConscript(['locale/SConscript']) diff --git a/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp b/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp index c3877c2c9f2..d055c131183 100644 --- a/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp +++ b/intern/audaspace/OpenAL/AUD_OpenALDevice.cpp @@ -994,7 +994,7 @@ void AUD_OpenALDevice::updateStreams() if(info != AL_PLAYING) { // if it really stopped - if(sound->m_eos) + if(sound->m_eos && info != AL_INITIAL) { if(sound->m_stop) sound->m_stop(sound->m_stop_data); diff --git a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp index d8f0d837fec..d30835da4e5 100644 --- a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp +++ b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.cpp @@ -169,14 +169,29 @@ AUD_FFMPEGWriter::AUD_FFMPEGWriter(std::string filename, AUD_DeviceSpecs specs, if(!codec) AUD_THROW(AUD_ERROR_FFMPEG, codec_error); + if(codec->sample_fmts) { + // Check if the prefered sample format for this codec is supported. + const enum AVSampleFormat *p = codec->sample_fmts; + for(; *p != -1; p++) { + if(*p == m_stream->codec->sample_fmt) + break; + } + if(*p == -1) { + // Sample format incompatible with codec. Defaulting to a format known to work. + m_stream->codec->sample_fmt = codec->sample_fmts[0]; + } + } + if(avcodec_open2(m_codecCtx, codec, NULL)) AUD_THROW(AUD_ERROR_FFMPEG, codec_error); m_output_buffer.resize(FF_MIN_BUFFER_SIZE); int samplesize = AUD_MAX(AUD_SAMPLE_SIZE(m_specs), AUD_DEVICE_SAMPLE_SIZE(m_specs)); - if(m_codecCtx->frame_size <= 1) - m_input_size = 0; + if(m_codecCtx->frame_size <= 1) { + m_input_size = FF_MIN_BUFFER_SIZE * 8 / m_codecCtx->bits_per_coded_sample / m_codecCtx->channels; + m_input_buffer.resize(m_input_size * samplesize); + } else { m_input_buffer.resize(m_codecCtx->frame_size * samplesize); @@ -187,14 +202,21 @@ AUD_FFMPEGWriter::AUD_FFMPEGWriter(std::string filename, AUD_DeviceSpecs specs, m_frame = av_frame_alloc(); if (!m_frame) AUD_THROW(AUD_ERROR_FFMPEG, codec_error); + avcodec_get_frame_defaults(m_frame); m_frame->linesize[0] = m_input_size * samplesize; m_frame->format = m_codecCtx->sample_fmt; + m_frame->nb_samples = m_input_size; # ifdef FFMPEG_HAVE_AVFRAME_SAMPLE_RATE m_frame->sample_rate = m_codecCtx->sample_rate; # endif # ifdef FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT m_frame->channel_layout = m_codecCtx->channel_layout; # endif + m_sample_size = av_get_bytes_per_sample(m_codecCtx->sample_fmt); + m_frame_pts = 0; + m_deinterleave = av_sample_fmt_is_planar(m_codecCtx->sample_fmt); + if(m_deinterleave) + m_deinterleave_buffer.resize(m_input_size * m_codecCtx->channels * m_sample_size); #endif try @@ -272,13 +294,31 @@ void AUD_FFMPEGWriter::encode(sample_t* data) #ifdef FFMPEG_HAVE_ENCODE_AUDIO2 int got_output, ret; + m_frame->pts = m_frame_pts / av_q2d(m_codecCtx->time_base); + m_frame_pts++; +#ifdef FFMPEG_HAVE_FRAME_CHANNEL_LAYOUT + m_frame->channel_layout = m_codecCtx->channel_layout; +#endif + + if(m_deinterleave) { + for(int channel = 0; channel < m_codecCtx->channels; channel++) { + for(int i = 0; i < m_frame->nb_samples; i++) { + memcpy(reinterpret_cast<uint8_t*>(m_deinterleave_buffer.getBuffer()) + (i + channel * m_frame->nb_samples) * m_sample_size, + reinterpret_cast<uint8_t*>(data) + (m_codecCtx->channels * i + channel) * m_sample_size, m_sample_size); + } + } + + data = m_deinterleave_buffer.getBuffer(); + } + + avcodec_fill_audio_frame(m_frame, m_codecCtx->channels, m_codecCtx->sample_fmt, reinterpret_cast<uint8_t*>(data), + m_frame->nb_samples * av_get_bytes_per_sample(m_codecCtx->sample_fmt) * m_codecCtx->channels, 1); - m_frame->data[0] = reinterpret_cast<uint8_t*>(data); ret = avcodec_encode_audio2(m_codecCtx, &packet, m_frame, &got_output); - if (ret < 0) + if(ret < 0) AUD_THROW(AUD_ERROR_FFMPEG, codec_error); - if (!got_output) + if(!got_output) return; #else sample_t* outbuf = m_output_buffer.getBuffer(); @@ -290,10 +330,23 @@ void AUD_FFMPEGWriter::encode(sample_t* data) packet.data = reinterpret_cast<uint8_t*>(outbuf); #endif + if(packet.pts != AV_NOPTS_VALUE) + packet.pts = av_rescale_q(packet.pts, m_codecCtx->time_base, m_stream->time_base); + if(packet.dts != AV_NOPTS_VALUE) + packet.dts = av_rescale_q(packet.dts, m_codecCtx->time_base, m_stream->time_base); + if(packet.duration > 0) + packet.duration = av_rescale_q(packet.duration, m_codecCtx->time_base, m_stream->time_base); + packet.stream_index = m_stream->index; - if(av_interleaved_write_frame(m_formatCtx, &packet)) + packet.flags |= AV_PKT_FLAG_KEY; + + if(av_interleaved_write_frame(m_formatCtx, &packet)) { + av_free_packet(&packet); AUD_THROW(AUD_ERROR_FFMPEG, write_error); + } + + av_free_packet(&packet); } void AUD_FFMPEGWriter::write(unsigned int length, sample_t* buffer) diff --git a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h index 310f69258ea..492aa35ff12 100644 --- a/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h +++ b/intern/audaspace/ffmpeg/AUD_FFMPEGWriter.h @@ -83,6 +83,23 @@ private: AVFrame *m_frame; /** + * PTS of next frame to write. + */ + int m_frame_pts; + + /** + * Number of bytes per sample. + */ + int m_sample_size; + + /** + * Need to de-interleave audio for planar sample formats. + */ + bool m_deinterleave; + + AUD_Buffer m_deinterleave_buffer; + + /** * The input buffer for the format converted data before encoding. */ AUD_Buffer m_input_buffer; diff --git a/intern/audaspace/intern/AUD_AnimateableProperty.cpp b/intern/audaspace/intern/AUD_AnimateableProperty.cpp index 61adae4b34b..9f399a0b99f 100644 --- a/intern/audaspace/intern/AUD_AnimateableProperty.cpp +++ b/intern/audaspace/intern/AUD_AnimateableProperty.cpp @@ -47,6 +47,23 @@ AUD_AnimateableProperty::AUD_AnimateableProperty(int count) : pthread_mutexattr_destroy(&attr); } +AUD_AnimateableProperty::AUD_AnimateableProperty(int count, float value) : + AUD_Buffer(count * sizeof(float)), m_count(count), m_isAnimated(false) +{ + sample_t* buf = getBuffer(); + + for(int i = 0; i < count; i++) + buf[i] = value; + + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + + pthread_mutex_init(&m_mutex, &attr); + + pthread_mutexattr_destroy(&attr); +} + void AUD_AnimateableProperty::updateUnknownCache(int start, int end) { float* buf = getBuffer(); @@ -104,7 +121,8 @@ void AUD_AnimateableProperty::write(const float* data, int position, int count) if(pos == 0) { - memset(buf, 0, position * m_count * sizeof(float)); + for(int i = 0; i < position; i++) + memcpy(buf + i * m_count, data, m_count * sizeof(float)); } else updateUnknownCache(pos, position - 1); diff --git a/intern/audaspace/intern/AUD_AnimateableProperty.h b/intern/audaspace/intern/AUD_AnimateableProperty.h index 37eb8f84550..f07e5916b25 100644 --- a/intern/audaspace/intern/AUD_AnimateableProperty.h +++ b/intern/audaspace/intern/AUD_AnimateableProperty.h @@ -76,6 +76,13 @@ public: AUD_AnimateableProperty(int count = 1); /** + * Creates a new animateable property. + * \param count The count of floats for a single property. + * \param count The value that the property should get initialized with. All count floats will be initialized to the same value. + */ + AUD_AnimateableProperty(int count, float value); + + /** * Destroys the animateable property. */ ~AUD_AnimateableProperty(); diff --git a/intern/audaspace/intern/AUD_ConverterFunctions.h b/intern/audaspace/intern/AUD_ConverterFunctions.h index 1ffcf6c4ef0..7817ee88c07 100644 --- a/intern/audaspace/intern/AUD_ConverterFunctions.h +++ b/intern/audaspace/intern/AUD_ConverterFunctions.h @@ -34,12 +34,11 @@ #include <cstring> #ifdef _MSC_VER -#if (_MSC_VER < 1300) +#if (_MSC_VER <= 1500) typedef short int16_t; typedef int int32_t; #else - typedef __int16 int16_t; - typedef __int32 int32_t; +# include <stdint.h> #endif #else #include <stdint.h> diff --git a/intern/audaspace/intern/AUD_Sequencer.cpp b/intern/audaspace/intern/AUD_Sequencer.cpp index c59c56a4479..6c5e48c73f0 100644 --- a/intern/audaspace/intern/AUD_Sequencer.cpp +++ b/intern/audaspace/intern/AUD_Sequencer.cpp @@ -42,6 +42,7 @@ AUD_Sequencer::AUD_Sequencer(AUD_Specs specs, float fps, bool muted) : m_speed_of_sound(434), m_doppler_factor(1), m_distance_model(AUD_DISTANCE_MODEL_INVERSE_CLAMPED), + m_volume(1, 1.0f), m_location(3), m_orientation(4) { diff --git a/intern/audaspace/intern/AUD_SequencerEntry.cpp b/intern/audaspace/intern/AUD_SequencerEntry.cpp index 005557bbed1..6ef8479cdb8 100644 --- a/intern/audaspace/intern/AUD_SequencerEntry.cpp +++ b/intern/audaspace/intern/AUD_SequencerEntry.cpp @@ -53,6 +53,8 @@ AUD_SequencerEntry::AUD_SequencerEntry(boost::shared_ptr<AUD_IFactory> sound, fl m_cone_angle_outer(360), m_cone_angle_inner(360), m_cone_volume_outer(0), + m_volume(1, 1.0f), + m_pitch(1, 1.0f), m_location(3), m_orientation(4) { diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index 5c8d68b07ee..a1b0030491e 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -1,4 +1,3 @@ - # Standalone or with Blender if(NOT WITH_BLENDER AND WITH_CYCLES_STANDALONE) set(CYCLES_INSTALL_PATH "") @@ -13,8 +12,11 @@ include(cmake/external_libs.cmake) # Build Flags # todo: refactor this code to match scons +# note: CXX_HAS_SSE is needed in case passing SSE flags fails altogether (gcc-arm) if(WIN32 AND MSVC) + set(CXX_HAS_SSE TRUE) + # /arch:AVX for VC2012 and above if(NOT MSVC_VERSION LESS 1700) set(CYCLES_AVX_ARCH_FLAGS "/arch:AVX") @@ -24,36 +26,49 @@ if(WIN32 AND MSVC) # there is no /arch:SSE3, but intrinsics are available anyway if(CMAKE_CL_64) - set(CYCLES_SSE2_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_SSE3_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_SSE41_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") + set(CYCLES_SSE2_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_SSE3_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_SSE41_KERNEL_FLAGS "/fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") else() - set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") - set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") + set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_SSE41_KERNEL_FLAGS "/arch:SSE2 /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") + set(CYCLES_AVX_KERNEL_FLAGS "${CYCLES_AVX_ARCH_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /Gs-") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /GS-") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Ox") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Ox") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Ox") elseif(CMAKE_COMPILER_IS_GNUCC) - set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse") - set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse") - set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse") - set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse") + check_cxx_compiler_flag(-msse CXX_HAS_SSE) + if(CXX_HAS_SSE) + set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse") + set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -mfpmath=sse") + set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mfpmath=sse") + set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx -mfpmath=sse") + endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2") - set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3") - set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1") - set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx") + check_cxx_compiler_flag(-msse CXX_HAS_SSE) + if(CXX_HAS_SSE) + set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2") + set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3") + set(CYCLES_SSE41_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1") + set(CYCLES_AVX_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mssse3 -msse4.1 -mavx") + endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") endif() -add_definitions(-DWITH_KERNEL_SSE2 -DWITH_KERNEL_SSE3 -DWITH_KERNEL_SSE41 -DWITH_KERNEL_AVX) +if(CXX_HAS_SSE) + add_definitions( + -DWITH_KERNEL_SSE2 + -DWITH_KERNEL_SSE3 + -DWITH_KERNEL_SSE41 + -DWITH_KERNEL_AVX + ) +endif() # for OSL if(WIN32 AND MSVC) @@ -64,10 +79,15 @@ endif() # Definitions and Includes -add_definitions(${BOOST_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS}) +add_definitions( + ${BOOST_DEFINITIONS} + ${OPENIMAGEIO_DEFINITIONS} +) -add_definitions(-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {) -add_definitions(-DCCL_NAMESPACE_END=}) +add_definitions( + -DCCL_NAMESPACE_BEGIN=namespace\ ccl\ { + -DCCL_NAMESPACE_END=} +) if(WITH_CYCLES_NETWORK) add_definitions(-DWITH_NETWORK) @@ -91,9 +111,11 @@ if(WITH_CYCLES_OSL) include_directories(${OSL_INCLUDES}) endif() -add_definitions(-DWITH_OPENCL) -add_definitions(-DWITH_CUDA) -add_definitions(-DWITH_MULTI) +add_definitions( + -DWITH_OPENCL + -DWITH_CUDA + -DWITH_MULTI +) include_directories( SYSTEM @@ -101,7 +123,16 @@ include_directories( ${OPENIMAGEIO_INCLUDE_DIRS} ${OPENIMAGEIO_INCLUDE_DIRS}/OpenImageIO ${OPENEXR_INCLUDE_DIR} - ${OPENEXR_INCLUDE_DIRS}) + ${OPENEXR_INCLUDE_DIRS} +) + + +# Warnings +if(CMAKE_COMPILER_IS_GNUCXX) + ADD_CHECK_CXX_COMPILER_FLAG(CMAKE_CXX_FLAGS _has_cxxflag_float_conversion "-Werror=float-conversion") + unset(_has_cxxflag_float_conversion) +endif() + # Subdirectories diff --git a/intern/cycles/SConscript b/intern/cycles/SConscript index b8c731e3315..532238b9d7e 100644 --- a/intern/cycles/SConscript +++ b/intern/cycles/SConscript @@ -72,6 +72,12 @@ if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'): else: cxxflags.append('-ffast-math'.split()) +# Warnings +# XXX Not supported by gcc < 4.9, since we do not have any 'supported flags' test as in cmake, +# simpler to comment for now. +#if env['C_COMPILER_ID'] == 'gcc': +# cxxflags.append(['-Werror=float-conversion']) + if env['OURPLATFORM'] in ('win32-vc', 'win32-mingw', 'linuxcross', 'win64-vc', 'win64-mingw'): incs.append(env['BF_PTHREADS_INC']) @@ -81,12 +87,12 @@ kernel_flags = {} if env['OURPLATFORM'] == 'win32-vc': # there is no /arch:SSE3, but intrinsics are available anyway - kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse2'] = '/arch:SSE /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /GS-' kernel_flags['sse3'] = kernel_flags['sse2'] elif env['OURPLATFORM'] == 'win64-vc': # /arch:AVX only available from visual studio 2012 - kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /Gs-' + kernel_flags['sse2'] = '-D_CRT_SECURE_NO_WARNINGS /fp:fast /Ox /GS-' kernel_flags['sse3'] = kernel_flags['sse2'] if env['MSVC_VERSION'] in ('11.0', '12.0'): diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp index 230833802b0..7ea1ca2d8fb 100644 --- a/intern/cycles/app/cycles_standalone.cpp +++ b/intern/cycles/app/cycles_standalone.cpp @@ -46,7 +46,8 @@ struct Options { int width, height; SceneParams scene_params; SessionParams session_params; - bool quiet, show_help, interactive; + bool quiet; + bool show_help, interactive, pause; } options; static void session_print(const string& str) @@ -114,15 +115,25 @@ static void session_init() options.scene = NULL; } -static void scene_init(int width, int height) +static void scene_init() { options.scene = new Scene(options.scene_params, options.session_params.device); + + /* Read XML */ xml_read_file(options.scene, options.filepath.c_str()); - if (width == 0 || height == 0) { + /* Camera width/height override? */ + if (!(options.width == 0 || options.height == 0)) { + options.scene->camera->width = options.width; + options.scene->camera->height = options.height; + } + else { options.width = options.scene->camera->width; options.height = options.scene->camera->height; } + + /* Calculate Viewplane */ + options.scene->camera->compute_auto_viewplane(); } static void session_exit() @@ -166,8 +177,14 @@ static void display_info(Progress& progress) interactive = options.interactive? "On":"Off"; - str = string_printf("%s Time: %.2f Latency: %.4f Sample: %d Average: %.4f Interactive: %s", - status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); + str = string_printf( + "%s" + " Time: %.2f" + " Latency: %.4f" + " Sample: %d" + " Average: %.4f" + " Interactive: %s", + status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); view_display_info(str.c_str()); @@ -177,7 +194,9 @@ static void display_info(Progress& progress) static void display() { - options.session->draw(session_buffer_params()); + static DeviceDrawParams draw_params = DeviceDrawParams(); + + options.session->draw(session_buffer_params(), draw_params); display_info(options.session->progress); } @@ -195,11 +214,11 @@ static void motion(int x, int y, int button) /* Rotate */ else if(button == 2) { - float4 r1= make_float4(x * 0.1f, 0.0f, 1.0f, 0.0f); - matrix = matrix * transform_rotate(r1.x * M_PI/180.0f, make_float3(r1.y, r1.z, r1.w)); + float4 r1 = make_float4((float)x * 0.1f, 0.0f, 1.0f, 0.0f); + matrix = matrix * transform_rotate(DEG2RADF(r1.x), make_float3(r1.y, r1.z, r1.w)); - float4 r2 = make_float4(y * 0.1, 1.0f, 0.0f, 0.0f); - matrix = matrix * transform_rotate(r2.x * M_PI/180.0f, make_float3(r2.y, r2.z, r2.w)); + float4 r2 = make_float4(y * 0.1f, 1.0f, 0.0f, 0.0f); + matrix = matrix * transform_rotate(DEG2RADF(r2.x), make_float3(r2.y, r2.z, r2.w)); } /* Update and Reset */ @@ -216,20 +235,64 @@ static void resize(int width, int height) options.width = width; options.height = height; - if(options.session) + if(options.session) { + /* Update camera */ + options.session->scene->camera->width = width; + options.session->scene->camera->height = height; + options.session->scene->camera->compute_auto_viewplane(); + options.session->scene->camera->need_update = true; + options.session->scene->camera->need_device_update = true; + options.session->reset(session_buffer_params(), options.session_params.samples); + } } static void keyboard(unsigned char key) { - if(key == 'r') - options.session->reset(session_buffer_params(), options.session_params.samples); - else if(key == 'h') + /* Toggle help */ + if(key == 'h') options.show_help = !(options.show_help); - else if(key == 'i') - options.interactive = !(options.interactive); + + /* Reset */ + else if(key == 'r') + options.session->reset(session_buffer_params(), options.session_params.samples); + + /* Cancel */ else if(key == 27) // escape options.session->progress.set_cancel("Canceled"); + + /* Pause */ + else if(key == 'p') { + options.pause = !options.pause; + options.session->set_pause(options.pause); + } + + /* Interactive Mode */ + else if(key == 'i') + options.interactive = !(options.interactive); + + else if(options.interactive && (key == 'w' || key == 'a' || key == 's' || key == 'd')) { + Transform matrix = options.session->scene->camera->matrix; + float3 translate; + + if(key == 'w') + translate = make_float3(0.0f, 0.0f, 0.1f); + else if(key == 's') + translate = make_float3(0.0f, 0.0f, -0.1f); + else if(key == 'a') + translate = make_float3(-0.1f, 0.0f, 0.0f); + else if(key == 'd') + translate = make_float3(0.1f, 0.0f, 0.0f); + + matrix = matrix * transform_translate(translate); + + /* Update and Reset */ + options.session->scene->camera->matrix = matrix; + options.session->scene->camera->need_update = true; + options.session->scene->camera->need_device_update = true; + + options.session->reset(session_buffer_params(), options.session_params.samples); + } } #endif @@ -314,15 +377,13 @@ static void options_parse(int argc, const char **argv) else if(ssname == "svm") options.scene_params.shadingsystem = SceneParams::SVM; -#ifdef WITH_CYCLES_STANDALONE_GUI - /* Progressive rendering for GUI */ - if(!options.session_params.background) - options.session_params.progressive = true; -#else - /* When building without GUI, set background */ +#ifndef WITH_CYCLES_STANDALONE_GUI options.session_params.background = true; #endif + /* Use progressive rendering */ + options.session_params.progressive = true; + /* find matching device */ DeviceType device_type = Device::type_from_string(devicename.c_str()); vector<DeviceInfo>& devices = Device::available_devices(); @@ -360,12 +421,12 @@ static void options_parse(int argc, const char **argv) fprintf(stderr, "No file path specified\n"); exit(EXIT_FAILURE); } - + /* For smoother Viewport */ options.session_params.start_resolution = 64; /* load scene */ - scene_init(options.width, options.height); + scene_init(); } CCL_NAMESPACE_END diff --git a/intern/cycles/app/cycles_xml.cpp b/intern/cycles/app/cycles_xml.cpp index 14fe43115d5..d5ef30e5c6f 100644 --- a/intern/cycles/app/cycles_xml.cpp +++ b/intern/cycles/app/cycles_xml.cpp @@ -105,7 +105,7 @@ static bool xml_read_float(float *value, pugi::xml_node node, const char *name) pugi::xml_attribute attr = node.attribute(name); if(attr) { - *value = atof(attr.value()); + *value = (float)atof(attr.value()); return true; } @@ -121,7 +121,7 @@ static bool xml_read_float_array(vector<float>& value, pugi::xml_node node, cons string_split(tokens, attr.value()); foreach(const string& token, tokens) - value.push_back(atof(token.c_str())); + value.push_back((float)atof(token.c_str())); return true; } @@ -219,6 +219,35 @@ static bool xml_read_enum(ustring *str, ShaderEnum& enm, pugi::xml_node node, co return false; } +static ShaderSocketType xml_read_socket_type(pugi::xml_node node, const char *name) +{ + pugi::xml_attribute attr = node.attribute(name); + + if(attr) { + string value = attr.value(); + if (string_iequals(value, "float")) + return SHADER_SOCKET_FLOAT; + else if (string_iequals(value, "int")) + return SHADER_SOCKET_INT; + else if (string_iequals(value, "color")) + return SHADER_SOCKET_COLOR; + else if (string_iequals(value, "vector")) + return SHADER_SOCKET_VECTOR; + else if (string_iequals(value, "point")) + return SHADER_SOCKET_POINT; + else if (string_iequals(value, "normal")) + return SHADER_SOCKET_NORMAL; + else if (string_iequals(value, "closure color")) + return SHADER_SOCKET_CLOSURE; + else if (string_iequals(value, "string")) + return SHADER_SOCKET_STRING; + else + fprintf(stderr, "Unknown shader socket type \"%s\" for attribute \"%s\".\n", value.c_str(), name); + } + + return SHADER_SOCKET_UNDEFINED; +} + /* Film */ static void xml_read_film(const XMLReadState& state, pugi::xml_node node) @@ -251,6 +280,8 @@ static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node) xml_read_int(&integrator->mesh_light_samples, node, "mesh_light_samples"); xml_read_int(&integrator->subsurface_samples, node, "subsurface_samples"); xml_read_int(&integrator->volume_samples, node, "volume_samples"); + xml_read_bool(&integrator->sample_all_lights_direct, node, "sample_all_lights_direct"); + xml_read_bool(&integrator->sample_all_lights_indirect, node, "sample_all_lights_indirect"); } /* Bounces */ @@ -268,6 +299,7 @@ static void xml_read_integrator(const XMLReadState& state, pugi::xml_node node) xml_read_bool(&integrator->transparent_shadows, node, "transparent_shadows"); /* Volume */ + xml_read_int(&integrator->volume_homogeneous_sampling, node, "volume_homogeneous_sampling"); xml_read_float(&integrator->volume_step_size, node, "volume_step_size"); xml_read_int(&integrator->volume_max_steps, node, "volume_max_steps"); @@ -289,23 +321,8 @@ static void xml_read_camera(const XMLReadState& state, pugi::xml_node node) xml_read_int(&cam->width, node, "width"); xml_read_int(&cam->height, node, "height"); - float aspect = (float)cam->width/(float)cam->height; - - if(cam->width >= cam->height) { - cam->viewplane.left = -aspect; - cam->viewplane.right = aspect; - cam->viewplane.bottom = -1.0f; - cam->viewplane.top = 1.0f; - } - else { - cam->viewplane.left = -1.0f; - cam->viewplane.right = 1.0f; - cam->viewplane.bottom = -1.0f/aspect; - cam->viewplane.top = 1.0f/aspect; - } - if(xml_read_float(&cam->fov, node, "fov")) - cam->fov *= M_PI/180.0f; + cam->fov = DEG2RADF(cam->fov); xml_read_float(&cam->nearclip, node, "nearclip"); xml_read_float(&cam->farclip, node, "farclip"); @@ -333,7 +350,6 @@ static void xml_read_camera(const XMLReadState& state, pugi::xml_node node) xml_read_float(&cam->sensorwidth, node, "sensorwidth"); xml_read_float(&cam->sensorheight, node, "sensorheight"); - cam->matrix = state.tfm; cam->need_update = true; @@ -392,24 +408,41 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug /* Source */ xml_read_string(&osl->filepath, node, "src"); - osl->filepath = path_join(state.base, osl->filepath); - - /* Outputs */ - string output = "", output_type = ""; - ShaderSocketType type = SHADER_SOCKET_FLOAT; + if(path_is_relative(osl->filepath)) { + osl->filepath = path_join(state.base, osl->filepath); + } - xml_read_string(&output, node, "output"); - xml_read_string(&output_type, node, "output_type"); - - if(output_type == "float") - type = SHADER_SOCKET_FLOAT; - else if(output_type == "closure color") - type = SHADER_SOCKET_CLOSURE; - else if(output_type == "color") - type = SHADER_SOCKET_COLOR; - - osl->output_names.push_back(ustring(output)); - osl->add_output(osl->output_names.back().c_str(), type); + /* Generate inputs/outputs from node sockets + * + * Note: ShaderInput/ShaderOutput store shallow string copies only! + * Socket names must be stored in the extra lists instead. */ + /* read input values */ + for(pugi::xml_node param = node.first_child(); param; param = param.next_sibling()) { + if (string_iequals(param.name(), "input")) { + string name; + if (!xml_read_string(&name, param, "name")) + continue; + + ShaderSocketType type = xml_read_socket_type(param, "type"); + if (type == SHADER_SOCKET_UNDEFINED) + continue; + + osl->input_names.push_back(ustring(name)); + osl->add_input(osl->input_names.back().c_str(), type); + } + else if (string_iequals(param.name(), "output")) { + string name; + if (!xml_read_string(&name, param, "name")) + continue; + + ShaderSocketType type = xml_read_socket_type(param, "type"); + if (type == SHADER_SOCKET_UNDEFINED) + continue; + + osl->output_names.push_back(ustring(name)); + osl->add_output(osl->output_names.back().c_str(), type); + } + } snode = osl; } @@ -616,6 +649,11 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug xml_read_ustring(&attr->attribute, node, "attribute"); snode = attr; } + else if(string_iequals(node.name(), "uv_map")) { + UVMapNode *uvm = new UVMapNode(); + xml_read_ustring(&uvm->attribute, node, "uv_map"); + snode = uvm; + } else if(string_iequals(node.name(), "camera")) { snode = new CameraNode(); } @@ -734,6 +772,9 @@ static void xml_read_shader_graph(const XMLReadState& state, Shader *shader, pug case SHADER_SOCKET_NORMAL: xml_read_float3(&in->value, node, attr.name()); break; + case SHADER_SOCKET_STRING: + xml_read_ustring( &in->value_string, node, attr.name() ); + break; default: break; } @@ -765,6 +806,8 @@ static void xml_read_shader(const XMLReadState& state, pugi::xml_node node) static void xml_read_background(const XMLReadState& state, pugi::xml_node node) { Shader *shader = state.scene->shaders[state.scene->default_background]; + + xml_read_bool(&shader->heterogeneous_volume, node, "heterogeneous_volume"); xml_read_shader_graph(state, shader, node); } @@ -846,7 +889,7 @@ static void xml_read_mesh(const XMLReadState& state, pugi::xml_node node) SubdParams sdparams(mesh, shader, smooth); xml_read_float(&sdparams.dicing_rate, node, "dicing_rate"); - DiagSplit dsplit(sdparams);; + DiagSplit dsplit(sdparams); sdmesh.tessellate(&dsplit); } else { @@ -944,6 +987,26 @@ static void xml_read_light(const XMLReadState& state, pugi::xml_node node) { Light *light = new Light(); light->shader = state.shader; + + /* Light Type + * 0: Point, 1: Sun, 3: Area, 5: Spot */ + int type = 0; + xml_read_int(&type, node, "type"); + light->type = (LightType)type; + + /* Spot Light */ + xml_read_float(&light->spot_angle, node, "spot_angle"); + xml_read_float(&light->spot_smooth, node, "spot_smooth"); + + /* Area Light */ + xml_read_float(&light->sizeu, node, "sizeu"); + xml_read_float(&light->sizev, node, "sizev"); + xml_read_float3(&light->axisu, node, "axisu"); + xml_read_float3(&light->axisv, node, "axisv"); + + /* Generic */ + xml_read_float(&light->size, node, "size"); + xml_read_float3(&light->dir, node, "dir"); xml_read_float3(&light->co, node, "P"); light->co = transform_point(&state.tfm, light->co); @@ -969,7 +1032,7 @@ static void xml_read_transform(pugi::xml_node node, Transform& tfm) if(node.attribute("rotate")) { float4 rotate = make_float4(0.0f, 0.0f, 0.0f, 0.0f); xml_read_float4(&rotate, node, "rotate"); - tfm = tfm * transform_rotate(rotate.x*M_PI/180.0f, make_float3(rotate.y, rotate.z, rotate.w)); + tfm = tfm * transform_rotate(DEG2RADF(rotate.x), make_float3(rotate.y, rotate.z, rotate.w)); } if(node.attribute("scale")) { diff --git a/intern/cycles/app/cycles_xml.h b/intern/cycles/app/cycles_xml.h index 1e3ed411312..96bc79c35d8 100644 --- a/intern/cycles/app/cycles_xml.h +++ b/intern/cycles/app/cycles_xml.h @@ -14,8 +14,8 @@ * limitations under the License */ -#ifndef __CYCLES_XML__ -#define __CYCLES_XML__ +#ifndef __CYCLES_XML_H__ +#define __CYCLES_XML_H__ CCL_NAMESPACE_BEGIN @@ -23,7 +23,10 @@ class Scene; void xml_read_file(Scene *scene, const char *filepath); -CCL_NAMESPACE_END +/* macros for importing */ +#define RAD2DEGF(_rad) ((_rad) * (float)(180.0 / M_PI)) +#define DEG2RADF(_deg) ((_deg) * (float)(M_PI / 180.0)) -#endif /* __CYCLES_XML__ */ +CCL_NAMESPACE_END +#endif /* __CYCLES_XML_H__ */ diff --git a/intern/cycles/blender/CCL_api.h b/intern/cycles/blender/CCL_api.h index 6532315cf39..2772b9ac8a7 100644 --- a/intern/cycles/blender/CCL_api.h +++ b/intern/cycles/blender/CCL_api.h @@ -14,8 +14,8 @@ * limitations under the License */ -#ifndef CCL_API_H -#define CCL_API_H +#ifndef __CCL_API_H__ +#define __CCL_API_H__ #ifdef __cplusplus extern "C" { @@ -40,5 +40,4 @@ void *CCL_python_module_init(void); } #endif -#endif /* CCL_API_H */ - +#endif /* __CCL_API_H__ */ diff --git a/intern/cycles/blender/CMakeLists.txt b/intern/cycles/blender/CMakeLists.txt index 25f91a0caea..9a60152841e 100644 --- a/intern/cycles/blender/CMakeLists.txt +++ b/intern/cycles/blender/CMakeLists.txt @@ -49,6 +49,11 @@ add_definitions(-DGLEW_STATIC) blender_add_lib(bf_intern_cycles "${SRC}" "${INC}" "${INC_SYS}") +# avoid link failure with clang 3.4 debug +if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND NOT ${CMAKE_C_COMPILER_VERSION} VERSION_LESS '3.4') + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -gline-tables-only") +endif() + add_dependencies(bf_intern_cycles bf_rna) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${ADDON_FILES}" ${CYCLES_INSTALL_PATH}) diff --git a/intern/cycles/blender/addon/__init__.py b/intern/cycles/blender/addon/__init__.py index afd26945d6c..27d986900c8 100644 --- a/intern/cycles/blender/addon/__init__.py +++ b/intern/cycles/blender/addon/__init__.py @@ -19,7 +19,7 @@ bl_info = { "name": "Cycles Render Engine", "author": "", - "blender": (2, 67, 0), + "blender": (2, 70, 0), "location": "Info header, render engine menu", "description": "Cycles Render Engine integration", "warning": "", @@ -67,6 +67,9 @@ class CyclesRender(bpy.types.RenderEngine): def render(self, scene): engine.render(self) + def bake(self, scene, obj, pass_type, pixel_array, num_pixels, depth, result): + engine.bake(self, obj, pass_type, pixel_array, num_pixels, depth, result) + # viewport render def view_update(self, context): if not self.session: diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index b9ce65588df..25a9e97a99b 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -59,6 +59,12 @@ def render(engine): _cycles.render(engine.session) +def bake(engine, obj, pass_type, pixel_array, num_pixels, depth, result): + import _cycles + session = getattr(engine, "session", None) + if session is not None: + _cycles.bake(engine.session, obj.as_pointer(), pass_type, pixel_array.as_pointer(), num_pixels, depth, result.as_pointer()) + def reset(engine, data, scene): import _cycles data = data.as_pointer() diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index c80e8a3250c..7205a272395 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -108,6 +108,11 @@ enum_integrator = ( ('PATH', "Path Tracing", "Pure path tracing integrator"), ) +enum_volume_homogeneous_sampling = ( + ('DISTANCE', "Distance", "Use Distance Sampling"), + ('EQUI_ANGULAR', "Equi-angular", "Use Equi-angular Sampling"), + ) + class CyclesRenderSettings(bpy.types.PropertyGroup): @classmethod @@ -141,6 +146,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default='PATH', ) + cls.volume_homogeneous_sampling = EnumProperty( + name="Homogeneous Sampling", + description="Sampling method to use for homogeneous volumes", + items=enum_volume_homogeneous_sampling, + default='DISTANCE', + ) + cls.use_square_samples = BoolProperty( name="Square Samples", description="Square sampling values for easier artist control", @@ -241,6 +253,18 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default='USE', ) + cls.sample_all_lights_direct = BoolProperty( + name="Sample All Direct Lights", + description="Sample all lights (for direct samples), rather than randomly picking one", + default=True, + ) + + cls.sample_all_lights_indirect = BoolProperty( + name="Sample All Indirect Lights", + description="Sample all lights (for indirect samples), rather than randomly picking one", + default=True, + ) + cls.no_caustics = BoolProperty( name="No Caustics", description="Leave out caustics, resulting in a darker image with less noise", @@ -447,6 +471,33 @@ class CyclesRenderSettings(bpy.types.PropertyGroup): default=False, ) + cls.bake_type = EnumProperty( + name="Bake Type", + default='COMBINED', + description="Type of pass to bake", + items = ( + ('COMBINED', "Combined", ""), + ('AO', "Ambient Occlusion", ""), + ('SHADOW', "Shadow", ""), + ('NORMAL', "Normal", ""), + ('UV', "UV", ""), + ('EMIT', "Emit", ""), + ('ENVIRONMENT', "Environment", ""), + ('DIFFUSE_DIRECT', "Diffuse Direct", ""), + ('DIFFUSE_INDIRECT', "Diffuse Indirect", ""), + ('DIFFUSE_COLOR', "Diffuse Color", ""), + ('GLOSSY_DIRECT', "Glossy Direct", ""), + ('GLOSSY_INDIRECT', "Glossy Indirect", ""), + ('GLOSSY_COLOR', "Glossy Color", ""), + ('TRANSMISSION_DIRECT', "Transmission Direct", ""), + ('TRANSMISSION_INDIRECT', "Transmission Indirect", ""), + ('TRANSMISSION_COLOR', "Transmission Color", ""), + ('SUBSURFACE_DIRECT', "Subsurface Direct", ""), + ('SUBSURFACE_INDIRECT', "Subsurface Indirect", ""), + ('SUBSURFACE_COLOR', "Subsurface Color", ""), + ), + ) + @classmethod def unregister(cls): del bpy.types.Scene.cycles @@ -718,6 +769,41 @@ class CyclesMeshSettings(bpy.types.PropertyGroup): del bpy.types.MetaBall.cycles +class CyclesObjectBlurSettings(bpy.types.PropertyGroup): + + @classmethod + def register(cls): + + bpy.types.Object.cycles = PointerProperty( + name="Cycles Object Settings", + description="Cycles object settings", + type=cls, + ) + + cls.use_motion_blur = BoolProperty( + name="Use Motion Blur", + description="Use motion blur for this object", + default=True, + ) + + cls.use_deform_motion = BoolProperty( + name="Use Deformation Motion", + description="Use deformation motion blur for this object", + default=True, + ) + + cls.motion_steps = IntProperty( + name="Motion Steps", + description="Control accuracy of deformation motion blur, more steps gives more memory usage (actual number of steps is 2^(steps - 1))", + min=1, soft_max=8, + default=1, + ) + + @classmethod + def unregister(cls): + del bpy.types.Object.cycles + + class CyclesCurveRenderSettings(bpy.types.PropertyGroup): @classmethod def register(cls): diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py index c0ce80426c0..5c8115b6612 100644 --- a/intern/cycles/blender/addon/ui.py +++ b/intern/cycles/blender/addon/ui.py @@ -49,6 +49,13 @@ class CyclesButtonsPanel(): return rd.engine in cls.COMPAT_ENGINES +def use_cpu(context): + cscene = context.scene.cycles + device_type = context.user_preferences.system.compute_device_type + + return (device_type == 'NONE' or cscene.device == 'CPU') + + def draw_samples_info(layout, cscene): integrator = cscene.progressive @@ -103,7 +110,6 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): scene = context.scene cscene = scene.cycles - device_type = context.user_preferences.system.compute_device_type row = layout.row(align=True) row.menu("CYCLES_MT_sampling_presets", text=bpy.types.CYCLES_MT_sampling_presets.bl_label) @@ -133,6 +139,9 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.label(text="AA Samples:") sub.prop(cscene, "aa_samples", text="Render") sub.prop(cscene, "preview_aa_samples", text="Preview") + sub.separator() + sub.prop(cscene, "sample_all_lights_direct") + sub.prop(cscene, "sample_all_lights_indirect") col = split.column() sub = col.column(align=True) @@ -145,7 +154,7 @@ class CyclesRender_PT_sampling(CyclesButtonsPanel, Panel): sub.prop(cscene, "subsurface_samples", text="Subsurface") sub.prop(cscene, "volume_samples", text="Volume") - if cscene.feature_set == 'EXPERIMENTAL' and (device_type == 'NONE' or cscene.device == 'CPU'): + if cscene.feature_set == 'EXPERIMENTAL' and use_cpu(context): layout.row().prop(cscene, "sampling_pattern", text="Pattern") for rl in scene.render.layers: @@ -167,9 +176,16 @@ class CyclesRender_PT_volume_sampling(CyclesButtonsPanel, Panel): scene = context.scene cscene = scene.cycles - split = layout.split() - split.prop(cscene, "volume_step_size") - split.prop(cscene, "volume_max_steps") + split = layout.split(align=True) + + sub = split.column(align=True) + sub.label("Heterogeneous:") + sub.prop(cscene, "volume_step_size") + sub.prop(cscene, "volume_max_steps") + + sub = split.column(align=True) + sub.label("Homogeneous:") + sub.prop(cscene, "volume_homogeneous_sampling", text="") class CyclesRender_PT_light_paths(CyclesButtonsPanel, Panel): @@ -310,28 +326,6 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel): col.prop(cscene, "debug_use_spatial_splits") -class CyclesRender_PT_opengl(CyclesButtonsPanel, Panel): - bl_label = "OpenGL Render" - bl_options = {'DEFAULT_CLOSED'} - - def draw(self, context): - layout = self.layout - - rd = context.scene.render - - split = layout.split() - - col = split.column() - col.prop(rd, "use_antialiasing") - sub = col.row() - sub.active = rd.use_antialiasing - sub.prop(rd, "antialiasing_samples", expand=True) - - col = split.column() - col.label(text="Alpha:") - col.prop(rd, "alpha_mode", text="") - - class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel): bl_label = "Layer" bl_context = "render_layer" @@ -562,26 +556,48 @@ class Cycles_PT_mesh_displacement(CyclesButtonsPanel, Panel): layout.prop(cdata, "dicing_rate") -class Cycles_PT_mesh_normals(CyclesButtonsPanel, Panel): - bl_label = "Normals" - bl_context = "data" +class CyclesObject_PT_motion_blur(CyclesButtonsPanel, Panel): + bl_label = "Motion Blur" + bl_context = "object" + bl_options = {'DEFAULT_CLOSED'} @classmethod def poll(cls, context): - return CyclesButtonsPanel.poll(context) and context.mesh + ob = context.object + return CyclesButtonsPanel.poll(context) and ob and ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META'} + + def draw_header(self, context): + layout = self.layout + + rd = context.scene.render + scene = context.scene + # cscene = scene.cycles + + layout.active = rd.use_motion_blur + + ob = context.object + cob = ob.cycles + + layout.prop(cob, "use_motion_blur", text="") def draw(self, context): layout = self.layout - mesh = context.mesh + rd = context.scene.render + scene = context.scene + # cscene = scene.cycles - split = layout.split() + ob = context.object + cob = ob.cycles - col = split.column() - col.prop(mesh, "show_double_sided") + layout.active = (rd.use_motion_blur and cob.use_motion_blur) - col = split.column() - col.label() + row = layout.row() + row.prop(cob, "use_deform_motion", text="Deformation") + + sub = row.row() + sub.active = cob.use_deform_motion + sub.prop(cob, "motion_steps", text="Steps") class CyclesObject_PT_ray_visibility(CyclesButtonsPanel, Panel): @@ -593,7 +609,8 @@ class CyclesObject_PT_ray_visibility(CyclesButtonsPanel, Panel): def poll(cls, context): ob = context.object return (CyclesButtonsPanel.poll(context) and - ob and ob.type in {'MESH', 'CURVE', 'CURVE', 'SURFACE', 'FONT', 'META', 'LAMP'}) + ob and ob.type in {'MESH', 'CURVE', 'SURFACE', 'FONT', 'META', 'LAMP'} or + ob and ob.dupli_type == 'GROUP' and ob.dupli_group) def draw(self, context): layout = self.layout @@ -847,9 +864,10 @@ class CyclesWorld_PT_mist(CyclesButtonsPanel, Panel): @classmethod def poll(cls, context): if CyclesButtonsPanel.poll(context): - for rl in context.scene.render.layers: - if rl.use_pass_mist: - return True + if context.world: + for rl in context.scene.render.layers: + if rl.use_pass_mist: + return True return False @@ -997,8 +1015,9 @@ class CyclesMaterial_PT_settings(CyclesButtonsPanel, Panel): split = layout.split() - col = split.column() + col = split.column(align=True) col.prop(mat, "diffuse_color", text="Viewport Color") + col.prop(mat, "alpha") col = split.column(align=True) col.label() @@ -1108,7 +1127,7 @@ class CyclesTexture_PT_colors(CyclesButtonsPanel, Panel): def poll(cls, context): # node = context.texture_node return False - #return node and CyclesButtonsPanel.poll(context) + # return node and CyclesButtonsPanel.poll(context) def draw(self, context): layout = self.layout @@ -1176,7 +1195,7 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel): @classmethod def poll(cls, context): scene = context.scene - cscene = scene.cycles + # cscene = scene.cycles psys = context.particle_system return CyclesButtonsPanel.poll(context) and psys and psys.settings.type == 'HAIR' @@ -1208,6 +1227,54 @@ class CyclesRender_PT_CurveRendering(CyclesButtonsPanel, Panel): row.prop(ccscene, "maximum_width", text="Max Ext.") +class CyclesRender_PT_bake(CyclesButtonsPanel, Panel): + bl_label = "Bake" + bl_context = "render" + bl_options = {'DEFAULT_CLOSED'} + COMPAT_ENGINES = {'CYCLES'} + + def draw(self, context): + layout = self.layout + + scene = context.scene + cscene = scene.cycles + + cbk = scene.render.bake + + layout.operator("object.bake", icon='RENDER_STILL').type = \ + cscene.bake_type + + col = layout.column() + col.prop(cscene, "bake_type") + + col.separator() + split = layout.split() + + sub = split.column() + sub.prop(cbk, "use_clear") + sub.prop(cbk, "margin") + + sub = split.column() + sub.prop(cbk, "use_selected_to_active") + sub = sub.column() + + sub.active = cbk.use_selected_to_active + sub.prop(cbk, "cage_extrusion", text="Distance") + sub.prop_search(cbk, "cage", scene, "objects") + + if cscene.bake_type == 'NORMAL': + col.separator() + box = col.box() + box.label(text="Normal Settings:") + box.prop(cbk, "normal_space", text="Space") + + row = box.row(align=True) + row.label(text = "Swizzle:") + row.prop(cbk, "normal_r", text="") + row.prop(cbk, "normal_g", text="") + row.prop(cbk, "normal_b", text="") + + class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel): bl_label = "Cycles Hair Settings" bl_context = "particle" @@ -1215,7 +1282,7 @@ class CyclesParticle_PT_CurveSettings(CyclesButtonsPanel, Panel): @classmethod def poll(cls, context): scene = context.scene - cscene = scene.cycles + # cscene = scene.cycles ccscene = scene.cycles_curves psys = context.particle_system use_curves = ccscene.use_curves and psys @@ -1275,7 +1342,7 @@ def draw_device(self, context): if device_type in {'CUDA', 'OPENCL', 'NETWORK'}: layout.prop(cscene, "device") - if engine.with_osl() and (cscene.device == 'CPU' or device_type == 'NONE'): + if engine.with_osl() and use_cpu(context): layout.prop(cscene, "shading_system") @@ -1316,6 +1383,7 @@ def get_panels(): "DATA_PT_context_camera", "DATA_PT_context_lamp", "DATA_PT_context_speaker", + "DATA_PT_normals", "DATA_PT_texture_space", "DATA_PT_curve_texture_space", "DATA_PT_mball_texture_space", diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index 4c6b42a9cbc..1a85561c6d5 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -212,8 +212,8 @@ static void blender_camera_viewplane(BlenderCamera *bcam, int width, int height, BoundBox2D *viewplane, float *aspectratio, float *sensor_size) { /* dimensions */ - float xratio = width*bcam->pixelaspect.x; - float yratio = height*bcam->pixelaspect.y; + float xratio = (float)width*bcam->pixelaspect.x; + float yratio = (float)height*bcam->pixelaspect.y; /* compute x/y aspect and ratio */ float xaspect, yaspect; @@ -288,8 +288,8 @@ static void blender_camera_sync(Camera *cam, BlenderCamera *bcam, int width, int /* panorama sensor */ if (bcam->type == CAMERA_PANORAMA && bcam->panorama_type == PANORAMA_FISHEYE_EQUISOLID) { - float fit_xratio = bcam->full_width*bcam->pixelaspect.x; - float fit_yratio = bcam->full_height*bcam->pixelaspect.y; + float fit_xratio = (float)bcam->full_width*bcam->pixelaspect.x; + float fit_yratio = (float)bcam->full_height*bcam->pixelaspect.y; bool horizontal_fit; float sensor_size; @@ -386,7 +386,7 @@ void BlenderSync::sync_camera(BL::RenderSettings b_render, BL::Object b_override blender_camera_sync(cam, &bcam, width, height); } -void BlenderSync::sync_camera_motion(BL::Object b_ob, int motion) +void BlenderSync::sync_camera_motion(BL::Object b_ob, float motion_time) { Camera *cam = scene->camera; @@ -394,12 +394,14 @@ void BlenderSync::sync_camera_motion(BL::Object b_ob, int motion) tfm = blender_camera_matrix(tfm, cam->type); if(tfm != cam->matrix) { - if(motion == -1) + if(motion_time == -1.0f) { cam->motion.pre = tfm; - else + cam->use_motion = true; + } + else if(motion_time == 1.0f) { cam->motion.post = tfm; - - cam->use_motion = true; + cam->use_motion = true; + } } } @@ -563,10 +565,10 @@ BufferParams BlenderSync::get_buffer_params(BL::RenderSettings b_render, BL::Sce if(use_border) { /* border render */ - params.full_x = cam->border.left*width; - params.full_y = cam->border.bottom*height; - params.width = (int)(cam->border.right*width) - params.full_x; - params.height = (int)(cam->border.top*height) - params.full_y; + params.full_x = (int)(cam->border.left * (float)width); + params.full_y = (int)(cam->border.bottom * (float)height); + params.width = (int)(cam->border.right * (float)width) - params.full_x; + params.height = (int)(cam->border.top * (float)height) - params.full_y; /* survive in case border goes out of view or becomes too small */ params.width = max(params.width, 1); diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp index 92c51b0aad3..22de7b64273 100644 --- a/intern/cycles/blender/blender_curves.cpp +++ b/intern/cycles/blender/blender_curves.cpp @@ -588,7 +588,7 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time); if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) - radius =0.0f; + radius = 0.0f; mesh->add_curve_key(ickey_loc, radius); if(attr_intercept) @@ -612,16 +612,23 @@ void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CData) } } -static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveData *CData, int motion) +static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveData *CData, int time_index) { + /* find attribute */ + Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + bool new_attribute = false; + + /* add new attribute if it doesn't exist already */ + if(!attr_mP) { + attr_mP = mesh->curve_attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); + new_attribute = true; + } + /* export motion vectors for curve keys */ - AttributeStandard std = (motion == -1)? ATTR_STD_MOTION_PRE: ATTR_STD_MOTION_POST; - Attribute *attr_motion = mesh->curve_attributes.add(std); - float3 *data_motion = attr_motion->data_float3(); - float3 *current_motion = data_motion; - size_t size = mesh->curve_keys.size(); - size_t i = 0; + size_t numkeys = mesh->curve_keys.size(); + float4 *mP = attr_mP->data_float4() + time_index*numkeys; bool have_motion = false; + int i = 0; for(int sys = 0; sys < CData->psys_firstcurve.size(); sys++) { if(CData->psys_curvenum[sys] == 0) @@ -633,15 +640,21 @@ static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveDat for(int curvekey = CData->curve_firstkey[curve]; curvekey < CData->curve_firstkey[curve] + CData->curve_keynum[curve]; curvekey++) { if(i < mesh->curve_keys.size()) { - *current_motion = CData->curvekey_co[curvekey]; + float3 ickey_loc = CData->curvekey_co[curvekey]; + float time = CData->curvekey_time[curvekey]/CData->curve_length[curve]; + float radius = shaperadius(CData->psys_shape[sys], CData->psys_rootradius[sys], CData->psys_tipradius[sys], time); + + if(CData->psys_closetip[sys] && (curvekey == CData->curve_firstkey[curve] + CData->curve_keynum[curve] - 1)) + radius = 0.0f; + + mP[i] = float3_to_float4(ickey_loc); + mP[i].w = radius; /* unlike mesh coordinates, these tend to be slightly different * between frames due to particle transforms into/out of object * space, so we use an epsilon to detect actual changes */ - if(len_squared(*current_motion - mesh->curve_keys[i].co) > 1e-5f*1e-5f) + if(len_squared(mP[i] - mesh->curve_keys[i]) > 1e-5f*1e-5f) have_motion = true; - - current_motion++; } i++; @@ -649,8 +662,23 @@ static void ExportCurveSegmentsMotion(Scene *scene, Mesh *mesh, ParticleCurveDat } } - if(i != size || !have_motion) - mesh->curve_attributes.remove(std); + /* in case of new attribute, we verify if there really was any motion */ + if(new_attribute) { + if(i != numkeys || !have_motion) { + /* no motion, remove attributes again */ + mesh->curve_attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); + } + else if(time_index > 0) { + /* motion, fill up previous steps that we might have skipped because + * they had no motion, but we need them anyway now */ + for(int step = 0; step < time_index; step++) { + float4 *mP = attr_mP->data_float4() + step*numkeys; + + for(int key = 0; key < numkeys; key++) + mP[key] = mesh->curve_keys[key]; + } + } + } } void ExportCurveTriangleUV(Mesh *mesh, ParticleCurveData *CData, int vert_offset, int resol, float3 *uvdata) @@ -796,7 +824,7 @@ void BlenderSync::sync_curve_settings() curve_system_manager->tag_update(scene); } -void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int motion) +void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, bool motion, int time_index) { if(!motion) { /* Clear stored curve data */ @@ -851,7 +879,7 @@ void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int } else { if(motion) - ExportCurveSegmentsMotion(scene, mesh, &CData, motion); + ExportCurveSegmentsMotion(scene, mesh, &CData, time_index); else ExportCurveSegments(scene, mesh, &CData); } @@ -876,7 +904,7 @@ void BlenderSync::sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int size_t i = 0; foreach(Mesh::Curve& curve, mesh->curves) { - float3 co = mesh->curve_keys[curve.first_key].co; + float3 co = float4_to_float3(mesh->curve_keys[curve.first_key]); generated[i++] = co*size - loc; } } diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp index 61c6ef6af1b..83514879477 100644 --- a/intern/cycles/blender/blender_mesh.cpp +++ b/intern/cycles/blender/blender_mesh.cpp @@ -206,6 +206,40 @@ static void mikk_compute_tangents(BL::Mesh b_mesh, BL::MeshTextureFaceLayer b_la } } +/* Create Volume Attribute */ + +static void create_mesh_volume_attribute(BL::Object b_ob, Mesh *mesh, ImageManager *image_manager, AttributeStandard std) +{ + BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob); + + if(!b_domain) + return; + + Attribute *attr = mesh->attributes.add(std); + VoxelAttribute *volume_data = attr->data_voxel(); + bool is_float, is_linear; + bool animated = false; + + volume_data->manager = image_manager; + volume_data->slot = image_manager->add_image(Attribute::standard_name(std), + b_ob.ptr.data, animated, is_float, is_linear, INTERPOLATION_LINEAR, true); +} + +static void create_mesh_volume_attributes(Scene *scene, BL::Object b_ob, Mesh *mesh) +{ + /* for smoke volume rendering */ + if(mesh->need_attribute(scene, ATTR_STD_VOLUME_DENSITY)) + create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_DENSITY); + if(mesh->need_attribute(scene, ATTR_STD_VOLUME_COLOR)) + create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_COLOR); + if(mesh->need_attribute(scene, ATTR_STD_VOLUME_FLAME)) + create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_FLAME); + if(mesh->need_attribute(scene, ATTR_STD_VOLUME_HEAT)) + create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_HEAT); + if(mesh->need_attribute(scene, ATTR_STD_VOLUME_VELOCITY)) + create_mesh_volume_attribute(b_ob, mesh, scene->image_manager, ATTR_STD_VOLUME_VELOCITY); +} + /* Create Mesh */ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector<uint>& used_shaders) @@ -214,6 +248,7 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector< int numverts = b_mesh.vertices.length(); int numfaces = b_mesh.tessfaces.length(); int numtris = 0; + bool use_loop_normals = b_mesh.use_auto_smooth(); BL::Mesh::vertices_iterator v; BL::Mesh::tessfaces_iterator f; @@ -236,6 +271,21 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector< for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v, ++N) *N = get_float3(v->normal()); + N = attr_N->data_float3(); + + /* create generated coordinates from undeformed coordinates */ + if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) { + Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED); + + float3 loc, size; + mesh_texture_space(b_mesh, loc, size); + + float3 *generated = attr->data_float3(); + size_t i = 0; + + for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v) + generated[i++] = get_float3(v->undeformed_co())*size - loc; + } /* create faces */ vector<int> nverts(numfaces); @@ -248,9 +298,32 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector< int shader = used_shaders[mi]; bool smooth = f->use_smooth(); + /* split vertices if normal is different + * + * note all vertex attributes must have been set here so we can split + * and copy attributes in split_vertex without remapping later */ + if(use_loop_normals) { + BL::Array<float, 12> loop_normals = f->split_normals(); + + for(int i = 0; i < n; i++) { + float3 loop_N = make_float3(loop_normals[i * 3], loop_normals[i * 3 + 1], loop_normals[i * 3 + 2]); + + if(N[vi[i]] != loop_N) { + int new_vi = mesh->split_vertex(vi[i]); + + /* set new normal and vertex index */ + N = attr_N->data_float3(); + N[new_vi] = loop_N; + vi[i] = new_vi; + } + } + } + + /* create triangles */ if(n == 4) { if(is_zero(cross(mesh->verts[vi[1]] - mesh->verts[vi[0]], mesh->verts[vi[2]] - mesh->verts[vi[0]])) || - is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]]))) { + is_zero(cross(mesh->verts[vi[2]] - mesh->verts[vi[0]], mesh->verts[vi[3]] - mesh->verts[vi[0]]))) + { mesh->set_triangle(ti++, vi[0], vi[1], vi[3], shader, smooth); mesh->set_triangle(ti++, vi[2], vi[3], vi[1], shader, smooth); } @@ -348,20 +421,6 @@ static void create_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, const vector< } } - /* create generated coordinates from undeformed coordinates */ - if(mesh->need_attribute(scene, ATTR_STD_GENERATED)) { - Attribute *attr = mesh->attributes.add(ATTR_STD_GENERATED); - - float3 loc, size; - mesh_texture_space(b_mesh, loc, size); - - float3 *generated = attr->data_float3(); - size_t i = 0; - - for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end(); ++v) - generated[i++] = get_float3(v->undeformed_co())*size - loc; - } - /* for volume objects, create a matrix to transform from object space to * mesh texture space. this does not work with deformations but that can * probably only be done well with a volume grid mapping of coordinates */ @@ -414,7 +473,7 @@ static void create_subd_mesh(Scene *scene, Mesh *mesh, BL::Mesh b_mesh, PointerR //sdparams.camera = scene->camera; /* tesselate */ - DiagSplit dsplit(sdparams);; + DiagSplit dsplit(sdparams); sdmesh.tessellate(&dsplit); } @@ -449,6 +508,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri Mesh *mesh; if(!mesh_map.sync(&mesh, key)) { + /* if transform was applied to mesh, need full update */ if(object_updated && mesh->transform_applied); /* test if shaders changed, these can be object level so mesh @@ -481,7 +541,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri /* compares curve_keys rather than strands in order to handle quick hair * adjustsments in dynamic BVH - other methods could probably do this better*/ - vector<Mesh::CurveKey> oldcurve_keys = mesh->curve_keys; + vector<float4> oldcurve_keys = mesh->curve_keys; mesh->clear(); mesh->used_shaders = used_shaders; @@ -500,10 +560,12 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri create_subd_mesh(scene, mesh, b_mesh, &cmesh, used_shaders); else create_mesh(scene, mesh, b_mesh, used_shaders); + + create_mesh_volume_attributes(scene, b_ob, mesh); } if(render_layer.use_hair) - sync_curves(mesh, b_mesh, b_ob, 0); + sync_curves(mesh, b_mesh, b_ob, false); /* free derived mesh */ b_data.meshes.remove(b_mesh); @@ -535,7 +597,7 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri if(oldcurve_keys.size() != mesh->curve_keys.size()) rebuild = true; else if(oldcurve_keys.size()) { - if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(Mesh::CurveKey)*oldcurve_keys.size()) != 0) + if(memcmp(&oldcurve_keys[0], &mesh->curve_keys[0], sizeof(float4)*oldcurve_keys.size()) != 0) rebuild = true; } @@ -544,46 +606,153 @@ Mesh *BlenderSync::sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tri return mesh; } -void BlenderSync::sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion) +void BlenderSync::sync_mesh_motion(BL::Object b_ob, Object *object, float motion_time) { - /* todo: displacement, subdivision */ - size_t size = mesh->verts.size(); - - /* skip objects without deforming modifiers. this is not a totally reliable, - * would need a more extensive check to see which objects are animated */ - if(!size || !ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) - return; - /* ensure we only sync instanced meshes once */ + Mesh *mesh = object->mesh; + if(mesh_motion_synced.find(mesh) != mesh_motion_synced.end()) return; mesh_motion_synced.insert(mesh); - /* get derived mesh */ - BL::Mesh b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false); + /* for motion pass always compute, for motion blur it can be disabled */ + int time_index = 0; + + if(scene->need_motion() == Scene::MOTION_BLUR) { + if(!mesh->use_motion_blur) + return; + + /* see if this mesh needs motion data at this time */ + vector<float> object_times = object->motion_times(); + bool found = false; + + foreach(float object_time, object_times) { + if(motion_time == object_time) { + found = true; + break; + } + else + time_index++; + } - if(b_mesh) { - BL::Mesh::vertices_iterator v; - AttributeStandard std = (motion == -1)? ATTR_STD_MOTION_PRE: ATTR_STD_MOTION_POST; - Attribute *attr_M = mesh->attributes.add(std); - float3 *M = attr_M->data_float3(), *cur_M; - size_t i = 0; + if(!found) + return; + } + else { + if(motion_time == -1.0f) + time_index = 0; + else if(motion_time == 1.0f) + time_index = 1; + else + return; + } + + /* skip empty meshes */ + size_t numverts = mesh->verts.size(); + size_t numkeys = mesh->curve_keys.size(); + + if(!numverts && !numkeys) + return; + + /* skip objects without deforming modifiers. this is not totally reliable, + * would need a more extensive check to see which objects are animated */ + BL::Mesh b_mesh(PointerRNA_NULL); + + if(ccl::BKE_object_is_deform_modified(b_ob, b_scene, preview)) { + /* get derived mesh */ + b_mesh = object_to_mesh(b_data, b_ob, b_scene, true, !preview, false); + } - for(b_mesh.vertices.begin(v), cur_M = M; v != b_mesh.vertices.end() && i < size; ++v, cur_M++, i++) - *cur_M = get_float3(v->co()); + if(!b_mesh) { + /* if we have no motion blur on this frame, but on other frames, copy */ + if(numverts) { + /* triangles */ + Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if(attr_mP) { + Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL); + float3 *P = &mesh->verts[0]; + float3 *N = (attr_N)? attr_N->data_float3(): NULL; + + memcpy(attr_mP->data_float3() + time_index*numverts, P, sizeof(float3)*numverts); + if(attr_mN) + memcpy(attr_mN->data_float3() + time_index*numverts, N, sizeof(float3)*numverts); + } + } + + if(numkeys) { + /* curves */ + Attribute *attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if(attr_mP) { + float4 *keys = &mesh->curve_keys[0]; + memcpy(attr_mP->data_float4() + time_index*numkeys, keys, sizeof(float4)*numkeys); + } + } + + return; + } + + if(numverts) { + /* find attributes */ + Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + Attribute *attr_mN = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + Attribute *attr_N = mesh->attributes.find(ATTR_STD_VERTEX_NORMAL); + bool new_attribute = false; + + /* add new attributes if they don't exist already */ + if(!attr_mP) { + attr_mP = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_POSITION); + if(attr_N) + attr_mN = mesh->attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL); - /* if number of vertices changed, or if coordinates stayed the same, drop it */ - if(i != size || memcmp(M, &mesh->verts[0], sizeof(float3)*size) == 0) - mesh->attributes.remove(std); + new_attribute = true; + } + + /* load vertex data from mesh */ + float3 *mP = attr_mP->data_float3() + time_index*numverts; + float3 *mN = (attr_mN)? attr_mN->data_float3() + time_index*numverts: NULL; + + BL::Mesh::vertices_iterator v; + int i = 0; - /* hair motion */ - if(render_layer.use_hair) - sync_curves(mesh, b_mesh, b_ob, motion); + for(b_mesh.vertices.begin(v); v != b_mesh.vertices.end() && i < numverts; ++v, ++i) { + mP[i] = get_float3(v->co()); + if(mN) + mN[i] = get_float3(v->normal()); + } - /* free derived mesh */ - b_data.meshes.remove(b_mesh); + /* in case of new attribute, we verify if there really was any motion */ + if(new_attribute) { + if(i != numverts || memcmp(mP, &mesh->verts[0], sizeof(float3)*numverts) == 0) { + /* no motion, remove attributes again */ + mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_POSITION); + if(attr_mN) + mesh->attributes.remove(ATTR_STD_MOTION_VERTEX_NORMAL); + } + else if(time_index > 0) { + /* motion, fill up previous steps that we might have skipped because + * they had no motion, but we need them anyway now */ + float3 *P = &mesh->verts[0]; + float3 *N = (attr_N)? attr_N->data_float3(): NULL; + + for(int step = 0; step < time_index; step++) { + memcpy(attr_mP->data_float3() + step*numverts, P, sizeof(float3)*numverts); + if(attr_mN) + memcpy(attr_mN->data_float3() + step*numverts, N, sizeof(float3)*numverts); + } + } + } } + + /* hair motion */ + if(numkeys) + sync_curves(mesh, b_mesh, b_ob, true, time_index); + + /* free derived mesh */ + b_data.meshes.remove(b_mesh); } CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index cc52717fdb6..167647608a5 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -38,7 +38,11 @@ CCL_NAMESPACE_BEGIN bool BlenderSync::BKE_object_is_modified(BL::Object b_ob) { /* test if we can instance or if the object is modified */ - if(ccl::BKE_object_is_modified(b_ob, b_scene, preview)) { + if(b_ob.type() == BL::Object::type_META) { + /* multi-user and dupli metaballs are fused, can't instance */ + return true; + } + else if(ccl::BKE_object_is_modified(b_ob, b_scene, preview)) { /* modifiers */ return true; } @@ -213,9 +217,11 @@ void BlenderSync::sync_background_light() /* Object */ -Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob, Transform& tfm, uint layer_flag, int motion, bool hide_tris) +Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob, + Transform& tfm, uint layer_flag, float motion_time, bool hide_tris) { BL::Object b_ob = (b_dupli_ob ? b_dupli_ob.object() : b_parent); + bool motion = motion_time != 0.0f; /* light is handled separately */ if(object_is_light(b_ob)) { @@ -238,19 +244,22 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P if(motion) { object = object_map.find(key); - if(object) { + if(object && (scene->need_motion() == Scene::MOTION_PASS || object_use_motion(b_ob))) { + /* object transformation */ if(tfm != object->tfm) { - if(motion == -1) + if(motion_time == -1.0f) { object->motion.pre = tfm; - else + object->use_motion = true; + } + else if(motion_time == 1.0f) { object->motion.post = tfm; - - object->use_motion = true; + object->use_motion = true; + } } - /* mesh deformation blur not supported yet */ - if(!scene->integrator->motion_blur) - sync_mesh_motion(b_ob, object->mesh, motion); + /* mesh deformation */ + if(object->mesh) + sync_mesh_motion(b_ob, object, motion_time); } return object; @@ -310,6 +319,24 @@ Object *BlenderSync::sync_object(BL::Object b_parent, int persistent_id[OBJECT_P object->motion.post = tfm; object->use_motion = false; + /* motion blur */ + if(scene->need_motion() == Scene::MOTION_BLUR && object->mesh) { + Mesh *mesh = object->mesh; + + mesh->use_motion_blur = false; + + if(object_use_motion(b_ob)) { + if(object_use_deform_motion(b_ob)) { + mesh->motion_steps = object_motion_steps(b_ob); + mesh->use_motion_blur = true; + } + + vector<float> times = object->motion_times(); + foreach(float time, times) + motion_times.insert(time); + } + } + /* random number */ object->random_id = hash_string(object->name.c_str()); @@ -408,10 +435,11 @@ static bool object_render_hide_duplis(BL::Object b_ob) /* Object Loop */ -void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion) +void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, float motion_time) { /* layer data */ uint scene_layer = render_layer.scene_layer; + bool motion = motion_time != 0.0f; if(!motion) { /* prepare for sync */ @@ -420,36 +448,40 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion) object_map.pre_sync(); mesh_synced.clear(); particle_system_map.pre_sync(); + motion_times.clear(); } else { mesh_motion_synced.clear(); } /* object loop */ - BL::Scene::objects_iterator b_ob; + BL::Scene::object_bases_iterator b_base; BL::Scene b_sce = b_scene; - - /* global particle index counter */ - int particle_id = 1; + /* modifier result type (not exposed as enum in C++ API) + * 1 : eModifierMode_Realtime + * 2 : eModifierMode_Render + */ + int dupli_settings = preview ? 1 : 2; bool cancel = false; for(; b_sce && !cancel; b_sce = b_sce.background_set()) { - for(b_sce.objects.begin(b_ob); b_ob != b_sce.objects.end() && !cancel; ++b_ob) { - bool hide = (render_layer.use_viewport_visibility)? b_ob->hide(): b_ob->hide_render(); - uint ob_layer = get_layer(b_ob->layers(), b_ob->layers_local_view(), render_layer.use_localview, object_is_light(*b_ob)); + for(b_sce.object_bases.begin(b_base); b_base != b_sce.object_bases.end() && !cancel; ++b_base) { + BL::Object b_ob = b_base->object(); + bool hide = (render_layer.use_viewport_visibility)? b_ob.hide(): b_ob.hide_render(); + uint ob_layer = get_layer(b_base->layers(), b_base->layers_local_view(), render_layer.use_localview, object_is_light(b_ob)); hide = hide || !(ob_layer & scene_layer); if(!hide) { - progress.set_sync_status("Synchronizing object", (*b_ob).name()); + progress.set_sync_status("Synchronizing object", b_ob.name()); - if(b_ob->is_duplicator() && !object_render_hide_duplis(*b_ob)) { + if(b_ob.is_duplicator() && !object_render_hide_duplis(b_ob)) { /* dupli objects */ - b_ob->dupli_list_create(b_scene, 2); + b_ob.dupli_list_create(b_scene, dupli_settings); BL::Object::dupli_list_iterator b_dup; - for(b_ob->dupli_list.begin(b_dup); b_dup != b_ob->dupli_list.end(); ++b_dup) { + for(b_ob.dupli_list.begin(b_dup); b_dup != b_ob.dupli_list.end(); ++b_dup) { Transform tfm = get_transform(b_dup->matrix()); BL::Object b_dup_ob = b_dup->object(); bool dup_hide = (b_v3d)? b_dup_ob.hide(): b_dup_ob.hide_render(); @@ -462,32 +494,27 @@ void BlenderSync::sync_objects(BL::SpaceView3D b_v3d, int motion) BL::Array<int, OBJECT_PERSISTENT_ID_SIZE> persistent_id = b_dup->persistent_id(); /* sync object and mesh or light data */ - Object *object = sync_object(*b_ob, persistent_id.data, *b_dup, tfm, ob_layer, motion, hide_tris); + Object *object = sync_object(b_ob, persistent_id.data, *b_dup, tfm, ob_layer, motion_time, hide_tris); /* sync possible particle data, note particle_id * starts counting at 1, first is dummy particle */ - if(!motion && object && sync_dupli_particle(*b_ob, *b_dup, object)) { - if(particle_id != object->particle_id) { - object->particle_id = particle_id; - scene->object_manager->tag_update(scene); - } - - particle_id++; + if(!motion && object) { + sync_dupli_particle(b_ob, *b_dup, object); } } } - b_ob->dupli_list_clear(); + b_ob.dupli_list_clear(); } /* test if object needs to be hidden */ bool hide_tris; - if(!object_render_hide(*b_ob, true, true, hide_tris)) { + if(!object_render_hide(b_ob, true, true, hide_tris)) { /* object itself */ - Transform tfm = get_transform(b_ob->matrix_world()); - sync_object(*b_ob, NULL, PointerRNA_NULL, tfm, ob_layer, motion, hide_tris); + Transform tfm = get_transform(b_ob.matrix_world()); + sync_object(b_ob, NULL, PointerRNA_NULL, tfm, ob_layer, motion_time, hide_tris); } } @@ -527,31 +554,46 @@ void BlenderSync::sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override, void b_cam = b_override; Camera prevcam = *(scene->camera); - - /* go back and forth one frame */ - int frame = b_scene.frame_current(); - for(int motion = -1; motion <= 1; motion += 2) { - /* we need to set the python thread state again because this - * function assumes it is being executed from python and will - * try to save the thread state */ + int frame_center = b_scene.frame_current(); + + /* always sample these times for camera motion */ + motion_times.insert(-1.0f); + motion_times.insert(1.0f); + + /* note iteration over motion_times set happens in sorted order */ + foreach(float relative_time, motion_times) { + /* fixed shutter time to get previous and next frame for motion pass */ + float shuttertime; + + if(scene->need_motion() == Scene::MOTION_PASS) + shuttertime = 2.0f; + else + shuttertime = scene->camera->shuttertime; + + /* compute frame and subframe time */ + float time = frame_center + relative_time * shuttertime * 0.5f; + int frame = (int)floorf(time); + float subframe = time - frame; + + /* change frame */ python_thread_state_restore(python_thread_state); - b_scene.frame_set(frame + motion, 0.0f); + b_scene.frame_set(frame, subframe); python_thread_state_save(python_thread_state); - /* camera object */ - if(b_cam) - sync_camera_motion(b_cam, motion); + /* sync camera, only supports two times at the moment */ + if(relative_time == -1.0f || relative_time == 1.0f) + sync_camera_motion(b_cam, relative_time); - /* mesh objects */ - sync_objects(b_v3d, motion); + /* sync object */ + sync_objects(b_v3d, relative_time); } /* we need to set the python thread state again because this * function assumes it is being executed from python and will * try to save the thread state */ python_thread_state_restore(python_thread_state); - b_scene.frame_set(frame, 0.0f); + b_scene.frame_set(frame_center, 0.0f); python_thread_state_save(python_thread_state); /* tag camera for motion update */ diff --git a/intern/cycles/blender/blender_particles.cpp b/intern/cycles/blender/blender_particles.cpp index ef832ed39c0..5b2782ec2ac 100644 --- a/intern/cycles/blender/blender_particles.cpp +++ b/intern/cycles/blender/blender_particles.cpp @@ -76,6 +76,11 @@ bool BlenderSync::sync_dupli_particle(BL::Object b_ob, BL::DupliObject b_dup, Ob psys->particles.push_back(pa); + if (object->particle_index != psys->particles.size() - 1) + scene->object_manager->tag_update(scene); + object->particle_system = psys; + object->particle_index = psys->particles.size() - 1; + /* return that this object has particle data */ return true; } diff --git a/intern/cycles/blender/blender_python.cpp b/intern/cycles/blender/blender_python.cpp index e08b7980e78..872f891cc2a 100644 --- a/intern/cycles/blender/blender_python.cpp +++ b/intern/cycles/blender/blender_python.cpp @@ -147,6 +147,38 @@ static PyObject *render_func(PyObject *self, PyObject *value) Py_RETURN_NONE; } +/* pixel_array and result passed as pointers */ +static PyObject *bake_func(PyObject *self, PyObject *args) +{ + PyObject *pysession, *pyobject; + PyObject *pypixel_array, *pyresult; + const char *pass_type; + int num_pixels, depth; + + if(!PyArg_ParseTuple(args, "OOsOiiO", &pysession, &pyobject, &pass_type, &pypixel_array, &num_pixels, &depth, &pyresult)) + return NULL; + + Py_BEGIN_ALLOW_THREADS + + BlenderSession *session = (BlenderSession*)PyLong_AsVoidPtr(pysession); + + PointerRNA objectptr; + RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pyobject), &objectptr); + BL::Object b_object(objectptr); + + void *b_result = PyLong_AsVoidPtr(pyresult); + + PointerRNA bakepixelptr; + RNA_id_pointer_create((ID*)PyLong_AsVoidPtr(pypixel_array), &bakepixelptr); + BL::BakePixel b_bake_pixel(bakepixelptr); + + session->bake(b_object, pass_type, b_bake_pixel, num_pixels, depth, (float *)b_result); + + Py_END_ALLOW_THREADS + + Py_RETURN_NONE; +} + static PyObject *draw_func(PyObject *self, PyObject *args) { PyObject *pysession, *pyv3d, *pyrv3d; @@ -285,7 +317,8 @@ static PyObject *osl_update_node_func(PyObject *self, PyObject *args) } else if(param->type.vecsemantics == TypeDesc::POINT || param->type.vecsemantics == TypeDesc::VECTOR || - param->type.vecsemantics == TypeDesc::NORMAL) { + param->type.vecsemantics == TypeDesc::NORMAL) + { socket_type = "NodeSocketVector"; data_type = BL::NodeSocket::type_VECTOR; @@ -418,6 +451,7 @@ static PyMethodDef methods[] = { {"create", create_func, METH_VARARGS, ""}, {"free", free_func, METH_O, ""}, {"render", render_func, METH_O, ""}, + {"bake", bake_func, METH_VARARGS, ""}, {"draw", draw_func, METH_VARARGS, ""}, {"sync", sync_func, METH_O, ""}, {"reset", reset_func, METH_VARARGS, ""}, @@ -493,7 +527,7 @@ void *CCL_python_module_init() /* TODO(sergey): This gives us library we've been linking against. * In theory with dynamic OSL library it might not be * accurate, but there's nothing in OSL API which we - * might use th get version in runtime. + * might use to get version in runtime. */ int curversion = OSL_LIBRARY_VERSION_CODE; PyModule_AddObject(mod, "with_osl", Py_True); diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index ef578493901..01a5acd8982 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -14,6 +14,8 @@ * limitations under the License */ +#include <stdlib.h> + #include "background.h" #include "buffers.h" #include "camera.h" @@ -21,6 +23,8 @@ #include "integrator.h" #include "film.h" #include "light.h" +#include "mesh.h" +#include "object.h" #include "scene.h" #include "session.h" #include "shader.h" @@ -93,6 +97,11 @@ void BlenderSession::create_session() /* create scene */ scene = new Scene(scene_params, session_params.device); + /* setup callbacks for builtin image support */ + scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6, _7); + scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3); + scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3); + /* create session */ session = new Session(session_params); session->scene = scene; @@ -121,11 +130,6 @@ void BlenderSession::create_session() session->reset(buffer_params, session_params.samples); b_engine.use_highlight_tiles(session_params.progressive_refine == false); - - /* setup callbacks for builtin image support */ - scene->image_manager->builtin_image_info_cb = function_bind(&BlenderSession::builtin_image_info, this, _1, _2, _3, _4, _5, _6); - scene->image_manager->builtin_image_pixels_cb = function_bind(&BlenderSession::builtin_image_pixels, this, _1, _2, _3); - scene->image_manager->builtin_image_float_pixels_cb = function_bind(&BlenderSession::builtin_image_float_pixels, this, _1, _2, _3); } void BlenderSession::reset_session(BL::BlendData b_data_, BL::Scene b_scene_) @@ -259,6 +263,58 @@ static PassType get_pass_type(BL::RenderPass b_pass) return PASS_NONE; } +static ShaderEvalType get_shader_type(const string& pass_type) +{ + const char *shader_type = pass_type.c_str(); + + /* data passes */ + if(strcmp(shader_type, "NORMAL")==0) + return SHADER_EVAL_NORMAL; + else if(strcmp(shader_type, "UV")==0) + return SHADER_EVAL_UV; + else if(strcmp(shader_type, "DIFFUSE_COLOR")==0) + return SHADER_EVAL_DIFFUSE_COLOR; + else if(strcmp(shader_type, "GLOSSY_COLOR")==0) + return SHADER_EVAL_GLOSSY_COLOR; + else if(strcmp(shader_type, "TRANSMISSION_COLOR")==0) + return SHADER_EVAL_TRANSMISSION_COLOR; + else if(strcmp(shader_type, "SUBSURFACE_COLOR")==0) + return SHADER_EVAL_SUBSURFACE_COLOR; + else if(strcmp(shader_type, "EMIT")==0) + return SHADER_EVAL_EMISSION; + + /* light passes */ + else if(strcmp(shader_type, "AO")==0) + return SHADER_EVAL_AO; + else if(strcmp(shader_type, "COMBINED")==0) + return SHADER_EVAL_COMBINED; + else if(strcmp(shader_type, "SHADOW")==0) + return SHADER_EVAL_SHADOW; + else if(strcmp(shader_type, "DIFFUSE_DIRECT")==0) + return SHADER_EVAL_DIFFUSE_DIRECT; + else if(strcmp(shader_type, "GLOSSY_DIRECT")==0) + return SHADER_EVAL_GLOSSY_DIRECT; + else if(strcmp(shader_type, "TRANSMISSION_DIRECT")==0) + return SHADER_EVAL_TRANSMISSION_DIRECT; + else if(strcmp(shader_type, "SUBSURFACE_DIRECT")==0) + return SHADER_EVAL_SUBSURFACE_DIRECT; + else if(strcmp(shader_type, "DIFFUSE_INDIRECT")==0) + return SHADER_EVAL_DIFFUSE_INDIRECT; + else if(strcmp(shader_type, "GLOSSY_INDIRECT")==0) + return SHADER_EVAL_GLOSSY_INDIRECT; + else if(strcmp(shader_type, "TRANSMISSION_INDIRECT")==0) + return SHADER_EVAL_TRANSMISSION_INDIRECT; + else if(strcmp(shader_type, "SUBSURFACE_INDIRECT")==0) + return SHADER_EVAL_SUBSURFACE_INDIRECT; + + /* extra */ + else if(strcmp(shader_type, "ENVIRONMENT")==0) + return SHADER_EVAL_ENVIRONMENT; + + else + return SHADER_EVAL_BAKE; +} + static BL::RenderResult begin_render_result(BL::RenderEngine b_engine, int x, int y, int w, int h, const char *layername) { return b_engine.begin_result(x, y, w, h, layername); @@ -425,6 +481,105 @@ void BlenderSession::render() sync = NULL; } +static void populate_bake_data(BakeData *data, BL::BakePixel pixel_array, const int num_pixels) +{ + BL::BakePixel bp = pixel_array; + + int i; + for(i=0; i < num_pixels; i++) { + data->set(i, bp.primitive_id(), bp.uv(), bp.du_dx(), bp.du_dy(), bp.dv_dx(), bp.dv_dy()); + bp = bp.next(); + } +} + +static bool is_light_pass(ShaderEvalType type) +{ + switch (type) { + case SHADER_EVAL_AO: + case SHADER_EVAL_COMBINED: + case SHADER_EVAL_SHADOW: + case SHADER_EVAL_DIFFUSE_DIRECT: + case SHADER_EVAL_GLOSSY_DIRECT: + case SHADER_EVAL_TRANSMISSION_DIRECT: + case SHADER_EVAL_SUBSURFACE_DIRECT: + case SHADER_EVAL_DIFFUSE_INDIRECT: + case SHADER_EVAL_GLOSSY_INDIRECT: + case SHADER_EVAL_TRANSMISSION_INDIRECT: + case SHADER_EVAL_SUBSURFACE_INDIRECT: + return true; + default: + return false; + } +} + +void BlenderSession::bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float result[]) +{ + ShaderEvalType shader_type = get_shader_type(pass_type); + size_t object_index = OBJECT_NONE; + int tri_offset = 0; + + if(shader_type == SHADER_EVAL_UV) { + /* force UV to be available */ + Pass::add(PASS_UV, scene->film->passes); + } + + if(is_light_pass(shader_type)) { + /* force use_light_pass to be true */ + Pass::add(PASS_LIGHT, scene->film->passes); + } + + /* create device and update scene */ + scene->film->tag_update(scene); + scene->integrator->tag_update(scene); + + /* update scene */ + sync->sync_camera(b_render, b_engine.camera_override(), width, height); + sync->sync_data(b_v3d, b_engine.camera_override(), &python_thread_state); + + /* get buffer parameters */ + SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height); + + scene->bake_manager->set_baking(true); + + /* set number of samples */ + session->tile_manager.set_samples(session_params.samples); + session->reset(buffer_params, session_params.samples); + session->update_scene(); + + /* find object index. todo: is arbitrary - copied from mesh_displace.cpp */ + for(size_t i = 0; i < scene->objects.size(); i++) { + if(strcmp(scene->objects[i]->name.c_str(), b_object.name().c_str()) == 0) { + object_index = i; + tri_offset = scene->objects[i]->mesh->tri_offset; + break; + } + } + + /* when used, non-instanced convention: object = ~object */ + int object = ~object_index; + + BakeData *bake_data = scene->bake_manager->init(object, tri_offset, num_pixels); + + populate_bake_data(bake_data, pixel_array, num_pixels); + + /* set number of samples */ + session->tile_manager.set_samples(session_params.samples); + session->reset(buffer_params, session_params.samples); + session->update_scene(); + + scene->bake_manager->bake(scene->device, &scene->dscene, scene, session->progress, shader_type, bake_data, result); + + /* free all memory used (host and device), so we wouldn't leave render + * engine with extra memory allocated + */ + + session->device_free(); + + delete sync; + sync = NULL; +} + void BlenderSession::do_write_update_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile, bool do_update_only) { RenderBuffers *buffers = rtile.buffers; @@ -592,16 +747,14 @@ bool BlenderSession::draw(int w, int h) /* draw */ BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_scene, b_v3d, b_rv3d, scene->camera, width, height); + DeviceDrawParams draw_params; - if(session->params.display_buffer_linear) - b_engine.bind_display_space_shader(b_scene); - - bool draw_ok = !session->draw(buffer_params); + if(session->params.display_buffer_linear) { + draw_params.bind_display_space_shader_cb = function_bind(&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene); + draw_params.unbind_display_space_shader_cb = function_bind(&BL::RenderEngine::unbind_display_space_shader, &b_engine); + } - if(session->params.display_buffer_linear) - b_engine.unbind_display_space_shader(); - - return draw_ok; + return !session->draw(buffer_params, draw_params); } void BlenderSession::get_status(string& status, string& substatus) @@ -726,85 +879,123 @@ int BlenderSession::builtin_image_frame(const string &builtin_name) return atoi(builtin_name.substr(last + 1, builtin_name.size() - last - 1).c_str()); } -void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &channels) +void BlenderSession::builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels) { + /* empty image */ + is_float = false; + width = 0; + height = 0; + depth = 0; + channels = 0; + + if(!builtin_data) + return; + + /* recover ID pointer */ PointerRNA ptr; RNA_id_pointer_create((ID*)builtin_data, &ptr); - BL::Image b_image(ptr); + BL::ID b_id(ptr); + + if(b_id.is_a(&RNA_Image)) { + /* image data */ + BL::Image b_image(b_id); - if(b_image) { is_float = b_image.is_float(); width = b_image.size()[0]; height = b_image.size()[1]; + depth = 1; channels = b_image.channels(); } - else { - is_float = false; - width = 0; - height = 0; - channels = 0; + else if(b_id.is_a(&RNA_Object)) { + /* smoke volume data */ + BL::Object b_ob(b_id); + BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob); + + if(!b_domain) + return; + + if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY) || + builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) + channels = 1; + else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) + channels = 4; + else + return; + + int3 resolution = get_int3(b_domain.domain_resolution()); + int amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1; + + width = resolution.x * amplify; + height = resolution.y * amplify; + depth = resolution.z * amplify; + + is_float = true; } } bool BlenderSession::builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels) { + if(!builtin_data) + return false; + int frame = builtin_image_frame(builtin_name); PointerRNA ptr; RNA_id_pointer_create((ID*)builtin_data, &ptr); BL::Image b_image(ptr); - if(b_image) { - int width = b_image.size()[0]; - int height = b_image.size()[1]; - int channels = b_image.channels(); + int width = b_image.size()[0]; + int height = b_image.size()[1]; + int channels = b_image.channels(); - unsigned char *image_pixels; - image_pixels = image_get_pixels_for_frame(b_image, frame); + unsigned char *image_pixels; + image_pixels = image_get_pixels_for_frame(b_image, frame); - if(image_pixels) { - memcpy(pixels, image_pixels, width * height * channels * sizeof(unsigned char)); - MEM_freeN(image_pixels); + if(image_pixels) { + memcpy(pixels, image_pixels, width * height * channels * sizeof(unsigned char)); + MEM_freeN(image_pixels); + } + else { + if(channels == 1) { + memset(pixels, 0, width * height * sizeof(unsigned char)); } else { - if(channels == 1) { - memset(pixels, 0, width * height * sizeof(unsigned char)); - } - else { - unsigned char *cp = pixels; - for(int i = 0; i < width * height; i++, cp += channels) { - cp[0] = 255; - cp[1] = 0; - cp[2] = 255; - if(channels == 4) - cp[3] = 255; - } + unsigned char *cp = pixels; + for(int i = 0; i < width * height; i++, cp += channels) { + cp[0] = 255; + cp[1] = 0; + cp[2] = 255; + if(channels == 4) + cp[3] = 255; } } + } - /* premultiply, byte images are always straight for blender */ - unsigned char *cp = pixels; - for(int i = 0; i < width * height; i++, cp += channels) { - cp[0] = (cp[0] * cp[3]) >> 8; - cp[1] = (cp[1] * cp[3]) >> 8; - cp[2] = (cp[2] * cp[3]) >> 8; - } - - return true; + /* premultiply, byte images are always straight for blender */ + unsigned char *cp = pixels; + for(int i = 0; i < width * height; i++, cp += channels) { + cp[0] = (cp[0] * cp[3]) >> 8; + cp[1] = (cp[1] * cp[3]) >> 8; + cp[2] = (cp[2] * cp[3]) >> 8; } - return false; + return true; } bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels) { - int frame = builtin_image_frame(builtin_name); + if(!builtin_data) + return false; PointerRNA ptr; RNA_id_pointer_create((ID*)builtin_data, &ptr); - BL::Image b_image(ptr); + BL::ID b_id(ptr); + + if(b_id.is_a(&RNA_Image)) { + /* image data */ + BL::Image b_image(b_id); + int frame = builtin_image_frame(builtin_name); - if(b_image) { int width = b_image.size()[0]; int height = b_image.size()[1]; int channels = b_image.channels(); @@ -834,6 +1025,51 @@ bool BlenderSession::builtin_image_float_pixels(const string &builtin_name, void return true; } + else if(b_id.is_a(&RNA_Object)) { + /* smoke volume data */ + BL::Object b_ob(b_id); + BL::SmokeDomainSettings b_domain = object_smoke_domain_find(b_ob); + + if(!b_domain) + return false; + + int3 resolution = get_int3(b_domain.domain_resolution()); + int length, amplify = (b_domain.use_high_resolution())? b_domain.amplify() + 1: 1; + + int width = resolution.x * amplify; + int height = resolution.y * amplify; + int depth = resolution.z * amplify; + + if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_DENSITY)) { + SmokeDomainSettings_density_grid_get_length(&b_domain.ptr, &length); + + if(length == width*height*depth) { + SmokeDomainSettings_density_grid_get(&b_domain.ptr, pixels); + return true; + } + } + else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_FLAME)) { + /* this is in range 0..1, and interpreted by the OpenGL smoke viewer + * as 1500..3000 K with the first part faded to zero density */ + SmokeDomainSettings_flame_grid_get_length(&b_domain.ptr, &length); + + if(length == width*height*depth) { + SmokeDomainSettings_flame_grid_get(&b_domain.ptr, pixels); + return true; + } + } + else if(builtin_name == Attribute::standard_name(ATTR_STD_VOLUME_COLOR)) { + /* the RGB is "premultiplied" by density for better interpolation results */ + SmokeDomainSettings_color_grid_get_length(&b_domain.ptr, &length); + + if(length == width*height*depth*4) { + SmokeDomainSettings_color_grid_get(&b_domain.ptr, pixels); + return true; + } + } + + fprintf(stderr, "Cycles error: unexpected smoke volume resolution, skipping\n"); + } return false; } diff --git a/intern/cycles/blender/blender_session.h b/intern/cycles/blender/blender_session.h index 0568fb291d0..0e44493d674 100644 --- a/intern/cycles/blender/blender_session.h +++ b/intern/cycles/blender/blender_session.h @@ -20,6 +20,7 @@ #include "device.h" #include "scene.h" #include "session.h" +#include "bake.h" #include "util_vector.h" @@ -51,6 +52,8 @@ public: /* offline render */ void render(); + void bake(BL::Object b_object, const string& pass_type, BL::BakePixel pixel_array, int num_pixels, int depth, float pixels[]); + void write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderTile& rtile); void write_render_tile(RenderTile& rtile); @@ -99,7 +102,7 @@ protected: void do_write_update_render_tile(RenderTile& rtile, bool do_update_only); int builtin_image_frame(const string &builtin_name); - void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &channels); + void builtin_image_info(const string &builtin_name, void *builtin_data, bool &is_float, int &width, int &height, int &depth, int &channels); bool builtin_image_pixels(const string &builtin_name, void *builtin_data, unsigned char *pixels); bool builtin_image_float_pixels(const string &builtin_name, void *builtin_data, float *pixels); }; diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 6175c8ea399..ddbb40da7db 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -546,9 +546,11 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen } image->animated = b_image_node.image_user().use_auto_refresh(); + image->use_alpha = b_image.use_alpha(); } image->color_space = ImageTextureNode::color_space_enum[(int)b_image_node.color_space()]; image->projection = ImageTextureNode::projection_enum[(int)b_image_node.projection()]; + image->interpolation = (InterpolationType)b_image_node.interpolation(); image->projection_blend = b_image_node.projection_blend(); get_tex_mapping(&image->tex_mapping, b_image_node.texture_mapping()); node = image; @@ -573,6 +575,8 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen env->animated = b_env_node.image_user().use_auto_refresh(); env->builtin_data = NULL; } + + env->use_alpha = b_image.use_alpha(); } env->color_space = EnvironmentTextureNode::color_space_enum[(int)b_env_node.color_space()]; env->projection = EnvironmentTextureNode::projection_enum[(int)b_env_node.projection()]; @@ -667,6 +671,13 @@ static ShaderNode *add_node(Scene *scene, BL::BlendData b_data, BL::Scene b_scen tangent->attribute = b_tangent_node.uv_map(); node = tangent; } + else if (b_node.is_a(&RNA_ShaderNodeUVMap)) { + BL::ShaderNodeUVMap b_uvmap_node(b_node); + UVMapNode *uvm = new UVMapNode(); + uvm->attribute = b_uvmap_node.uv_map(); + uvm->from_dupli = b_uvmap_node.from_dupli(); + node = uvm; + } if(node) graph->add(node); diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp index 8e2197a2aa6..1f5e32a1123 100644 --- a/intern/cycles/blender/blender_sync.cpp +++ b/intern/cycles/blender/blender_sync.cpp @@ -172,6 +172,7 @@ void BlenderSync::sync_integrator() integrator->transparent_min_bounce = get_int(cscene, "transparent_min_bounces"); integrator->transparent_shadows = get_boolean(cscene, "use_transparent_shadows"); + integrator->volume_homogeneous_sampling = RNA_enum_get(&cscene, "volume_homogeneous_sampling"); integrator->volume_max_steps = get_int(cscene, "volume_max_steps"); integrator->volume_step_size = get_float(cscene, "volume_step_size"); @@ -197,6 +198,9 @@ void BlenderSync::sync_integrator() integrator->method = (Integrator::Method)get_enum(cscene, "progressive"); + integrator->sample_all_lights_direct = get_boolean(cscene, "sample_all_lights_direct"); + integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect"); + int diffuse_samples = get_int(cscene, "diffuse_samples"); int glossy_samples = get_int(cscene, "glossy_samples"); int transmission_samples = get_int(cscene, "transmission_samples"); diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 205761ad302..9c4175ef690 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -71,7 +71,7 @@ private: /* sync */ void sync_lamps(bool update_all); void sync_materials(bool update_all); - void sync_objects(BL::SpaceView3D b_v3d, int motion = 0); + void sync_objects(BL::SpaceView3D b_v3d, float motion_time = 0.0f); void sync_motion(BL::SpaceView3D b_v3d, BL::Object b_override, void **python_thread_state); void sync_film(); void sync_view(); @@ -81,12 +81,13 @@ private: void sync_nodes(Shader *shader, BL::ShaderNodeTree b_ntree); Mesh *sync_mesh(BL::Object b_ob, bool object_updated, bool hide_tris); - void sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, int motion); - Object *sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_object, Transform& tfm, uint layer_flag, int motion, bool hide_tris); + void sync_curves(Mesh *mesh, BL::Mesh b_mesh, BL::Object b_ob, bool motion, int time_index = 0); + Object *sync_object(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::DupliObject b_dupli_ob, + Transform& tfm, uint layer_flag, float motion_time, bool hide_tris); void sync_light(BL::Object b_parent, int persistent_id[OBJECT_PERSISTENT_ID_SIZE], BL::Object b_ob, Transform& tfm); void sync_background_light(); - void sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion); - void sync_camera_motion(BL::Object b_ob, int motion); + void sync_mesh_motion(BL::Object b_ob, Object *object, float motion_time); + void sync_camera_motion(BL::Object b_ob, float motion_time); /* particles */ bool sync_dupli_particle(BL::Object b_ob, BL::DupliObject b_dup, Object *object); @@ -109,6 +110,7 @@ private: id_map<ParticleSystemKey, ParticleSystem> particle_system_map; set<Mesh*> mesh_synced; set<Mesh*> mesh_motion_synced; + std::set<float> motion_times; void *world_map; bool world_recalc; diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h index 58e523d7fc2..35e417d8069 100644 --- a/intern/cycles/blender/blender_util.h +++ b/intern/cycles/blender/blender_util.h @@ -42,7 +42,14 @@ void python_thread_state_restore(void **python_thread_state); static inline BL::Mesh object_to_mesh(BL::BlendData data, BL::Object object, BL::Scene scene, bool apply_modifiers, bool render, bool calc_undeformed) { - return data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, true, calc_undeformed); + BL::Mesh me = data.meshes.new_from_object(scene, object, apply_modifiers, (render)? 2: 1, false, calc_undeformed); + if ((bool)me) { + if (me.use_auto_smooth()) { + me.calc_normals_split(me.auto_smooth_angle()); + } + me.calc_tessface(); + } + return me; } static inline void colorramp_to_array(BL::ColorRamp ramp, float4 *data, int size) @@ -50,7 +57,7 @@ static inline void colorramp_to_array(BL::ColorRamp ramp, float4 *data, int size for(int i = 0; i < size; i++) { float color[4]; - ramp.evaluate(i/(float)(size-1), color); + ramp.evaluate((float)i/(float)(size-1), color); data[i] = make_float4(color[0], color[1], color[2], color[3]); } } @@ -67,7 +74,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping cumap, float4 *d BL::CurveMap mapI = cumap.curves[3]; for(int i = 0; i < size; i++) { - float t = i/(float)(size-1); + float t = (float)i/(float)(size-1); data[i][0] = mapR.evaluate(mapI.evaluate(t)); data[i][1] = mapG.evaluate(mapI.evaluate(t)); @@ -76,7 +83,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping cumap, float4 *d } else { for(int i = 0; i < size; i++) { - float t = i/(float)(size-1); + float t = (float)i/(float)(size-1); data[i][0] = mapR.evaluate(t); data[i][1] = mapG.evaluate(t); @@ -168,6 +175,11 @@ static inline float4 get_float4(BL::Array<float, 4> array) return make_float4(array[0], array[1], array[2], array[3]); } +static inline int3 get_int3(BL::Array<int, 3> array) +{ + return make_int3(array[0], array[1], array[2]); +} + static inline int4 get_int4(BL::Array<int, 4> array) { return make_int4(array[0], array[1], array[2], array[3]); @@ -341,6 +353,52 @@ static inline void mesh_texture_space(BL::Mesh b_mesh, float3& loc, float3& size loc = loc*size - make_float3(0.5f, 0.5f, 0.5f); } +/* object used for motion blur */ +static inline bool object_use_motion(BL::Object b_ob) +{ + PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles"); + bool use_motion = get_boolean(cobject, "use_motion_blur"); + + return use_motion; +} + +/* object motion steps */ +static inline uint object_motion_steps(BL::Object b_ob) +{ + PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles"); + uint steps = get_int(cobject, "motion_steps"); + + /* use uneven number of steps so we get one keyframe at the current frame, + * and ue 2^(steps - 1) so objects with more/fewer steps still have samples + * at the same times, to avoid sampling at many different times */ + return (2 << (steps - 1)) + 1; +} + +/* object uses deformation motion blur */ +static inline bool object_use_deform_motion(BL::Object b_ob) +{ + PointerRNA cobject = RNA_pointer_get(&b_ob.ptr, "cycles"); + bool use_deform_motion = get_boolean(cobject, "use_deform_motion"); + + return use_deform_motion; +} + +static inline BL::SmokeDomainSettings object_smoke_domain_find(BL::Object b_ob) +{ + BL::Object::modifiers_iterator b_mod; + + for(b_ob.modifiers.begin(b_mod); b_mod != b_ob.modifiers.end(); ++b_mod) { + if (b_mod->is_a(&RNA_SmokeModifier)) { + BL::SmokeModifier b_smd(*b_mod); + + if(b_smd.smoke_type() == BL::SmokeModifier::smoke_type_DOMAIN) + return b_smd.domain_settings(); + } + } + + return BL::SmokeDomainSettings(PointerRNA_NULL); +} + /* ID Map * * Utility class to keep in sync with blender data. diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp index 6c636ac5c8d..3c0c5c021c8 100644 --- a/intern/cycles/bvh/bvh.cpp +++ b/intern/cycles/bvh/bvh.cpp @@ -77,13 +77,25 @@ bool BVH::cache_read(CacheData& key) key.add(¶ms, sizeof(params)); foreach(Object *ob, objects) { - key.add(ob->mesh->verts); - key.add(ob->mesh->triangles); - key.add(ob->mesh->curve_keys); - key.add(ob->mesh->curves); + Mesh *mesh = ob->mesh; + + key.add(mesh->verts); + key.add(mesh->triangles); + key.add(mesh->curve_keys); + key.add(mesh->curves); key.add(&ob->bounds, sizeof(ob->bounds)); key.add(&ob->visibility, sizeof(ob->visibility)); - key.add(&ob->mesh->transform_applied, sizeof(bool)); + key.add(&mesh->transform_applied, sizeof(bool)); + + if(mesh->use_motion_blur) { + Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if(attr) + key.add(attr->buffer); + + attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if(attr) + key.add(attr->buffer); + } } CacheData value; @@ -97,7 +109,7 @@ bool BVH::cache_read(CacheData& key) value.read(pack.nodes); value.read(pack.object_node); value.read(pack.tri_woop); - value.read(pack.prim_segment); + value.read(pack.prim_type); value.read(pack.prim_visibility); value.read(pack.prim_index); value.read(pack.prim_object); @@ -119,7 +131,7 @@ void BVH::cache_write(CacheData& key) value.add(pack.nodes); value.add(pack.object_node); value.add(pack.tri_woop); - value.add(pack.prim_segment); + value.add(pack.prim_type); value.add(pack.prim_visibility); value.add(pack.prim_index); value.add(pack.prim_object); @@ -165,11 +177,11 @@ void BVH::build(Progress& progress) } /* build nodes */ - vector<int> prim_segment; + vector<int> prim_type; vector<int> prim_index; vector<int> prim_object; - BVHBuild bvh_build(objects, prim_segment, prim_index, prim_object, params, progress); + BVHBuild bvh_build(objects, prim_type, prim_index, prim_object, params, progress); BVHNode *root = bvh_build.run(); if(progress.get_cancel()) { @@ -178,7 +190,7 @@ void BVH::build(Progress& progress) } /* todo: get rid of this copy */ - pack.prim_segment = prim_segment; + pack.prim_type = prim_type; pack.prim_index = prim_index; pack.prim_object = prim_object; @@ -238,9 +250,12 @@ void BVH::refit(Progress& progress) void BVH::pack_triangle(int idx, float4 woop[3]) { - /* create Woop triangle */ int tob = pack.prim_object[idx]; const Mesh *mesh = objects[tob]->mesh; + + if(mesh->has_motion_blur()) + return; + int tidx = pack.prim_index[idx]; const int *vidx = mesh->triangles[tidx].v; const float3* vpos = &mesh->verts[0]; @@ -280,11 +295,11 @@ void BVH::pack_curve_segment(int idx, float4 woop[3]) int tob = pack.prim_object[idx]; const Mesh *mesh = objects[tob]->mesh; int tidx = pack.prim_index[idx]; - int segment = pack.prim_segment[idx]; + int segment = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[idx]); int k0 = mesh->curves[tidx].first_key + segment; int k1 = mesh->curves[tidx].first_key + segment + 1; - float3 v0 = mesh->curve_keys[k0].co; - float3 v1 = mesh->curve_keys[k1].co; + float3 v0 = float4_to_float3(mesh->curve_keys[k0]); + float3 v1 = float4_to_float3(mesh->curve_keys[k1]); float3 d0 = v1 - v0; float l = len(d0); @@ -324,7 +339,7 @@ void BVH::pack_primitives() if(pack.prim_index[i] != -1) { float4 woop[3]; - if(pack.prim_segment[i] != ~0) + if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) pack_curve_segment(i, woop); else pack_triangle(i, woop); @@ -335,7 +350,7 @@ void BVH::pack_primitives() Object *ob = objects[tob]; pack.prim_visibility[i] = ob->visibility; - if(pack.prim_segment[i] != ~0) + if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) pack.prim_visibility[i] |= PATH_RAY_CURVE; } else { @@ -359,7 +374,7 @@ void BVH::pack_instances(size_t nodes_size) * meshes with transform applied and already in the top level BVH */ for(size_t i = 0; i < pack.prim_index.size(); i++) if(pack.prim_index[i] != -1) { - if(pack.prim_segment[i] != ~0) + if(pack.prim_type[i] & PRIMITIVE_ALL_CURVE) pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->curve_offset; else pack.prim_index[i] += objects[pack.prim_object[i]]->mesh->tri_offset; @@ -401,7 +416,7 @@ void BVH::pack_instances(size_t nodes_size) mesh_map.clear(); pack.prim_index.resize(prim_index_size); - pack.prim_segment.resize(prim_index_size); + pack.prim_type.resize(prim_index_size); pack.prim_object.resize(prim_index_size); pack.prim_visibility.resize(prim_index_size); pack.tri_woop.resize(tri_woop_size); @@ -409,7 +424,7 @@ void BVH::pack_instances(size_t nodes_size) pack.object_node.resize(objects.size()); int *pack_prim_index = (pack.prim_index.size())? &pack.prim_index[0]: NULL; - int *pack_prim_segment = (pack.prim_segment.size())? &pack.prim_segment[0]: NULL; + int *pack_prim_type = (pack.prim_type.size())? &pack.prim_type[0]: NULL; int *pack_prim_object = (pack.prim_object.size())? &pack.prim_object[0]: NULL; uint *pack_prim_visibility = (pack.prim_visibility.size())? &pack.prim_visibility[0]: NULL; float4 *pack_tri_woop = (pack.tri_woop.size())? &pack.tri_woop[0]: NULL; @@ -454,16 +469,16 @@ void BVH::pack_instances(size_t nodes_size) if(bvh->pack.prim_index.size()) { size_t bvh_prim_index_size = bvh->pack.prim_index.size(); int *bvh_prim_index = &bvh->pack.prim_index[0]; - int *bvh_prim_segment = &bvh->pack.prim_segment[0]; + int *bvh_prim_type = &bvh->pack.prim_type[0]; uint *bvh_prim_visibility = &bvh->pack.prim_visibility[0]; for(size_t i = 0; i < bvh_prim_index_size; i++) { - if(bvh->pack.prim_segment[i] != ~0) + if(bvh->pack.prim_type[i] & PRIMITIVE_ALL_CURVE) pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_curve_offset; else pack_prim_index[pack_prim_index_offset] = bvh_prim_index[i] + mesh_tri_offset; - pack_prim_segment[pack_prim_index_offset] = bvh_prim_segment[i]; + pack_prim_type[pack_prim_index_offset] = bvh_prim_type[i]; pack_prim_visibility[pack_prim_index_offset] = bvh_prim_visibility[i]; pack_prim_object[pack_prim_index_offset] = 0; // unused for instances pack_prim_index_offset++; @@ -629,37 +644,51 @@ void RegularBVH::refit_node(int idx, bool leaf, BoundBox& bbox, uint& visibility /* primitives */ const Mesh *mesh = ob->mesh; - if(pack.prim_segment[prim] != ~0) { + if(pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { /* curves */ int str_offset = (params.top_level)? mesh->curve_offset: 0; - int k0 = mesh->curves[pidx - str_offset].first_key + pack.prim_segment[prim]; // XXX! - int k1 = k0 + 1; - - float3 p[4]; - p[0] = mesh->curve_keys[max(k0 - 1,mesh->curves[pidx - str_offset].first_key)].co; - p[1] = mesh->curve_keys[k0].co; - p[2] = mesh->curve_keys[k1].co; - p[3] = mesh->curve_keys[min(k1 + 1,mesh->curves[pidx - str_offset].first_key + mesh->curves[pidx - str_offset].num_keys - 1)].co; - float3 lower; - float3 upper; - curvebounds(&lower.x, &upper.x, p, 0); - curvebounds(&lower.y, &upper.y, p, 1); - curvebounds(&lower.z, &upper.z, p, 2); - float mr = max(mesh->curve_keys[k0].radius,mesh->curve_keys[k1].radius); - bbox.grow(lower, mr); - bbox.grow(upper, mr); + const Mesh::Curve& curve = mesh->curves[pidx - str_offset]; + int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); + + curve.bounds_grow(k, &mesh->curve_keys[0], bbox); visibility |= PATH_RAY_CURVE; + + /* motion curves */ + if(mesh->use_motion_blur) { + Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if(attr) { + size_t mesh_size = mesh->curve_keys.size(); + size_t steps = mesh->motion_steps - 1; + float4 *key_steps = attr->data_float4(); + + for (size_t i = 0; i < steps; i++) + curve.bounds_grow(k, key_steps + i*mesh_size, bbox); + } + } } else { /* triangles */ int tri_offset = (params.top_level)? mesh->tri_offset: 0; - const int *vidx = mesh->triangles[pidx - tri_offset].v; + const Mesh::Triangle& triangle = mesh->triangles[pidx - tri_offset]; const float3 *vpos = &mesh->verts[0]; - bbox.grow(vpos[vidx[0]]); - bbox.grow(vpos[vidx[1]]); - bbox.grow(vpos[vidx[2]]); + triangle.bounds_grow(vpos, bbox); + + /* motion triangles */ + if(mesh->use_motion_blur) { + Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if(attr) { + size_t mesh_size = mesh->verts.size(); + size_t steps = mesh->motion_steps - 1; + float3 *vert_steps = attr->data_float3(); + + for (size_t i = 0; i < steps; i++) + triangle.bounds_grow(vert_steps + i*mesh_size, bbox); + } + } } } diff --git a/intern/cycles/bvh/bvh.h b/intern/cycles/bvh/bvh.h index f2c96638b84..5fcaaaa988c 100644 --- a/intern/cycles/bvh/bvh.h +++ b/intern/cycles/bvh/bvh.h @@ -52,8 +52,8 @@ struct PackedBVH { array<int> object_node; /* precomputed triangle intersection data, one triangle is 4x float4 */ array<float4> tri_woop; - /* primitive type - triangle or strand (should be moved to flag?) */ - array<int> prim_segment; + /* primitive type - triangle or strand */ + array<int> prim_type; /* visibility visibilitys for primitives */ array<uint> prim_visibility; /* mapping from BVH primitive index to true primitive index, as primitives diff --git a/intern/cycles/bvh/bvh_binning.cpp b/intern/cycles/bvh/bvh_binning.cpp index 05a674a47a7..bd37ffbcf38 100644 --- a/intern/cycles/bvh/bvh_binning.cpp +++ b/intern/cycles/bvh/bvh_binning.cpp @@ -83,14 +83,14 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims) int4 bin1 = get_bin(prim1.bounds()); /* increase bounds for bins for even primitive */ - int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); - int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); - int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); /* increase bounds of bins for odd primitive */ - int b10 = extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds()); - int b11 = extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds()); - int b12 = extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds()); + int b10 = (int)extract<0>(bin1); bin_count[b10][0]++; bin_bounds[b10][0].grow(prim1.bounds()); + int b11 = (int)extract<1>(bin1); bin_count[b11][1]++; bin_bounds[b11][1].grow(prim1.bounds()); + int b12 = (int)extract<2>(bin1); bin_count[b12][2]++; bin_bounds[b12][2].grow(prim1.bounds()); } /* for uneven number of primitives */ @@ -100,9 +100,9 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange& job, BVHReference *prims) int4 bin0 = get_bin(prim0.bounds()); /* increase bounds of bins */ - int b00 = extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); - int b01 = extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); - int b02 = extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); + int b00 = (int)extract<0>(bin0); bin_count[b00][0]++; bin_bounds[b00][0].grow(prim0.bounds()); + int b01 = (int)extract<1>(bin0); bin_count[b01][1]++; bin_bounds[b01][1].grow(prim0.bounds()); + int b02 = (int)extract<2>(bin0); bin_count[b02][2]++; bin_bounds[b02][2].grow(prim0.bounds()); } } diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp index b21b20a87e5..eb4cca92b6b 100644 --- a/intern/cycles/bvh/bvh_build.cpp +++ b/intern/cycles/bvh/bvh_build.cpp @@ -49,10 +49,10 @@ public: /* Constructor / Destructor */ BVHBuild::BVHBuild(const vector<Object*>& objects_, - vector<int>& prim_segment_, vector<int>& prim_index_, vector<int>& prim_object_, + vector<int>& prim_type_, vector<int>& prim_index_, vector<int>& prim_object_, const BVHParams& params_, Progress& progress_) : objects(objects_), - prim_segment(prim_segment_), + prim_type(prim_type_), prim_index(prim_index_), prim_object(prim_object_), params(params_), @@ -70,45 +70,66 @@ BVHBuild::~BVHBuild() void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, int i) { + Attribute *attr_mP = NULL; + + if(mesh->has_motion_blur()) + attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + for(uint j = 0; j < mesh->triangles.size(); j++) { Mesh::Triangle t = mesh->triangles[j]; BoundBox bounds = BoundBox::empty; + PrimitiveType type = PRIMITIVE_TRIANGLE; + + t.bounds_grow(&mesh->verts[0], bounds); - for(int k = 0; k < 3; k++) { - float3 co = mesh->verts[t.v[k]]; - bounds.grow(co); + /* motion triangles */ + if(attr_mP) { + size_t mesh_size = mesh->verts.size(); + size_t steps = mesh->motion_steps - 1; + float3 *vert_steps = attr_mP->data_float3(); + + for(size_t i = 0; i < steps; i++) + t.bounds_grow(vert_steps + i*mesh_size, bounds); + + type = PRIMITIVE_MOTION_TRIANGLE; } if(bounds.valid()) { - references.push_back(BVHReference(bounds, j, i, ~0)); + references.push_back(BVHReference(bounds, j, i, type)); root.grow(bounds); center.grow(bounds.center2()); } } + Attribute *curve_attr_mP = NULL; + + if(mesh->has_motion_blur()) + curve_attr_mP = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + for(uint j = 0; j < mesh->curves.size(); j++) { Mesh::Curve curve = mesh->curves[j]; + PrimitiveType type = PRIMITIVE_CURVE; for(int k = 0; k < curve.num_keys - 1; k++) { BoundBox bounds = BoundBox::empty; + curve.bounds_grow(k, &mesh->curve_keys[0], bounds); + + /* motion curve */ + if(curve_attr_mP) { + size_t mesh_size = mesh->curve_keys.size(); + size_t steps = mesh->motion_steps - 1; + float4 *key_steps = curve_attr_mP->data_float4(); - float3 co[4]; - co[0] = mesh->curve_keys[max(curve.first_key + k - 1,curve.first_key)].co; - co[1] = mesh->curve_keys[curve.first_key + k].co; - co[2] = mesh->curve_keys[curve.first_key + k + 1].co; - co[3] = mesh->curve_keys[min(curve.first_key + k + 2, curve.first_key + curve.num_keys - 1)].co; - - float3 lower; - float3 upper; - curvebounds(&lower.x, &upper.x, co, 0); - curvebounds(&lower.y, &upper.y, co, 1); - curvebounds(&lower.z, &upper.z, co, 2); - float mr = max(mesh->curve_keys[curve.first_key + k].radius, mesh->curve_keys[curve.first_key + k + 1].radius); - bounds.grow(lower, mr); - bounds.grow(upper, mr); + for (size_t i = 0; i < steps; i++) + curve.bounds_grow(k, key_steps + i*mesh_size, bounds); + + type = PRIMITIVE_MOTION_CURVE; + } if(bounds.valid()) { - references.push_back(BVHReference(bounds, j, i, k)); + int packed_type = PRIMITIVE_PACK_SEGMENT(type, k); + + references.push_back(BVHReference(bounds, j, i, packed_type)); root.grow(bounds); center.grow(bounds.center2()); } @@ -118,7 +139,7 @@ void BVHBuild::add_reference_mesh(BoundBox& root, BoundBox& center, Mesh *mesh, void BVHBuild::add_reference_object(BoundBox& root, BoundBox& center, Object *ob, int i) { - references.push_back(BVHReference(ob->bounds, -1, i, false)); + references.push_back(BVHReference(ob->bounds, -1, i, 0)); root.grow(ob->bounds); center.grow(ob->bounds.center2()); } @@ -207,7 +228,7 @@ BVHNode* BVHBuild::run() progress_total = references.size(); progress_original_total = progress_total; - prim_segment.resize(references.size()); + prim_type.resize(references.size()); prim_index.resize(references.size()); prim_object.resize(references.size()); @@ -277,18 +298,41 @@ void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning * } } +bool BVHBuild::range_within_max_leaf_size(const BVHRange& range) +{ + size_t size = range.size(); + size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size); + + if(size > max_leaf_size) + return false; + + size_t num_triangles = 0; + size_t num_curves = 0; + + for(int i = 0; i < size; i++) { + BVHReference& ref = references[range.start() + i]; + + if(ref.prim_type() & PRIMITIVE_ALL_CURVE) + num_curves++; + else if(ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) + num_triangles++; + } + + return (num_triangles < params.max_triangle_leaf_size) && (num_curves < params.max_curve_leaf_size); +} + /* multithreaded binning builder */ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level) { size_t size = range.size(); - float leafSAH = params.sah_triangle_cost * range.leafSAH; - float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_triangle_cost * range.splitSAH; + float leafSAH = params.sah_primitive_cost * range.leafSAH; + float splitSAH = params.sah_node_cost * range.bounds().half_area() + params.sah_primitive_cost * range.splitSAH; /* have at least one inner node on top level, for performance and correct * visibility tests, since object instances do not check visibility flag */ if(!(range.size() > 0 && params.top_level && level == 0)) { /* make leaf node when threshold reached or SAH tells us */ - if(params.small_enough_for_leaf(size, level) || (size <= params.max_leaf_size && leafSAH < splitSAH)) + if(params.small_enough_for_leaf(size, level) || (range_within_max_leaf_size(range) && leafSAH < splitSAH)) return create_leaf_node(range); } @@ -373,12 +417,12 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, if(start == prim_index.size()) { assert(params.use_spatial_split); - prim_segment.push_back(ref->prim_segment()); + prim_type.push_back(ref->prim_type()); prim_index.push_back(ref->prim_index()); prim_object.push_back(ref->prim_object()); } else { - prim_segment[start] = ref->prim_segment(); + prim_type[start] = ref->prim_type(); prim_index[start] = ref->prim_index(); prim_object[start] = ref->prim_object(); } @@ -401,7 +445,7 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start, BVHNode* BVHBuild::create_leaf_node(const BVHRange& range) { - vector<int>& p_segment = prim_segment; + vector<int>& p_type = prim_type; vector<int>& p_index = prim_index; vector<int>& p_object = prim_object; BoundBox bounds = BoundBox::empty; @@ -415,12 +459,12 @@ BVHNode* BVHBuild::create_leaf_node(const BVHRange& range) if(range.start() + num == prim_index.size()) { assert(params.use_spatial_split); - p_segment.push_back(ref.prim_segment()); + p_type.push_back(ref.prim_type()); p_index.push_back(ref.prim_index()); p_object.push_back(ref.prim_object()); } else { - p_segment[range.start() + num] = ref.prim_segment(); + p_type[range.start() + num] = ref.prim_type(); p_index[range.start() + num] = ref.prim_index(); p_object[range.start() + num] = ref.prim_object(); } @@ -490,7 +534,7 @@ void BVHBuild::rotate(BVHNode *node, int max_depth) /* find best rotation. we pick a target child of a first child, and swap * this with an other child. we perform the best such swap. */ float best_cost = FLT_MAX; - int best_child = -1, bets_target = -1, best_other = -1; + int best_child = -1, best_target = -1, best_other = -1; for(size_t c = 0; c < 2; c++) { /* ignore leaf nodes as we cannot descent into */ @@ -514,11 +558,11 @@ void BVHBuild::rotate(BVHNode *node, int max_depth) if(cost0 < cost1) { best_cost = cost0; - bets_target = 0; + best_target = 0; } else { best_cost = cost0; - bets_target = 1; + best_target = 1; } } } @@ -527,10 +571,13 @@ void BVHBuild::rotate(BVHNode *node, int max_depth) if(best_cost >= 0) return; + assert(best_child == 0 || best_child == 1); + assert(best_target != -1); + /* perform the best found tree rotation */ InnerNode *child = (InnerNode*)parent->children[best_child]; - swap(parent->children[best_other], child->children[bets_target]); + swap(parent->children[best_other], child->children[best_target]); child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds); } diff --git a/intern/cycles/bvh/bvh_build.h b/intern/cycles/bvh/bvh_build.h index 3df4da1739a..a6b9916de9b 100644 --- a/intern/cycles/bvh/bvh_build.h +++ b/intern/cycles/bvh/bvh_build.h @@ -44,7 +44,7 @@ public: /* Constructor/Destructor */ BVHBuild( const vector<Object*>& objects, - vector<int>& prim_segment, + vector<int>& prim_type, vector<int>& prim_index, vector<int>& prim_object, const BVHParams& params, @@ -70,6 +70,8 @@ protected: BVHNode *create_leaf_node(const BVHRange& range); BVHNode *create_object_leaf_nodes(const BVHReference *ref, int start, int num); + bool range_within_max_leaf_size(const BVHRange& range); + /* threads */ enum { THREAD_TASK_SIZE = 4096 }; void thread_build_node(InnerNode *node, int child, BVHObjectBinning *range, int level); @@ -88,7 +90,7 @@ protected: int num_original_references; /* output primitive indexes and objects */ - vector<int>& prim_segment; + vector<int>& prim_type; vector<int>& prim_index; vector<int>& prim_object; diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h index ad36bdfa326..ed67690a07f 100644 --- a/intern/cycles/bvh/bvh_params.h +++ b/intern/cycles/bvh/bvh_params.h @@ -33,11 +33,12 @@ public: /* SAH costs */ float sah_node_cost; - float sah_triangle_cost; + float sah_primitive_cost; - /* number of triangles in leaf */ + /* number of primitives in leaf */ int min_leaf_size; - int max_leaf_size; + int max_triangle_leaf_size; + int max_curve_leaf_size; /* object or mesh level bvh */ int top_level; @@ -62,11 +63,14 @@ public: use_spatial_split = true; spatial_split_alpha = 1e-5f; + /* todo: see if splitting up primitive cost to be separate for triangles + * and curves can help. so far in tests it doesn't help, but why? */ sah_node_cost = 1.0f; - sah_triangle_cost = 1.0f; + sah_primitive_cost = 1.0f; min_leaf_size = 1; - max_leaf_size = 8; + max_triangle_leaf_size = 8; + max_curve_leaf_size = 2; top_level = false; use_cache = false; @@ -75,11 +79,11 @@ public: } /* SAH costs */ - __forceinline float cost(int num_nodes, int num_tris) const - { return node_cost(num_nodes) + triangle_cost(num_tris); } + __forceinline float cost(int num_nodes, int num_primitives) const + { return node_cost(num_nodes) + primitive_cost(num_primitives); } - __forceinline float triangle_cost(int n) const - { return n*sah_triangle_cost; } + __forceinline float primitive_cost(int n) const + { return n*sah_primitive_cost; } __forceinline float node_cost(int n) const { return n*sah_node_cost; } @@ -98,22 +102,22 @@ class BVHReference public: __forceinline BVHReference() {} - __forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_segment) + __forceinline BVHReference(const BoundBox& bounds_, int prim_index_, int prim_object_, int prim_type) : rbounds(bounds_) { rbounds.min.w = __int_as_float(prim_index_); rbounds.max.w = __int_as_float(prim_object_); - segment = prim_segment; + type = prim_type; } __forceinline const BoundBox& bounds() const { return rbounds; } __forceinline int prim_index() const { return __float_as_int(rbounds.min.w); } __forceinline int prim_object() const { return __float_as_int(rbounds.max.w); } - __forceinline int prim_segment() const { return segment; } + __forceinline int prim_type() const { return type; } protected: BoundBox rbounds; - uint segment; + uint type; }; /* BVH Range diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp index d7dbae36336..3140bf23376 100644 --- a/intern/cycles/bvh/bvh_sort.cpp +++ b/intern/cycles/bvh/bvh_sort.cpp @@ -52,8 +52,8 @@ public: else if(ra.prim_object() > rb.prim_object()) return false; else if(ra.prim_index() < rb.prim_index()) return true; else if(ra.prim_index() > rb.prim_index()) return false; - else if(ra.prim_segment() < rb.prim_segment()) return true; - else if(ra.prim_segment() > rb.prim_segment()) return false; + else if(ra.prim_type() < rb.prim_type()) return true; + else if(ra.prim_type() > rb.prim_type()) return false; return false; } diff --git a/intern/cycles/bvh/bvh_split.cpp b/intern/cycles/bvh/bvh_split.cpp index 03ff69d7b6d..07c35c08c18 100644 --- a/intern/cycles/bvh/bvh_split.cpp +++ b/intern/cycles/bvh/bvh_split.cpp @@ -54,8 +54,8 @@ BVHObjectSplit::BVHObjectSplit(BVHBuild *builder, const BVHRange& range, float n right_bounds = builder->spatial_right_bounds[i - 1]; float sah = nodeSAH + - left_bounds.safe_area() * builder->params.triangle_cost(i) + - right_bounds.safe_area() * builder->params.triangle_cost(range.size() - i); + left_bounds.safe_area() * builder->params.primitive_cost(i) + + right_bounds.safe_area() * builder->params.primitive_cost(range.size() - i); if(sah < min_sah) { min_sah = sah; @@ -150,8 +150,8 @@ BVHSpatialSplit::BVHSpatialSplit(BVHBuild *builder, const BVHRange& range, float rightNum -= builder->spatial_bins[dim][i - 1].exit; float sah = nodeSAH + - left_bounds.safe_area() * builder->params.triangle_cost(leftNum) + - builder->spatial_right_bounds[i - 1].safe_area() * builder->params.triangle_cost(rightNum); + left_bounds.safe_area() * builder->params.primitive_cost(leftNum) + + builder->spatial_right_bounds[i - 1].safe_area() * builder->params.primitive_cost(rightNum); if(sah < this->sah) { this->sah = sah; @@ -209,10 +209,10 @@ void BVHSpatialSplit::split(BVHBuild *builder, BVHRange& left, BVHRange& right, ldb.grow(lref.bounds()); rdb.grow(rref.bounds()); - float lac = builder->params.triangle_cost(left_end - left_start); - float rac = builder->params.triangle_cost(right_end - right_start); - float lbc = builder->params.triangle_cost(left_end - left_start + 1); - float rbc = builder->params.triangle_cost(right_end - right_start + 1); + float lac = builder->params.primitive_cost(left_end - left_start); + float rac = builder->params.primitive_cost(right_end - right_start); + float lbc = builder->params.primitive_cost(left_end - left_start + 1); + float rbc = builder->params.primitive_cost(right_end - right_start + 1); float unsplitLeftSAH = lub.safe_area() * lbc + right_bounds.safe_area() * rac; float unsplitRightSAH = left_bounds.safe_area() * lac + rub.safe_area() * rbc; @@ -253,7 +253,7 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH Object *ob = builder->objects[ref.prim_object()]; const Mesh *mesh = ob->mesh; - if (ref.prim_segment() == ~0) { + if (ref.prim_type() & PRIMITIVE_ALL_TRIANGLE) { const int *inds = mesh->triangles[ref.prim_index()].v; const float3 *verts = &mesh->verts[0]; const float3* v1 = &verts[inds[2]]; @@ -282,30 +282,32 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH } else { /* curve split: NOTE - Currently ignores curve width and needs to be fixed.*/ - const int k0 = mesh->curves[ref.prim_index()].first_key + ref.prim_segment(); + const int k0 = mesh->curves[ref.prim_index()].first_key + PRIMITIVE_UNPACK_SEGMENT(ref.prim_type()); const int k1 = k0 + 1; - const float3* v0 = &mesh->curve_keys[k0].co; - const float3* v1 = &mesh->curve_keys[k1].co; + const float4 key0 = mesh->curve_keys[k0]; + const float4 key1 = mesh->curve_keys[k1]; + const float3 v0 = float4_to_float3(key0); + const float3 v1 = float4_to_float3(key1); - float v0p = (*v0)[dim]; - float v1p = (*v1)[dim]; + float v0p = v0[dim]; + float v1p = v1[dim]; /* insert vertex to the boxes it belongs to. */ if(v0p <= pos) - left_bounds.grow(*v0); + left_bounds.grow(v0); if(v0p >= pos) - right_bounds.grow(*v0); + right_bounds.grow(v0); if(v1p <= pos) - left_bounds.grow(*v1); + left_bounds.grow(v1); if(v1p >= pos) - right_bounds.grow(*v1); + right_bounds.grow(v1); /* edge intersects the plane => insert intersection to both boxes. */ if((v0p < pos && v1p > pos) || (v0p > pos && v1p < pos)) { - float3 t = lerp(*v0, *v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); + float3 t = lerp(v0, v1, clamp((pos - v0p) / (v1p - v0p), 0.0f, 1.0f)); left_bounds.grow(t); right_bounds.grow(t); } @@ -318,8 +320,8 @@ void BVHSpatialSplit::split_reference(BVHBuild *builder, BVHReference& left, BVH right_bounds.intersect(ref.bounds()); /* set references */ - left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_segment()); - right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_segment()); + left = BVHReference(left_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type()); + right = BVHReference(right_bounds, ref.prim_index(), ref.prim_object(), ref.prim_type()); } CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_split.h b/intern/cycles/bvh/bvh_split.h index 1f4befbe8e2..5b739311e5f 100644 --- a/intern/cycles/bvh/bvh_split.h +++ b/intern/cycles/bvh/bvh_split.h @@ -77,7 +77,7 @@ public: /* find split candidates. */ float area = range.bounds().safe_area(); - leafSAH = area * builder->params.triangle_cost(range.size()); + leafSAH = area * builder->params.primitive_cost(range.size()); nodeSAH = area * builder->params.node_cost(2); object = BVHObjectSplit(builder, range, nodeSAH); @@ -92,7 +92,7 @@ public: /* leaf SAH is the lowest => create leaf. */ minSAH = min(min(leafSAH, object.sah), spatial.sah); - no_split = (minSAH == leafSAH && range.size() <= builder->params.max_leaf_size); + no_split = (minSAH == leafSAH && builder->range_within_max_leaf_size(range)); } __forceinline void split(BVHBuild *builder, BVHRange& left, BVHRange& right, const BVHRange& range) diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 9d60d062b8e..d9e68742c53 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -53,7 +53,8 @@ void Device::pixels_free(device_memory& mem) mem_free(mem); } -void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent) +void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent, + const DeviceDrawParams &draw_params) { pixels_copy_from(rgba, y, w, h); @@ -80,6 +81,10 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w glEnable(GL_TEXTURE_2D); + if(draw_params.bind_display_space_shader_cb) { + draw_params.bind_display_space_shader_cb(); + } + glPushMatrix(); glTranslatef(0.0f, (float)dy, 0.0f); @@ -98,6 +103,10 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w glPopMatrix(); + if(draw_params.unbind_display_space_shader_cb) { + draw_params.unbind_display_space_shader_cb(); + } + glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); glDeleteTextures(1, &texid); diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index bd309e35788..bcddd4f73e2 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -54,6 +54,7 @@ public: bool display_device; bool advanced_shading; bool pack_images; + bool extended_images; /* flag for GPU and Multi device */ vector<DeviceInfo> multi_devices; DeviceInfo() @@ -64,11 +65,17 @@ public: display_device = false; advanced_shading = true; pack_images = false; + extended_images = false; } }; /* Device */ +struct DeviceDrawParams { + boost::function<void(void)> bind_display_space_shader_cb; + boost::function<void(void)> unbind_display_space_shader_cb; +}; + class Device { protected: Device(DeviceInfo& info_, Stats &stats_, bool background) : background(background), info(info_), stats(stats_) {} @@ -100,7 +107,7 @@ public: /* texture memory */ virtual void tex_alloc(const char *name, device_memory& mem, - bool interpolation = false, bool periodic = false) {}; + InterpolationType interpolation = INTERPOLATION_NONE, bool periodic = false) {}; virtual void tex_free(device_memory& mem) {}; /* pixel memory */ @@ -121,7 +128,8 @@ public: /* opengl drawing */ virtual void draw_pixels(device_memory& mem, int y, int w, int h, - int dy, int width, int height, bool transparent); + int dy, int width, int height, bool transparent, + const DeviceDrawParams &draw_params); #ifdef WITH_NETWORK /* networking */ diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 76123fe44d2..c9cc7592028 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -103,9 +103,9 @@ public: kernel_const_copy(&kernel_globals, name, host, size); } - void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) + void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { - kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height); + kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height, mem.data_depth, interpolation); mem.device_pointer = mem.data_pointer; stats.mem_alloc(mem.memory_size()); @@ -395,7 +395,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(task_pool.canceled()) + if(task.get_cancel() || task_pool.canceled()) break; } } @@ -406,7 +406,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(task_pool.canceled()) + if(task.get_cancel() || task_pool.canceled()) break; } } @@ -417,7 +417,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(task_pool.canceled()) + if(task.get_cancel() || task_pool.canceled()) break; } } @@ -428,7 +428,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(task_pool.canceled()) + if(task.get_cancel() || task_pool.canceled()) break; } } @@ -438,7 +438,7 @@ public: for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); - if(task_pool.canceled()) + if(task.get_cancel() || task_pool.canceled()) break; } } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 107ca16c4d2..93b89dc38d9 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -48,6 +48,7 @@ public: int cuDevArchitecture; bool first_error; bool use_texture_storage; + unsigned int target_update_frequency; struct PixelMem { GLuint cuPBO; @@ -138,7 +139,7 @@ public: /*cuda_abort();*/ \ cuda_error_documentation(); \ } \ - } + } (void)0 bool cuda_error_(CUresult result, const string& stmt) { @@ -165,7 +166,7 @@ public: void cuda_push_context() { - cuda_assert(cuCtxSetCurrent(cuContext)) + cuda_assert(cuCtxSetCurrent(cuContext)); } void cuda_pop_context() @@ -173,12 +174,14 @@ public: cuda_assert(cuCtxSetCurrent(NULL)); } - CUDADevice(DeviceInfo& info, Stats &stats, bool background_) + CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_) { first_error = true; background = background_; use_texture_storage = true; + /* we try an update / sync every 1000 ms */ + target_update_frequency = 1000; cuDevId = info.num; cuDevice = 0; @@ -209,8 +212,8 @@ public: if(cuda_error_(result, "cuCtxCreate")) return; - cuda_assert(cuStreamCreate(&cuStream, 0)) - cuda_assert(cuEventCreate(&tileDone, 0x1)) + cuda_assert(cuStreamCreate(&cuStream, 0)); + cuda_assert(cuEventCreate(&tileDone, 0x1)); int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); @@ -219,7 +222,7 @@ public: /* In order to use full 6GB of memory on Titan cards, use arrays instead * of textures. On earlier cards this seems slower, but on Titan it is * actually slightly faster in tests. */ - use_texture_storage = (cuDevArchitecture < 350); + use_texture_storage = (cuDevArchitecture < 300); cuda_pop_context(); } @@ -228,21 +231,22 @@ public: { task_pool.stop(); - cuda_assert(cuEventDestroy(tileDone)) - cuda_assert(cuStreamDestroy(cuStream)) - cuda_assert(cuCtxDestroy(cuContext)) + cuda_assert(cuEventDestroy(tileDone)); + cuda_assert(cuStreamDestroy(cuStream)); + cuda_assert(cuCtxDestroy(cuContext)); } - bool support_device(bool experimental) + bool support_device(bool experimental, bool branched) { int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); - + + /* We only support sm_20 and above */ if(major < 2) { cuda_error_message(string_printf("CUDA device supported only with compute capability 2.0 or up, found %d.%d.", major, minor)); return false; } - + return true; } @@ -293,28 +297,16 @@ public: return ""; } if(cuda_version < 50) { - printf("Unsupported CUDA version %d.%d detected, you need CUDA 5.0.\n", cuda_version/10, cuda_version%10); + printf("Unsupported CUDA version %d.%d detected, you need CUDA 6.0.\n", cuda_version/10, cuda_version%10); return ""; } - - else if(cuda_version > 50) - printf("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported.\n", cuda_version/10, cuda_version%10); + else if(cuda_version != 60) + printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ string kernel = path_join(kernel_path, "kernel.cu"); string include = kernel_path; const int machine = system_cpu_bits(); - string arch_flags; - - /* CUDA 5.x build flags for different archs */ - if(major == 2) { - /* sm_2x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } - else if(major == 3) { - /* sm_3x */ - arch_flags = "--maxrregcount=32 --use_fast_math"; - } double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); @@ -322,8 +314,8 @@ public: path_create_directories(cubin); string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " - "-o \"%s\" --ptxas-options=\"-v\" %s -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d", - nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), arch_flags.c_str(), include.c_str(), cuda_version); + "-o \"%s\" --ptxas-options=\"-v\" -I\"%s\" -DNVCC -D__KERNEL_CUDA_VERSION__=%d", + nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); printf("%s\n", command.c_str()); @@ -349,8 +341,8 @@ public: if(cuContext == 0) return false; - /* check if GPU is supported with current feature set */ - if(!support_device(experimental)) + /* check if GPU is supported */ + if(!support_device(experimental, false)) return false; /* get kernel */ @@ -383,7 +375,7 @@ public: cuda_push_context(); CUdeviceptr device_pointer; size_t size = mem.memory_size(); - cuda_assert(cuMemAlloc(&device_pointer, size)) + cuda_assert(cuMemAlloc(&device_pointer, size)); mem.device_pointer = (device_ptr)device_pointer; stats.mem_alloc(size); cuda_pop_context(); @@ -393,7 +385,7 @@ public: { cuda_push_context(); if(mem.device_pointer) - cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())) + cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); cuda_pop_context(); } @@ -405,7 +397,7 @@ public: cuda_push_context(); if(mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, - (CUdeviceptr)((uchar*)mem.device_pointer + offset), size)) + (CUdeviceptr)((uchar*)mem.device_pointer + offset), size)); } else { memset((char*)mem.data_pointer + offset, 0, size); @@ -419,7 +411,7 @@ public: cuda_push_context(); if(mem.device_pointer) - cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())) + cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); cuda_pop_context(); } @@ -427,7 +419,7 @@ public: { if(mem.device_pointer) { cuda_push_context(); - cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))) + cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); cuda_pop_context(); mem.device_pointer = 0; @@ -442,19 +434,21 @@ public: size_t bytes; cuda_push_context(); - cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)) + cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); //assert(bytes == size); - cuda_assert(cuMemcpyHtoD(mem, host, size)) + cuda_assert(cuMemcpyHtoD(mem, host, size)); cuda_pop_context(); } - void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) + void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { + /* todo: support 3D textures, only CPU for now */ + /* determine format */ CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); - bool use_texture = interpolation || use_texture_storage; + bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage; if(use_texture) { @@ -469,14 +463,14 @@ public: CUtexref texref = NULL; cuda_push_context(); - cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) + cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)); if(!texref) { cuda_pop_context(); return; } - if(interpolation) { + if(interpolation != INTERPOLATION_NONE) { CUarray handle = NULL; CUDA_ARRAY_DESCRIPTOR desc; @@ -485,7 +479,7 @@ public: desc.Format = format; desc.NumChannels = mem.data_elements; - cuda_assert(cuArrayCreate(&handle, &desc)) + cuda_assert(cuArrayCreate(&handle, &desc)); if(!handle) { cuda_pop_context(); @@ -503,15 +497,23 @@ public: param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; - cuda_assert(cuMemcpy2D(¶m)) + cuda_assert(cuMemcpy2D(¶m)); } else - cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)) + cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size)); - cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)) + cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT)); - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)) - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)) + if(interpolation == INTERPOLATION_CLOSEST) { + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); + } + else if (interpolation == INTERPOLATION_LINEAR) { + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)); + } + else {/* CUBIC and SMART are unsupported for CUDA */ + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)); + } + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); mem.device_pointer = (device_ptr)handle; @@ -525,20 +527,20 @@ public: cuda_push_context(); - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)) - cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)) - cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)) + cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); + cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); + cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); } if(periodic) { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)) + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP)); + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP)); } else { - cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)) - cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)) + cuda_assert(cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_CLAMP)); + cuda_assert(cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_CLAMP)); } - cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)) + cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); cuda_pop_context(); } @@ -551,23 +553,23 @@ public: CUdeviceptr cumem; size_t cubytes; - cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)) + cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, name)); if(cubytes == 8) { /* 64 bit device pointer */ uint64_t ptr = mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } else { /* 32 bit device pointer */ uint32_t ptr = (uint32_t)mem.device_pointer; - cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)) + cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } cuda_pop_context(); } - tex_interp_map[mem.device_pointer] = interpolation; + tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE); } void tex_free(device_memory& mem) @@ -602,10 +604,12 @@ public: CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state); /* get kernel function */ - if(branched) - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")) - else - cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")) + if(branched && support_device(true, branched)) { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_branched_path_trace")); + } + else { + cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); + } if(have_error()) return; @@ -613,49 +617,63 @@ public: /* pass in parameters */ int offset = 0; - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer))) + cuda_assert(cuParamSetv(cuPathTrace, offset, &d_buffer, sizeof(d_buffer))); offset += sizeof(d_buffer); - cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))) + cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state))); offset += sizeof(d_rng_state); offset = align_up(offset, __alignof(sample)); - cuda_assert(cuParamSeti(cuPathTrace, offset, sample)) + cuda_assert(cuParamSeti(cuPathTrace, offset, sample)); offset += sizeof(sample); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x)); offset += sizeof(rtile.x); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y)); offset += sizeof(rtile.y); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w)); offset += sizeof(rtile.w); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h)); offset += sizeof(rtile.h); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset)); offset += sizeof(rtile.offset); - cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)) + cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride)); offset += sizeof(rtile.stride); - cuda_assert(cuParamSetSize(cuPathTrace, offset)) + cuda_assert(cuParamSetSize(cuPathTrace, offset)); + + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuPathTrace)); + + /*int num_registers; + cuda_assert(cuFuncGetAttribute(&num_registers, CU_FUNC_ATTRIBUTE_NUM_REGS, cuPathTrace)); + + printf("threads_per_block %d\n", threads_per_block); + printf("num_registers %d\n", num_registers);*/ - /* launch kernel: todo find optimal size, cache config for fermi */ - int xthreads = 16; - int ythreads = 16; + int xthreads = (int)sqrt((float)threads_per_block); + int ythreads = (int)sqrt((float)threads_per_block); int xblocks = (rtile.w + xthreads - 1)/xthreads; int yblocks = (rtile.h + ythreads - 1)/ythreads; - cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)) - cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream)) + cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)); - cuda_assert(cuEventRecord(tileDone, cuStream )) - cuda_assert(cuEventSynchronize(tileDone)) + if(info.display_device) { + /* don't use async for device used for display, locks up UI too much */ + cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks)); + cuda_assert(cuCtxSynchronize()); + } + else { + cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream)); + } cuda_pop_context(); } @@ -672,55 +690,60 @@ public: CUdeviceptr d_buffer = cuda_device_ptr(buffer); /* get kernel function */ - if(rgba_half) - cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")) - else - cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")) + if(rgba_half) { + cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_half_float")); + } + else { + cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_convert_to_byte")); + } /* pass in parameters */ int offset = 0; - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba))) + cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_rgba, sizeof(d_rgba))); offset += sizeof(d_rgba); - cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))) + cuda_assert(cuParamSetv(cuFilmConvert, offset, &d_buffer, sizeof(d_buffer))); offset += sizeof(d_buffer); float sample_scale = 1.0f/(task.sample + 1); offset = align_up(offset, __alignof(sample_scale)); - cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale)) + cuda_assert(cuParamSetf(cuFilmConvert, offset, sample_scale)); offset += sizeof(sample_scale); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.x)); offset += sizeof(task.x); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.y)); offset += sizeof(task.y); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.w)); offset += sizeof(task.w); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)); offset += sizeof(task.h); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)); offset += sizeof(task.offset); - cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)) + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)); offset += sizeof(task.stride); - cuda_assert(cuParamSetSize(cuFilmConvert, offset)) + cuda_assert(cuParamSetSize(cuFilmConvert, offset)); + + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilmConvert)); - /* launch kernel: todo find optimal size, cache config for fermi */ - int xthreads = 16; - int ythreads = 16; + int xthreads = (int)sqrt((float)threads_per_block); + int ythreads = (int)sqrt((float)threads_per_block); int xblocks = (task.w + xthreads - 1)/xthreads; int yblocks = (task.h + ythreads - 1)/ythreads; - cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)) - cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)) + cuda_assert(cuFuncSetCacheConfig(cuFilmConvert, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuFilmConvert, xthreads, ythreads, 1)); + cuda_assert(cuLaunchGrid(cuFilmConvert, xblocks, yblocks)); unmap_pixels((rgba_byte)? rgba_byte: rgba_half); @@ -734,40 +757,55 @@ public: cuda_push_context(); - CUfunction cuDisplace; + CUfunction cuShader; CUdeviceptr d_input = cuda_device_ptr(task.shader_input); CUdeviceptr d_output = cuda_device_ptr(task.shader_output); /* get kernel function */ - cuda_assert(cuModuleGetFunction(&cuDisplace, cuModule, "kernel_cuda_shader")) - - /* pass in parameters */ - int offset = 0; - - cuda_assert(cuParamSetv(cuDisplace, offset, &d_input, sizeof(d_input))) - offset += sizeof(d_input); + cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader")); + + /* do tasks in smaller chunks, so we can cancel it */ + const int shader_chunk_size = 65536; + const int start = task.shader_x; + const int end = task.shader_x + task.shader_w; - cuda_assert(cuParamSetv(cuDisplace, offset, &d_output, sizeof(d_output))) - offset += sizeof(d_output); + for(int shader_x = start; shader_x < end; shader_x += shader_chunk_size) { + if(task.get_cancel()) + break; - int shader_eval_type = task.shader_eval_type; - offset = align_up(offset, __alignof(shader_eval_type)); + /* pass in parameters */ + int offset = 0; - cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_eval_type)) - offset += sizeof(task.shader_eval_type); + cuda_assert(cuParamSetv(cuShader, offset, &d_input, sizeof(d_input))); + offset += sizeof(d_input); - cuda_assert(cuParamSeti(cuDisplace, offset, task.shader_x)) - offset += sizeof(task.shader_x); + cuda_assert(cuParamSetv(cuShader, offset, &d_output, sizeof(d_output))); + offset += sizeof(d_output); - cuda_assert(cuParamSetSize(cuDisplace, offset)) + int shader_eval_type = task.shader_eval_type; + offset = align_up(offset, __alignof(shader_eval_type)); - /* launch kernel: todo find optimal size, cache config for fermi */ - int xthreads = 16; - int xblocks = (task.shader_w + xthreads - 1)/xthreads; + cuda_assert(cuParamSeti(cuShader, offset, task.shader_eval_type)); + offset += sizeof(task.shader_eval_type); - cuda_assert(cuFuncSetCacheConfig(cuDisplace, CU_FUNC_CACHE_PREFER_L1)) - cuda_assert(cuFuncSetBlockShape(cuDisplace, xthreads, 1, 1)) - cuda_assert(cuLaunchGrid(cuDisplace, xblocks, 1)) + cuda_assert(cuParamSeti(cuShader, offset, shader_x)); + offset += sizeof(shader_x); + + cuda_assert(cuParamSetSize(cuShader, offset)); + + /* launch kernel */ + int threads_per_block; + cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuShader)); + + int shader_w = min(shader_chunk_size, end - shader_x); + int xblocks = (shader_w + threads_per_block - 1)/threads_per_block; + + cuda_assert(cuFuncSetCacheConfig(cuShader, CU_FUNC_CACHE_PREFER_L1)); + cuda_assert(cuFuncSetBlockShape(cuShader, threads_per_block, 1, 1)); + cuda_assert(cuLaunchGrid(cuShader, xblocks, 1)); + + cuda_assert(cuCtxSynchronize()); + } cuda_pop_context(); } @@ -779,8 +817,8 @@ public: CUdeviceptr buffer; size_t bytes; - cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)) - cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)) + cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0)); + cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource)); return buffer; } @@ -793,7 +831,7 @@ public: if(!background) { PixelMem pmem = pixel_mem_map[mem]; - cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)) + cuda_assert(cuGraphicsUnmapResources(1, &pmem.cuPBOresource, 0)); } } @@ -882,7 +920,7 @@ public: cuda_push_context(); - cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)) + cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); glDeleteBuffers(1, &pmem.cuPBO); glDeleteTextures(1, &pmem.cuTexId); @@ -900,7 +938,8 @@ public: } } - void draw_pixels(device_memory& mem, int y, int w, int h, int dy, int width, int height, bool transparent) + void draw_pixels(device_memory& mem, int y, int w, int h, int dy, int width, int height, bool transparent, + const DeviceDrawParams &draw_params) { if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; @@ -933,6 +972,10 @@ public: glColor3f(1.0f, 1.0f, 1.0f); + if(draw_params.bind_display_space_shader_cb) { + draw_params.bind_display_space_shader_cb(); + } + glPushMatrix(); glTranslatef(0.0f, (float)dy, 0.0f); @@ -951,6 +994,10 @@ public: glPopMatrix(); + if(draw_params.unbind_display_space_shader_cb) { + draw_params.unbind_display_space_shader_cb(); + } + if(transparent) { glDisable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); /* reset blender default */ @@ -964,7 +1011,7 @@ public: return; } - Device::draw_pixels(mem, y, w, h, dy, width, height, transparent); + Device::draw_pixels(mem, y, w, h, dy, width, height, transparent, draw_params); } void thread_run(DeviceTask *task) @@ -979,6 +1026,10 @@ public: int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; + boost::posix_time::ptime start_time(boost::posix_time::microsec_clock::local_time()); + boost::posix_time::ptime last_time = start_time; + int sync_sample = 10; + for(int sample = start_sample; sample < end_sample; sample++) { if (task->get_cancel()) { if(task->need_finish_queue == false) @@ -988,8 +1039,28 @@ public: path_trace(tile, sample, branched); tile.sample = sample + 1; - task->update_progress(tile); + + if(!info.display_device && sample == sync_sample) { + cuda_push_context(); + cuda_assert(cuEventRecord(tileDone, cuStream)); + cuda_assert(cuEventSynchronize(tileDone)); + + /* Do some time keeping to find out if we need to sync less */ + boost::posix_time::ptime current_time(boost::posix_time::microsec_clock::local_time()); + boost::posix_time::time_duration sample_duration = current_time - last_time; + + long msec = sample_duration.total_milliseconds(); + float scaling_factor = (float)target_update_frequency / (float)msec; + + /* sync at earliest next sample and probably later */ + sync_sample = (sample + 1) + sync_sample * (int)ceil(scaling_factor); + + sync_sample = min(end_sample - 1, sync_sample); // make sure we sync the last sample always + + last_time = current_time; + cuda_pop_context(); + } } task->release_tile(tile); @@ -999,7 +1070,7 @@ public: shader(*task); cuda_push_context(); - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuCtxSynchronize()); cuda_pop_context(); } } @@ -1020,7 +1091,7 @@ public: film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); cuda_push_context(); - cuda_assert(cuCtxSynchronize()) + cuda_assert(cuCtxSynchronize()); cuda_pop_context(); } else { @@ -1081,6 +1152,7 @@ void device_cuda_info(vector<DeviceInfo>& devices) int major, minor; cuDeviceComputeCapability(&major, &minor, num); info.advanced_shading = (major >= 2); + info.extended_images = (major >= 3); info.pack_images = false; /* if device has a kernel timeout, assume it is used for display */ diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 1427d12cba2..8d6f4a49a9c 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -169,6 +169,7 @@ public: size_t data_size; size_t data_width; size_t data_height; + size_t data_depth; /* device pointer */ device_ptr device_pointer; @@ -195,6 +196,7 @@ public: data_size = 0; data_width = 0; data_height = 0; + data_depth = 0; assert(data_elements > 0); @@ -204,20 +206,21 @@ public: virtual ~device_vector() {} /* vector functions */ - T *resize(size_t width, size_t height = 0) + T *resize(size_t width, size_t height = 0, size_t depth = 0) { - data_size = (height == 0)? width: width*height; + data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); data.resize(data_size); data_pointer = (device_ptr)&data[0]; data_width = width; data_height = height; + data_depth = depth; return &data[0]; } - T *copy(T *ptr, size_t width, size_t height = 0) + T *copy(T *ptr, size_t width, size_t height = 0, size_t depth = 0) { - T *mem = resize(width, height); + T *mem = resize(width, height, depth); memcpy(mem, ptr, memory_size()); return mem; } @@ -230,13 +233,14 @@ public: } } - void reference(T *ptr, size_t width, size_t height = 0) + void reference(T *ptr, size_t width, size_t height = 0, size_t depth = 0) { data.clear(); - data_size = (height == 0)? width: width*height; + data_size = width * ((height == 0)? 1: height) * ((depth == 0)? 1: depth); data_pointer = (device_ptr)ptr; data_width = width; data_height = height; + data_depth = depth; } void clear() @@ -245,6 +249,7 @@ public: data_pointer = 0; data_width = 0; data_height = 0; + data_depth = 0; data_size = 0; } diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 27b9de0769e..c866ebaaea2 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -168,7 +168,7 @@ public: sub.device->const_copy_to(name, host, size); } - void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) + void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { foreach(SubDevice& sub, devices) { mem.device_pointer = 0; @@ -233,7 +233,8 @@ public: mem.device_pointer = tmp; } - void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent) + void draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int width, int height, bool transparent, + const DeviceDrawParams &draw_params) { device_ptr tmp = rgba.device_pointer; int i = 0, sub_h = h/devices.size(); @@ -247,7 +248,7 @@ public: /* adjust math for w/width */ rgba.device_pointer = sub.ptr_map[tmp]; - sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent); + sub.device->draw_pixels(rgba, sy, w, sh, sdy, width, sheight, transparent, draw_params); i++; } @@ -327,6 +328,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool info.advanced_shading = with_advanced_shading; info.pack_images = false; + info.extended_images = true; foreach(DeviceInfo& subinfo, devices) { if(subinfo.type == type) { @@ -350,6 +352,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool if(subinfo.display_device) info.display_device = true; info.pack_images = info.pack_images || subinfo.pack_images; + info.extended_images = info.extended_images && subinfo.extended_images; num_added++; } } diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp index bffd993818f..af051076009 100644 --- a/intern/cycles/device/device_network.cpp +++ b/intern/cycles/device/device_network.cpp @@ -162,7 +162,7 @@ public: snd.write_buffer(host, size); } - void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) + void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { thread_scoped_lock lock(rpc_lock); @@ -326,7 +326,7 @@ class DeviceServer { public: thread_mutex rpc_lock; - void network_error(const string &message){ + void network_error(const string &message) { error_func.network_error(message); } @@ -366,7 +366,7 @@ protected: { /* create a new DataVector and insert it into mem_data */ pair<DataMap::iterator,bool> data_ins = mem_data.insert( - DataMap::value_type(client_pointer, DataVector())); + DataMap::value_type(client_pointer, DataVector())); /* make sure it was a unique insertion */ assert(data_ins.second); @@ -559,7 +559,7 @@ protected: else if(rcv.name == "tex_alloc") { network_device_memory mem; string name; - bool interpolation; + InterpolationType interpolation; bool periodic; device_ptr client_pointer; diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h index bf8f3c70c49..893841d1da7 100644 --- a/intern/cycles/device/device_network.h +++ b/intern/cycles/device/device_network.h @@ -118,7 +118,7 @@ public: void add(const device_memory& mem) { archive & mem.data_type & mem.data_elements & mem.data_size; - archive & mem.data_width & mem.data_height & mem.device_pointer; + archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; } template<typename T> void add(const T& data) @@ -209,7 +209,7 @@ public: boost::system::error_code error; size_t len = boost::asio::read(socket, boost::asio::buffer(header), error); - if(error.value()){ + if(error.value()) { error_func->network_error(error.message()); } @@ -261,7 +261,7 @@ public: void read(network_device_memory& mem) { *archive & mem.data_type & mem.data_elements & mem.data_size; - *archive & mem.data_width & mem.data_height & mem.device_pointer; + *archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer; mem.data_pointer = 0; } @@ -276,7 +276,7 @@ public: boost::system::error_code error; size_t len = boost::asio::read(socket, boost::asio::buffer(buffer, size), error); - if(error.value()){ + if(error.value()) { error_func->network_error(error.message()); } @@ -391,7 +391,7 @@ private: /* add address if it's not already in the list */ bool found = std::find(servers.begin(), servers.end(), - address) != servers.end(); + address) != servers.end(); if(!found) servers.push_back(address); diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 9117b70d749..694ec9db036 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -101,9 +101,6 @@ static string opencl_kernel_build_options(const string& platform, const string * if(opencl_kernel_use_debug()) build_options += "-D__KERNEL_OPENCL_DEBUG__ "; - - if(opencl_kernel_use_advanced_shading(platform)) - build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ "; return build_options; } @@ -409,10 +406,22 @@ public: fprintf(stderr, "%s\n", message.c_str()); } - void opencl_assert(cl_int err) +#define opencl_assert(stmt) \ + { \ + cl_int err = stmt; \ + \ + if(err != CL_SUCCESS) { \ + string message = string_printf("OpenCL error: %s in %s", opencl_error_string(err), #stmt); \ + if(error_msg == "") \ + error_msg = message; \ + fprintf(stderr, "%s\n", message.c_str()); \ + } \ + } (void)0 + + void opencl_assert_err(cl_int err, const char* where) { if(err != CL_SUCCESS) { - string message = string_printf("OpenCL error (%d): %s", err, opencl_error_string(err)); + string message = string_printf("OpenCL error (%d): %s in %s", err, opencl_error_string(err), where); if(error_msg == "") error_msg = message; fprintf(stderr, "%s\n", message.c_str()); @@ -452,8 +461,10 @@ public: vector<cl_platform_id> platforms(num_platforms, NULL); ciErr = clGetPlatformIDs(num_platforms, &platforms[0], NULL); - if(opencl_error(ciErr)) + if(opencl_error(ciErr)) { + fprintf(stderr, "clGetPlatformIDs failed \n"); return; + } int num_base = 0; int total_devices = 0; @@ -478,8 +489,10 @@ public: /* get devices */ vector<cl_device_id> device_ids(num_devices, NULL); - if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL))) + if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL))) { + fprintf(stderr, "clGetDeviceIDs failed \n"); return; + } cdDevice = device_ids[info.num - num_base]; @@ -515,8 +528,10 @@ public: cxContext = clCreateContext(context_props, 1, &cdDevice, context_notify_callback, cdDevice, &ciErr); - if(opencl_error(ciErr)) + if(opencl_error(ciErr)) { + opencl_error("OpenCL: clCreateContext failed"); return; + } /* cache it */ OpenCLCache::store_context(cpPlatform, cdDevice, cxContext, cache_locker); @@ -531,6 +546,7 @@ public: if(opencl_error(ciErr)) return; + fprintf(stderr,"Device init succes\n"); device_initialized = true; } @@ -821,7 +837,7 @@ public: mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, mem_flag, size, mem_ptr, &ciErr); - opencl_assert(ciErr); + opencl_assert_err(ciErr, "clCreateBuffer"); stats.mem_alloc(size); } @@ -830,8 +846,7 @@ public: { /* this is blocking */ size_t size = mem.memory_size(); - ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL); - opencl_assert(ciErr); + opencl_assert(clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL)); } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) @@ -839,8 +854,7 @@ public: size_t offset = elem*y*w; size_t size = elem*w*h; - ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL); - opencl_assert(ciErr); + opencl_assert(clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL)); } void mem_zero(device_memory& mem) @@ -854,9 +868,8 @@ public: void mem_free(device_memory& mem) { if(mem.device_pointer) { - ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)); + opencl_assert(clReleaseMemObject(CL_MEM_PTR(mem.device_pointer))); mem.device_pointer = 0; - opencl_assert(ciErr); stats.mem_free(mem.memory_size()); } @@ -881,7 +894,7 @@ public: mem_copy_to(*i->second); } - void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) + void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { mem_alloc(mem, MEM_READ_ONLY); mem_copy_to(mem); @@ -919,7 +932,7 @@ public: CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL); /* try to divide evenly over 2 dimensions */ - size_t sqrt_workgroup_size = max(sqrt((double)workgroup_size), 1.0); + size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1); size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size}; /* some implementations have max size 1 on 2nd dimension */ @@ -931,8 +944,7 @@ public: size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)}; /* run kernel */ - ciErr = clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL); - opencl_assert(ciErr); + opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL)); opencl_assert(clFlush(cqCommandQueue)); } @@ -952,33 +964,29 @@ public: /* sample arguments */ cl_uint narg = 0; - ciErr = 0; - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state)); #define KERNEL_TEX(type, ttype, name) \ - ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); + set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); #include "kernel_textures.h" - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); - ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); - - opencl_assert(ciErr); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset)); + opencl_assert(clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride)); enqueue_kernel(ckPathTraceKernel, d_w, d_h); } - cl_int set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name) + void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name) { cl_mem ptr; - cl_int err = 0; MemMap::iterator i = mem_map.find(name); if(i != mem_map.end()) { @@ -989,10 +997,7 @@ public: ptr = CL_MEM_PTR(null_mem); } - err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); - opencl_assert(err); - - return err; + opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr)); } void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1011,27 +1016,27 @@ public: /* sample arguments */ cl_uint narg = 0; - ciErr = 0; + cl_kernel ckFilmConvertKernel = (rgba_byte)? ckFilmConvertByteKernel: ckFilmConvertHalfFloatKernel; - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer)); #define KERNEL_TEX(type, ttype, name) \ - ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); + set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); #include "kernel_textures.h" - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); - ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample_scale), (void*)&d_sample_scale)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset)); + opencl_assert(clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride)); + - opencl_assert(ciErr); enqueue_kernel(ckFilmConvertKernel, d_w, d_h); } @@ -1048,21 +1053,18 @@ public: /* sample arguments */ cl_uint narg = 0; - ciErr = 0; - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data); - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input); - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output)); #define KERNEL_TEX(type, ttype, name) \ - ciErr |= set_kernel_arg_mem(ckShaderKernel, &narg, #name); + set_kernel_arg_mem(ckShaderKernel, &narg, #name); #include "kernel_textures.h" - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type); - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x); - ciErr |= clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w); - - opencl_assert(ciErr); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x)); + opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w)); enqueue_kernel(ckShaderKernel, task.shader_w, 1); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index cbe0d4b5d10..d18f4fa2998 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -12,10 +12,6 @@ set(INC_SYS set(SRC kernel.cpp - kernel_sse2.cpp - kernel_sse3.cpp - kernel_sse41.cpp - kernel_avx.cpp kernel.cl kernel.cu ) @@ -23,14 +19,10 @@ set(SRC set(SRC_HEADERS kernel.h kernel_accumulate.h - kernel_bvh.h - kernel_bvh_subsurface.h - kernel_bvh_traversal.h kernel_camera.h kernel_compat_cpu.h kernel_compat_cuda.h kernel_compat_opencl.h - kernel_curve.h kernel_differential.h kernel_displace.h kernel_emission.h @@ -40,18 +32,15 @@ set(SRC_HEADERS kernel_light.h kernel_math.h kernel_montecarlo.h - kernel_object.h kernel_passes.h kernel_path.h kernel_path_state.h - kernel_primitive.h kernel_projection.h kernel_random.h kernel_shader.h kernel_shadow.h kernel_subsurface.h kernel_textures.h - kernel_triangle.h kernel_types.h kernel_volume.h ) @@ -118,6 +107,21 @@ set(SRC_SVM_HEADERS svm/svm_wave.h ) +set(SRC_GEOM_HEADERS + geom/geom.h + geom/geom_attribute.h + geom/geom_bvh.h + geom/geom_bvh_subsurface.h + geom/geom_bvh_traversal.h + geom/geom_curve.h + geom/geom_motion_curve.h + geom/geom_motion_triangle.h + geom/geom_object.h + geom/geom_primitive.h + geom/geom_triangle.h + geom/geom_volume.h +) + set(SRC_UTIL_HEADERS ../util/util_color.h ../util/util_half.h @@ -142,37 +146,45 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "50") + if(CUDA_VERSION MATCHES "60") else() - message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported") + message(WARNING + "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " + "build may succeed but only CUDA 6.0 is officially supported") endif() # build for each arch - set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) + set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS}) set(cuda_cubins) foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) set(cuda_cubin kernel_${arch}.cubin) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}") - - # CUDA 5.x build flags for different archs - if(${arch} MATCHES "sm_2[0-9]") - # sm_2x - set(cuda_arch_flags "--maxrregcount=32") - elseif(${arch} MATCHES "sm_3[0-9]") - # sm_3x - set(cuda_arch_flags "--maxrregcount=32") - endif() - set(cuda_math_flags "--use_fast_math") - - if(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") + + if(CUDA_VERSION LESS 60 AND ${arch} MATCHES "sm_50") + message(WARNING "Can't build kernel for CUDA sm_50 architecture, skipping") + elseif(CUDA_VERSION LESS 50 AND ${arch} MATCHES "sm_35") message(WARNING "Can't build kernel for CUDA sm_35 architecture, skipping") else() add_custom_command( OUTPUT ${cuda_cubin} - COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC + COMMAND ${CUDA_NVCC_EXECUTABLE} + -arch=${arch} + -m${CUDA_BITS} + --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} + --ptxas-options="-v" + ${cuda_arch_flags} + ${cuda_version_flags} + ${cuda_math_flags} + -I${CMAKE_CURRENT_SOURCE_DIR}/../util + -I${CMAKE_CURRENT_SOURCE_DIR}/svm + -DCCL_NAMESPACE_BEGIN= + -DCCL_NAMESPACE_END= + -DNVCC + DEPENDS ${cuda_sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) @@ -195,12 +207,22 @@ endif() include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") -set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") -set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") -set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") +if(CXX_HAS_SSE) + list(APPEND SRC + kernel_sse2.cpp + kernel_sse3.cpp + kernel_sse41.cpp + kernel_avx.cpp + ) + + set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") +endif() + -add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS}) +add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS}) if(WITH_CYCLES_CUDA) add_dependencies(cycles_kernel cycles_kernel_cuda) @@ -221,5 +243,6 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/k delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/SConscript b/intern/cycles/kernel/SConscript index 5077d8c96b0..04e1bad7538 100644 --- a/intern/cycles/kernel/SConscript +++ b/intern/cycles/kernel/SConscript @@ -60,6 +60,7 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: kernel_file = os.path.join(source_dir, "kernel.cu") util_dir = os.path.join(source_dir, "../util") svm_dir = os.path.join(source_dir, "../svm") + geom_dir = os.path.join(source_dir, "../geom") closure_dir = os.path.join(source_dir, "../closure") # get CUDA version @@ -68,37 +69,33 @@ if env['WITH_BF_CYCLES_CUDA_BINARIES']: cuda_major_minor = re.findall(r'release (\d+).(\d+)', output)[0] cuda_version = int(cuda_major_minor[0])*10 + int(cuda_major_minor[1]) - if cuda_version != 50: - print("CUDA version %d.%d detected, build may succeed but only CUDA 5.0 is officially supported." % (cuda_version/10, cuda_version%10)) + if cuda_version != 60: + print("CUDA version %d.%d detected, build may succeed but only CUDA 6.0 is officially supported." % (cuda_version/10, cuda_version%10)) # nvcc flags nvcc_flags = "-m%s" % (bits) nvcc_flags += " --cubin --ptxas-options=\"-v\"" nvcc_flags += " -D__KERNEL_CUDA_VERSION__=%d" % (cuda_version) nvcc_flags += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC" - nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, closure_dir) + nvcc_flags += " -I \"%s\" -I \"%s\" -I \"%s\" -I \"%s\"" % (util_dir, svm_dir, geom_dir, closure_dir) # dependencies - dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('closure/*.h') + dependencies = ['kernel.cu'] + kernel.Glob('*.h') + kernel.Glob('../util/*.h') + kernel.Glob('svm/*.h') + kernel.Glob('geom/*.h') + kernel.Glob('closure/*.h') last_cubin_file = None # add command for each cuda architecture for arch in cuda_archs: - cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) + if cuda_version < 60 and arch == "sm_50": + print("Can't build kernel for CUDA sm_50 architecture, skipping") + continue - # CUDA 5.x build flags for different archs - if arch.startswith("sm_2"): - # sm_2x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" - elif arch.startswith("sm_3"): - # sm_3x - cuda_arch_flags = "--maxrregcount=32 --use_fast_math" + cubin_file = os.path.join(build_dir, "kernel_%s.cubin" % arch) if env['BF_CYCLES_CUDA_ENV']: MS_SDK = "C:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd" - command = "\"%s\" & \"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file) + command = "\"%s\" & \"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (MS_SDK, nvcc, arch, nvcc_flags, kernel_file, cubin_file) else: - command = "\"%s\" -arch=%s %s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, cuda_arch_flags, kernel_file, cubin_file) + command = "\"%s\" -arch=%s %s \"%s\" -o \"%s\"" % (nvcc, arch, nvcc_flags, kernel_file, cubin_file) kernel.Command(cubin_file, 'kernel.cu', command) kernel.Depends(cubin_file, dependencies) diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h index 163e7cc5ee2..19cdb773255 100644 --- a/intern/cycles/kernel/closure/bsdf_hair.h +++ b/intern/cycles/kernel/closure/bsdf_hair.h @@ -84,7 +84,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con float theta_i = M_PI_2_F - safe_acosf(omega_in_z); float cosphi_i = dot(omega_in_y, locy); - if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f){ + if(M_PI_2_F - fabsf(theta_i) < 0.001f || cosphi_i < 0.0f) { *pdf = 0.0f; return make_float3(*pdf, *pdf, *pdf); } @@ -99,7 +99,7 @@ ccl_device float3 bsdf_hair_reflection_eval_reflect(const ShaderClosure *sc, con float theta_h = (theta_i + theta_r) * 0.5f; float t = theta_h - offset; - float phi_pdf = cos(phi_i * 0.5f) * 0.25f / roughness2; + float phi_pdf = cosf(phi_i * 0.5f) * 0.25f / roughness2; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)* costheta_i); *pdf = phi_pdf * theta_pdf; @@ -140,7 +140,7 @@ ccl_device float3 bsdf_hair_transmission_eval_transmit(const ShaderClosure *sc, float theta_i = M_PI_2_F - safe_acosf(omega_in_z); float phi_i = safe_acosf(dot(omega_in_y, locy)); - if(M_PI_2_F - fabsf(theta_i) < 0.001f){ + if(M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; return make_float3(*pdf, *pdf, *pdf); } @@ -191,7 +191,7 @@ ccl_device int bsdf_hair_reflection_sample(const ShaderClosure *sc, float3 Ng, f float phi = 2 * safe_asinf(1 - 2 * randv) * roughness2; - float phi_pdf = cos(phi * 0.5f) * 0.25f / roughness2; + float phi_pdf = cosf(phi * 0.5f) * 0.25f / roughness2; float theta_pdf = roughness1 / (2 * (t*t + roughness1*roughness1) * (a_R - b_R)*costheta_i); @@ -251,8 +251,8 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, float phi_pdf = roughness2 / (c_TT * (p * p + roughness2 * roughness2)); *omega_in =(cosf(phi) * costheta_i) * locy - - (sinf(phi) * costheta_i) * locx + - ( sintheta_i) * Tg; + (sinf(phi) * costheta_i) * locx + + ( sintheta_i) * Tg; //differentials - TODO: find a better approximation for the transmission bounce #ifdef __RAY_DIFFERENTIALS__ @@ -261,7 +261,7 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng, #endif *pdf = fabsf(phi_pdf * theta_pdf); - if(M_PI_2_F - fabsf(theta_i) < 0.001f){ + if(M_PI_2_F - fabsf(theta_i) < 0.001f) { *pdf = 0.0f; } diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h index dfa8886c113..1ec35e444fe 100644 --- a/intern/cycles/kernel/closure/bsdf_microfacet.h +++ b/intern/cycles/kernel/closure/bsdf_microfacet.h @@ -154,8 +154,8 @@ ccl_device int bsdf_microfacet_ggx_sample(const ShaderClosure *sc, float3 Ng, fl float sinThetaM = cosThetaM * safe_sqrtf(tanThetaM2); float phiM = M_2PI_F * randv; float3 m = (cosf(phiM) * sinThetaM) * X + - (sinf(phiM) * sinThetaM) * Y + - cosThetaM * Z; + (sinf(phiM) * sinThetaM) * Y + + ( cosThetaM) * Z; if(!m_refractive) { float cosMO = dot(m, I); if(cosMO > 0) { @@ -383,8 +383,8 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N float sinThetaM = cosThetaM * tanThetaM; float phiM = M_2PI_F * randv; float3 m = (cosf(phiM) * sinThetaM) * X + - (sinf(phiM) * sinThetaM) * Y + - cosThetaM * Z; + (sinf(phiM) * sinThetaM) * Y + + ( cosThetaM) * Z; if(!m_refractive) { float cosMO = dot(m, I); diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 219c5aea159..2b4e1c68640 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -109,8 +109,8 @@ ccl_device int bsdf_phong_ramp_sample(const ShaderClosure *sc, const float3 colo float sinTheta2 = 1 - cosTheta * cosTheta; float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0; *omega_in = (cosf(phi) * sinTheta) * T + - (sinf(phi) * sinTheta) * B + - ( cosTheta) * R; + (sinf(phi) * sinTheta) * B + + ( cosTheta) * R; if (dot(Ng, *omega_in) > 0.0f) { // common terms for pdf and eval diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index f6dceb3ca82..b3dcb9dcc38 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -35,14 +35,15 @@ CCL_NAMESPACE_BEGIN -ccl_device float fresnel_dielectric(float eta, const float3 N, - const float3 I, float3 *R, float3 *T, +ccl_device float fresnel_dielectric( + float eta, const float3 N, + const float3 I, float3 *R, float3 *T, #ifdef __RAY_DIFFERENTIALS__ - const float3 dIdx, const float3 dIdy, - float3 *dRdx, float3 *dRdy, - float3 *dTdx, float3 *dTdy, + const float3 dIdx, const float3 dIdy, + float3 *dRdx, float3 *dRdy, + float3 *dTdx, float3 *dTdy, #endif - bool *is_inside) + bool *is_inside) { float cos = dot(N, I), neta; float3 Nn; diff --git a/intern/cycles/kernel/closure/bsdf_westin.h b/intern/cycles/kernel/closure/bsdf_westin.h index ca4c05e91fe..9dc1c00bb3d 100644 --- a/intern/cycles/kernel/closure/bsdf_westin.h +++ b/intern/cycles/kernel/closure/bsdf_westin.h @@ -96,10 +96,9 @@ ccl_device int bsdf_westin_backscatter_sample(const ShaderClosure *sc, float3 Ng float sinTheta2 = 1 - cosTheta * cosTheta; float sinTheta = sinTheta2 > 0 ? sqrtf(sinTheta2) : 0; *omega_in = (cosf(phi) * sinTheta) * T + - (sinf(phi) * sinTheta) * B + - (cosTheta) * I; - if(dot(Ng, *omega_in) > 0) - { + (sinf(phi) * sinTheta) * B + + (cosTheta) * I; + if(dot(Ng, *omega_in) > 0) { // common terms for pdf and eval float cosNI = dot(N, *omega_in); // make sure the direction we chose is still in the right hemisphere diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h new file mode 100644 index 00000000000..9495a2541f9 --- /dev/null +++ b/intern/cycles/kernel/geom/geom.h @@ -0,0 +1,44 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* bottom-most stack entry, indicating the end of traversal */ +#define ENTRYPOINT_SENTINEL 0x76543210 + +/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ +#define BVH_STACK_SIZE 192 +#define BVH_NODE_SIZE 4 +#define TRI_NODE_SIZE 3 + +/* silly workaround for float extended precision that happens when compiling + * without sse support on x86, it results in different results for float ops + * that you would otherwise expect to compare correctly */ +#if !defined(__i386__) || defined(__SSE__) +#define NO_EXTENDED_PRECISION +#else +#define NO_EXTENDED_PRECISION volatile +#endif + +#include "geom_attribute.h" +#include "geom_object.h" +#include "geom_triangle.h" +#include "geom_motion_triangle.h" +#include "geom_motion_curve.h" +#include "geom_curve.h" +#include "geom_volume.h" +#include "geom_primitive.h" +#include "geom_bvh.h" + diff --git a/intern/cycles/kernel/geom/geom_attribute.h b/intern/cycles/kernel/geom/geom_attribute.h new file mode 100644 index 00000000000..63ce31c492f --- /dev/null +++ b/intern/cycles/kernel/geom/geom_attribute.h @@ -0,0 +1,71 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +CCL_NAMESPACE_BEGIN + +/* Attributes + * + * We support an arbitrary number of attributes on various mesh elements. + * On vertices, triangles, curve keys, curves, meshes and volume grids. + * Most of the code for attribute reading is in the primitive files. + * + * Lookup of attributes is different between OSL and SVM, as OSL is ustring + * based while for SVM we use integer ids. */ + +/* Find attribute based on ID */ + +ccl_device_inline int find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, AttributeElement *elem) +{ + if(sd->object == PRIM_NONE) + return (int)ATTR_STD_NOT_FOUND; + + /* for SVM, find attribute by unique id */ + uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; +#ifdef __HAIR__ + attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; +#endif + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while(attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + *elem = (AttributeElement)attr_map.y; + + if(sd->prim == PRIM_NONE && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) + return ATTR_STD_NOT_FOUND; + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; +} + +/* Transform matrix attribute on meshes */ + +ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset) +{ + Transform tfm; + + tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0); + tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1); + tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2); + tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3); + + return tfm; +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h new file mode 100644 index 00000000000..dd7c25d581d --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh.h @@ -0,0 +1,318 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* BVH + * + * Bounding volume hierarchy for ray tracing. We compile different variations + * of the same BVH traversal function for faster rendering when some types of + * primitives are not needed, using #includes to work around the lack of + * C++ templates in OpenCL. + * + * Originally based on "Understanding the Efficiency of Ray Traversal on GPUs", + * the code has been extended and modified to support more primitives and work + * with CPU/CUDA/OpenCL. */ + +CCL_NAMESPACE_BEGIN + +/* BVH intersection function variations */ + +#define BVH_INSTANCING 1 +#define BVH_MOTION 2 +#define BVH_HAIR 4 +#define BVH_HAIR_MINIMUM_WIDTH 8 + +#define BVH_FUNCTION_NAME bvh_intersect +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_traversal.h" + +#if defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_traversal.h" +#endif + +#if defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH +#include "geom_bvh_traversal.h" +#endif + +#if defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_traversal.h" +#endif + +#if defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION +#include "geom_bvh_traversal.h" +#endif + +#if defined(__SUBSURFACE__) +#define BVH_FUNCTION_NAME bvh_intersect_subsurface +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_subsurface.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_subsurface.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +#include "geom_bvh_subsurface.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_subsurface.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION +#include "geom_bvh_subsurface.h" +#endif + +#if defined(__SHADOW_RECORD_ALL__) +#define BVH_FUNCTION_NAME bvh_intersect_shadow_all +#define BVH_FUNCTION_FEATURES 0 +#include "geom_bvh_shadow.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__INSTANCING__) +#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_instancing +#define BVH_FUNCTION_FEATURES BVH_INSTANCING +#include "geom_bvh_shadow.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__HAIR__) +#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR +#include "geom_bvh_shadow.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION +#include "geom_bvh_shadow.h" +#endif + +#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) +#define BVH_FUNCTION_NAME bvh_intersect_shadow_all_hair_motion +#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION +#include "geom_bvh_shadow.h" +#endif + +/* to work around titan bug when using arrays instead of textures */ +#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) +ccl_device_inline +#else +ccl_device_noinline +#endif +#ifdef __HAIR__ +bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) +#else +bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) +#endif +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax); +#endif /* __HAIR__ */ + + return bvh_intersect_motion(kg, ray, isect, visibility); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax); +#endif /* __HAIR__ */ + +#ifdef __KERNEL_CPU__ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_instancing(kg, ray, isect, visibility); +#endif /* __INSTANCING__ */ + + return bvh_intersect(kg, ray, isect, visibility); +#else /* __KERNEL_CPU__ */ + +#ifdef __INSTANCING__ + return bvh_intersect_instancing(kg, ray, isect, visibility); +#else + return bvh_intersect(kg, ray, isect, visibility); +#endif /* __INSTANCING__ */ + +#endif /* __KERNEL_CPU__ */ +} + +/* to work around titan bug when using arrays instead of textures */ +#ifdef __SUBSURFACE__ +#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) +ccl_device_inline +#else +ccl_device_noinline +#endif +uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#endif /* __HAIR__ */ + + return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#endif /* __HAIR__ */ + +#ifdef __KERNEL_CPU__ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#endif /* __INSTANCING__ */ + + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#else /* __KERNEL_CPU__ */ + +#ifdef __INSTANCING__ + return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#else + return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); +#endif /* __INSTANCING__ */ + +#endif /* __KERNEL_CPU__ */ +} +#endif + +/* to work around titan bug when using arrays instead of textures */ +#ifdef __SHADOW_RECORD_ALL__ +#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) +ccl_device_inline +#else +ccl_device_noinline +#endif +uint scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits) +{ +#ifdef __OBJECT_MOTION__ + if(kernel_data.bvh.have_motion) { +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_shadow_all_hair_motion(kg, ray, isect, max_hits, num_hits); +#endif /* __HAIR__ */ + + return bvh_intersect_shadow_all_motion(kg, ray, isect, max_hits, num_hits); + } +#endif /* __OBJECT_MOTION__ */ + +#ifdef __HAIR__ + if(kernel_data.bvh.have_curves) + return bvh_intersect_shadow_all_hair(kg, ray, isect, max_hits, num_hits); +#endif /* __HAIR__ */ + +#ifdef __KERNEL_CPU__ + +#ifdef __INSTANCING__ + if(kernel_data.bvh.have_instancing) + return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); +#endif /* __INSTANCING__ */ + + return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); +#else /* __KERNEL_CPU__ */ + +#ifdef __INSTANCING__ + return bvh_intersect_shadow_all_instancing(kg, ray, isect, max_hits, num_hits); +#else + return bvh_intersect_shadow_all(kg, ray, isect, max_hits, num_hits); +#endif /* __INSTANCING__ */ + +#endif /* __KERNEL_CPU__ */ +} +#endif + + +/* Ray offset to avoid self intersection. + * + * This function should be used to compute a modified ray start position for + * rays leaving from a surface. */ + +ccl_device_inline float3 ray_offset(float3 P, float3 Ng) +{ +#ifdef __INTERSECTION_REFINE__ + const float epsilon_f = 1e-5f; + /* ideally this should match epsilon_f, but instancing and motion blur + * precision makes it problematic */ + const float epsilon_test = 1.0f; + const int epsilon_i = 32; + + float3 res; + + /* x component */ + if(fabsf(P.x) < epsilon_test) { + res.x = P.x + Ng.x*epsilon_f; + } + else { + uint ix = __float_as_uint(P.x); + ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; + res.x = __uint_as_float(ix); + } + + /* y component */ + if(fabsf(P.y) < epsilon_test) { + res.y = P.y + Ng.y*epsilon_f; + } + else { + uint iy = __float_as_uint(P.y); + iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; + res.y = __uint_as_float(iy); + } + + /* z component */ + if(fabsf(P.z) < epsilon_test) { + res.z = P.z + Ng.z*epsilon_f; + } + else { + uint iz = __float_as_uint(P.z); + iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; + res.z = __uint_as_float(iz); + } + + return res; +#else + const float epsilon_f = 1e-4f; + return P + epsilon_f*Ng; +#endif +} + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h new file mode 100644 index 00000000000..98bf82b3b2d --- /dev/null +++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h @@ -0,0 +1,375 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation, + * and code copyright 2009-2012 Intel Corporation + * + * Modifications Copyright 2011-2013, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This is a template BVH traversal function, where various features can be + * enabled/disabled. This way we can compile optimized versions for each case + * without new features slowing things down. + * + * BVH_INSTANCING: object instancing + * BVH_HAIR: hair curve rendering + * BVH_MOTION: motion blur rendering + * + */ + +#define FEATURE(f) (((BVH_FUNCTION_FEATURES) & (f)) != 0) + +ccl_device bool BVH_FUNCTION_NAME +(KernelGlobals *kg, const Ray *ray, Intersection *isect_array, const uint max_hits, uint *num_hits) +{ + /* todo: + * - likely and unlikely for if() statements + * - test restrict attribute for pointers + */ + + /* traversal stack in CUDA thread-local memory */ + int traversalStack[BVH_STACK_SIZE]; + traversalStack[0] = ENTRYPOINT_SENTINEL; + + /* traversal variables in registers */ + int stackPtr = 0; + int nodeAddr = kernel_data.bvh.root; + + /* ray parameters in registers */ + const float tmax = ray->t; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = tmax; + +#if FEATURE(BVH_MOTION) + Transform ob_tfm; +#endif + +#if FEATURE(BVH_INSTANCING) + int num_hits_in_instance = 0; +#endif + + *num_hits = 0; + isect_array->t = tmax; + +#if defined(__KERNEL_SSE2__) + const shuffle_swap_t shuf_identity = shuffle_swap_identity(); + const shuffle_swap_t shuf_swap = shuffle_swap_swap(); + + const __m128 pn = _mm_castsi128_ps(_mm_set_epi32(0x80000000, 0x80000000, 0, 0)); + __m128 Psplat[3], idirsplat[3]; + shuffle_swap_t shufflexyz[3]; + + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + /* traversal loop */ + do { + do { + /* traverse internal nodes */ + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + bool traverseChild0, traverseChild1; + int nodeAddrChild1; + +#if !defined(__KERNEL_SSE2__) + /* Intersect two child bounding boxes, non-SSE version */ + float t = isect_t; + + /* fetch node data */ + float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); + float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1); + float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2); + float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3); + + /* intersect ray against child nodes */ + NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x; + NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y; + NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z; + NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f); + NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t); + + NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x; + NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y; + NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z; + NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f); + NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t); + + /* decide which nodes to traverse next */ +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW); + traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW); +#else + traverseChild0 = (c0max >= c0min); + traverseChild1 = (c1max >= c1min); +#endif + +#else // __KERNEL_SSE2__ + /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ + + /* fetch node data */ + const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; + + /* intersect ray against child nodes */ + const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]); + const __m128 tminmaxy = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[1], shufflexyz[1]), Psplat[1]), idirsplat[1]); + const __m128 tminmaxz = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[2], shufflexyz[2]), Psplat[2]), idirsplat[2]); + + /* calculate { c0min, c1min, -c0max, -c1max} */ + __m128 minmax = _mm_max_ps(_mm_max_ps(tminmaxx, tminmaxy), _mm_max_ps(tminmaxz, tsplat)); + const __m128 tminmax = _mm_xor_ps(minmax, pn); + const __m128 lrhit = _mm_cmple_ps(tminmax, shuffle<2, 3, 0, 1>(tminmax)); + + /* decide which nodes to traverse next */ +#ifdef __VISIBILITY_FLAG__ + /* this visibility test gives a 5% performance hit, how to solve? */ + traverseChild0 = (_mm_movemask_ps(lrhit) & 1) && (__float_as_uint(cnodes.z) & PATH_RAY_SHADOW); + traverseChild1 = (_mm_movemask_ps(lrhit) & 2) && (__float_as_uint(cnodes.w) & PATH_RAY_SHADOW); +#else + traverseChild0 = (_mm_movemask_ps(lrhit) & 1); + traverseChild1 = (_mm_movemask_ps(lrhit) & 2); +#endif +#endif // __KERNEL_SSE2__ + + nodeAddr = __float_as_int(cnodes.x); + nodeAddrChild1 = __float_as_int(cnodes.y); + + if(traverseChild0 && traverseChild1) { + /* both children were intersected, push the farther one */ +#if !defined(__KERNEL_SSE2__) + bool closestChild1 = (c1min < c0min); +#else + union { __m128 m128; float v[4]; } uminmax; + uminmax.m128 = tminmax; + bool closestChild1 = uminmax.v[1] < uminmax.v[0]; +#endif + + if(closestChild1) { + int tmp = nodeAddr; + nodeAddr = nodeAddrChild1; + nodeAddrChild1 = tmp; + } + + ++stackPtr; + traversalStack[stackPtr] = nodeAddrChild1; + } + else { + /* one child was intersected */ + if(traverseChild1) { + nodeAddr = nodeAddrChild1; + } + else if(!traverseChild0) { + /* neither child was intersected */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } + } + } + + /* if node is leaf, fetch triangle list */ + if(nodeAddr < 0) { + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1)); + int primAddr = __float_as_int(leaf.x); + +#if FEATURE(BVH_INSTANCING) + if(primAddr >= 0) { +#endif + int primAddr2 = __float_as_int(leaf.y); + + /* pop */ + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + + /* primitive intersection */ + while(primAddr < primAddr2) { + bool hit; + uint type = kernel_tex_fetch(__prim_type, primAddr); + + /* todo: specialized intersect functions which don't fill in + * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW? + * might give a few % performance improvement */ + + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr); + break; + } +#if FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, PATH_RAY_SHADOW, object, primAddr); + break; + } +#endif +#if FEATURE(BVH_HAIR) + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + else + hit = bvh_curve_intersect(kg, isect_array, P, dir, PATH_RAY_SHADOW, object, primAddr, ray->time, type, NULL, 0, 0); + break; + } +#endif + default: { + hit = false; + break; + } + } + + /* shadow ray early termination */ + if(hit) { + /* detect if this surface has a shader with transparent shadows */ + + /* todo: optimize so primitive visibility flag indicates if + * the primitive has a transparent shadow shader? */ + int prim = kernel_tex_fetch(__prim_index, isect_array->prim); + int shader = 0; + +#ifdef __HAIR__ + if(kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE) +#endif + { + float4 Ns = kernel_tex_fetch(__tri_normal, prim); + shader = __float_as_int(Ns.w); + } +#ifdef __HAIR__ + else { + float4 str = kernel_tex_fetch(__curves, prim); + shader = __float_as_int(str.z); + } +#endif + int flag = kernel_tex_fetch(__shader_flag, (shader & SHADER_MASK)*2); + + /* if no transparent shadows, all light is blocked */ + if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) { + return true; + } + /* if maximum number of hits reached, block all light */ + else if(*num_hits == max_hits) { + return true; + } + + /* move on to next entry in intersections array */ + isect_array++; + (*num_hits)++; +#if FEATURE(BVH_INSTANCING) + num_hits_in_instance++; +#endif + + isect_array->t = isect_t; + } + + primAddr++; + } + } +#if FEATURE(BVH_INSTANCING) + else { + /* instance push */ + object = kernel_tex_fetch(__prim_object, -primAddr-1); + +#if FEATURE(BVH_MOTION) + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); +#else + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); +#endif + + num_hits_in_instance = 0; + +#if defined(__KERNEL_SSE2__) + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + isect_array->t = isect_t; + tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + ++stackPtr; + traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + + nodeAddr = kernel_tex_fetch(__object_node, object); + } + } +#endif + } while(nodeAddr != ENTRYPOINT_SENTINEL); + +#if FEATURE(BVH_INSTANCING) + if(stackPtr >= 0) { + kernel_assert(object != OBJECT_NONE); + + if(num_hits_in_instance) { + float t_fac; + +#if FEATURE(BVH_MOTION) + bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_tfm); +#else + bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac); +#endif + + /* scale isect->t to adjust for instancing */ + for(int i = 0; i < num_hits_in_instance; i++) + (isect_array-i-1)->t *= t_fac; + } + else { + float ignore_t = FLT_MAX; + +#if FEATURE(BVH_MOTION) + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_tfm); +#else + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t); +#endif + } + +#if defined(__KERNEL_SSE2__) + Psplat[0] = _mm_set_ps1(P.x); + Psplat[1] = _mm_set_ps1(P.y); + Psplat[2] = _mm_set_ps1(P.z); + + isect_t = tmax; + isect_array->t = isect_t; + tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); + + gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); +#endif + + object = OBJECT_NONE; + nodeAddr = traversalStack[stackPtr]; + --stackPtr; + } +#endif + } while(nodeAddr != ENTRYPOINT_SENTINEL); + + return false; +} + +#undef FEATURE +#undef BVH_FUNCTION_NAME +#undef BVH_FUNCTION_FEATURES + diff --git a/intern/cycles/kernel/kernel_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h index df82dda2435..a19f05dd371 100644 --- a/intern/cycles/kernel/kernel_bvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h @@ -48,12 +48,13 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio int nodeAddr = kernel_data.bvh.root; /* ray parameters in registers */ - const float tmax = ray->t; float3 P = ray->P; - float3 idir = bvh_inverse_direction(ray->D); - int object = ~0; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; + float isect_t = ray->t; - const uint visibility = ~0; + const uint visibility = PATH_RAY_ALL_VISIBILITY; uint num_hits = 0; #if FEATURE(BVH_MOTION) @@ -72,7 +73,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio Psplat[1] = _mm_set_ps1(P.y); Psplat[2] = _mm_set_ps1(P.z); - __m128 tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + __m128 tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif @@ -89,7 +90,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio #if !defined(__KERNEL_SSE2__) /* Intersect two child bounding boxes, non-SSE version */ - float t = tmax; + float t = isect_t; /* fetch node data */ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0); @@ -130,8 +131,8 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio /* Intersect two child bounding boxes, SSE3 version adapted from Embree */ /* fetch node data */ - __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; - float4 cnodes = ((float4*)bvh_nodes)[3]; + const __m128 *bvh_nodes = (__m128*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE; + const float4 cnodes = ((float4*)bvh_nodes)[3]; /* intersect ray against child nodes */ const __m128 tminmaxx = _mm_mul_ps(_mm_sub_ps(shuffle_swap(bvh_nodes[0], shufflexyz[0]), Psplat[0]), idirsplat[0]); @@ -203,19 +204,29 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio /* primitive intersection */ for(; primAddr < primAddr2; primAddr++) { -#if FEATURE(BVH_HAIR) - uint segment = kernel_tex_fetch(__prim_segment, primAddr); - if(segment != ~0) - continue; -#endif - /* only primitives from the same object */ - uint tri_object = (object == ~0)? kernel_tex_fetch(__prim_object, primAddr): object; + uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object; - if(tri_object == subsurface_object) { + if(tri_object != subsurface_object) + continue; - /* intersect ray against primitive */ - bvh_triangle_intersect_subsurface(kg, isect_array, P, idir, object, primAddr, tmax, &num_hits, lcg_state, max_hits); + /* intersect ray against primitive */ + uint type = kernel_tex_fetch(__prim_type, primAddr); + + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + triangle_intersect_subsurface(kg, isect_array, P, dir, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + break; + } +#if FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits); + break; + } +#endif + default: { + break; + } } } } @@ -225,11 +236,10 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio if(subsurface_object == kernel_tex_fetch(__prim_object, -primAddr-1)) { object = subsurface_object; - float t_ignore = FLT_MAX; #if FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); #else - bvh_instance_push(kg, object, ray, &P, &idir, &t_ignore, tmax); + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t); #endif #if defined(__KERNEL_SSE2__) @@ -237,7 +247,7 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio Psplat[1] = _mm_set_ps1(P.y); Psplat[2] = _mm_set_ps1(P.z); - tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif @@ -259,14 +269,13 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio #if FEATURE(BVH_INSTANCING) if(stackPtr >= 0) { - kernel_assert(object != ~0); + kernel_assert(object != OBJECT_NONE); /* instance pop */ - float t_ignore = FLT_MAX; #if FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &idir, &t_ignore, &ob_tfm, tmax); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_tfm); #else - bvh_instance_pop(kg, object, ray, &P, &idir, &t_ignore, tmax); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect_t); #endif #if defined(__KERNEL_SSE2__) @@ -274,12 +283,12 @@ ccl_device uint BVH_FUNCTION_NAME(KernelGlobals *kg, const Ray *ray, Intersectio Psplat[1] = _mm_set_ps1(P.y); Psplat[2] = _mm_set_ps1(P.z); - tsplat = _mm_set_ps(-tmax, -tmax, 0.0f, 0.0f); + tsplat = _mm_set_ps(-isect_t, -isect_t, 0.0f, 0.0f); gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - object = ~0; + object = OBJECT_NONE; nodeAddr = traversalStack[stackPtr]; --stackPtr; } diff --git a/intern/cycles/kernel/kernel_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h index bfd72b0aa16..9fd40f91471 100644 --- a/intern/cycles/kernel/kernel_bvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h @@ -41,7 +41,6 @@ ccl_device bool BVH_FUNCTION_NAME * - test if pushing distance on the stack helps (for non shadow rays) * - separate version for shadow rays * - likely and unlikely for if() statements - * - SSE for hair * - test restrict attribute for pointers */ @@ -54,18 +53,18 @@ ccl_device bool BVH_FUNCTION_NAME int nodeAddr = kernel_data.bvh.root; /* ray parameters in registers */ - const float tmax = ray->t; - ccl_align(16) float3 P = ray->P; - ccl_align(16) float3 idir = bvh_inverse_direction(ray->D); - int object = ~0; + float3 P = ray->P; + float3 dir = bvh_clamp_direction(ray->D); + float3 idir = bvh_inverse_direction(dir); + int object = OBJECT_NONE; #if FEATURE(BVH_MOTION) Transform ob_tfm; #endif - isect->t = tmax; - isect->object = ~0; - isect->prim = ~0; + isect->t = ray->t; + isect->object = OBJECT_NONE; + isect->prim = PRIM_NONE; isect->u = 0.0f; isect->v = 0.0f; @@ -88,11 +87,9 @@ ccl_device bool BVH_FUNCTION_NAME /* traversal loop */ do { - do - { + do { /* traverse internal nodes */ - while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) - { + while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { bool traverseChild0, traverseChild1; int nodeAddrChild1; @@ -250,26 +247,34 @@ ccl_device bool BVH_FUNCTION_NAME /* primitive intersection */ while(primAddr < primAddr2) { bool hit; + uint type = kernel_tex_fetch(__prim_type, primAddr); - /* intersect ray against primitive */ + switch(type & PRIMITIVE_ALL) { + case PRIMITIVE_TRIANGLE: { + hit = triangle_intersect(kg, isect, P, dir, visibility, object, primAddr); + break; + } +#if FEATURE(BVH_MOTION) + case PRIMITIVE_MOTION_TRIANGLE: { + hit = motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr); + break; + } +#endif #if FEATURE(BVH_HAIR) - uint segment = kernel_tex_fetch(__prim_segment, primAddr); - if(segment != ~0) { - - if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) -#if FEATURE(BVH_HAIR_MINIMUM_WIDTH) - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); - else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment, lcg_state, difl, extmax); -#else - hit = bvh_cardinal_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); - else - hit = bvh_curve_intersect(kg, isect, P, idir, visibility, object, primAddr, segment); + case PRIMITIVE_CURVE: + case PRIMITIVE_MOTION_CURVE: { + if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) + hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + else + hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax); + break; + } #endif + default: { + hit = false; + break; + } } - else -#endif - hit = bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr); /* shadow ray early termination */ #if defined(__KERNEL_SSE2__) @@ -293,9 +298,9 @@ ccl_device bool BVH_FUNCTION_NAME object = kernel_tex_fetch(__prim_object, -primAddr-1); #if FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); #else - bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax); + bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); #endif #if defined(__KERNEL_SSE2__) @@ -319,13 +324,13 @@ ccl_device bool BVH_FUNCTION_NAME #if FEATURE(BVH_INSTANCING) if(stackPtr >= 0) { - kernel_assert(object != ~0); + kernel_assert(object != OBJECT_NONE); /* instance pop */ #if FEATURE(BVH_MOTION) - bvh_instance_motion_pop(kg, object, ray, &P, &idir, &isect->t, &ob_tfm, tmax); + bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); #else - bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax); + bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t); #endif #if defined(__KERNEL_SSE2__) @@ -338,14 +343,14 @@ ccl_device bool BVH_FUNCTION_NAME gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz); #endif - object = ~0; + object = OBJECT_NONE; nodeAddr = traversalStack[stackPtr]; --stackPtr; } #endif } while(nodeAddr != ENTRYPOINT_SENTINEL); - return (isect->prim != ~0); + return (isect->prim != PRIM_NONE); } #undef FEATURE diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h new file mode 100644 index 00000000000..e1d225436a6 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_curve.h @@ -0,0 +1,1035 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Curve Primitive + * + * Curve primitive for rendering hair and fur. These can be render as flat ribbons + * or curves with actual thickness. The curve can also be rendered as line segments + * rather than curves for better performance */ + +#ifdef __HAIR__ + +/* Reading attributes on various curve elements */ + +ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +{ + if(elem == ATTR_ELEMENT_CURVE) { +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; +#endif + + return kernel_tex_fetch(__attributes_float, offset + sd->prim); + } + else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float f0 = kernel_tex_fetch(__attributes_float, offset + k0); + float f1 = kernel_tex_fetch(__attributes_float, offset + k1); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*(f1 - f0); + if(dy) *dy = 0.0f; +#endif + + return (1.0f - sd->u)*f0 + sd->u*f1; + } + else { +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; +#endif + + return 0.0f; + } +} + +ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +{ + if(elem == ATTR_ELEMENT_CURVE) { + /* idea: we can't derive any useful differentials here, but for tiled + * mipmap image caching it would be useful to avoid reading the highest + * detail level always. maybe a derivative based on the hair density + * could be computed somehow? */ +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); +#endif + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + } + else if(elem == ATTR_ELEMENT_CURVE_KEY || elem == ATTR_ELEMENT_CURVE_KEY_MOTION) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*(f1 - f0); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); +#endif + + return (1.0f - sd->u)*f0 + sd->u*f1; + } + else { +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); +#endif + + return make_float3(0.0f, 0.0f, 0.0f); + } +} + +/* Curve thickness */ + +ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) +{ + float r = 0.0f; + + if(sd->type & PRIMITIVE_ALL_CURVE) { + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float4 P_curve[2]; + + if(sd->type & PRIMITIVE_CURVE) { + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } + + r = (P_curve[1].w - P_curve[0].w) * sd->u + P_curve[0].w; + } + + return r*2.0f; +} + +/* Curve location for motion pass, linear interpolation between keys and + * ignoring radius because we do the same for the motion keys */ + +ccl_device float3 curve_motion_center_location(KernelGlobals *kg, ShaderData *sd) +{ + float4 curvedata = kernel_tex_fetch(__curves, sd->prim); + int k0 = __float_as_int(curvedata.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float4 P_curve[2]; + + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + + return float4_to_float3(P_curve[1]) * sd->u + float4_to_float3(P_curve[0]) * (1.0f - sd->u); +} + +/* Curve tangent normal */ + +ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) +{ + float3 tgN = make_float3(0.0f,0.0f,0.0f); + + if(sd->type & PRIMITIVE_ALL_CURVE) { + + tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); + tgN = normalize(tgN); + + /* need to find suitable scaled gd for corrected normal */ +#if 0 + tgN = normalize(tgN - gd * sd->dPdu); +#endif + } + + return tgN; +} + +/* Curve bounds utility function */ + +ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) +{ + float halfdiscroot = (p2 * p2 - 3 * p3 * p1); + float ta = -1.0f; + float tb = -1.0f; + + *extremta = -1.0f; + *extremtb = -1.0f; + *upper = p0; + *lower = (p0 + p1) + (p2 + p3); + *extrema = *upper; + *extremb = *lower; + + if(*lower >= *upper) { + *upper = *lower; + *lower = p0; + } + + if(halfdiscroot >= 0) { + float inv3p3 = (1.0f/3.0f)/p3; + halfdiscroot = sqrtf(halfdiscroot); + ta = (-p2 - halfdiscroot) * inv3p3; + tb = (-p2 + halfdiscroot) * inv3p3; + } + + float t2; + float t3; + + if(ta > 0.0f && ta < 1.0f) { + t2 = ta * ta; + t3 = t2 * ta; + *extremta = ta; + *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; + + *upper = fmaxf(*extrema, *upper); + *lower = fminf(*extrema, *lower); + } + + if(tb > 0.0f && tb < 1.0f) { + t2 = tb * tb; + t3 = t2 * tb; + *extremtb = tb; + *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; + + *upper = fmaxf(*extremb, *upper); + *lower = fminf(*extremb, *lower); + } +} + +#ifdef __KERNEL_SSE2__ +ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a) +{ + return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2]))); +} +#endif + +#ifdef __KERNEL_SSE2__ +/* Pass P and dir by reference to aligned vector */ +ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, + const float3 &P, const float3 &dir, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) +#else +ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 dir, uint visibility, int object, int curveAddr, float time,int type, uint *lcg_state, float difl, float extmax) +#endif +{ + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + float epsilon = 0.0f; + float r_st, r_en; + + int depth = kernel_data.curve.subdivisions; + int flags = kernel_data.curve.curveflags; + int prim = kernel_tex_fetch(__prim_index, curveAddr); + +#ifdef __KERNEL_SSE2__ + __m128 vdir = load_m128(dir); + __m128 vcurve_coef[4]; + const float3 *curve_coef = (float3 *)vcurve_coef; + + { + __m128 dtmp = _mm_mul_ps(vdir, vdir); + __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp))); + __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss); + + __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]); + int2 &v00 = (int2 &)v00vec; + + int k0 = v00.x + segment; + int k1 = k0 + 1; + int ka = max(k0 - 1, v00.x); + int kb = min(k1 + 1, v00.x + v00.y - 1); + + __m128 P_curve[4]; + + if(type & PRIMITIVE_CURVE) { + P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[ka].x); + P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k0].x); + P_curve[2] = _mm_load_ps(&kg->__curve_keys.data[k1].x); + P_curve[3] = _mm_load_ps(&kg->__curve_keys.data[kb].x); + } + else { + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve); + } + + __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss)); + __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn); + __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy); + __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); + __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0))); + + __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); + __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0); + __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); + + __m128 htfm[] = { htfm0, htfm1, htfm2 }; + __m128 vP = load_m128(P); + __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P_curve[0], vP)); + __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P_curve[1], vP)); + __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P_curve[2], vP)); + __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P_curve[3], vP)); + + float fc = 0.71f; + __m128 vfc = _mm_set1_ps(fc); + __m128 vfcxp3 = _mm_mul_ps(vfc, p3); + + vcurve_coef[0] = p1; + vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0)); + vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3))); + vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3)); + + r_st = ((float4 &)P_curve[1]).w; + r_en = ((float4 &)P_curve[2]).w; + } +#else + float3 curve_coef[4]; + + /* curve Intersection check */ + /* obtain curve parameters */ + { + /* ray transform created - this should be created at beginning of intersection loop */ + Transform htfm; + float d = sqrtf(dir.x * dir.x + dir.z * dir.z); + htfm = make_transform( + dir.z / d, 0, -dir.x /d, 0, + -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, + dir.x, dir.y, dir.z, 0, + 0, 0, 0, 1); + + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + segment; + int k1 = k0 + 1; + + int ka = max(k0 - 1,__float_as_int(v00.x)); + int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if(type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; + motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, P_curve); + } + + float3 p0 = transform_point(&htfm, float4_to_float3(P_curve[0]) - P); + float3 p1 = transform_point(&htfm, float4_to_float3(P_curve[1]) - P); + float3 p2 = transform_point(&htfm, float4_to_float3(P_curve[2]) - P); + float3 p3 = transform_point(&htfm, float4_to_float3(P_curve[3]) - P); + + float fc = 0.71f; + curve_coef[0] = p1; + curve_coef[1] = -fc*p0 + fc*p2; + curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; + curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; + r_st = P_curve[1].w; + r_en = P_curve[2].w; + } +#endif + + float r_curr = max(r_st, r_en); + + if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) + epsilon = 2 * r_curr; + + /* find bounds - this is slow for cubic curves */ + float upper, lower; + + float zextrem[4]; + curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); + if(lower - r_curr > isect->t || upper + r_curr < epsilon) + return false; + + /* minimum width extension */ + float mw_extension = min(difl * fabsf(upper), extmax); + float r_ext = mw_extension + r_curr; + + float xextrem[4]; + curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); + if(lower > r_ext || upper < -r_ext) + return false; + + float yextrem[4]; + curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); + if(lower > r_ext || upper < -r_ext) + return false; + + /* setup recurrent loop */ + int level = 1 << depth; + int tree = 0; + float resol = 1.0f / (float)level; + bool hit = false; + + /* begin loop */ + while(!(tree >> (depth))) { + float i_st = tree * resol; + float i_en = i_st + (level * resol); +#ifdef __KERNEL_SSE2__ + __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en); + __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); + __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); + + __m128 vbmin = _mm_min_ps(vp_st, vp_en); + __m128 vbmax = _mm_max_ps(vp_st, vp_en); + + float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; + float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; + float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; + float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; +#else + float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; + float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; + + float bminx = min(p_st.x, p_en.x); + float bmaxx = max(p_st.x, p_en.x); + float bminy = min(p_st.y, p_en.y); + float bmaxy = max(p_st.y, p_en.y); + float bminz = min(p_st.z, p_en.z); + float bmaxz = max(p_st.z, p_en.z); +#endif + + if(xextrem[0] >= i_st && xextrem[0] <= i_en) { + bminx = min(bminx,xextrem[1]); + bmaxx = max(bmaxx,xextrem[1]); + } + if(xextrem[2] >= i_st && xextrem[2] <= i_en) { + bminx = min(bminx,xextrem[3]); + bmaxx = max(bmaxx,xextrem[3]); + } + if(yextrem[0] >= i_st && yextrem[0] <= i_en) { + bminy = min(bminy,yextrem[1]); + bmaxy = max(bmaxy,yextrem[1]); + } + if(yextrem[2] >= i_st && yextrem[2] <= i_en) { + bminy = min(bminy,yextrem[3]); + bmaxy = max(bmaxy,yextrem[3]); + } + if(zextrem[0] >= i_st && zextrem[0] <= i_en) { + bminz = min(bminz,zextrem[1]); + bmaxz = max(bmaxz,zextrem[1]); + } + if(zextrem[2] >= i_st && zextrem[2] <= i_en) { + bminz = min(bminz,zextrem[3]); + bmaxz = max(bmaxz,zextrem[3]); + } + + float r1 = r_st + (r_en - r_st) * i_st; + float r2 = r_st + (r_en - r_st) * i_en; + r_curr = max(r1, r2); + + mw_extension = min(difl * fabsf(bmaxz), extmax); + float r_ext = mw_extension + r_curr; + float coverage = 1.0f; + + if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { + /* the bounding box does not overlap the square centered at O */ + tree += level; + level = tree & -tree; + } + else if (level == 1) { + + /* the maximum recursion depth is reached. + * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. + * dP* is reversed if necessary.*/ + float t = isect->t; + float u = 0.0f; + float gd = 0.0f; + + if(flags & CURVE_KN_RIBBONS) { + float3 tg = (p_en - p_st); + float w = tg.x * tg.x + tg.y * tg.y; + if (w == 0) { + tree++; + level = tree & -tree; + continue; + } + w = -(p_st.x * tg.x + p_st.y * tg.y) / w; + w = clamp((float)w, 0.0f, 1.0f); + + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + r_curr = r_st + (r_en - r_st) * u; + /* compare x-y distances */ + float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st)< 0) + dp_st *= -1; + if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { + tree++; + level = tree & -tree; + continue; + } + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { + tree++; + level = tree & -tree; + continue; + } + + /* compute coverage */ + float r_ext = r_curr; + coverage = 1.0f; + if(difl != 0.0f) { + mw_extension = min(difl * fabsf(bmaxz), extmax); + r_ext = mw_extension + r_curr; + float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); + float d0 = d - r_curr; + float d1 = d + r_curr; + float inv_mw_extension = 1.0f/mw_extension; + if (d0 >= 0) + coverage = (min(d1 * inv_mw_extension, 1.0f) - min(d0 * inv_mw_extension, 1.0f)) * 0.5f; + else // inside + coverage = (min(d1 * inv_mw_extension, 1.0f) + min(-d0 * inv_mw_extension, 1.0f)) * 0.5f; + } + + if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { + tree++; + level = tree & -tree; + continue; + } + + t = p_curr.z; + + /* stochastic fade from minimum width */ + if(difl != 0.0f && lcg_state) { + if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + else { + float l = len(p_en - p_st); + /* minimum width extension */ + float or1 = r1; + float or2 = r2; + + if(difl != 0.0f) { + mw_extension = min(len(p_st - P) * difl, extmax); + or1 = r1 < mw_extension ? mw_extension : r1; + mw_extension = min(len(p_en - P) * difl, extmax); + or2 = r2 < mw_extension ? mw_extension : r2; + } + /* --- */ + float invl = 1.0f/l; + float3 tg = (p_en - p_st) * invl; + gd = (or2 - or1) * invl; + float difz = -dot(p_st,tg); + float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); + float invcyla = 1.0f/cyla; + float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); + float tcentre = -halfb*invcyla; + float zcentre = difz + (tg.z * tcentre); + float3 tdif = - p_st; + tdif.z += tcentre; + float tdifz = dot(tdif,tg); + float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); + float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; + float td = tb*tb - 4*cyla*tc; + if (td < 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float rootd = sqrtf(td); + float correction = (-tb - rootd) * 0.5f * invcyla; + t = tcentre + correction; + + float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; + if (dot(tg, dp_st)< 0) + dp_st *= -1; + float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; + if (dot(tg, dp_en) < 0) + dp_en *= -1; + + if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { + correction = (-tb + rootd) * 0.5f * invcyla; + t = tcentre + correction; + } + + if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { + tree++; + level = tree & -tree; + continue; + } + + float w = (zcentre + (tg.z * correction)) * invl; + w = clamp((float)w, 0.0f, 1.0f); + /* compute u on the curve segment */ + u = i_st * (1 - w) + i_en * w; + + /* stochastic fade from minimum width */ + if(difl != 0.0f && lcg_state) { + r_curr = r1 + (r2 - r1) * w; + r_ext = or1 + (or2 - or1) * w; + coverage = r_curr/r_ext; + + if(coverage != 1.0f && (lcg_step_float(lcg_state) > coverage)) + return hit; + } + } + /* we found a new intersection */ + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + isect->u = u; + isect->v = gd; + /*isect->transparency = 1.0f - coverage; */ + isect->t = t; + hit = true; + } + + tree++; + level = tree & -tree; + } + else { + /* split the curve into two curves and process */ + level = level >> 1; + } + } + + return hit; +} + +ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 direction, uint visibility, int object, int curveAddr, float time, int type, uint *lcg_state, float difl, float extmax) +{ + /* define few macros to minimize code duplication for SSE */ +#ifndef __KERNEL_SSE2__ +#define len3_squared(x) len_squared(x) +#define len3(x) len(x) +#define dot3(x, y) dot(x, y) +#endif + + int segment = PRIMITIVE_UNPACK_SEGMENT(type); + /* curve Intersection check */ + int flags = kernel_data.curve.curveflags; + + int prim = kernel_tex_fetch(__prim_index, curveAddr); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int cnum = __float_as_int(v00.x); + int k0 = cnum + segment; + int k1 = k0 + 1; + +#ifndef __KERNEL_SSE2__ + float4 P_curve[2]; + + if(type & PRIMITIVE_CURVE) { + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + } + else { + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, P_curve); + } + + float or1 = P_curve[0].w; + float or2 = P_curve[1].w; + float3 p1 = float4_to_float3(P_curve[0]); + float3 p2 = float4_to_float3(P_curve[1]); + + /* minimum width extension */ + float r1 = or1; + float r2 = or2; + float3 dif = P - p1; + float3 dif_second = P - p2; + if(difl != 0.0f) { + float pixelsize = min(len3(dif) * difl, extmax); + r1 = or1 < pixelsize ? pixelsize : or1; + pixelsize = min(len3(dif_second) * difl, extmax); + r2 = or2 < pixelsize ? pixelsize : or2; + } + /* --- */ + + float3 p21_diff = p2 - p1; + float3 sphere_dif1 = (dif + dif_second) * 0.5f; + float3 dir = direction; + float sphere_b_tmp = dot3(dir, sphere_dif1); + float3 sphere_dif2 = sphere_dif1 - sphere_b_tmp * dir; +#else + __m128 P_curve[2]; + + if(type & PRIMITIVE_CURVE) { + P_curve[0] = _mm_load_ps(&kg->__curve_keys.data[k0].x); + P_curve[1] = _mm_load_ps(&kg->__curve_keys.data[k1].x); + } + else { + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object; + motion_curve_keys(kg, fobject, prim, time, k0, k1, (float4*)&P_curve); + } + + const __m128 or12 = shuffle<3, 3, 3, 3>(P_curve[0], P_curve[1]); + + __m128 r12 = or12; + const __m128 vP = load_m128(P); + const __m128 dif = _mm_sub_ps(vP, P_curve[0]); + const __m128 dif_second = _mm_sub_ps(vP, P_curve[1]); + if(difl != 0.0f) { + const __m128 len1_sq = len3_squared_splat(dif); + const __m128 len2_sq = len3_squared_splat(dif_second); + const __m128 len12 = _mm_sqrt_ps(shuffle<0, 0, 0, 0>(len1_sq, len2_sq)); + const __m128 pixelsize12 = _mm_min_ps(_mm_mul_ps(len12, _mm_set1_ps(difl)), _mm_set1_ps(extmax)); + r12 = _mm_max_ps(or12, pixelsize12); + } + float or1 = _mm_cvtss_f32(or12), or2 = _mm_cvtss_f32(broadcast<2>(or12)); + float r1 = _mm_cvtss_f32(r12), r2 = _mm_cvtss_f32(broadcast<2>(r12)); + + const __m128 p21_diff = _mm_sub_ps(P_curve[1], P_curve[0]); + const __m128 sphere_dif1 = _mm_mul_ps(_mm_add_ps(dif, dif_second), _mm_set1_ps(0.5f)); + const __m128 dir = load_m128(direction); + const __m128 sphere_b_tmp = dot3_splat(dir, sphere_dif1); + const __m128 sphere_dif2 = fnma(sphere_b_tmp, dir, sphere_dif1); +#endif + + float mr = max(r1, r2); + float l = len3(p21_diff); + float invl = 1.0f / l; + float sp_r = mr + 0.5f * l; + + float sphere_b = dot3(dir, sphere_dif2); + float sdisc = sphere_b * sphere_b - len3_squared(sphere_dif2) + sp_r * sp_r; + + if(sdisc < 0.0f) + return false; + + /* obtain parameters and test midpoint distance for suitable modes */ +#ifndef __KERNEL_SSE2__ + float3 tg = p21_diff * invl; +#else + const __m128 tg = _mm_mul_ps(p21_diff, _mm_set1_ps(invl)); +#endif + float gd = (r2 - r1) * invl; + + float dirz = dot3(dir, tg); + float difz = dot3(dif, tg); + + float a = 1.0f - (dirz*dirz*(1 + gd*gd)); + + float halfb = dot3(dir, dif) - dirz*(difz + gd*(difz*gd + r1)); + + float tcentre = -halfb/a; + float zcentre = difz + (dirz * tcentre); + + if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) + return false; + if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) + return false; + + /* test minimum separation */ +#ifndef __KERNEL_SSE2__ + float3 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross(tg, dif)); +#else + const __m128 cprod = cross(tg, dir); + float cprod2sq = len3_squared(cross_zxy(tg, dif)); +#endif + float cprodsq = len3_squared(cprod); + float distscaled = dot3(cprod, dif); + + if(cprodsq == 0) + distscaled = cprod2sq; + else + distscaled = (distscaled*distscaled)/cprodsq; + + if(distscaled > mr*mr) + return false; + + /* calculate true intersection */ +#ifndef __KERNEL_SSE2__ + float3 tdif = dif + tcentre * dir; +#else + const __m128 tdif = fma(_mm_set1_ps(tcentre), dir, dif); +#endif + float tdifz = dot3(tdif, tg); + float tdifma = tdifz*gd + r1; + float tb = 2*(dot3(dir, tdif) - dirz*(tdifz + gd*tdifma)); + float tc = dot3(tdif, tdif) - tdifz*tdifz - tdifma*tdifma; + float td = tb*tb - 4*a*tc; + + if (td < 0.0f) + return false; + + float rootd = 0.0f; + float correction = 0.0f; + if(flags & CURVE_KN_ACCURATE) { + rootd = sqrtf(td); + correction = ((-tb - rootd)/(2*a)); + } + + float t = tcentre + correction; + + if(t < isect->t) { + + if(flags & CURVE_KN_INTERSECTCORRECTION) { + rootd = sqrtf(td); + correction = ((-tb - rootd)/(2*a)); + t = tcentre + correction; + } + + float z = zcentre + (dirz * correction); + // bool backface = false; + + if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { + // backface = true; + correction = ((-tb + rootd)/(2*a)); + t = tcentre + correction; + z = zcentre + (dirz * correction); + } + + /* stochastic fade from minimum width */ + float adjradius = or1 + z * (or2 - or1) * invl; + adjradius = adjradius / (r1 + z * gd); + if(lcg_state && adjradius != 1.0f) { + if(lcg_step_float(lcg_state) > adjradius) + return false; + } + /* --- */ + + if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { + + if (flags & CURVE_KN_ENCLOSEFILTER) { + float enc_ratio = 1.01f; + if((difz > -r1 * enc_ratio) && (dot3(dif_second, tg) < r2 * enc_ratio)) { + float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); + float c2 = dot3(dif, dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; + if(a2*c2 < 0.0f) + return false; + } + } + +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = curveAddr; + isect->object = object; + isect->type = type; + isect->u = z*invl; + isect->v = gd; + /*isect->transparency = 1.0f - adjradius;*/ + isect->t = t; + + return true; + } + } + } + + return false; + +#ifndef __KERNEL_SSE2__ +#undef len3_squared +#undef len3 +#undef dot3 +#endif +} + +ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) +{ + float fc = 0.71f; + float data[4]; + float t2 = t * t; + data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; + data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; + data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; + data[3] = 3.0f * fc * t2 - 2.0f * fc * t; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; +} + +ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) +{ + float data[4]; + float fc = 0.71f; + float t2 = t * t; + float t3 = t2 * t; + data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; + data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; + data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; + data[3] = fc * t3 - fc * t2; + return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; +} + +ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + int flag = kernel_data.curve.curveflags; + float t = isect->t; + float3 P = ray->P; + float3 D = ray->D; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + int prim = kernel_tex_fetch(__prim_index, isect->prim); + float4 v00 = kernel_tex_fetch(__curves, prim); + + int k0 = __float_as_int(v00.x) + PRIMITIVE_UNPACK_SEGMENT(sd->type); + int k1 = k0 + 1; + + float3 tg; + + if(flag & CURVE_KN_INTERPOLATE) { + int ka = max(k0 - 1,__float_as_int(v00.x)); + int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); + + float4 P_curve[4]; + + if(sd->type & PRIMITIVE_CURVE) { + P_curve[0] = kernel_tex_fetch(__curve_keys, ka); + P_curve[1] = kernel_tex_fetch(__curve_keys, k0); + P_curve[2] = kernel_tex_fetch(__curve_keys, k1); + P_curve[3] = kernel_tex_fetch(__curve_keys, kb); + } + else { + motion_cardinal_curve_keys(kg, sd->object, sd->prim, sd->time, ka, k0, k1, kb, P_curve); + } + + float3 p[4]; + p[0] = float4_to_float3(P_curve[0]); + p[1] = float4_to_float3(P_curve[1]); + p[2] = float4_to_float3(P_curve[2]); + p[3] = float4_to_float3(P_curve[3]); + + P = P + D*t; + +#ifdef __UV__ + sd->u = isect->u; + sd->v = 0.0f; +#endif + + if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) { + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + sd->Ng = normalize(-(D - tg * (dot(tg, D)))); + } + else { + /* direction from inside to surface of curve */ + float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); + sd->Ng = normalize(P - p_curr); + + /* adjustment for changing radius */ + float gd = isect->v; + + if(gd != 0.0f) { + tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); + } + } + + /* todo: sometimes the normal is still so that this is detected as + * backfacing even if cull backfaces is enabled */ + + sd->N = sd->Ng; + } + else { + float4 P_curve[2]; + + if(sd->type & PRIMITIVE_CURVE) { + P_curve[0]= kernel_tex_fetch(__curve_keys, k0); + P_curve[1]= kernel_tex_fetch(__curve_keys, k1); + } + else { + motion_curve_keys(kg, sd->object, sd->prim, sd->time, k0, k1, P_curve); + } + + float l = 1.0f; + tg = normalize_len(float4_to_float3(P_curve[1] - P_curve[0]), &l); + + P = P + D*t; + + float3 dif = P - float4_to_float3(P_curve[0]); + +#ifdef __UV__ + sd->u = dot(dif,tg)/l; + sd->v = 0.0f; +#endif + + if (flag & CURVE_KN_TRUETANGENTGNORMAL) { + sd->Ng = -(D - tg * dot(tg, D)); + sd->Ng = normalize(sd->Ng); + } + else { + float gd = isect->v; + + /* direction from inside to surface of curve */ + sd->Ng = (dif - tg * sd->u * l) / (P_curve[0].w + sd->u * l * gd); + + /* adjustment for changing radius */ + if (gd != 0.0f) { + sd->Ng = sd->Ng - gd * tg; + sd->Ng = normalize(sd->Ng); + } + } + + sd->N = sd->Ng; + } + +#ifdef __DPDU__ + /* dPdu/dPdv */ + sd->dPdu = tg; + sd->dPdv = cross(tg, sd->Ng); +#endif + + /*add fading parameter for minimum pixel width with transparency bsdf*/ + /*sd->curve_transparency = isect->transparency;*/ + /*sd->curve_radius = sd->u * gd * l + r1;*/ + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +} + +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h new file mode 100644 index 00000000000..1022a957b05 --- /dev/null +++ b/intern/cycles/kernel/geom/geom_motion_curve.h @@ -0,0 +1,148 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CCL_NAMESPACE_BEGIN + +/* Motion Curve Primitive + * + * These are stored as regular curves, plus extra positions and radii at times + * other than the frame center. Computing the curve keys at a given ray time is + * a matter of interpolation of the two steps between which the ray time lies. + * + * The extra curve keys are stored as ATTR_STD_MOTION_VERTEX_POSITION. + */ + +#ifdef __HAIR__ + +ccl_device_inline int find_attribute_curve_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +{ + /* todo: find a better (faster) solution for this, maybe store offset per object */ + uint attr_offset = object*kernel_data.bvh.attributes_map_stride + ATTR_PRIM_CURVE; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while(attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + *elem = (AttributeElement)attr_map.y; + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; +} + +ccl_device_inline void motion_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, float4 keys[2]) +{ + if(step == numsteps) { + /* center step: regular vertex location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + } + else { + /* center step not stored in this array */ + if(step > numsteps) + step--; + + offset += step*numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + } +} + +/* return 2 curve key locations */ +ccl_device_inline void motion_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, float4 keys[2]) +{ + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps*2; + int step = min((int)(time*maxstep), maxstep-1); + float t = time*maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch key coordinates */ + float4 next_keys[2]; + + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, keys); + motion_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, next_keys); + + /* interpolate between steps */ + keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; + keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; +} + +ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, int offset, int numkeys, int numsteps, int step, int k0, int k1, int k2, int k3, float4 keys[4]) +{ + if(step == numsteps) { + /* center step: regular vertex location */ + keys[0] = kernel_tex_fetch(__curve_keys, k0); + keys[1] = kernel_tex_fetch(__curve_keys, k1); + keys[2] = kernel_tex_fetch(__curve_keys, k2); + keys[3] = kernel_tex_fetch(__curve_keys, k3); + } + else { + /* center step not store in this array */ + if(step > numsteps) + step--; + + offset += step*numkeys; + + keys[0] = kernel_tex_fetch(__attributes_float3, offset + k0); + keys[1] = kernel_tex_fetch(__attributes_float3, offset + k1); + keys[2] = kernel_tex_fetch(__attributes_float3, offset + k2); + keys[3] = kernel_tex_fetch(__attributes_float3, offset + k3); + } +} + +/* return 2 curve key locations */ +ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4]) +{ + /* get motion info */ + int numsteps, numkeys; + object_motion_info(kg, object, &numsteps, NULL, &numkeys); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps*2; + int step = min((int)(time*maxstep), maxstep-1); + float t = time*maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_curve_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch key coordinates */ + float4 next_keys[4]; + + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step, k0, k1, k2, k3, keys); + motion_cardinal_curve_keys_for_step(kg, offset, numkeys, numsteps, step+1, k0, k1, k2, k3, next_keys); + + /* interpolate between steps */ + keys[0] = (1.0f - t)*keys[0] + t*next_keys[0]; + keys[1] = (1.0f - t)*keys[1] + t*next_keys[1]; + keys[2] = (1.0f - t)*keys[2] + t*next_keys[2]; + keys[3] = (1.0f - t)*keys[3] + t*next_keys[3]; +} + +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h new file mode 100644 index 00000000000..73338bb6b3b --- /dev/null +++ b/intern/cycles/kernel/geom/geom_motion_triangle.h @@ -0,0 +1,392 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Motion Triangle Primitive + * + * These are stored as regular triangles, plus extra positions and normals at + * times other than the frame center. Computing the triangle vertex positions + * or normals at a given ray time is a matter of interpolation of the two steps + * between which the ray time lies. + * + * The extra positions and normals are stored as ATTR_STD_MOTION_VERTEX_POSITION + * and ATTR_STD_MOTION_VERTEX_NORMAL mesh attributes. + */ + +CCL_NAMESPACE_BEGIN + +/* Time interpolation of vertex positions and normals */ + +ccl_device_inline int find_attribute_motion(KernelGlobals *kg, int object, uint id, AttributeElement *elem) +{ + /* todo: find a better (faster) solution for this, maybe store offset per object */ + uint attr_offset = object*kernel_data.bvh.attributes_map_stride; + uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + + while(attr_map.x != id) { + attr_offset += ATTR_PRIM_TYPES; + attr_map = kernel_tex_fetch(__attributes_map, attr_offset); + } + + *elem = (AttributeElement)attr_map.y; + + /* return result */ + return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; +} + +ccl_device_inline void motion_triangle_verts_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 verts[3]) +{ + if(step == numsteps) { + /* center step: regular vertex location */ + verts[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); + verts[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); + verts[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + } + else { + /* center step not store in this array */ + if(step > numsteps) + step--; + + offset += step*numverts; + + verts[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); + verts[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); + verts[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + } +} + +ccl_device_inline void motion_triangle_normals_for_step(KernelGlobals *kg, float3 tri_vindex, int offset, int numverts, int numsteps, int step, float3 normals[3]) +{ + if(step == numsteps) { + /* center step: regular vertex location */ + normals[0] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); + normals[1] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); + normals[2] = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); + } + else { + /* center step not stored in this array */ + if(step > numsteps) + step--; + + offset += step*numverts; + + normals[0] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); + normals[1] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); + normals[2] = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + } +} + +ccl_device_inline void motion_triangle_vertices(KernelGlobals *kg, int object, int prim, float time, float3 verts[3]) +{ + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps*2; + int step = min((int)(time*maxstep), maxstep-1); + float t = time*maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch vertex coordinates */ + float3 next_verts[3]; + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); + + /* interpolate between steps */ + verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; + verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; + verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; +} + +/* Refine triangle intersection to more precise hit point. For rays that travel + * far the precision is often not so good, this reintersects the primitive from + * a closer distance. */ + +ccl_device_inline float3 motion_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3]) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + P = P + D*t; + + /* compute refined intersection distance */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); + + const float invdivisor = 1.0f/dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2)*invdivisor; + + /* compute refined position */ + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* Same as above, except that isect->t is assumed to be in object space for instancing */ + +#ifdef __SUBSURFACE__ +ccl_device_inline float3 motion_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, float3 verts[3]) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } + + P = P + D*t; + + /* compute refined intersection distance */ + const float3 e1 = verts[0] - verts[2]; + const float3 e2 = verts[1] - verts[2]; + const float3 s1 = cross(D, e2); + + const float invdivisor = 1.0f/dot(s1, e1); + const float3 d = P - verts[2]; + const float3 s2 = cross(d, e1); + float rt = dot(e2, s2)*invdivisor; + + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} +#endif + +/* Setup of motion triangle specific parts of ShaderData, moved into this one + * function to more easily share computation of interpolated positions and + * normals */ + +/* return 3 triangle vertex normals */ +ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray, bool subsurface) +{ + /* get shader */ + float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); + sd->shader = __float_as_int(Ns.w); + + /* get motion info */ + int numsteps, numverts; + object_motion_info(kg, sd->object, &numsteps, &numverts, NULL); + + /* figure out which steps we need to fetch and their interpolation factor */ + int maxstep = numsteps*2; + int step = min((int)(sd->time*maxstep), maxstep-1); + float t = sd->time*maxstep - step; + + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch vertex coordinates */ + float3 verts[3], next_verts[3]; + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step, verts); + motion_triangle_verts_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_verts); + + /* interpolate between steps */ + verts[0] = (1.0f - t)*verts[0] + t*next_verts[0]; + verts[1] = (1.0f - t)*verts[1] + t*next_verts[1]; + verts[2] = (1.0f - t)*verts[2] + t*next_verts[2]; + + /* compute refined position */ +#ifdef __SUBSURFACE__ + if(!subsurface) +#endif + sd->P = motion_triangle_refine(kg, sd, isect, ray, verts); +#ifdef __SUBSURFACE__ + else + sd->P = motion_triangle_refine_subsurface(kg, sd, isect, ray, verts); +#endif + + /* compute face normal */ + float3 Ng = normalize(cross(verts[1] - verts[0], verts[2] - verts[0])); + + sd->Ng = Ng; + sd->N = Ng; + + /* compute derivatives of P w.r.t. uv */ +#ifdef __DPDU__ + sd->dPdu = (verts[0] - verts[2]); + sd->dPdv = (verts[1] - verts[2]); +#endif + + /* compute smooth normal */ + if(sd->shader & SHADER_SMOOTH_NORMAL) { + /* find attribute */ + AttributeElement elem; + int offset = find_attribute_motion(kg, sd->object, ATTR_STD_MOTION_VERTEX_NORMAL, &elem); + kernel_assert(offset != ATTR_STD_NOT_FOUND); + + /* fetch vertex coordinates */ + float3 normals[3], next_normals[3]; + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step, normals); + motion_triangle_normals_for_step(kg, tri_vindex, offset, numverts, numsteps, step+1, next_normals); + + /* interpolate between steps */ + normals[0] = (1.0f - t)*normals[0] + t*next_normals[0]; + normals[1] = (1.0f - t)*normals[1] + t*next_normals[1]; + normals[2] = (1.0f - t)*normals[2] + t*next_normals[2]; + + /* interpolate between vertices */ + float u = sd->u; + float v = sd->v; + float w = 1.0f - u - v; + sd->N = (u*normals[0] + v*normals[1] + w*normals[2]); + } +} + +/* Ray intersection. We simply compute the vertex positions at the given ray + * time and do a ray intersection with the resulting triangle */ + +ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 dir, float time, uint visibility, int object, int triAddr) +{ + /* primitive index for vertex location lookup */ + int prim = kernel_tex_fetch(__prim_index, triAddr); + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object; + + /* get vertex locations for intersection */ + float3 verts[3]; + motion_triangle_vertices(kg, fobject, prim, time, verts); + + /* ray-triangle intersection, unoptimized */ + float t, u, v; + + if(ray_triangle_intersect_uv(P, dir, isect->t, verts[2], verts[0], verts[1], &u, &v, &t)) { + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + + return true; + } + + return false; +} + +/* Special ray intersection routines for subsurface scattering. In that case we + * only want to intersect with primitives in the same object, and if case of + * multiple hits we pick a single random primitive as the intersection point. */ + +#ifdef __SUBSURFACE__ +ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, + float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) +{ + /* primitive index for vertex location lookup */ + int prim = kernel_tex_fetch(__prim_index, triAddr); + int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object; + + /* get vertex locations for intersection */ + float3 verts[3]; + motion_triangle_vertices(kg, fobject, prim, time, verts); + + /* ray-triangle intersection, unoptimized */ + float t, u, v; + + if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)) { + (*num_hits)++; + + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_MOTION_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + } +} +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel_object.h b/intern/cycles/kernel/geom/geom_object.h index a66277e10cd..91edd5863ac 100644 --- a/intern/cycles/kernel/kernel_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -1,6 +1,4 @@ /* - * Copyright 2011-2013 Blender Foundation - * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -11,11 +9,23 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ +/* Object Primitive + * + * All mesh and curve primitives are part of an object. The same mesh and curves + * may be instanced multiple times by different objects. + * + * If the mesh is not instanced multiple times, the object will not be explicitly + * stored as a primitive in the BVH, rather the bare triangles are curved are + * directly primitives in the BVH with world space locations applied, and the object + * ID is looked up afterwards. */ + CCL_NAMESPACE_BEGIN +/* Object attributes, for now a fixed size and contents */ + enum ObjectTransform { OBJECT_TRANSFORM = 0, OBJECT_TRANSFORM_MOTION_PRE = 0, @@ -30,6 +40,8 @@ enum ObjectVectorTransform { OBJECT_VECTOR_MOTION_POST = 3 }; +/* Object to world space transformation */ + ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object, enum ObjectTransform type) { int offset = object*OBJECT_SIZE + (int)type; @@ -43,6 +55,8 @@ ccl_device_inline Transform object_fetch_transform(KernelGlobals *kg, int object return tfm; } +/* Object to world space transformation for motion vectors */ + ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int object, enum ObjectVectorTransform type) { int offset = object*OBJECT_VECTOR_SIZE + (int)type; @@ -56,6 +70,8 @@ ccl_device_inline Transform object_fetch_vector_transform(KernelGlobals *kg, int return tfm; } +/* Motion blurred object transformations */ + #ifdef __OBJECT_MOTION__ ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals *kg, int object, float time) { @@ -102,7 +118,9 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals *kg } #endif -ccl_device_inline void object_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P) +/* Transform position from object to world space */ + +ccl_device_inline void object_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ *P = transform_point(&sd->ob_tfm, *P); @@ -112,7 +130,9 @@ ccl_device_inline void object_position_transform(KernelGlobals *kg, ShaderData * #endif } -ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, ShaderData *sd, float3 *P) +/* Transform position from world to object space */ + +ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, const ShaderData *sd, float3 *P) { #ifdef __OBJECT_MOTION__ *P = transform_point(&sd->ob_itfm, *P); @@ -122,7 +142,9 @@ ccl_device_inline void object_inverse_position_transform(KernelGlobals *kg, Shad #endif } -ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, ShaderData *sd, float3 *N) +/* Transform normal from world to object space */ + +ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ *N = normalize(transform_direction_transposed(&sd->ob_tfm, *N)); @@ -132,7 +154,9 @@ ccl_device_inline void object_inverse_normal_transform(KernelGlobals *kg, Shader #endif } -ccl_device_inline void object_normal_transform(KernelGlobals *kg, ShaderData *sd, float3 *N) +/* Transform normal from object to world space */ + +ccl_device_inline void object_normal_transform(KernelGlobals *kg, const ShaderData *sd, float3 *N) { #ifdef __OBJECT_MOTION__ *N = normalize(transform_direction_transposed(&sd->ob_itfm, *N)); @@ -142,7 +166,9 @@ ccl_device_inline void object_normal_transform(KernelGlobals *kg, ShaderData *sd #endif } -ccl_device_inline void object_dir_transform(KernelGlobals *kg, ShaderData *sd, float3 *D) +/* Transform direction vector from object to world space */ + +ccl_device_inline void object_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ *D = transform_direction(&sd->ob_tfm, *D); @@ -152,7 +178,9 @@ ccl_device_inline void object_dir_transform(KernelGlobals *kg, ShaderData *sd, f #endif } -ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, ShaderData *sd, float3 *D) +/* Transform direction vector from world to object space */ + +ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, const ShaderData *sd, float3 *D) { #ifdef __OBJECT_MOTION__ *D = transform_direction(&sd->ob_itfm, *D); @@ -162,9 +190,11 @@ ccl_device_inline void object_inverse_dir_transform(KernelGlobals *kg, ShaderDat #endif } -ccl_device_inline float3 object_location(KernelGlobals *kg, ShaderData *sd) +/* Object center position */ + +ccl_device_inline float3 object_location(KernelGlobals *kg, const ShaderData *sd) { - if(sd->object == ~0) + if(sd->object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); #ifdef __OBJECT_MOTION__ @@ -175,6 +205,8 @@ ccl_device_inline float3 object_location(KernelGlobals *kg, ShaderData *sd) #endif } +/* Total surface area of object */ + ccl_device_inline float object_surface_area(KernelGlobals *kg, int object) { int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; @@ -182,9 +214,11 @@ ccl_device_inline float object_surface_area(KernelGlobals *kg, int object) return f.x; } +/* Pass ID number of object */ + ccl_device_inline float object_pass_id(KernelGlobals *kg, int object) { - if(object == ~0) + if(object == OBJECT_NONE) return 0.0f; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; @@ -192,9 +226,11 @@ ccl_device_inline float object_pass_id(KernelGlobals *kg, int object) return f.y; } +/* Per object random number for shader variation */ + ccl_device_inline float object_random_number(KernelGlobals *kg, int object) { - if(object == ~0) + if(object == OBJECT_NONE) return 0.0f; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; @@ -202,9 +238,11 @@ ccl_device_inline float object_random_number(KernelGlobals *kg, int object) return f.z; } -ccl_device_inline uint object_particle_id(KernelGlobals *kg, int object) +/* Particle ID from which this object was generated */ + +ccl_device_inline int object_particle_id(KernelGlobals *kg, int object) { - if(object == ~0) + if(object == OBJECT_NONE) return 0.0f; int offset = object*OBJECT_SIZE + OBJECT_PROPERTIES; @@ -212,9 +250,11 @@ ccl_device_inline uint object_particle_id(KernelGlobals *kg, int object) return __float_as_uint(f.w); } +/* Generated texture coordinate on surface from where object was instanced */ + ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) { - if(object == ~0) + if(object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); int offset = object*OBJECT_SIZE + OBJECT_DUPLI; @@ -222,9 +262,11 @@ ccl_device_inline float3 object_dupli_generated(KernelGlobals *kg, int object) return make_float3(f.x, f.y, f.z); } +/* UV texture coordinate on surface from where object was instanced */ + ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object) { - if(object == ~0) + if(object == OBJECT_NONE) return make_float3(0.0f, 0.0f, 0.0f); int offset = object*OBJECT_SIZE + OBJECT_DUPLI; @@ -232,12 +274,33 @@ ccl_device_inline float3 object_dupli_uv(KernelGlobals *kg, int object) return make_float3(f.x, f.y, 0.0f); } +/* Information about mesh for motion blurred triangles and curves */ + +ccl_device_inline void object_motion_info(KernelGlobals *kg, int object, int *numsteps, int *numverts, int *numkeys) +{ + int offset = object*OBJECT_SIZE + OBJECT_DUPLI; + + if(numkeys) { + float4 f = kernel_tex_fetch(__objects, offset); + *numkeys = __float_as_int(f.w); + } + + float4 f = kernel_tex_fetch(__objects, offset + 1); + if(numsteps) + *numsteps = __float_as_int(f.z); + if(numverts) + *numverts = __float_as_int(f.w); +} + +/* Pass ID for shader */ -ccl_device int shader_pass_id(KernelGlobals *kg, ShaderData *sd) +ccl_device int shader_pass_id(KernelGlobals *kg, const ShaderData *sd) { return kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2 + 1); } +/* Particle data from which object was instanced */ + ccl_device_inline float particle_index(KernelGlobals *kg, int particle) { int offset = particle*PARTICLE_SIZE; @@ -296,5 +359,107 @@ ccl_device float3 particle_angular_velocity(KernelGlobals *kg, int particle) return make_float3(f3.z, f3.w, f4.x); } +/* Object intersection in BVH */ + +ccl_device_inline float3 bvh_clamp_direction(float3 dir) +{ + /* clamp absolute values by exp2f(-80.0f) to avoid division by zero when calculating inverse direction */ + float ooeps = 8.271806E-25f; + return make_float3((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x), + (fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y), + (fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); +} + +ccl_device_inline float3 bvh_inverse_direction(float3 dir) +{ + return 1.0f / dir; +} + +/* Transform ray into object space to enter static object in BVH */ + +ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +{ + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + + *P = transform_point(&tfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + if(*t != FLT_MAX) + *t *= len; +} + +/* Transorm ray to exit static object in BVH */ + +ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) +{ + if(*t != FLT_MAX) { + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + *t *= len(transform_direction(&tfm, 1.0f/(*idir))); + } + + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); +} + +/* Same as above, but returns scale factor to apply to multiple intersection distances */ + +ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac) +{ + Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); + *t_fac = len(transform_direction(&tfm, 1.0f/(*idir))); + + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); +} + + +#ifdef __OBJECT_MOTION__ +/* Transform ray into object space to enter motion blurred object in BVH */ + +ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +{ + Transform itfm; + *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); + + *P = transform_point(&itfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + if(*t != FLT_MAX) + *t *= len; +} + +/* Transorm ray to exit motion blurred object in BVH */ + +ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) +{ + if(*t != FLT_MAX) + *t *= len(transform_direction(tfm, 1.0f/(*idir))); + + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); +} + +/* Same as above, but returns scale factor to apply to multiple intersection distances */ + +ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t_fac, Transform *tfm) +{ + *t_fac = len(transform_direction(tfm, 1.0f/(*idir))); + + *P = ray->P; + *dir = bvh_clamp_direction(ray->D); + *idir = bvh_inverse_direction(*dir); +} + +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h index fa450c97cbf..533973621d7 100644 --- a/intern/cycles/kernel/kernel_primitive.h +++ b/intern/cycles/kernel/geom/geom_primitive.h @@ -14,82 +14,60 @@ * limitations under the License */ -#ifndef __KERNEL_ATTRIBUTE_CL__ -#define __KERNEL_ATTRIBUTE_CL__ +/* Primitive Utilities + * + * Generic functions to look up mesh, curve and volume primitive attributes for + * shading and render passes. */ CCL_NAMESPACE_BEGIN -/* attribute lookup */ - -ccl_device_inline int find_attribute(KernelGlobals *kg, ShaderData *sd, uint id, AttributeElement *elem) -{ - if(sd->object == ~0) - return (int)ATTR_STD_NOT_FOUND; - -#ifdef __OSL__ - if (kg->osl) { - return OSLShader::find_attribute(kg, sd, id, elem); - } - else -#endif - { - /* for SVM, find attribute by unique id */ - uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; -#ifdef __HAIR__ - attr_offset = (sd->segment == ~0)? attr_offset: attr_offset + ATTR_PRIM_CURVE; -#endif - uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - - while(attr_map.x != id) { - attr_offset += ATTR_PRIM_TYPES; - attr_map = kernel_tex_fetch(__attributes_map, attr_offset); - } - - *elem = (AttributeElement)attr_map.y; - - if(sd->prim == ~0 && (AttributeElement)attr_map.y != ATTR_ELEMENT_MESH) - return ATTR_STD_NOT_FOUND; - - /* return result */ - return (attr_map.y == ATTR_ELEMENT_NONE) ? (int)ATTR_STD_NOT_FOUND : (int)attr_map.z; - } -} +/* Generic primitive attribute reading functions */ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) { -#ifdef __HAIR__ - if(sd->segment == ~0) -#endif + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float(kg, sd, elem, offset, dx, dy); + } #ifdef __HAIR__ - else + else if(sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float(kg, sd, elem, offset, dx, dy); + } +#endif +#ifdef __VOLUME__ + else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float(kg, sd, elem, offset, dx, dy); + } #endif + else { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + return 0.0f; + } } ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) { -#ifdef __HAIR__ - if(sd->segment == ~0) -#endif + if(sd->type & PRIMITIVE_ALL_TRIANGLE) { return triangle_attribute_float3(kg, sd, elem, offset, dx, dy); + } #ifdef __HAIR__ - else + else if(sd->type & PRIMITIVE_ALL_CURVE) { return curve_attribute_float3(kg, sd, elem, offset, dx, dy); + } +#endif +#ifdef __VOLUME__ + else if(sd->object != OBJECT_NONE && elem == ATTR_ELEMENT_VOXEL) { + return volume_attribute_float3(kg, sd, elem, offset, dx, dy); + } #endif + else { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + return make_float3(0.0f, 0.0f, 0.0f); + } } -ccl_device Transform primitive_attribute_matrix(KernelGlobals *kg, const ShaderData *sd, int offset) -{ - Transform tfm; - - tfm.x = kernel_tex_fetch(__attributes_float3, offset + 0); - tfm.y = kernel_tex_fetch(__attributes_float3, offset + 1); - tfm.z = kernel_tex_fetch(__attributes_float3, offset + 2); - tfm.w = kernel_tex_fetch(__attributes_float3, offset + 3); - - return tfm; -} +/* Default UV coordinate */ ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) { @@ -104,6 +82,8 @@ ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd) return uv; } +/* Ptex coordinates */ + ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, int *face_id) { /* storing ptex data as attributes is not memory efficient but simple for tests */ @@ -123,10 +103,12 @@ ccl_device bool primitive_ptex(KernelGlobals *kg, ShaderData *sd, float2 *uv, in return true; } +/* Surface tangent */ + ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) { #ifdef __HAIR__ - if(sd->segment != ~0) + if(sd->type & PRIMITIVE_ALL_CURVE) #ifdef __DPDU__ return normalize(sd->dPdu); #else @@ -154,21 +136,39 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd) } } -/* motion */ +/* Motion vector for motion pass */ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) { - float3 motion_pre = sd->P, motion_post = sd->P; + /* center position */ + float3 center; + + if(sd->type & PRIMITIVE_ALL_CURVE) { + center = curve_motion_center_location(kg, sd); + + if(!(sd->flag & SD_TRANSFORM_APPLIED)) + object_position_transform(kg, sd, ¢er); + } + else + center = sd->P; + + float3 motion_pre = center, motion_post = center; /* deformation motion */ - AttributeElement elem_pre, elem_post; - int offset_pre = find_attribute(kg, sd, ATTR_STD_MOTION_PRE, &elem_pre); - int offset_post = find_attribute(kg, sd, ATTR_STD_MOTION_POST, &elem_post); + AttributeElement elem; + int offset = find_attribute(kg, sd, ATTR_STD_MOTION_VERTEX_POSITION, &elem); + + if(offset != ATTR_STD_NOT_FOUND) { + /* get motion info */ + int numverts, numkeys; + object_motion_info(kg, sd->object, NULL, &numverts, &numkeys); - if(offset_pre != ATTR_STD_NOT_FOUND) - motion_pre = primitive_attribute_float3(kg, sd, elem_pre, offset_pre, NULL, NULL); - if(offset_post != ATTR_STD_NOT_FOUND) - motion_post = primitive_attribute_float3(kg, sd, elem_post, offset_post, NULL, NULL); + /* lookup attributes */ + int offset_next = (sd->type & PRIMITIVE_ALL_TRIANGLE)? offset + numverts: offset + numkeys; + + motion_pre = primitive_attribute_float3(kg, sd, elem, offset, NULL, NULL); + motion_post = primitive_attribute_float3(kg, sd, elem, offset_next, NULL, NULL); + } /* object motion. note that depending on the mesh having motion vectors, this * transformation was set match the world/object space of motion_pre/post */ @@ -180,13 +180,13 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) tfm = object_fetch_vector_transform(kg, sd->object, OBJECT_VECTOR_MOTION_POST); motion_post = transform_point(&tfm, motion_post); - float3 P; + float3 motion_center; /* camera motion, for perspective/orthographic motion.pre/post will be a * world-to-raster matrix, for panorama it's world-to-camera */ if (kernel_data.cam.type != CAMERA_PANORAMA) { tfm = kernel_data.cam.worldtoraster; - P = transform_perspective(&tfm, sd->P); + motion_center = transform_perspective(&tfm, center); tfm = kernel_data.cam.motion.pre; motion_pre = transform_perspective(&tfm, motion_pre); @@ -196,10 +196,10 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) } else { tfm = kernel_data.cam.worldtocamera; - P = normalize(transform_point(&tfm, sd->P)); - P = float2_to_float3(direction_to_panorama(kg, P)); - P.x *= kernel_data.cam.width; - P.y *= kernel_data.cam.height; + motion_center = normalize(transform_point(&tfm, center)); + motion_center = float2_to_float3(direction_to_panorama(kg, motion_center)); + motion_center.x *= kernel_data.cam.width; + motion_center.y *= kernel_data.cam.height; tfm = kernel_data.cam.motion.pre; motion_pre = normalize(transform_point(&tfm, motion_pre)); @@ -214,12 +214,11 @@ ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd) motion_post.y *= kernel_data.cam.height; } - motion_pre = motion_pre - P; - motion_post = P - motion_post; + motion_pre = motion_pre - motion_center; + motion_post = motion_center - motion_post; return make_float4(motion_pre.x, motion_pre.y, motion_post.x, motion_post.y); } CCL_NAMESPACE_END -#endif /* __KERNEL_ATTRIBUTE_CL__ */ diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h new file mode 100644 index 00000000000..355e36fef0c --- /dev/null +++ b/intern/cycles/kernel/geom/geom_triangle.h @@ -0,0 +1,379 @@ +/* + * Adapted from code Copyright 2009-2010 NVIDIA Corporation + * Modifications Copyright 2011, Blender Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Triangle Primitive + * + * Basic triangle with 3 vertices is used to represent mesh surfaces. For BVH + * ray intersection we use a precomputed triangle storage to accelarate + * intersection at the cost of more memory usage */ + +CCL_NAMESPACE_BEGIN + +/* Refine triangle intersection to more precise hit point. For rays that travel + * far the precision is often not so good, this reintersects the primitive from + * a closer distance. */ + +ccl_device_inline float3 triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D*t); + D = normalize_len(D, &t); + } + + P = P + D*t; + + float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); + float rt = Oz * invDz; + + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* same as above, except that isect->t is assumed to be in object space for instancing */ +ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) +{ + float3 P = ray->P; + float3 D = ray->D; + float t = isect->t; + +#ifdef __INTERSECTION_REFINE__ + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_itfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + D = normalize(D); + } + + P = P + D*t; + + float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); + float rt = Oz * invDz; + + P = P + D*rt; + + if(isect->object != OBJECT_NONE) { +#ifdef __OBJECT_MOTION__ + Transform tfm = sd->ob_tfm; +#else + Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); +#endif + + P = transform_point(&tfm, P); + } + + return P; +#else + return P + D*t; +#endif +} + +/* point and normal on triangle */ +ccl_device_inline void triangle_point_normal(KernelGlobals *kg, int prim, float u, float v, float3 *P, float3 *Ng, int *shader) +{ + /* load triangle vertices */ + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + + float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); + float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); + float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + + /* compute point */ + float t = 1.0f - u - v; + *P = (u*v0 + v*v1 + t*v2); + + float4 Nm = kernel_tex_fetch(__tri_normal, prim); + *Ng = make_float3(Nm.x, Nm.y, Nm.z); + *shader = __float_as_int(Nm.w); +} + +/* Triangle vertex locations */ + +ccl_device_inline void triangle_vertices(KernelGlobals *kg, int prim, float3 P[3]) +{ + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + + P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); + P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); + P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); +} + +/* Interpolate smooth vertex normal from vertices */ + +ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int prim, float u, float v) +{ + /* load triangle vertices */ + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + + float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); + float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); + float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); + + return normalize((1.0f - u - v)*n2 + u*n0 + v*n1); +} + +/* Ray differentials on triangle */ + +ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, int prim, float3 *dPdu, float3 *dPdv) +{ + /* fetch triangle vertex coordinates */ + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, prim)); + + float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); + float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); + float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); + + /* compute derivatives of P w.r.t. uv */ + *dPdu = (p0 - p2); + *dPdv = (p1 - p2); +} + +/* Reading attributes on various triangle elements */ + +ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) +{ + if(elem == ATTR_ELEMENT_FACE) { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + + return kernel_tex_fetch(__attributes_float, offset + sd->prim); + } + else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + + float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); + float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); + float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z)); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; +#endif + + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + } + else if(elem == ATTR_ELEMENT_CORNER) { + int tri = offset + sd->prim*3; + float f0 = kernel_tex_fetch(__attributes_float, tri + 0); + float f1 = kernel_tex_fetch(__attributes_float, tri + 1); + float f2 = kernel_tex_fetch(__attributes_float, tri + 2); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; +#endif + + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + } + else { + if(dx) *dx = 0.0f; + if(dy) *dy = 0.0f; + + return 0.0f; + } +} + +ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) +{ + if(elem == ATTR_ELEMENT_FACE) { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); + } + else if(elem == ATTR_ELEMENT_VERTEX || elem == ATTR_ELEMENT_VERTEX_MOTION) { + float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); + + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; +#endif + + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + } + else if(elem == ATTR_ELEMENT_CORNER) { + int tri = offset + sd->prim*3; + float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); + float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); + float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); + +#ifdef __RAY_DIFFERENTIALS__ + if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; + if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; +#endif + + return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; + } + else { + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + + return make_float3(0.0f, 0.0f, 0.0f); + } +} + +/* Ray-Triangle intersection for BVH traversal + * + * Based on Sven Woop's algorithm with precomputed triangle storage */ + +ccl_device_inline bool triangle_intersect(KernelGlobals *kg, Intersection *isect, + float3 P, float3 dir, uint visibility, int object, int triAddr) +{ + /* compute and check intersection t-value */ + float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); + float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); + float t = Oz * invDz; + + if(t > 0.0f && t < isect->t) { + /* compute and check barycentric u */ + float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; + float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; + float u = Ox + t*Dx; + + if(u >= 0.0f) { + /* compute and check barycentric v */ + float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; + float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; + float v = Oy + t*Dy; + + if(v >= 0.0f && u + v <= 1.0f) { +#ifdef __VISIBILITY_FLAG__ + /* visibility flag test. we do it here under the assumption + * that most triangles are culled by node flags */ + if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) +#endif + { + /* record intersection */ + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + return true; + } + } + } + } + + return false; +} + +/* Special ray intersection routines for subsurface scattering. In that case we + * only want to intersect with primitives in the same object, and if case of + * multiple hits we pick a single random primitive as the intersection point. */ + +#ifdef __SUBSURFACE__ +ccl_device_inline void triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, + float3 P, float3 dir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) +{ + /* compute and check intersection t-value */ + float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); + float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); + + float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; + float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); + float t = Oz * invDz; + + if(t > 0.0f && t < tmax) { + /* compute and check barycentric u */ + float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; + float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; + float u = Ox + t*Dx; + + if(u >= 0.0f) { + /* compute and check barycentric v */ + float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); + float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; + float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; + float v = Oy + t*Dy; + + if(v >= 0.0f && u + v <= 1.0f) { + (*num_hits)++; + + int hit; + + if(*num_hits <= max_hits) { + hit = *num_hits - 1; + } + else { + /* reservoir sampling: if we are at the maximum number of + * hits, randomly replace element or skip it */ + hit = lcg_step_uint(lcg_state) % *num_hits; + + if(hit >= max_hits) + return; + } + + /* record intersection */ + Intersection *isect = &isect_array[hit]; + isect->prim = triAddr; + isect->object = object; + isect->type = PRIMITIVE_TRIANGLE; + isect->u = u; + isect->v = v; + isect->t = t; + } + } + } +} +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h new file mode 100644 index 00000000000..963d6cbee9c --- /dev/null +++ b/intern/cycles/kernel/geom/geom_volume.h @@ -0,0 +1,75 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +/* Volume Primitive + * + * Volumes are just regions inside meshes with the mesh surface as boundaries. + * There isn't as much data to access as for surfaces, there is only a position + * to do lookups in 3D voxel or procedural textures. + * + * 3D voxel textures can be assigned as attributes per mesh, which means the + * same shader can be used for volume objects with different densities, etc. */ + +CCL_NAMESPACE_BEGIN + +#ifdef __VOLUME__ + +/* Return position normalized to 0..1 in mesh bounds */ + +ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P) +{ + /* todo: optimize this so it's just a single matrix multiplication when + * possible (not motion blur), or perhaps even just translation + scale */ + AttributeElement attr_elem; + int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); + + object_inverse_position_transform(kg, sd, &P); + + if(attr_offset != ATTR_STD_NOT_FOUND) { + Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); + P = transform_point(&tfm, P); + } + + return P; +} + +ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float *dx, float *dy) +{ + float3 P = volume_normalized_position(kg, sd, sd->P); + float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + + if(dx) *dx = 0.0f; + if(dx) *dy = 0.0f; + + /* todo: support float textures to lower memory usage for single floats */ + return average(float4_to_float3(r)); +} + +ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int id, float3 *dx, float3 *dy) +{ + float3 P = volume_normalized_position(kg, sd, sd->P); + float4 r = kernel_tex_image_interp_3d(id, P.x, P.y, P.z); + + if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); + if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); + + return float4_to_float3(r); +} + +#endif + +CCL_NAMESPACE_END + diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 6cd14d3c51c..173028d50c8 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -37,7 +37,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s assert(0); } -void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height) +void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation) { if(0) { } @@ -61,8 +61,8 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t if(tex) { tex->data = (float4*)mem; - tex->width = width; - tex->height = height; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; } } else if(strstr(name, "__tex_image")) { @@ -76,8 +76,8 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t if(tex) { tex->data = (uchar4*)mem; - tex->width = width; - tex->height = height; + tex->dimensions_set(width, height, depth); + tex->interpolation = interpolation; } } else diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 5e6748c66fc..636e48b5456 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -24,7 +24,83 @@ #include "kernel_path.h" #include "kernel_displace.h" -extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +/* device data taken from CUDA occupancy calculator */ + +#ifdef __CUDA_ARCH__ + +/* 2.0 and 2.1 */ +#if __CUDA_ARCH__ == 200 || __CUDA_ARCH__ == 210 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 8 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 32 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 40 + +/* 3.0 and 3.5 */ +#elif __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 63 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* 5.0 */ +#elif __CUDA_ARCH__ == 500 +#define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536 +#define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32 +#define CUDA_BLOCK_MAX_THREADS 1024 +#define CUDA_THREAD_MAX_REGISTERS 255 + +/* tunable parameters */ +#define CUDA_THREADS_BLOCK_WIDTH 16 +#define CUDA_KERNEL_MAX_REGISTERS 63 +#define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63 + +/* unknown architecture */ +#else +#error "Unknown or unuspported CUDA architecture, can't determine launch bounds" +#endif + +/* compute number of threads per block and minimum blocks per multiprocessor + * given the maximum number of registers per thread */ + +#define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \ + __launch_bounds__( \ + threads_block_width*threads_block_width, \ + CUDA_MULTIPRESSOR_MAX_REGISTERS/(threads_block_width*threads_block_width*thread_num_registers) \ + ) + +/* sanity checks */ + +#if CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS +#error "Maximum number of threads per block exceeded" +#endif + +#if CUDA_MULTIPRESSOR_MAX_REGISTERS/(CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH*CUDA_KERNEL_MAX_REGISTERS) > CUDA_MULTIPROCESSOR_MAX_BLOCKS +#error "Maximum number of blocks per multiprocessor exceeded" +#endif + +#if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +#error "Maximum number of registers per thread exceeded" +#endif + +#if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS +#error "Maximum number of registers per thread exceeded" +#endif + +/* kernels */ + +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; @@ -34,7 +110,9 @@ extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state } #ifdef __BRANCHED_PATH__ -extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_BRANCHED_MAX_REGISTERS) +kernel_cuda_branched_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; @@ -44,7 +122,9 @@ extern "C" __global__ void kernel_cuda_branched_path_trace(float *buffer, uint * } #endif -extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_byte(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; @@ -53,7 +133,9 @@ extern "C" __global__ void kernel_cuda_convert_to_byte(uchar4 *rgba, float *buff kernel_film_convert_to_byte(NULL, rgba, buffer, sample_scale, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_convert_to_half_float(uchar4 *rgba, float *buffer, float sample_scale, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; @@ -62,10 +144,14 @@ extern "C" __global__ void kernel_cuda_convert_to_half_float(uchar4 *rgba, float kernel_film_convert_to_half_float(NULL, rgba, buffer, sample_scale, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx) +extern "C" __global__ void +CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS) +kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x); } +#endif + diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 039dc791b08..c4a08646bab 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -32,7 +32,7 @@ void *kernel_osl_memory(KernelGlobals *kg); bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); -void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height); +void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height, size_t depth, InterpolationType interpolation=INTERPOLATION_LINEAR); void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride); diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h index 582a220ab3c..b4f6dcdace9 100644 --- a/intern/cycles/kernel/kernel_accumulate.h +++ b/intern/cycles/kernel/kernel_accumulate.h @@ -407,5 +407,30 @@ ccl_device_inline float3 path_radiance_clamp_and_sum(KernelGlobals *kg, PathRadi return L_sum; } +ccl_device_inline void path_radiance_accum_sample(PathRadiance *L, PathRadiance *L_sample, int num_samples) +{ + float fac = 1.0f/num_samples; + +#ifdef __PASSES__ + L->direct_diffuse += L_sample->direct_diffuse*fac; + L->direct_glossy += L_sample->direct_glossy*fac; + L->direct_transmission += L_sample->direct_transmission*fac; + L->direct_subsurface += L_sample->direct_subsurface*fac; + + L->indirect_diffuse += L_sample->indirect_diffuse*fac; + L->indirect_glossy += L_sample->indirect_glossy*fac; + L->indirect_transmission += L_sample->indirect_transmission*fac; + L->indirect_subsurface += L_sample->indirect_subsurface*fac; + + L->emission += L_sample->emission*fac; + L->background += L_sample->background*fac; + L->ao += L_sample->ao*fac; + L->shadow += L_sample->shadow*fac; + L->mist += L_sample->mist*fac; +#else + *L += *L_sample * fac; +#endif +} + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp index d2a7142c551..354214c406e 100644 --- a/intern/cycles/kernel/kernel_avx.cpp +++ b/intern/cycles/kernel/kernel_avx.cpp @@ -77,6 +77,6 @@ CCL_NAMESPACE_END /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_avx(void); -void __dummy_function_cycles_avx(void){} +void __dummy_function_cycles_avx(void) {} #endif diff --git a/intern/cycles/kernel/kernel_bvh.h b/intern/cycles/kernel/kernel_bvh.h deleted file mode 100644 index 93e546eaece..00000000000 --- a/intern/cycles/kernel/kernel_bvh.h +++ /dev/null @@ -1,1258 +0,0 @@ -/* - * Adapted from code Copyright 2009-2010 NVIDIA Corporation - * Modifications Copyright 2011, Blender Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -CCL_NAMESPACE_BEGIN - -/* - * "Persistent while-while kernel" used in: - * - * "Understanding the Efficiency of Ray Traversal on GPUs", - * Timo Aila and Samuli Laine, - * Proc. High-Performance Graphics 2009 - */ - -/* bottom-most stack entry, indicating the end of traversal */ -#define ENTRYPOINT_SENTINEL 0x76543210 - -/* 64 object BVH + 64 mesh BVH + 64 object node splitting */ -#define BVH_STACK_SIZE 192 -#define BVH_NODE_SIZE 4 -#define TRI_NODE_SIZE 3 - -/* silly workaround for float extended precision that happens when compiling - * without sse support on x86, it results in different results for float ops - * that you would otherwise expect to compare correctly */ -#if !defined(__i386__) || defined(__SSE__) -#define NO_EXTENDED_PRECISION -#else -#define NO_EXTENDED_PRECISION volatile -#endif - -ccl_device_inline float3 bvh_inverse_direction(float3 dir) -{ - /* avoid divide by zero (ooeps = exp2f(-80.0f)) */ - float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f; - float3 idir; - - idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x)); - idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y)); - idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z)); - - return idir; -} - -ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); - - *P = transform_point(&tfm, ray->P); - - float3 dir = transform_direction(&tfm, ray->D); - - float len; - dir = normalize_len(dir, &len); - - *idir = bvh_inverse_direction(dir); - - if(*t != FLT_MAX) - *t *= len; -} - -ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax) -{ - if(*t != FLT_MAX) { - Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM); - *t *= len(transform_direction(&tfm, 1.0f/(*idir))); - } - - *P = ray->P; - *idir = bvh_inverse_direction(ray->D); -} - -#ifdef __OBJECT_MOTION__ -ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) -{ - Transform itfm; - *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); - - *P = transform_point(&itfm, ray->P); - - float3 dir = transform_direction(&itfm, ray->D); - - float len; - dir = normalize_len(dir, &len); - - *idir = bvh_inverse_direction(dir); - - if(*t != FLT_MAX) - *t *= len; -} - -ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, Transform *tfm, const float tmax) -{ - if(*t != FLT_MAX) - *t *= len(transform_direction(tfm, 1.0f/(*idir))); - - *P = ray->P; - *idir = bvh_inverse_direction(ray->D); -} -#endif - -/* Sven Woop's algorithm */ -ccl_device_inline bool bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int triAddr) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < isect->t) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - return true; - } - } - } - } - - return false; -} - -#ifdef __HAIR__ -ccl_device_inline void curvebounds(float *lower, float *upper, float *extremta, float *extrema, float *extremtb, float *extremb, float p0, float p1, float p2, float p3) -{ - float halfdiscroot = (p2 * p2 - 3 * p3 * p1); - float ta = -1.0f; - float tb = -1.0f; - *extremta = -1.0f; - *extremtb = -1.0f; - *upper = p0; - *lower = p0 + p1 + p2 + p3; - *extrema = *upper; - *extremb = *lower; - if(*lower >= *upper) { - *upper = *lower; - *lower = p0; - } - - if(halfdiscroot >= 0) { - halfdiscroot = sqrt(halfdiscroot); - ta = (-p2 - halfdiscroot) / (3 * p3); - tb = (-p2 + halfdiscroot) / (3 * p3); - } - - float t2; - float t3; - if(ta > 0.0f && ta < 1.0f) { - t2 = ta * ta; - t3 = t2 * ta; - *extremta = ta; - *extrema = p3 * t3 + p2 * t2 + p1 * ta + p0; - if(*extrema > *upper) { - *upper = *extrema; - } - if(*extrema < *lower) { - *lower = *extrema; - } - } - if(tb > 0.0f && tb < 1.0f) { - t2 = tb * tb; - t3 = t2 * tb; - *extremtb = tb; - *extremb = p3 * t3 + p2 * t2 + p1 * tb + p0; - if(*extremb >= *upper) { - *upper = *extremb; - } - if(*extremb <= *lower) { - *lower = *extremb; - } - } -} - -#ifdef __KERNEL_SSE2__ -ccl_device_inline __m128 transform_point_T3(const __m128 t[3], const __m128 &a) -{ - return fma(broadcast<0>(a), t[0], fma(broadcast<1>(a), t[1], _mm_mul_ps(broadcast<2>(a), t[2]))); -} -#endif - -#ifdef __KERNEL_SSE2__ -/* Pass P and idir by reference to aligned vector */ -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, - const float3 &P, const float3 &idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -#else -ccl_device_inline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -#endif -{ - float epsilon = 0.0f; - float r_st, r_en; - - int depth = kernel_data.curve.subdivisions; - int flags = kernel_data.curve.curveflags; - int prim = kernel_tex_fetch(__prim_index, curveAddr); - -#ifdef __KERNEL_SSE2__ - __m128 vdir = _mm_div_ps(_mm_set1_ps(1.0f), (__m128 &)idir); - __m128 vcurve_coef[4]; - const float3 *curve_coef = (float3 *)vcurve_coef; - - { - __m128 dtmp = _mm_mul_ps(vdir, vdir); - __m128 d_ss = _mm_sqrt_ss(_mm_add_ss(dtmp, broadcast<2>(dtmp))); - __m128 rd_ss = _mm_div_ss(_mm_set_ss(1.0f), d_ss); - - __m128i v00vec = _mm_load_si128((__m128i *)&kg->__curves.data[prim]); - int2 &v00 = (int2 &)v00vec; - - int k0 = v00.x + segment; - int k1 = k0 + 1; - int ka = max(k0 - 1, v00.x); - int kb = min(k1 + 1, v00.x + v00.y - 1); - - __m128 P0 = _mm_load_ps(&kg->__curve_keys.data[ka].x); - __m128 P1 = _mm_load_ps(&kg->__curve_keys.data[k0].x); - __m128 P2 = _mm_load_ps(&kg->__curve_keys.data[k1].x); - __m128 P3 = _mm_load_ps(&kg->__curve_keys.data[kb].x); - - __m128 rd_sgn = set_sign_bit<0, 1, 1, 1>(broadcast<0>(rd_ss)); - __m128 mul_zxxy = _mm_mul_ps(shuffle<2, 0, 0, 1>(vdir), rd_sgn); - __m128 mul_yz = _mm_mul_ps(shuffle<1, 2, 1, 2>(vdir), mul_zxxy); - __m128 mul_shuf = shuffle<0, 1, 2, 3>(mul_zxxy, mul_yz); - __m128 vdir0 = _mm_and_ps(vdir, _mm_castsi128_ps(_mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0))); - - __m128 htfm0 = shuffle<0, 2, 0, 3>(mul_shuf, vdir0); - __m128 htfm1 = shuffle<1, 0, 1, 3>(_mm_set_ss(_mm_cvtss_f32(d_ss)), vdir0); - __m128 htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0); - - __m128 htfm[] = { htfm0, htfm1, htfm2 }; - __m128 p0 = transform_point_T3(htfm, _mm_sub_ps(P0, (__m128 &)P)); - __m128 p1 = transform_point_T3(htfm, _mm_sub_ps(P1, (__m128 &)P)); - __m128 p2 = transform_point_T3(htfm, _mm_sub_ps(P2, (__m128 &)P)); - __m128 p3 = transform_point_T3(htfm, _mm_sub_ps(P3, (__m128 &)P)); - - float fc = 0.71f; - __m128 vfc = _mm_set1_ps(fc); - __m128 vfcxp3 = _mm_mul_ps(vfc, p3); - - vcurve_coef[0] = p1; - vcurve_coef[1] = _mm_mul_ps(vfc, _mm_sub_ps(p2, p0)); - vcurve_coef[2] = fma(_mm_set1_ps(fc * 2.0f), p0, fma(_mm_set1_ps(fc - 3.0f), p1, fms(_mm_set1_ps(3.0f - 2.0f * fc), p2, vfcxp3))); - vcurve_coef[3] = fms(_mm_set1_ps(fc - 2.0f), _mm_sub_ps(p2, p1), fms(vfc, p0, vfcxp3)); - - r_st = ((float4 &)P1).w; - r_en = ((float4 &)P2).w; - } -#else - float3 curve_coef[4]; - - /* curve Intersection check */ - float3 dir = 1.0f/idir; - - /* obtain curve parameters */ - { - /* ray transform created - this should be created at beginning of intersection loop */ - Transform htfm; - float d = sqrtf(dir.x * dir.x + dir.z * dir.z); - htfm = make_transform( - dir.z / d, 0, -dir.x /d, 0, - -dir.x * dir.y /d, d, -dir.y * dir.z /d, 0, - dir.x, dir.y, dir.z, 0, - 0, 0, 0, 1); - - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P0 = kernel_tex_fetch(__curve_keys, ka); - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - float4 P3 = kernel_tex_fetch(__curve_keys, kb); - - float3 p0 = transform_point(&htfm, float4_to_float3(P0) - P); - float3 p1 = transform_point(&htfm, float4_to_float3(P1) - P); - float3 p2 = transform_point(&htfm, float4_to_float3(P2) - P); - float3 p3 = transform_point(&htfm, float4_to_float3(P3) - P); - - float fc = 0.71f; - curve_coef[0] = p1; - curve_coef[1] = -fc*p0 + fc*p2; - curve_coef[2] = 2.0f * fc * p0 + (fc - 3.0f) * p1 + (3.0f - 2.0f * fc) * p2 - fc * p3; - curve_coef[3] = -fc * p0 + (2.0f - fc) * p1 + (fc - 2.0f) * p2 + fc * p3; - r_st = P1.w; - r_en = P2.w; - } -#endif - - float r_curr = max(r_st, r_en); - - if((flags & CURVE_KN_RIBBONS) || !(flags & CURVE_KN_BACKFACING)) - epsilon = 2 * r_curr; - - /* find bounds - this is slow for cubic curves */ - float upper, lower; - - float zextrem[4]; - curvebounds(&lower, &upper, &zextrem[0], &zextrem[1], &zextrem[2], &zextrem[3], curve_coef[0].z, curve_coef[1].z, curve_coef[2].z, curve_coef[3].z); - if(lower - r_curr > isect->t || upper + r_curr < epsilon) - return false; - - /* minimum width extension */ - float mw_extension = min(difl * fabsf(upper), extmax); - float r_ext = mw_extension + r_curr; - - float xextrem[4]; - curvebounds(&lower, &upper, &xextrem[0], &xextrem[1], &xextrem[2], &xextrem[3], curve_coef[0].x, curve_coef[1].x, curve_coef[2].x, curve_coef[3].x); - if(lower > r_ext || upper < -r_ext) - return false; - - float yextrem[4]; - curvebounds(&lower, &upper, &yextrem[0], &yextrem[1], &yextrem[2], &yextrem[3], curve_coef[0].y, curve_coef[1].y, curve_coef[2].y, curve_coef[3].y); - if(lower > r_ext || upper < -r_ext) - return false; - - /* setup recurrent loop */ - int level = 1 << depth; - int tree = 0; - float resol = 1.0f / (float)level; - bool hit = false; - - /* begin loop */ - while(!(tree >> (depth))) { - float i_st = tree * resol; - float i_en = i_st + (level * resol); -#ifdef __KERNEL_SSE2__ - __m128 vi_st = _mm_set1_ps(i_st), vi_en = _mm_set1_ps(i_en); - __m128 vp_st = fma(fma(fma(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]); - __m128 vp_en = fma(fma(fma(vcurve_coef[3], vi_en, vcurve_coef[2]), vi_en, vcurve_coef[1]), vi_en, vcurve_coef[0]); - - __m128 vbmin = _mm_min_ps(vp_st, vp_en); - __m128 vbmax = _mm_max_ps(vp_st, vp_en); - - float3 &bmin = (float3 &)vbmin, &bmax = (float3 &)vbmax; - float &bminx = bmin.x, &bminy = bmin.y, &bminz = bmin.z; - float &bmaxx = bmax.x, &bmaxy = bmax.y, &bmaxz = bmax.z; - float3 &p_st = (float3 &)vp_st, &p_en = (float3 &)vp_en; -#else - float3 p_st = ((curve_coef[3] * i_st + curve_coef[2]) * i_st + curve_coef[1]) * i_st + curve_coef[0]; - float3 p_en = ((curve_coef[3] * i_en + curve_coef[2]) * i_en + curve_coef[1]) * i_en + curve_coef[0]; - - float bminx = min(p_st.x, p_en.x); - float bmaxx = max(p_st.x, p_en.x); - float bminy = min(p_st.y, p_en.y); - float bmaxy = max(p_st.y, p_en.y); - float bminz = min(p_st.z, p_en.z); - float bmaxz = max(p_st.z, p_en.z); -#endif - - if(xextrem[0] >= i_st && xextrem[0] <= i_en) { - bminx = min(bminx,xextrem[1]); - bmaxx = max(bmaxx,xextrem[1]); - } - if(xextrem[2] >= i_st && xextrem[2] <= i_en) { - bminx = min(bminx,xextrem[3]); - bmaxx = max(bmaxx,xextrem[3]); - } - if(yextrem[0] >= i_st && yextrem[0] <= i_en) { - bminy = min(bminy,yextrem[1]); - bmaxy = max(bmaxy,yextrem[1]); - } - if(yextrem[2] >= i_st && yextrem[2] <= i_en) { - bminy = min(bminy,yextrem[3]); - bmaxy = max(bmaxy,yextrem[3]); - } - if(zextrem[0] >= i_st && zextrem[0] <= i_en) { - bminz = min(bminz,zextrem[1]); - bmaxz = max(bmaxz,zextrem[1]); - } - if(zextrem[2] >= i_st && zextrem[2] <= i_en) { - bminz = min(bminz,zextrem[3]); - bmaxz = max(bmaxz,zextrem[3]); - } - - float r1 = r_st + (r_en - r_st) * i_st; - float r2 = r_st + (r_en - r_st) * i_en; - r_curr = max(r1, r2); - - mw_extension = min(difl * fabsf(bmaxz), extmax); - float r_ext = mw_extension + r_curr; - float coverage = 1.0f; - - if (bminz - r_curr > isect->t || bmaxz + r_curr < epsilon || bminx > r_ext|| bmaxx < -r_ext|| bminy > r_ext|| bmaxy < -r_ext) { - /* the bounding box does not overlap the square centered at O */ - tree += level; - level = tree & -tree; - } - else if (level == 1) { - - /* the maximum recursion depth is reached. - * check if dP0.(Q-P0)>=0 and dPn.(Pn-Q)>=0. - * dP* is reversed if necessary.*/ - float t = isect->t; - float u = 0.0f; - if(flags & CURVE_KN_RIBBONS) { - float3 tg = (p_en - p_st); - float w = tg.x * tg.x + tg.y * tg.y; - if (w == 0) { - tree++; - level = tree & -tree; - continue; - } - w = -(p_st.x * tg.x + p_st.y * tg.y) / w; - w = clamp((float)w, 0.0f, 1.0f); - - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r_st + (r_en - r_st) * u; - /* compare x-y distances */ - float3 p_curr = ((curve_coef[3] * u + curve_coef[2]) * u + curve_coef[1]) * u + curve_coef[0]; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) - dp_st *= -1; - if (dot(dp_st, -p_st) + p_curr.z * dp_st.z < 0) { - tree++; - level = tree & -tree; - continue; - } - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) - dp_en *= -1; - if (dot(dp_en, p_en) - p_curr.z * dp_en.z < 0) { - tree++; - level = tree & -tree; - continue; - } - - /* compute coverage */ - float r_ext = r_curr; - coverage = 1.0f; - if(difl != 0.0f) { - mw_extension = min(difl * fabsf(bmaxz), extmax); - r_ext = mw_extension + r_curr; - float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y); - float d0 = d - r_curr; - float d1 = d + r_curr; - if (d0 >= 0) - coverage = (min(d1 / mw_extension, 1.0f) - min(d0 / mw_extension, 1.0f)) * 0.5f; - else // inside - coverage = (min(d1 / mw_extension, 1.0f) + min(-d0 / mw_extension, 1.0f)) * 0.5f; - } - - if (p_curr.x * p_curr.x + p_curr.y * p_curr.y >= r_ext * r_ext || p_curr.z <= epsilon || isect->t < p_curr.z) { - tree++; - level = tree & -tree; - continue; - } - - t = p_curr.z; - } - else { - float l = len(p_en - p_st); - /* minimum width extension */ - float or1 = r1; - float or2 = r2; - if(difl != 0.0f) { - mw_extension = min(len(p_st - P) * difl, extmax); - or1 = r1 < mw_extension ? mw_extension : r1; - mw_extension = min(len(p_en - P) * difl, extmax); - or2 = r2 < mw_extension ? mw_extension : r2; - } - /* --- */ - float3 tg = (p_en - p_st) / l; - float gd = (or2 - or1) / l; - float difz = -dot(p_st,tg); - float cyla = 1.0f - (tg.z * tg.z * (1 + gd*gd)); - float halfb = (-p_st.z - tg.z*(difz + gd*(difz*gd + or1))); - float tcentre = -halfb/cyla; - float zcentre = difz + (tg.z * tcentre); - float3 tdif = - p_st; - tdif.z += tcentre; - float tdifz = dot(tdif,tg); - float tb = 2*(tdif.z - tg.z*(tdifz + gd*(tdifz*gd + or1))); - float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - or1*or1 - 2*or1*tdifz*gd; - float td = tb*tb - 4*cyla*tc; - if (td < 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float rootd = sqrtf(td); - float correction = ((-tb - rootd)/(2*cyla)); - t = tcentre + correction; - - float3 dp_st = (3 * curve_coef[3] * i_st + 2 * curve_coef[2]) * i_st + curve_coef[1]; - if (dot(tg, dp_st)< 0) - dp_st *= -1; - float3 dp_en = (3 * curve_coef[3] * i_en + 2 * curve_coef[2]) * i_en + curve_coef[1]; - if (dot(tg, dp_en) < 0) - dp_en *= -1; - - if(flags & CURVE_KN_BACKFACING && (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f)) { - correction = ((-tb + rootd)/(2*cyla)); - t = tcentre + correction; - } - - if (dot(dp_st, -p_st) + t * dp_st.z < 0 || dot(dp_en, p_en) - t * dp_en.z < 0 || isect->t < t || t <= 0.0f) { - tree++; - level = tree & -tree; - continue; - } - - float w = (zcentre + (tg.z * correction))/l; - w = clamp((float)w, 0.0f, 1.0f); - /* compute u on the curve segment */ - u = i_st * (1 - w) + i_en * w; - r_curr = r1 + (r2 - r1) * w; - r_ext = or1 + (or2 - or1) * w; - coverage = r_curr/r_ext; - - } - /* we found a new intersection */ - - /* stochastic fade from minimum width */ - if(lcg_state && coverage != 1.0f) { - if(lcg_step_float(lcg_state) > coverage) - return hit; - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = curveAddr; - isect->segment = segment; - isect->object = object; - isect->u = u; - isect->v = 0.0f; - /*isect->v = 1.0f - coverage; */ - isect->t = t; - hit = true; - } - - tree++; - level = tree & -tree; - } - else { - /* split the curve into two curves and process */ - level = level >> 1; - } - } - - return hit; -} - -ccl_device_inline bool bvh_curve_intersect(KernelGlobals *kg, Intersection *isect, - float3 P, float3 idir, uint visibility, int object, int curveAddr, int segment, uint *lcg_state, float difl, float extmax) -{ - /* curve Intersection check */ - int flags = kernel_data.curve.curveflags; - - int prim = kernel_tex_fetch(__prim_index, curveAddr); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int cnum = __float_as_int(v00.x); - int k0 = cnum + segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - - float or1 = P1.w; - float or2 = P2.w; - float3 p1 = float4_to_float3(P1); - float3 p2 = float4_to_float3(P2); - - /* minimum width extension */ - float r1 = or1; - float r2 = or2; - if(difl != 0.0f) { - float pixelsize = min(len(p1 - P) * difl, extmax); - r1 = or1 < pixelsize ? pixelsize : or1; - pixelsize = min(len(p2 - P) * difl, extmax); - r2 = or2 < pixelsize ? pixelsize : or2; - } - /* --- */ - - float mr = max(r1,r2); - float3 dif = P - p1; - float3 dir = 1.0f/idir; - float l = len(p2 - p1); - - float sp_r = mr + 0.5f * l; - float3 sphere_dif = P - ((p1 + p2) * 0.5f); - float sphere_b = dot(dir,sphere_dif); - sphere_dif = sphere_dif - sphere_b * dir; - sphere_b = dot(dir,sphere_dif); - float sdisc = sphere_b * sphere_b - len_squared(sphere_dif) + sp_r * sp_r; - if(sdisc < 0.0f) - return false; - - /* obtain parameters and test midpoint distance for suitable modes */ - float3 tg = (p2 - p1) / l; - float gd = (r2 - r1) / l; - float dirz = dot(dir,tg); - float difz = dot(dif,tg); - - float a = 1.0f - (dirz*dirz*(1 + gd*gd)); - float halfb = dot(dir,dif) - dirz*(difz + gd*(difz*gd + r1)); - - float tcentre = -halfb/a; - float zcentre = difz + (dirz * tcentre); - - if((tcentre > isect->t) && !(flags & CURVE_KN_ACCURATE)) - return false; - if((zcentre < 0 || zcentre > l) && !(flags & CURVE_KN_ACCURATE) && !(flags & CURVE_KN_INTERSECTCORRECTION)) - return false; - - /* test minimum separation */ - float3 cprod = cross(tg, dir); - float3 cprod2 = cross(tg, dif); - float cprodsq = len_squared(cprod); - float cprod2sq = len_squared(cprod2); - float distscaled = dot(cprod,dif); - - if(cprodsq == 0) - distscaled = cprod2sq; - else - distscaled = (distscaled*distscaled)/cprodsq; - - if(distscaled > mr*mr) - return false; - - /* calculate true intersection */ - float3 tdif = P - p1 + tcentre * dir; - float tdifz = dot(tdif,tg); - float tb = 2*(dot(dir,tdif) - dirz*(tdifz + gd*(tdifz*gd + r1))); - float tc = dot(tdif,tdif) - tdifz * tdifz * (1 + gd*gd) - r1*r1 - 2*r1*tdifz*gd; - float td = tb*tb - 4*a*tc; - - if (td < 0.0f) - return false; - - float rootd = 0.0f; - float correction = 0.0f; - if(flags & CURVE_KN_ACCURATE) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - } - - float t = tcentre + correction; - - if(t < isect->t) { - - if(flags & CURVE_KN_INTERSECTCORRECTION) { - rootd = sqrtf(td); - correction = ((-tb - rootd)/(2*a)); - t = tcentre + correction; - } - - float z = zcentre + (dirz * correction); - bool backface = false; - - if(flags & CURVE_KN_BACKFACING && (t < 0.0f || z < 0 || z > l)) { - backface = true; - correction = ((-tb + rootd)/(2*a)); - t = tcentre + correction; - z = zcentre + (dirz * correction); - } - - /* stochastic fade from minimum width */ - float adjradius = or1 + z * (or2 - or1) / l; - adjradius = adjradius / (r1 + z * gd); - if(lcg_state && adjradius != 1.0f) { - if(lcg_step_float(lcg_state) > adjradius) - return false; - } - /* --- */ - - if(t > 0.0f && t < isect->t && z >= 0 && z <= l) { - - if (flags & CURVE_KN_ENCLOSEFILTER) { - float enc_ratio = 1.01f; - if((dot(P - p1, tg) > -r1 * enc_ratio) && (dot(P - p2, tg) < r2 * enc_ratio)) { - float a2 = 1.0f - (dirz*dirz*(1 + gd*gd*enc_ratio*enc_ratio)); - float c2 = dot(dif,dif) - difz * difz * (1 + gd*gd*enc_ratio*enc_ratio) - r1*r1*enc_ratio*enc_ratio - 2*r1*difz*gd*enc_ratio; - if(a2*c2 < 0.0f) - return false; - } - } - -#ifdef __VISIBILITY_FLAG__ - /* visibility flag test. we do it here under the assumption - * that most triangles are culled by node flags */ - if(kernel_tex_fetch(__prim_visibility, curveAddr) & visibility) -#endif - { - /* record intersection */ - isect->prim = curveAddr; - isect->segment = segment; - isect->object = object; - isect->u = z/l; - isect->v = td/(4*a*a); - /*isect->v = 1.0f - adjradius;*/ - isect->t = t; - - if(backface) - isect->u = -isect->u; - - return true; - } - } - } - - return false; -} -#endif - -#ifdef __SUBSURFACE__ -/* Special ray intersection routines for subsurface scattering. In that case we - * only want to intersect with primitives in the same object, and if case of - * multiple hits we pick a single random primitive as the intersection point. */ - -ccl_device_inline void bvh_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array, - float3 P, float3 idir, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits) -{ - /* compute and check intersection t-value */ - float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0); - float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1); - float3 dir = 1.0f/idir; - - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z); - float t = Oz * invDz; - - if(t > 0.0f && t < tmax) { - /* compute and check barycentric u */ - float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z; - float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z; - float u = Ox + t*Dx; - - if(u >= 0.0f) { - /* compute and check barycentric v */ - float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2); - float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z; - float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z; - float v = Oy + t*Dy; - - if(v >= 0.0f && u + v <= 1.0f) { - (*num_hits)++; - - int hit; - - if(*num_hits <= max_hits) { - hit = *num_hits - 1; - } - else { - /* reservoir sampling: if we are at the maximum number of - * hits, randomly replace element or skip it */ - hit = lcg_step_uint(lcg_state) % *num_hits; - - if(hit >= max_hits) - return; - } - - /* record intersection */ - Intersection *isect = &isect_array[hit]; - isect->prim = triAddr; - isect->object = object; - isect->u = u; - isect->v = v; - isect->t = t; - } - } - } -} -#endif - -/* BVH intersection function variations */ - -#define BVH_INSTANCING 1 -#define BVH_MOTION 2 -#define BVH_HAIR 4 -#define BVH_HAIR_MINIMUM_WIDTH 8 - -#define BVH_FUNCTION_NAME bvh_intersect -#define BVH_FUNCTION_FEATURES 0 -#include "kernel_bvh_traversal.h" - -#if defined(__INSTANCING__) -#define BVH_FUNCTION_NAME bvh_intersect_instancing -#define BVH_FUNCTION_FEATURES BVH_INSTANCING -#include "kernel_bvh_traversal.h" -#endif - -#if defined(__HAIR__) -#define BVH_FUNCTION_NAME bvh_intersect_hair -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH -#include "kernel_bvh_traversal.h" -#endif - -#if defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -#include "kernel_bvh_traversal.h" -#endif - -#if defined(__HAIR__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_hair_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_HAIR_MINIMUM_WIDTH|BVH_MOTION -#include "kernel_bvh_traversal.h" -#endif - -#if defined(__SUBSURFACE__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface -#define BVH_FUNCTION_FEATURES 0 -#include "kernel_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__INSTANCING__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_instancing -#define BVH_FUNCTION_FEATURES BVH_INSTANCING -#include "kernel_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__HAIR__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR -#include "kernel_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION -#include "kernel_bvh_subsurface.h" -#endif - -#if defined(__SUBSURFACE__) && defined(__HAIR__) && defined(__OBJECT_MOTION__) -#define BVH_FUNCTION_NAME bvh_intersect_subsurface_hair_motion -#define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR|BVH_MOTION -#include "kernel_bvh_subsurface.h" -#endif - -/* to work around titan bug when using arrays instead of textures */ -#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) -ccl_device_inline -#else -ccl_device_noinline -#endif -#ifdef __HAIR__ -bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect, uint *lcg_state, float difl, float extmax) -#else -bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect) -#endif -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair_motion(kg, ray, isect, visibility, lcg_state, difl, extmax); -#endif /* __HAIR__ */ - - return bvh_intersect_motion(kg, ray, isect, visibility); - } -#endif /* __OBJECT_MOTION__ */ - -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_hair(kg, ray, isect, visibility, lcg_state, difl, extmax); -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ - -#ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_instancing(kg, ray, isect, visibility); -#endif /* __INSTANCING__ */ - - return bvh_intersect(kg, ray, isect, visibility); -#else /* __KERNEL_CPU__ */ - -#ifdef __INSTANCING__ - return bvh_intersect_instancing(kg, ray, isect, visibility); -#else - return bvh_intersect(kg, ray, isect, visibility); -#endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ -} - -/* to work around titan bug when using arrays instead of textures */ -#ifdef __SUBSURFACE__ -#if !defined(__KERNEL_CUDA__) || defined(__KERNEL_CUDA_TEX_STORAGE__) -ccl_device_inline -#else -ccl_device_noinline -#endif -uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits) -{ -#ifdef __OBJECT_MOTION__ - if(kernel_data.bvh.have_motion) { -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#endif /* __HAIR__ */ - - return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits); - } -#endif /* __OBJECT_MOTION__ */ - -#ifdef __HAIR__ - if(kernel_data.bvh.have_curves) - return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#endif /* __HAIR__ */ - -#ifdef __KERNEL_CPU__ - -#ifdef __INSTANCING__ - if(kernel_data.bvh.have_instancing) - return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#endif /* __INSTANCING__ */ - - return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#else /* __KERNEL_CPU__ */ - -#ifdef __INSTANCING__ - return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#else - return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits); -#endif /* __INSTANCING__ */ - -#endif /* __KERNEL_CPU__ */ -} -#endif - -/* Ray offset to avoid self intersection */ - -ccl_device_inline float3 ray_offset(float3 P, float3 Ng) -{ -#ifdef __INTERSECTION_REFINE__ - const float epsilon_f = 1e-5f; - /* ideally this should match epsilon_f, but instancing/mblur - * precision makes it problematic */ - const float epsilon_test = 1.0f; - const int epsilon_i = 32; - - float3 res; - - /* x component */ - if(fabsf(P.x) < epsilon_test) { - res.x = P.x + Ng.x*epsilon_f; - } - else { - uint ix = __float_as_uint(P.x); - ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i; - res.x = __uint_as_float(ix); - } - - /* y component */ - if(fabsf(P.y) < epsilon_test) { - res.y = P.y + Ng.y*epsilon_f; - } - else { - uint iy = __float_as_uint(P.y); - iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i; - res.y = __uint_as_float(iy); - } - - /* z component */ - if(fabsf(P.z) < epsilon_test) { - res.z = P.z + Ng.z*epsilon_f; - } - else { - uint iz = __float_as_uint(P.z); - iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i; - res.z = __uint_as_float(iz); - } - - return res; -#else - const float epsilon_f = 1e-4f; - return P + epsilon_f*Ng; -#endif -} - -/* Refine triangle intersection to more precise hit point. For rays that travel - * far the precision is often not so good, this reintersects the primitive from - * a closer distance. */ - -ccl_device_inline float3 bvh_triangle_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -/* same as above, except that isect->t is assumed to be in object space for instancing */ -ccl_device_inline float3 bvh_triangle_refine_subsurface(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - float3 P = ray->P; - float3 D = ray->D; - float t = isect->t; - -#ifdef __INTERSECTION_REFINE__ - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D); - D = normalize(D); - } - - P = P + D*t; - - float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0); - float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z; - float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z); - float rt = Oz * invDz; - - P = P + D*rt; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -#else - return P + D*t; -#endif -} - -#ifdef __HAIR__ - -ccl_device_inline float3 curvetangent(float t, float3 p0, float3 p1, float3 p2, float3 p3) -{ - float fc = 0.71f; - float data[4]; - float t2 = t * t; - data[0] = -3.0f * fc * t2 + 4.0f * fc * t - fc; - data[1] = 3.0f * (2.0f - fc) * t2 + 2.0f * (fc - 3.0f) * t; - data[2] = 3.0f * (fc - 2.0f) * t2 + 2.0f * (3.0f - 2.0f * fc) * t + fc; - data[3] = 3.0f * fc * t2 - 2.0f * fc * t; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; -} - -ccl_device_inline float3 curvepoint(float t, float3 p0, float3 p1, float3 p2, float3 p3) -{ - float data[4]; - float fc = 0.71f; - float t2 = t * t; - float t3 = t2 * t; - data[0] = -fc * t3 + 2.0f * fc * t2 - fc * t; - data[1] = (2.0f - fc) * t3 + (fc - 3.0f) * t2 + 1.0f; - data[2] = (fc - 2.0f) * t3 + (3.0f - 2.0f * fc) * t2 + fc * t; - data[3] = fc * t3 - fc * t2; - return data[0] * p0 + data[1] * p1 + data[2] * p2 + data[3] * p3; -} - -ccl_device_inline float3 bvh_curve_refine(KernelGlobals *kg, ShaderData *sd, const Intersection *isect, const Ray *ray) -{ - int flag = kernel_data.curve.curveflags; - float t = isect->t; - float3 P = ray->P; - float3 D = ray->D; - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_itfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - D = transform_direction(&tfm, D*t); - D = normalize_len(D, &t); - } - - int prim = kernel_tex_fetch(__prim_index, isect->prim); - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + isect->segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - float l = 1.0f; - float3 tg = normalize_len(float4_to_float3(P2 - P1), &l); - float r1 = P1.w; - float r2 = P2.w; - float gd = ((r2 - r1)/l); - - P = P + D*t; - - if(flag & CURVE_KN_INTERPOLATE) { - int ka = max(k0 - 1,__float_as_int(v00.x)); - int kb = min(k1 + 1,__float_as_int(v00.x) + __float_as_int(v00.y) - 1); - - float4 P0 = kernel_tex_fetch(__curve_keys, ka); - float4 P3 = kernel_tex_fetch(__curve_keys, kb); - - float3 p[4]; - p[0] = float4_to_float3(P0); - p[1] = float4_to_float3(P1); - p[2] = float4_to_float3(P2); - p[3] = float4_to_float3(P3); - -#ifdef __UV__ - sd->u = isect->u; - sd->v = 0.0f; -#endif - - tg = normalize(curvetangent(isect->u, p[0], p[1], p[2], p[3])); - - if(kernel_data.curve.curveflags & CURVE_KN_RIBBONS) - sd->Ng = normalize(-(D - tg * (dot(tg, D)))); - else { - float3 p_curr = curvepoint(isect->u, p[0], p[1], p[2], p[3]); - sd->Ng = normalize(P - p_curr); - sd->Ng = sd->Ng - gd * tg; - sd->Ng = normalize(sd->Ng); - } - sd->N = sd->Ng; - } - else { - float3 dif = P - float4_to_float3(P1); - -#ifdef __UV__ - sd->u = dot(dif,tg)/l; - sd->v = 0.0f; -#endif - - if (flag & CURVE_KN_TRUETANGENTGNORMAL) { - sd->Ng = -(D - tg * dot(tg, D)); - sd->Ng = normalize(sd->Ng); - } - else { - sd->Ng = (dif - tg * sd->u * l) / (P1.w + sd->u * l * gd); - if (gd != 0.0f) { - sd->Ng = sd->Ng - gd * tg ; - sd->Ng = normalize(sd->Ng); - } - } - - sd->N = sd->Ng; - } - -#ifdef __DPDU__ - /* dPdu/dPdv */ - sd->dPdu = tg; - sd->dPdv = cross(tg, sd->Ng); -#endif - - /*add fading parameter for minimum pixel width with transparency bsdf*/ - /*sd->curve_transparency = isect->v;*/ - /*sd->curve_radius = sd->u * gd * l + r1;*/ - - if(isect->object != ~0) { -#ifdef __OBJECT_MOTION__ - Transform tfm = sd->ob_tfm; -#else - Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM); -#endif - - P = transform_point(&tfm, P); - } - - return P; -} -#endif - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 887b1afddd4..7fc66a9fdee 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -229,7 +229,7 @@ ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, f if(kernel_data.cam.shuttertime == -1.0f) ray->time = TIME_INVALID; else - ray->time = 0.5f + 0.5f*(time - 0.5f)*kernel_data.cam.shuttertime; + ray->time = time; #endif /* sample */ @@ -266,7 +266,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, { if(kernel_data.cam.type != CAMERA_PANORAMA) { /* perspective / ortho */ - if(sd->object == ~0 && kernel_data.cam.type == CAMERA_PERSPECTIVE) + if(sd->object == PRIM_NONE && kernel_data.cam.type == CAMERA_PERSPECTIVE) P += camera_position(kg); Transform tfm = kernel_data.cam.worldtondc; @@ -276,7 +276,7 @@ ccl_device_inline float3 camera_world_to_ndc(KernelGlobals *kg, ShaderData *sd, /* panorama */ Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) P = normalize(transform_point(&tfm, P)); else P = normalize(transform_direction(&tfm, P)); diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h index b213e91274d..d027bb62ebe 100644 --- a/intern/cycles/kernel/kernel_compat_cpu.h +++ b/intern/cycles/kernel/kernel_compat_cpu.h @@ -20,9 +20,9 @@ #define __KERNEL_CPU__ #include "util_debug.h" -#include "util_half.h" #include "util_math.h" #include "util_simd.h" +#include "util_half.h" #include "util_types.h" CCL_NAMESPACE_BEGIN @@ -95,38 +95,128 @@ template<typename T> struct texture_image { ccl_always_inline float4 interp(float x, float y, bool periodic = true) { - if(!data) + if(UNLIKELY(!data)) return make_float4(0.0f, 0.0f, 0.0f, 0.0f); int ix, iy, nix, niy; - float tx = frac(x*width - 0.5f, &ix); - float ty = frac(y*height - 0.5f, &iy); - if(periodic) { - ix = wrap_periodic(ix, width); - iy = wrap_periodic(iy, height); - - nix = wrap_periodic(ix+1, width); - niy = wrap_periodic(iy+1, height); + if(interpolation == INTERPOLATION_CLOSEST) { + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + if(periodic) { + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + } + else { + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + } + return read(data[ix + iy*width]); } else { - ix = wrap_clamp(ix, width); - iy = wrap_clamp(iy, height); - - nix = wrap_clamp(ix+1, width); - niy = wrap_clamp(iy+1, height); + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + + if(periodic) { + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + } + else { + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + } + + float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); + r += (1.0f - ty)*tx*read(data[nix + iy*width]); + r += ty*(1.0f - tx)*read(data[ix + niy*width]); + r += ty*tx*read(data[nix + niy*width]); + + return r; } + } + + ccl_always_inline float4 interp_3d(float x, float y, float z, bool periodic = false) + { + if(UNLIKELY(!data)) + return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]); - r += (1.0f - ty)*tx*read(data[nix + iy*width]); - r += ty*(1.0f - tx)*read(data[ix + niy*width]); - r += ty*tx*read(data[nix + niy*width]); + int ix, iy, iz, nix, niy, niz; + + if(interpolation == INTERPOLATION_CLOSEST) { + frac(x*(float)width, &ix); + frac(y*(float)height, &iy); + frac(z*(float)depth, &iz); + + if(periodic) { + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + } + else { + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + } + + return read(data[ix + iy*width + iz*width*height]); + } + else { + float tx = frac(x*(float)width - 0.5f, &ix); + float ty = frac(y*(float)height - 0.5f, &iy); + float tz = frac(z*(float)depth - 0.5f, &iz); + + if(periodic) { + ix = wrap_periodic(ix, width); + iy = wrap_periodic(iy, height); + iz = wrap_periodic(iz, depth); + + nix = wrap_periodic(ix+1, width); + niy = wrap_periodic(iy+1, height); + niz = wrap_periodic(iz+1, depth); + } + else { + ix = wrap_clamp(ix, width); + iy = wrap_clamp(iy, height); + iz = wrap_clamp(iz, depth); + + nix = wrap_clamp(ix+1, width); + niy = wrap_clamp(iy+1, height); + niz = wrap_clamp(iz+1, depth); + } + + float4 r; + + r = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]); + r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]); + r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]); + r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]); + + r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]); + r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]); + r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]); + r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]); + + return r; + } + } - return r; + ccl_always_inline void dimensions_set(int width_, int height_, int depth_) + { + width = width_; + height = height_; + depth = depth_; } T *data; - int width, height; + int interpolation; + int width, height, depth; }; typedef texture<float4> texture_float4; @@ -146,6 +236,7 @@ typedef texture_image<uchar4> texture_image_uchar4; #define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index)) #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size)) #define kernel_tex_image_interp(tex, x, y) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp(x, y) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp(x, y)) +#define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < MAX_FLOAT_IMAGES) ? kg->texture_float_images[tex].interp_3d(x, y, z) : kg->texture_byte_images[tex - MAX_FLOAT_IMAGES].interp_3d(x, y, z)) #define kernel_data (kg->__data) diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h index 15e7353ec38..e4c20d26ff1 100644 --- a/intern/cycles/kernel/kernel_compat_cuda.h +++ b/intern/cycles/kernel/kernel_compat_cuda.h @@ -60,7 +60,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4; /* In order to use full 6GB of memory on Titan cards, use arrays instead * of textures. On earlier cards this seems slower, but on Titan it is * actually slightly faster in tests. */ -#if __CUDA_ARCH__ < 350 +#if __CUDA_ARCH__ < 300 #define __KERNEL_CUDA_TEX_STORAGE__ #endif diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 4f4414cc298..8346b09619e 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -85,27 +85,36 @@ #define __float_as_uint(x) as_uint(x) #define __int_as_float(x) as_float(x) #define __float_as_int(x) as_int(x) -#define sqrtf(x) sqrt(((float)x)) -#define cosf(x) cos(((float)x)) -#define sinf(x) sin(((float)x)) #define powf(x, y) pow(((float)x), ((float)y)) #define fabsf(x) fabs(((float)x)) #define copysignf(x, y) copysign(((float)x), ((float)y)) -#define cosf(x) cos(((float)x)) #define asinf(x) asin(((float)x)) #define acosf(x) acos(((float)x)) #define atanf(x) atan(((float)x)) -#define tanf(x) tan(((float)x)) -#define logf(x) log(((float)x)) #define floorf(x) floor(((float)x)) #define ceilf(x) ceil(((float)x)) -#define expf(x) exp(((float)x)) #define hypotf(x, y) hypot(((float)x), ((float)y)) #define atan2f(x, y) atan2(((float)x), ((float)y)) #define fmaxf(x, y) fmax(((float)x), ((float)y)) #define fminf(x, y) fmin(((float)x), ((float)y)) #define fmodf(x, y) fmod((float)x, (float)y) +#ifndef __CL_USE_NATIVE__ +#define sinf(x) native_sin(((float)x)) +#define cosf(x) native_cos(((float)x)) +#define tanf(x) native_tan(((float)x)) +#define expf(x) native_exp(((float)x)) +#define sqrtf(x) native_sqrt(((float)x)) +#define logf(x) native_log(((float)x)) +#else +#define sinf(x) sin(((float)x)) +#define cosf(x) cos(((float)x)) +#define tanf(x) tan(((float)x)) +#define expf(x) exp(((float)x)) +#define sqrtf(x) sqrt(((float)x)) +#define logf(x) log(((float)x)) +#endif + /* data lookup defines */ #define kernel_data (*kg->data) #define kernel_tex_fetch(t, index) kg->t[index] diff --git a/intern/cycles/kernel/kernel_curve.h b/intern/cycles/kernel/kernel_curve.h deleted file mode 100644 index 821ac50eaa9..00000000000 --- a/intern/cycles/kernel/kernel_curve.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -CCL_NAMESPACE_BEGIN - -#ifdef __HAIR__ - -/* curve attributes */ - -ccl_device float curve_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) -{ - if(elem == ATTR_ELEMENT_CURVE) { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return kernel_tex_fetch(__attributes_float, offset + sd->prim); - } - else if(elem == ATTR_ELEMENT_CURVE_KEY) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + sd->segment; - int k1 = k0 + 1; - - float f0 = kernel_tex_fetch(__attributes_float, offset + k0); - float f1 = kernel_tex_fetch(__attributes_float, offset + k1); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = 0.0f; -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; -#endif - - return 0.0f; - } -} - -ccl_device float3 curve_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) -{ - if(elem == ATTR_ELEMENT_CURVE) { - /* idea: we can't derive any useful differentials here, but for tiled - * mipmap image caching it would be useful to avoid reading the highest - * detail level always. maybe a derivative based on the hair density - * could be computed somehow? */ -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); - } - else if(elem == ATTR_ELEMENT_CURVE_KEY) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + sd->segment; - int k1 = k0 + 1; - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + k1)); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*(f1 - f0); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return (1.0f - sd->u)*f0 + sd->u*f1; - } - else { -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); -#endif - - return make_float3(0.0f, 0.0f, 0.0f); - } -} - -/* hair info node functions */ - -ccl_device float curve_thickness(KernelGlobals *kg, ShaderData *sd) -{ - float r = 0.0f; - - if(sd->segment != ~0) { - float4 curvedata = kernel_tex_fetch(__curves, sd->prim); - int k0 = __float_as_int(curvedata.x) + sd->segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - r = (P2.w - P1.w) * sd->u + P1.w; - } - - return r*2.0f; -} - -ccl_device float3 curve_tangent_normal(KernelGlobals *kg, ShaderData *sd) -{ - float3 tgN = make_float3(0.0f,0.0f,0.0f); - - if(sd->segment != ~0) { - - tgN = -(-sd->I - sd->dPdu * (dot(sd->dPdu,-sd->I) / len_squared(sd->dPdu))); - tgN = normalize(tgN); - - /* need to find suitable scaled gd for corrected normal */ -#if 0 - tgN = normalize(tgN - gd * sd->dPdu); -#endif - } - - return tgN; -} - -#endif - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_displace.h b/intern/cycles/kernel/kernel_displace.h index c50e2166660..b8c64af658f 100644 --- a/intern/cycles/kernel/kernel_displace.h +++ b/intern/cycles/kernel/kernel_displace.h @@ -16,8 +16,308 @@ CCL_NAMESPACE_BEGIN +ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng, + bool is_combined, bool is_ao, bool is_sss) +{ + int samples = kernel_data.integrator.aa_samples; + + /* initialize master radiance accumulator */ + kernel_assert(kernel_data.film.use_light_pass); + path_radiance_init(L, kernel_data.film.use_light_pass); + + /* take multiple samples */ + for(int sample = 0; sample < samples; sample++) { + PathRadiance L_sample; + PathState state; + Ray ray; + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + + /* init radiance */ + path_radiance_init(&L_sample, kernel_data.film.use_light_pass); + + /* init path state */ + path_state_init(kg, &state, &rng, sample); + state.num_samples = samples; + + /* evaluate surface shader */ + float rbsdf = path_state_rng_1D(kg, &rng, &state, PRNG_BSDF); + shader_eval_surface(kg, sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); + + /* TODO, disable the closures we won't need */ + + /* sample ambient occlusion */ + if(is_combined || is_ao) { + kernel_path_ao(kg, sd, &L_sample, &state, &rng, throughput); + } + + /* sample subsurface scattering */ + if((is_combined || is_sss) && (sd->flag & SD_BSSRDF)) { +#ifdef __SUBSURFACE__ + /* when mixing BSSRDF and BSDF closures we should skip BSDF lighting if scattering was successful */ + if (kernel_path_subsurface_scatter(kg, sd, &L_sample, &state, &rng, &ray, &throughput)) + is_sss = true; +#endif + } + + /* sample light and BSDF */ + if((!is_sss) && (!is_ao)) { + if(kernel_path_integrate_lighting(kg, &rng, sd, &throughput, &state, &L_sample, &ray)) { +#ifdef __LAMP_MIS__ + state.ray_t = 0.0f; +#endif + /* compute indirect light */ + kernel_path_indirect(kg, &rng, ray, throughput, state.num_samples, state, &L_sample); + + /* sum and reset indirect light pass variables for the next samples */ + path_radiance_sum_indirect(&L_sample); + path_radiance_reset_indirect(&L_sample); + } + } + + /* accumulate into master L */ + path_radiance_accum_sample(L, &L_sample, samples); + } +} + +ccl_device bool is_light_pass(ShaderEvalType type) +{ + switch (type) { + case SHADER_EVAL_AO: + case SHADER_EVAL_COMBINED: + case SHADER_EVAL_SHADOW: + case SHADER_EVAL_DIFFUSE_DIRECT: + case SHADER_EVAL_GLOSSY_DIRECT: + case SHADER_EVAL_TRANSMISSION_DIRECT: + case SHADER_EVAL_SUBSURFACE_DIRECT: + case SHADER_EVAL_DIFFUSE_INDIRECT: + case SHADER_EVAL_GLOSSY_INDIRECT: + case SHADER_EVAL_TRANSMISSION_INDIRECT: + case SHADER_EVAL_SUBSURFACE_INDIRECT: + return true; + default: + return false; + } +} + +ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i) +{ + ShaderData sd; + uint4 in = input[i * 2]; + uint4 diff = input[i * 2 + 1]; + + float3 out; + + int object = in.x; + int prim = in.y; + + if(prim == -1) + return; + + float u = __uint_as_float(in.z); + float v = __uint_as_float(in.w); + + float dudx = __uint_as_float(diff.x); + float dudy = __uint_as_float(diff.y); + float dvdx = __uint_as_float(diff.z); + float dvdy = __uint_as_float(diff.w); + + int shader; + float3 P, Ng; + + triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); + + /* dummy initilizations copied from SHADER_EVAL_DISPLACE */ + float3 I = Ng; + float t = 0.0f; + float time = TIME_INVALID; + int bounce = 0; + int transparent_bounce = 0; + + /* light passes */ + PathRadiance L; + + shader_setup_from_sample(kg, &sd, P, Ng, I, shader, object, prim, u, v, t, time, bounce, transparent_bounce); + sd.I = sd.N; + + /* update differentials */ + sd.dP.dx = sd.dPdu * dudx + sd.dPdv * dvdx; + sd.dP.dy = sd.dPdu * dudy + sd.dPdv * dvdy; + sd.du.dx = dudx; + sd.du.dy = dudy; + sd.dv.dx = dvdx; + sd.dv.dy = dvdy; + + if(is_light_pass(type)) { + RNG rng = cmj_hash(i, 0); + compute_light_pass(kg, &sd, &L, rng, (type == SHADER_EVAL_COMBINED), + (type == SHADER_EVAL_AO), + (type == SHADER_EVAL_SUBSURFACE_DIRECT || + type == SHADER_EVAL_SUBSURFACE_INDIRECT)); + } + + switch (type) { + /* data passes */ + case SHADER_EVAL_NORMAL: + { + /* compression: normal = (2 * color) - 1 */ + out = sd.N * 0.5f + make_float3(0.5f, 0.5f, 0.5f); + break; + } + case SHADER_EVAL_UV: + { + out = primitive_uv(kg, &sd); + break; + } + case SHADER_EVAL_DIFFUSE_COLOR: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = shader_bsdf_diffuse(kg, &sd); + break; + } + case SHADER_EVAL_GLOSSY_COLOR: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = shader_bsdf_glossy(kg, &sd); + break; + } + case SHADER_EVAL_TRANSMISSION_COLOR: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = shader_bsdf_transmission(kg, &sd); + break; + } + case SHADER_EVAL_SUBSURFACE_COLOR: + { +#ifdef __SUBSURFACE__ + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = shader_bsdf_subsurface(kg, &sd); +#endif + break; + } + case SHADER_EVAL_EMISSION: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_EMISSION); + out = shader_emissive_eval(kg, &sd); + break; + } + +#ifdef __PASSES__ + /* light passes */ + case SHADER_EVAL_AO: + { + out = L.ao; + break; + } + case SHADER_EVAL_COMBINED: + { + out = path_radiance_clamp_and_sum(kg, &L); + break; + } + case SHADER_EVAL_SHADOW: + { + out = make_float3(L.shadow.x, L.shadow.y, L.shadow.z); + break; + } + case SHADER_EVAL_DIFFUSE_DIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.direct_diffuse, shader_bsdf_diffuse(kg, &sd)); + break; + } + case SHADER_EVAL_GLOSSY_DIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.direct_glossy, shader_bsdf_glossy(kg, &sd)); + break; + } + case SHADER_EVAL_TRANSMISSION_DIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.direct_transmission, shader_bsdf_transmission(kg, &sd)); + break; + } + case SHADER_EVAL_SUBSURFACE_DIRECT: + { +#ifdef __SUBSURFACE__ + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.direct_subsurface, shader_bsdf_subsurface(kg, &sd)); +#endif + break; + } + case SHADER_EVAL_DIFFUSE_INDIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.indirect_diffuse, shader_bsdf_diffuse(kg, &sd)); + break; + } + case SHADER_EVAL_GLOSSY_INDIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.indirect_glossy, shader_bsdf_glossy(kg, &sd)); + break; + } + case SHADER_EVAL_TRANSMISSION_INDIRECT: + { + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.indirect_transmission, shader_bsdf_transmission(kg, &sd)); + break; + } + case SHADER_EVAL_SUBSURFACE_INDIRECT: + { +#ifdef __SUBSURFACE__ + shader_eval_surface(kg, &sd, 0.f, 0, SHADER_CONTEXT_MAIN); + out = safe_divide_color(L.indirect_subsurface, shader_bsdf_subsurface(kg, &sd)); +#endif + break; + } +#endif + + /* extra */ + case SHADER_EVAL_ENVIRONMENT: + { + /* setup ray */ + Ray ray; + + ray.P = make_float3(0.0f, 0.0f, 0.0f); + ray.D = normalize(P); + ray.t = 0.0f; +#ifdef __CAMERA_MOTION__ + ray.time = 0.5f; +#endif + +#ifdef __RAY_DIFFERENTIALS__ + ray.dD = differential3_zero(); + ray.dP = differential3_zero(); +#endif + + /* setup shader data */ + shader_setup_from_background(kg, &sd, &ray, 0, 0); + + /* evaluate */ + int flag = 0; /* we can't know which type of BSDF this is for */ + out = shader_eval_background(kg, &sd, flag, SHADER_CONTEXT_MAIN); + break; + } + default: + { + /* no real shader, returning the position of the verts for debugging */ + out = normalize(P); + break; + } + } + + /* write output */ + output[i] = make_float4(out.x, out.y, out.z, 1.0f); + return; +} + ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i) { + if(type >= SHADER_EVAL_BAKE) { + kernel_bake_evaluate(kg, input, output, type, i); + return; + } + ShaderData sd; uint4 in = input[i]; float3 out; @@ -55,7 +355,7 @@ ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *inpu #endif /* setup shader data */ - shader_setup_from_background(kg, &sd, &ray, 0); + shader_setup_from_background(kg, &sd, &ray, 0, 0); /* evaluate */ int flag = 0; /* we can't know which type of BSDF this is for */ diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h index 58bdc2b70ca..deffa7f2ba2 100644 --- a/intern/cycles/kernel/kernel_emission.h +++ b/intern/cycles/kernel/kernel_emission.h @@ -18,8 +18,8 @@ CCL_NAMESPACE_BEGIN /* Direction Emission */ -ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando, - LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce) +ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, + LightSample *ls, float3 I, differential3 dI, float t, float time, int bounce, int transparent_bounce) { /* setup shading at emitter */ ShaderData sd; @@ -36,27 +36,20 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando, #endif ray.dP = differential3_zero(); ray.dD = dI; -#ifdef __CAMERA_MOTION__ - ray.time = time; -#endif - shader_setup_from_background(kg, &sd, &ray, bounce+1); + + shader_setup_from_background(kg, &sd, &ray, bounce+1, transparent_bounce); eval = shader_eval_background(kg, &sd, 0, SHADER_CONTEXT_EMISSION); } else #endif { -#ifdef __HAIR__ - if(ls->type == LIGHT_STRAND) - shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, ls->prim); - else -#endif - shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, ~0); + shader_setup_from_sample(kg, &sd, ls->P, ls->Ng, I, ls->shader, ls->object, ls->prim, ls->u, ls->v, t, time, bounce+1, transparent_bounce); ls->Ng = sd.Ng; /* no path flag, we're evaluating this for all closures. that's weak but * we'd have to do multiple evaluations otherwise */ - shader_eval_surface(kg, &sd, rando, 0, SHADER_CONTEXT_EMISSION); + shader_eval_surface(kg, &sd, 0.0f, 0, SHADER_CONTEXT_EMISSION); /* evaluate emissive closure */ if(sd.flag & SD_EMISSION) @@ -71,13 +64,13 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg, float rando, } ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int lindex, - float randt, float rando, float randu, float randv, Ray *ray, BsdfEval *eval, - bool *is_lamp, int bounce) + float randt, float randu, float randv, Ray *ray, BsdfEval *eval, + bool *is_lamp, int bounce, int transparent_bounce) { LightSample ls; #ifdef __BRANCHED_PATH__ - if(lindex != -1) { + if(lindex != LAMP_NONE) { /* sample position on a specified light */ light_select(kg, lindex, randu, randv, sd->P, &ls); } @@ -95,7 +88,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int differential3 dD = differential3_zero(); /* evaluate closure */ - float3 light_eval = direct_emissive_eval(kg, rando, &ls, -ls.D, dD, ls.t, sd->time, bounce); + float3 light_eval = direct_emissive_eval(kg, &ls, -ls.D, dD, ls.t, sd->time, bounce, transparent_bounce); if(is_zero(light_eval)) return false; @@ -104,7 +97,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int float bsdf_pdf; #ifdef __VOLUME__ - if(sd->prim != ~0) + if(sd->prim != PRIM_NONE) shader_bsdf_eval(kg, sd, ls.D, eval, &bsdf_pdf); else shader_volume_phase_eval(kg, sd, ls.D, eval, &bsdf_pdf); @@ -160,7 +153,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg, ShaderData *sd, int } /* return if it's a lamp for shadow pass */ - *is_lamp = (ls.prim == ~0 && ls.type != LIGHT_BACKGROUND); + *is_lamp = (ls.prim == PRIM_NONE && ls.type != LIGHT_BACKGROUND); return true; } @@ -173,10 +166,11 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader float3 L = shader_emissive_eval(kg, sd); #ifdef __HAIR__ - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->segment == ~0)) { + if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS) && (sd->type & PRIMITIVE_ALL_TRIANGLE)) #else - if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) { + if(!(path_flag & PATH_RAY_MIS_SKIP) && (sd->flag & SD_USE_MIS)) #endif + { /* multiple importance sampling, get triangle light pdf, * and compute weight with respect to BSDF pdf */ float pdf = triangle_light_pdf(kg, sd->Ng, sd->I, t); @@ -190,71 +184,75 @@ ccl_device_noinline float3 indirect_primitive_emission(KernelGlobals *kg, Shader /* Indirect Lamp Emission */ -ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, float randt, float3 *emission, int bounce) +ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg, PathState *state, Ray *ray, float3 *emission) { - LightSample ls; - int lamp = lamp_light_eval_sample(kg, randt); + bool hit_lamp = false; - if(lamp == ~0) - return false; + *emission = make_float3(0.0f, 0.0f, 0.0f); - if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls)) - return false; + for(int lamp = 0; lamp < kernel_data.integrator.num_all_lights; lamp++) { + LightSample ls; + + if(!lamp_light_eval(kg, lamp, ray->P, ray->D, ray->t, &ls)) + continue; #ifdef __PASSES__ - /* use visibility flag to skip lights */ - if(ls.shader & SHADER_EXCLUDE_ANY) { - if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || - ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (path_flag & PATH_RAY_GLOSSY)) || - ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT))) - return false; - } + /* use visibility flag to skip lights */ + if(ls.shader & SHADER_EXCLUDE_ANY) { + if(((ls.shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || + ((ls.shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || + ((ls.shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT))) + continue; + } #endif - float3 L = direct_emissive_eval(kg, 0.0f, &ls, -ray->D, ray->dD, ls.t, ray->time, bounce); + float3 L = direct_emissive_eval(kg, &ls, -ray->D, ray->dD, ls.t, ray->time, state->bounce, state->transparent_bounce); - if(!(path_flag & PATH_RAY_MIS_SKIP)) { - /* multiple importance sampling, get regular light pdf, - * and compute weight with respect to BSDF pdf */ - float mis_weight = power_heuristic(bsdf_pdf, ls.pdf); - L *= mis_weight; + if(!(state->flag & PATH_RAY_MIS_SKIP)) { + /* multiple importance sampling, get regular light pdf, + * and compute weight with respect to BSDF pdf */ + float mis_weight = power_heuristic(state->ray_pdf, ls.pdf); + L *= mis_weight; + } + + *emission += L; + hit_lamp = true; } - *emission = L; - return true; + return hit_lamp; } /* Indirect Background */ -ccl_device_noinline float3 indirect_background(KernelGlobals *kg, Ray *ray, int path_flag, float bsdf_pdf, int bounce) +ccl_device_noinline float3 indirect_background(KernelGlobals *kg, PathState *state, Ray *ray) { #ifdef __BACKGROUND__ int shader = kernel_data.background.surface_shader; /* use visibility flag to skip lights */ if(shader & SHADER_EXCLUDE_ANY) { - if(((shader & SHADER_EXCLUDE_DIFFUSE) && (path_flag & PATH_RAY_DIFFUSE)) || - ((shader & SHADER_EXCLUDE_GLOSSY) && (path_flag & PATH_RAY_GLOSSY)) || - ((shader & SHADER_EXCLUDE_TRANSMIT) && (path_flag & PATH_RAY_TRANSMIT)) || - ((shader & SHADER_EXCLUDE_CAMERA) && (path_flag & PATH_RAY_CAMERA))) + if(((shader & SHADER_EXCLUDE_DIFFUSE) && (state->flag & PATH_RAY_DIFFUSE)) || + ((shader & SHADER_EXCLUDE_GLOSSY) && (state->flag & PATH_RAY_GLOSSY)) || + ((shader & SHADER_EXCLUDE_TRANSMIT) && (state->flag & PATH_RAY_TRANSMIT)) || + ((shader & SHADER_EXCLUDE_CAMERA) && (state->flag & PATH_RAY_CAMERA))) return make_float3(0.0f, 0.0f, 0.0f); } /* evaluate background closure */ ShaderData sd; - shader_setup_from_background(kg, &sd, ray, bounce+1); + shader_setup_from_background(kg, &sd, ray, state->bounce+1, state->transparent_bounce); - float3 L = shader_eval_background(kg, &sd, path_flag, SHADER_CONTEXT_EMISSION); + float3 L = shader_eval_background(kg, &sd, state->flag, SHADER_CONTEXT_EMISSION); #ifdef __BACKGROUND_MIS__ /* check if background light exists or if we should skip pdf */ int res = kernel_data.integrator.pdf_background_res; - if(!(path_flag & PATH_RAY_MIS_SKIP) && res) { + if(!(state->flag & PATH_RAY_MIS_SKIP) && res) { /* multiple importance sampling, get background light pdf for ray * direction, and compute weight with respect to BSDF pdf */ float pdf = background_light_pdf(kg, ray->D); - float mis_weight = power_heuristic(bsdf_pdf, pdf); + float mis_weight = power_heuristic(state->ray_pdf, pdf); return L*mis_weight; } diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index cbd875e994c..dc5f6e7ce38 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -75,7 +75,7 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg, float exposure = kernel_data.film.exposure; - ccl_align(16) float4 rgba_in = *in; + float4 rgba_in = *in; if(exposure != 1.0f) { rgba_in.x *= exposure; @@ -83,7 +83,7 @@ ccl_device void kernel_film_convert_to_half_float(KernelGlobals *kg, rgba_in.z *= exposure; } - float4_store_half(out, &rgba_in, sample_scale); + float4_store_half(out, rgba_in, sample_scale); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h index c32f0395744..ac432d3fe04 100644 --- a/intern/cycles/kernel/kernel_light.h +++ b/intern/cycles/kernel/kernel_light.h @@ -217,8 +217,8 @@ ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp, LightType type = (LightType)__float_as_int(data0.x); ls->type = type; ls->shader = __float_as_int(data1.x); - ls->object = ~0; - ls->prim = ~0; + ls->object = PRIM_NONE; + ls->prim = PRIM_NONE; ls->lamp = lamp; ls->u = randu; ls->v = randv; @@ -309,8 +309,8 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, LightType type = (LightType)__float_as_int(data0.x); ls->type = type; ls->shader = __float_as_int(data1.x); - ls->object = ~0; - ls->prim = ~0; + ls->object = PRIM_NONE; + ls->prim = PRIM_NONE; ls->lamp = lamp; /* todo: missing texture coordinates */ ls->u = 0.0f; @@ -421,7 +421,6 @@ ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, /* compute pdf */ if(ls->t != FLT_MAX) ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t); - ls->eval_fac *= kernel_data.integrator.inv_pdf_lights; return true; } @@ -458,11 +457,10 @@ ccl_device void triangle_light_sample(KernelGlobals *kg, int prim, int object, v = randv*randu; /* triangle, so get position, normal, shader */ - ls->P = triangle_point_MT(kg, prim, u, v); - ls->Ng = triangle_normal_MT(kg, prim, &ls->shader); + triangle_point_normal(kg, prim, u, v, &ls->P, &ls->Ng, &ls->shader); ls->object = object; ls->prim = prim; - ls->lamp = ~0; + ls->lamp = LAMP_NONE; ls->shader |= SHADER_USE_MIS; ls->t = 0.0f; ls->u = u; @@ -485,52 +483,6 @@ ccl_device float triangle_light_pdf(KernelGlobals *kg, return t*t*pdf/cos_pi; } -/* Curve Light */ - -#ifdef __HAIR__ - -ccl_device void curve_segment_light_sample(KernelGlobals *kg, int prim, int object, - int segment, float randu, float randv, float time, LightSample *ls) -{ - /* this strand code needs completion */ - float4 v00 = kernel_tex_fetch(__curves, prim); - - int k0 = __float_as_int(v00.x) + segment; - int k1 = k0 + 1; - - float4 P1 = kernel_tex_fetch(__curve_keys, k0); - float4 P2 = kernel_tex_fetch(__curve_keys, k1); - - float l = len(float4_to_float3(P2) - float4_to_float3(P1)); - - float r1 = P1.w; - float r2 = P2.w; - float3 tg = (float4_to_float3(P2) - float4_to_float3(P1)) / l; - float3 xc = make_float3(tg.x * tg.z, tg.y * tg.z, -(tg.x * tg.x + tg.y * tg.y)); - if (is_zero(xc)) - xc = make_float3(tg.x * tg.y, -(tg.x * tg.x + tg.z * tg.z), tg.z * tg.y); - xc = normalize(xc); - float3 yc = cross(tg, xc); - float gd = ((r2 - r1)/l); - - /* normal currently ignores gradient */ - ls->Ng = sinf(M_2PI_F * randv) * xc + cosf(M_2PI_F * randv) * yc; - ls->P = randu * l * tg + (gd * l + r1) * ls->Ng; - ls->object = object; - ls->prim = prim; - ls->lamp = ~0; - ls->t = 0.0f; - ls->u = randu; - ls->v = randv; - ls->type = LIGHT_STRAND; - ls->eval_fac = 1.0f; - ls->shader = __float_as_int(v00.z) | SHADER_USE_MIS; - - object_transform_light_sample(kg, ls, object, time); -} - -#endif - /* Light Distribution */ ccl_device int light_distribution_sample(KernelGlobals *kg, float randt) @@ -573,21 +525,14 @@ ccl_device void light_sample(KernelGlobals *kg, float randt, float randu, float if(prim >= 0) { int object = __float_as_int(l.w); -#ifdef __HAIR__ - int segment = __float_as_int(l.z) & SHADER_MASK; -#endif + int shader_flag = __float_as_int(l.z); -#ifdef __HAIR__ - if (segment != SHADER_MASK) - curve_segment_light_sample(kg, prim, object, segment, randu, randv, time, ls); - else -#endif - triangle_light_sample(kg, prim, object, randu, randv, time, ls); + triangle_light_sample(kg, prim, object, randu, randv, time, ls); /* compute incoming direction, distance and pdf */ ls->D = normalize_len(ls->P - P, &ls->t); ls->pdf = triangle_light_pdf(kg, ls->Ng, -ls->D, ls->t); - ls->shader |= __float_as_int(l.z) & (~SHADER_MASK); + ls->shader |= shader_flag; } else { int lamp = -prim-1; @@ -620,7 +565,7 @@ ccl_device int lamp_light_eval_sample(KernelGlobals *kg, float randt) return lamp; } else - return ~0; + return LAMP_NONE; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h index 92f3420a218..af7b727c1ba 100644 --- a/intern/cycles/kernel/kernel_montecarlo.h +++ b/intern/cycles/kernel/kernel_montecarlo.h @@ -131,6 +131,11 @@ ccl_device float power_heuristic_3(float a, float b, float c) return (a*a)/(a*a + b*b + c*c); } +ccl_device float max_heuristic(float a, float b) +{ + return (a > b)? 1.0f: 0.0f; +} + /* distribute uniform xy on [0,1] over unit disk [-1,1], with concentric mapping * to better preserve stratification for some RNG sequences */ ccl_device float2 concentric_sample_disk(float u1, float u2) diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h index 9cdcb8c5229..b3b6fc02894 100644 --- a/intern/cycles/kernel/kernel_passes.h +++ b/intern/cycles/kernel/kernel_passes.h @@ -51,7 +51,8 @@ ccl_device_inline void kernel_write_data_passes(KernelGlobals *kg, ccl_global fl if(!(path_flag & PATH_RAY_SINGLE_PASS_DONE)) { if(!(sd->flag & SD_TRANSPARENT) || kernel_data.film.pass_alpha_threshold == 0.0f || - average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) { + average(shader_bsdf_alpha(kg, sd)) >= kernel_data.film.pass_alpha_threshold) + { if(sample == 0) { if(flag & PASS_DEPTH) { diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index 635201471e1..a80a0033712 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -18,18 +18,15 @@ #include "osl_shader.h" #endif -#include "kernel_differential.h" -#include "kernel_montecarlo.h" -#include "kernel_projection.h" -#include "kernel_object.h" -#include "kernel_triangle.h" -#include "kernel_curve.h" -#include "kernel_primitive.h" -#include "kernel_projection.h" #include "kernel_random.h" -#include "kernel_bvh.h" -#include "kernel_accumulate.h" +#include "kernel_projection.h" +#include "kernel_montecarlo.h" +#include "kernel_differential.h" #include "kernel_camera.h" + +#include "geom/geom.h" + +#include "kernel_accumulate.h" #include "kernel_shader.h" #include "kernel_light.h" #include "kernel_emission.h" @@ -59,11 +56,6 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg, /* sample illumination from lights to find path contribution */ if(sd->flag & SD_BSDF_HAS_EVAL) { float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); -#ifdef __MULTI_CLOSURE__ - float light_o = 0.0f; -#else - float light_o = path_state_rng_1D(kg, rng, state, PRNG_LIGHT_F); -#endif float light_u, light_v; path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); @@ -75,7 +67,7 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg, light_ray.time = sd->time; #endif - if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) { + if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ float3 shadow; @@ -133,7 +125,96 @@ ccl_device_inline bool kernel_path_integrate_scatter_lighting(KernelGlobals *kg, #if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__) -ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_global float *buffer, +ccl_device void kernel_branched_path_integrate_direct_lighting(KernelGlobals *kg, RNG *rng, + ShaderData *sd, PathState *state, float3 throughput, float num_samples_adjust, PathRadiance *L, bool sample_all_lights) +{ + /* sample illumination from lights to find path contribution */ + if(sd->flag & SD_BSDF_HAS_EVAL) { + Ray light_ray; + BsdfEval L_light; + bool is_lamp; + +#ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +#endif + + if(sample_all_lights) { + /* lamp sampling */ + for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { + int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); + float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); + RNG lamp_rng = cmj_hash(*rng, i); + + if(kernel_data.integrator.pdf_triangles != 0.0f) + num_samples_inv *= 0.5f; + + for(int j = 0; j < num_samples; j++) { + float light_u, light_v; + path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + + if(direct_emission(kg, sd, i, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { + /* trace shadow ray */ + float3 shadow; + + if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); + } + } + } + } + + /* mesh light sampling */ + if(kernel_data.integrator.pdf_triangles != 0.0f) { + int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples); + float num_samples_inv = num_samples_adjust/num_samples; + + if(kernel_data.integrator.num_all_lights) + num_samples_inv *= 0.5f; + + for(int j = 0; j < num_samples; j++) { + float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT); + float light_u, light_v; + path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); + + /* only sample triangle lights */ + if(kernel_data.integrator.num_all_lights) + light_t = 0.5f*light_t; + + if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { + /* trace shadow ray */ + float3 shadow; + + if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); + } + } + } + } + } + else { + float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); + float light_u, light_v; + path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); + + /* sample random light */ + if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { + /* trace shadow ray */ + float3 shadow; + + if(!shadow_blocked(kg, state, &light_ray, &shadow)) { + /* accumulate */ + path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state->bounce, is_lamp); + } + } + } + } +} + +#endif + +ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, float3 throughput, int num_samples, PathState state, PathRadiance *L) { /* path iteration */ @@ -161,17 +242,16 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g light_ray.dP = ray.dP; /* intersect with lamp */ - float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT); float3 emission; - if(indirect_lamp_emission(kg, &light_ray, state.flag, state.ray_pdf, light_t, &emission, state.bounce)) + if(indirect_lamp_emission(kg, &state, &light_ray, &emission)) path_radiance_accum_emission(L, throughput, emission, state.bounce); } #endif #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ - if(state.volume_stack[0].shader != SHADER_NO_ID) { + if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; @@ -191,7 +271,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g if(!hit) { #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce); + float3 L_background = indirect_background(kg, &state, &ray); path_radiance_accum_background(L, throughput, L_background, state.bounce); #endif @@ -200,7 +280,7 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g /* setup shading */ ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce); + shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce); float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF); shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_INDIRECT); #ifdef __BRANCHED_PATH__ @@ -300,38 +380,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g } #endif -#ifdef __EMISSION__ +#if defined(__EMISSION__) && defined(__BRANCHED_PATH__) if(kernel_data.integrator.use_direct_light) { - /* sample illumination from lights to find path contribution */ - if(sd.flag & SD_BSDF_HAS_EVAL) { - float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT); -#ifdef __MULTI_CLOSURE__ - float light_o = 0.0f; -#else - float light_o = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT_F); -#endif - float light_u, light_v; - path_state_rng_2D(kg, rng, &state, PRNG_LIGHT_U, &light_u, &light_v); - - Ray light_ray; - BsdfEval L_light; - bool is_lamp; - -#ifdef __OBJECT_MOTION__ - light_ray.time = sd.time; -#endif - - /* sample random light */ - if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, &state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, throughput, &L_light, shadow, 1.0f, state.bounce, is_lamp); - } - } - } + bool all = kernel_data.integrator.sample_all_lights_indirect; + kernel_branched_path_integrate_direct_lighting(kg, rng, &sd, &state, throughput, 1.0f, L, all); } #endif @@ -406,10 +458,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, Ray ray, ccl_g } } -#endif - -#ifdef __SUBSURFACE__ - ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rng, ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray) { @@ -418,11 +466,6 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn /* sample illumination from lights to find path contribution */ if(sd->flag & SD_BSDF_HAS_EVAL) { float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); -#ifdef __MULTI_CLOSURE__ - float light_o = 0.0f; -#else - float light_o = path_state_rng_1D(kg, rng, state, PRNG_LIGHT_F); -#endif float light_u, light_v; path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); @@ -434,7 +477,7 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn light_ray.time = sd->time; #endif - if(direct_emission(kg, sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) { + if(direct_emission(kg, sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce, state->transparent_bounce)) { /* trace shadow ray */ float3 shadow; @@ -524,6 +567,84 @@ ccl_device_inline bool kernel_path_integrate_lighting(KernelGlobals *kg, RNG *rn } } +ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput) +{ + /* todo: solve correlation */ + float bsdf_u, bsdf_v; + + path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); + + float ao_factor = kernel_data.background.ao_factor; + float3 ao_N; + float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N); + float3 ao_D; + float ao_pdf; + float3 ao_alpha = shader_bsdf_alpha(kg, sd); + + sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); + + if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { + Ray light_ray; + float3 ao_shadow; + + light_ray.P = ray_offset(sd->P, sd->Ng); + light_ray.D = ao_D; + light_ray.t = kernel_data.background.ao_distance; +#ifdef __OBJECT_MOTION__ + light_ray.time = sd->time; +#endif + light_ray.dP = sd->dP; + light_ray.dD = differential3_zero(); + + if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) + path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce); + } +} + +#ifdef __SUBSURFACE__ +ccl_device bool kernel_path_subsurface_scatter(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, Ray *ray, float3 *throughput) +{ + float bssrdf_probability; + ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability); + + /* modify throughput for picking bssrdf or bsdf */ + *throughput *= bssrdf_probability; + + /* do bssrdf scatter step if we picked a bssrdf closure */ + if(sc) { + uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb); + + ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + float bssrdf_u, bssrdf_v; + path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_step(kg, sd, bssrdf_sd, state->flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); + + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) { + float3 tp = *throughput; + PathState hit_state = *state; + Ray hit_ray = *ray; + + hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR; + hit_state.rng_offset += PRNG_BOUNCE_NUM; + + if(kernel_path_integrate_lighting(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, L, &hit_ray)) { +#ifdef __LAMP_MIS__ + hit_state.ray_t = 0.0f; +#endif + + kernel_path_indirect(kg, rng, hit_ray, tp, state->num_samples, hit_state, L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(L); + path_radiance_reset_indirect(L); + } + } + return true; + } + return false; +} #endif ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer) @@ -578,17 +699,16 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, light_ray.dP = ray.dP; /* intersect with lamp */ - float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT); float3 emission; - if(indirect_lamp_emission(kg, &light_ray, state.flag, state.ray_pdf, light_t, &emission, state.bounce)) + if(indirect_lamp_emission(kg, &state, &light_ray, &emission)) path_radiance_accum_emission(&L, throughput, emission, state.bounce); } #endif #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ - if(state.volume_stack[0].shader != SHADER_NO_ID) { + if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; @@ -618,7 +738,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce); + float3 L_background = indirect_background(kg, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif @@ -627,7 +747,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, /* setup shading */ ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce); + shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce); float rbsdf = path_state_rng_1D(kg, rng, &state, PRNG_BSDF); shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN); @@ -694,35 +814,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, #ifdef __AO__ /* ambient occlusion */ if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) { - /* todo: solve correlation */ - float bsdf_u, bsdf_v; - path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bsdf_u, &bsdf_v); - - float ao_factor = kernel_data.background.ao_factor; - float3 ao_N; - float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N); - float3 ao_D; - float ao_pdf; - float3 ao_alpha = shader_bsdf_alpha(kg, &sd); - - sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf); - - if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) { - Ray light_ray; - float3 ao_shadow; - - light_ray.P = ray_offset(sd.P, sd.Ng); - light_ray.D = ao_D; - light_ray.t = kernel_data.background.ao_distance; -#ifdef __OBJECT_MOTION__ - light_ray.time = sd.time; -#endif - light_ray.dP = sd.dP; - light_ray.dD = differential3_zero(); - - if(!shadow_blocked(kg, &state, &light_ray, &ao_shadow)) - path_radiance_accum_ao(&L, throughput, ao_alpha, ao_bsdf, ao_shadow, state.bounce); - } + kernel_path_ao(kg, &sd, &L, &state, rng, throughput); } #endif @@ -730,60 +822,18 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, /* bssrdf scatter to a different location on the same object, replacing * the closures with a diffuse BSDF */ if(sd.flag & SD_BSSRDF) { - float bssrdf_probability; - ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability); - - /* modify throughput for picking bssrdf or bsdf */ - throughput *= bssrdf_probability; - - /* do bssrdf scatter step if we picked a bssrdf closure */ - if(sc) { - uint lcg_state = lcg_state_init(rng, &state, 0x68bc21eb); - - ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; - float bssrdf_u, bssrdf_v; - path_state_rng_2D(kg, rng, &state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, false); - - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - float3 tp = throughput; - PathState hit_state = state; - Ray hit_ray = ray; - - hit_state.flag |= PATH_RAY_BSSRDF_ANCESTOR; - hit_state.rng_offset += PRNG_BOUNCE_NUM; - - if(kernel_path_integrate_lighting(kg, rng, &bssrdf_sd[hit], &tp, &hit_state, &L, &hit_ray)) { -#ifdef __LAMP_MIS__ - hit_state.ray_t = 0.0f; -#endif - - kernel_path_indirect(kg, rng, hit_ray, buffer, tp, state.num_samples, hit_state, &L); - - /* for render passes, sum and reset indirect light pass variables - * for the next samples */ - path_radiance_sum_indirect(&L); - path_radiance_reset_indirect(&L); - } - } + if(kernel_path_subsurface_scatter(kg, &sd, &L, &state, rng, &ray, &throughput)) break; - } } #endif - /* The following code is the same as in kernel_path_integrate_lighting(), + /* Same as kernel_path_integrate_lighting(kg, rng, &sd, &throughput, &state, &L, &ray), but for CUDA the function call is slower. */ #ifdef __EMISSION__ if(kernel_data.integrator.use_direct_light) { /* sample illumination from lights to find path contribution */ if(sd.flag & SD_BSDF_HAS_EVAL) { float light_t = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT); -#ifdef __MULTI_CLOSURE__ - float light_o = 0.0f; -#else - float light_o = path_state_rng_1D(kg, rng, &state, PRNG_LIGHT_F); -#endif float light_u, light_v; path_state_rng_2D(kg, rng, &state, PRNG_LIGHT_U, &light_u, &light_v); @@ -795,7 +845,7 @@ ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, light_ray.time = sd.time; #endif - if(direct_emission(kg, &sd, -1, light_t, light_o, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce)) { + if(direct_emission(kg, &sd, LAMP_NONE, light_t, light_u, light_v, &light_ray, &L_light, &is_lamp, state.bounce, state.transparent_bounce)) { /* trace shadow ray */ float3 shadow; @@ -898,69 +948,9 @@ ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals * PathState *state, PathRadiance *L, ccl_global float *buffer) { #ifdef __EMISSION__ - /* sample illumination from lights to find path contribution */ - if(sd->flag & SD_BSDF_HAS_EVAL) { - Ray light_ray; - BsdfEval L_light; - bool is_lamp; - -#ifdef __OBJECT_MOTION__ - light_ray.time = sd->time; -#endif - - /* lamp sampling */ - for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) { - int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i)); - float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights); - RNG lamp_rng = cmj_hash(*rng, i); - - if(kernel_data.integrator.pdf_triangles != 0.0f) - num_samples_inv *= 0.5f; - - for(int j = 0; j < num_samples; j++) { - float light_u, light_v; - path_branched_rng_2D(kg, &lamp_rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - - if(direct_emission(kg, sd, i, 0.0f, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); - } - } - } - } - - /* mesh light sampling */ - if(kernel_data.integrator.pdf_triangles != 0.0f) { - int num_samples = ceil_to_int(num_samples_adjust*kernel_data.integrator.mesh_light_samples); - float num_samples_inv = num_samples_adjust/num_samples; - - if(kernel_data.integrator.num_all_lights) - num_samples_inv *= 0.5f; - - for(int j = 0; j < num_samples; j++) { - float light_t = path_branched_rng_1D(kg, rng, state, j, num_samples, PRNG_LIGHT); - float light_u, light_v; - path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v); - - /* only sample triangle lights */ - if(kernel_data.integrator.num_all_lights) - light_t = 0.5f*light_t; - - if(direct_emission(kg, sd, -1, light_t, 0.0f, light_u, light_v, &light_ray, &L_light, &is_lamp, state->bounce)) { - /* trace shadow ray */ - float3 shadow; - - if(!shadow_blocked(kg, state, &light_ray, &shadow)) { - /* accumulate */ - path_radiance_accum_light(L, throughput*num_samples_inv, &L_light, shadow, num_samples_inv, state->bounce, is_lamp); - } - } - } - } + if(kernel_data.integrator.use_direct_light) { + bool all = kernel_data.integrator.sample_all_lights_direct; + kernel_branched_path_integrate_direct_lighting(kg, rng, sd, state, throughput, num_samples_adjust, L, all); } #endif @@ -1043,7 +1033,7 @@ ccl_device_noinline void kernel_branched_path_integrate_lighting(KernelGlobals * ps.ray_t = 0.0f; #endif - kernel_path_indirect(kg, rng, bsdf_ray, buffer, tp*num_samples_inv, num_samples, ps, L); + kernel_path_indirect(kg, rng, bsdf_ray, tp*num_samples_inv, num_samples, ps, L); /* for render passes, sum and reset indirect light pass variables * for the next samples */ @@ -1092,13 +1082,66 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __VOLUME__ /* volume attenuation, emission, scatter */ - if(state.volume_stack[0].shader != SHADER_NO_ID) { + if(state.volume_stack[0].shader != SHADER_NONE) { Ray volume_ray = ray; volume_ray.t = (hit)? isect.t: FLT_MAX; +#ifdef __KERNEL_CPU__ + /* decoupled ray marching only supported on CPU */ + bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack); + + /* cache steps along volume for repeated sampling */ + VolumeSegment volume_segment; + ShaderData volume_sd; + + shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce); + kernel_volume_decoupled_record(kg, &state, + &volume_ray, &volume_sd, &volume_segment, heterogeneous); + + /* sample scattering */ + int num_samples = kernel_data.integrator.volume_samples; + float num_samples_inv = 1.0f/num_samples; + + for(int j = 0; j < num_samples; j++) { + /* workaround to fix correlation bug in T38710, can find better solution + * in random number generator later, for now this is done here to not impact + * performance of rendering without volumes */ + RNG tmp_rng = cmj_hash(*rng, state.rng_offset); + + PathState ps = state; + Ray pray = ray; + float3 tp = throughput; + + /* branch RNG state */ + path_state_branch(&ps, j, num_samples); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, + &ps, &volume_ray, &volume_sd, &tp, &tmp_rng, &volume_segment); + + if(result == VOLUME_PATH_SCATTERED) { + /* todo: use all-light sampling */ + if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { + kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); + + /* for render passes, sum and reset indirect light pass variables + * for the next samples */ + path_radiance_sum_indirect(&L); + path_radiance_reset_indirect(&L); + } + } + } + + /* emission and transmittance */ + if(volume_segment.closure_flag & SD_EMISSION) + path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce); + throughput *= volume_segment.accum_transmittance; + + /* free cached steps */ + kernel_volume_decoupled_free(kg, &volume_segment); +#else + /* GPU: no decoupled ray marching, scatter probalistically */ int num_samples = kernel_data.integrator.volume_samples; float num_samples_inv = 1.0f/num_samples; - float3 avg_tp = make_float3(0.0f, 0.0f, 0.0f); /* todo: we should cache the shader evaluations from stepping * through the volume, for now we redo them multiple times */ @@ -1118,7 +1161,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in if(result == VOLUME_PATH_SCATTERED) { /* todo: use all-light sampling */ if(kernel_path_integrate_scatter_lighting(kg, rng, &volume_sd, &tp, &ps, &L, &pray, num_samples_inv)) { - kernel_path_indirect(kg, rng, pray, buffer, tp*num_samples_inv, num_samples, ps, &L); + kernel_path_indirect(kg, rng, pray, tp*num_samples_inv, num_samples, ps, &L); /* for render passes, sum and reset indirect light pass variables * for the next samples */ @@ -1126,11 +1169,11 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in path_radiance_reset_indirect(&L); } } - else - avg_tp += tp; } - throughput = avg_tp * num_samples_inv; + /* todo: avoid this calculation using decoupled ray marching */ + kernel_volume_shadow(kg, &state, &volume_ray, &throughput); +#endif } #endif @@ -1147,7 +1190,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in #ifdef __BACKGROUND__ /* sample background shader */ - float3 L_background = indirect_background(kg, &ray, state.flag, state.ray_pdf, state.bounce); + float3 L_background = indirect_background(kg, &state, &ray); path_radiance_accum_background(&L, throughput, L_background, state.bounce); #endif @@ -1156,7 +1199,7 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* setup shading */ ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce); + shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce); shader_eval_surface(kg, &sd, 0.0f, state.flag, SHADER_CONTEXT_MAIN); shader_merge_closures(&sd); @@ -1270,21 +1313,21 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in /* do subsurface scatter step with copy of shader data, this will * replace the BSSRDF with a diffuse BSDF closure */ for(int j = 0; j < num_samples; j++) { - ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; - float bssrdf_u, bssrdf_v; - path_branched_rng_2D(kg, &bssrdf_rng, &state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); - int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true); + ShaderData bssrdf_sd[BSSRDF_MAX_HITS]; + float bssrdf_u, bssrdf_v; + path_branched_rng_2D(kg, &bssrdf_rng, &state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v); + int num_hits = subsurface_scatter_multi_step(kg, &sd, bssrdf_sd, state.flag, sc, &lcg_state, bssrdf_u, bssrdf_v, true); - /* compute lighting with the BSDF closure */ - for(int hit = 0; hit < num_hits; hit++) { - PathState hit_state = state; + /* compute lighting with the BSDF closure */ + for(int hit = 0; hit < num_hits; hit++) { + PathState hit_state = state; - path_state_branch(&hit_state, j, num_samples); + path_state_branch(&hit_state, j, num_samples); - kernel_branched_path_integrate_lighting(kg, rng, - &bssrdf_sd[hit], throughput, num_samples_inv, - &hit_state, &L, buffer); - } + kernel_branched_path_integrate_lighting(kg, rng, + &bssrdf_sd[hit], throughput, num_samples_inv, + &hit_state, &L, buffer); + } } state.flag &= ~PATH_RAY_BSSRDF_ANCESTOR; diff --git a/intern/cycles/kernel/kernel_path_state.h b/intern/cycles/kernel/kernel_path_state.h index c3f617542a6..406654c1741 100644 --- a/intern/cycles/kernel/kernel_path_state.h +++ b/intern/cycles/kernel/kernel_path_state.h @@ -50,7 +50,7 @@ ccl_device_inline void path_state_init(KernelGlobals *kg, PathState *state, RNG state->rng_congruential = lcg_init(*rng + sample*0x51633e2d); } else { - state->volume_stack[0].shader = SHADER_NO_ID; + state->volume_stack[0].shader = SHADER_NONE; } #endif } @@ -132,6 +132,9 @@ ccl_device_inline uint path_state_ray_visibility(KernelGlobals *kg, PathState *s /* for visibility, diffuse/glossy are for reflection only */ if(flag & PATH_RAY_TRANSMIT) flag &= ~(PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY); + /* todo: this is not supported as its own ray visibility yet */ + if(state->flag & PATH_RAY_VOLUME_SCATTER) + flag |= PATH_RAY_DIFFUSE; /* for camera visibility, use render layer flags */ if(flag & PATH_RAY_CAMERA) flag |= kernel_data.integrator.layer_flag; diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h index e2108604bc8..6744471d659 100644 --- a/intern/cycles/kernel/kernel_projection.h +++ b/intern/cycles/kernel/kernel_projection.h @@ -39,7 +39,7 @@ CCL_NAMESPACE_BEGIN ccl_device float2 direction_to_spherical(float3 dir) { - float theta = acosf(dir.z); + float theta = safe_acosf(dir.z); float phi = atan2f(dir.x, dir.y); return make_float2(theta, phi); @@ -97,7 +97,7 @@ ccl_device float3 fisheye_to_direction(float u, float v, float fov) if(r > 1.0f) return make_float3(0.0f, 0.0f, 0.0f); - float phi = acosf((r != 0.0f)? u/r: 0.0f); + float phi = safe_acosf((r != 0.0f)? u/r: 0.0f); float theta = r * fov * 0.5f; if(v < 0.0f) phi = -phi; @@ -111,7 +111,7 @@ ccl_device float3 fisheye_to_direction(float u, float v, float fov) ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float width, float height) { - float theta = acosf(dir.x); + float theta = safe_acosf(dir.x); float r = 2.0f * lens * sinf(theta * 0.5f); float phi = atan2f(dir.z, dir.y); @@ -132,7 +132,7 @@ ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, f if(r > rmax) return make_float3(0.0f, 0.0f, 0.0f); - float phi = acosf((r != 0.0f)? u/r: 0.0f); + float phi = safe_acosf((r != 0.0f)? u/r: 0.0f); float theta = 2.0f * asinf(r/(2.0f * lens)); if(v < 0.0f) phi = -phi; diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index ef397269ec2..31cb6ff6abd 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -120,6 +120,9 @@ ccl_device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int /* Cranly-Patterson rotation using rng seed */ float shift; + /* using the same *rng value to offset seems to give correlation issues, + * we could hash it with the dimension but this has a performance impact, + * we need to find a solution for this */ if(dimension & 1) shift = (*rng >> 16) * (1.0f/(float)0xFFFF); else diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h index b113e906e9d..58cec090410 100644 --- a/intern/cycles/kernel/kernel_shader.h +++ b/intern/cycles/kernel/kernel_shader.h @@ -39,7 +39,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd { if(sd->flag & SD_OBJECT_MOTION) { sd->ob_tfm = object_fetch_transform_motion(kg, sd->object, time); - sd->ob_itfm= transform_quick_inverse(sd->ob_tfm); + sd->ob_itfm = transform_quick_inverse(sd->ob_tfm); } else { sd->ob_tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM); @@ -49,12 +49,13 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd #endif ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, - const Intersection *isect, const Ray *ray, int bounce) + const Intersection *isect, const Ray *ray, int bounce, int transparent_bounce) { #ifdef __INSTANCING__ - sd->object = (isect->object == ~0)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; + sd->object = (isect->object == PRIM_NONE)? kernel_tex_fetch(__prim_object, isect->prim): isect->object; #endif + sd->type = isect->type; sd->flag = kernel_tex_fetch(__object_flag, sd->object); /* matrices and time */ @@ -66,37 +67,31 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, sd->prim = kernel_tex_fetch(__prim_index, isect->prim); sd->ray_length = isect->t; sd->ray_depth = bounce; + sd->transparent_depth = transparent_bounce; + +#ifdef __UV__ + sd->u = isect->u; + sd->v = isect->v; +#endif #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_segment, isect->prim) != ~0) { - /* Strand Shader setting*/ + if(sd->type & PRIMITIVE_ALL_CURVE) { + /* curve */ float4 curvedata = kernel_tex_fetch(__curves, sd->prim); sd->shader = __float_as_int(curvedata.z); - sd->segment = isect->segment; sd->P = bvh_curve_refine(kg, sd, isect, ray); } - else { + else #endif - /* fetch triangle data */ + if(sd->type & PRIMITIVE_TRIANGLE) { + /* static triangle */ float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); sd->shader = __float_as_int(Ns.w); -#ifdef __HAIR__ - sd->segment = ~0; - /*elements for minimum hair width using transparency bsdf*/ - /*sd->curve_transparency = 0.0f;*/ - /*sd->curve_radius = 0.0f;*/ -#endif - -#ifdef __UV__ - sd->u = isect->u; - sd->v = isect->v; -#endif - /* vectors */ - sd->P = bvh_triangle_refine(kg, sd, isect, ray); + sd->P = triangle_refine(kg, sd, isect, ray); sd->Ng = Ng; sd->N = Ng; @@ -106,19 +101,20 @@ ccl_device void shader_setup_from_ray(KernelGlobals *kg, ShaderData *sd, #ifdef __DPDU__ /* dPdu/dPdv */ - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif - -#ifdef __HAIR__ } -#endif + else { + /* motion triangle */ + motion_triangle_shader_setup(kg, sd, isect, ray, false); + } sd->I = -ray->D; sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); #ifdef __INSTANCING__ - if(isect->object != ~0) { + if(isect->object != OBJECT_NONE) { /* instance transform */ object_normal_transform(kg, sd, &sd->N); object_normal_transform(kg, sd, &sd->Ng); @@ -161,39 +157,41 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat /* object, matrices, time, ray_length stay the same */ sd->flag = kernel_tex_fetch(__object_flag, sd->object); sd->prim = kernel_tex_fetch(__prim_index, isect->prim); - - /* fetch triangle data */ - float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); - float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); - sd->shader = __float_as_int(Ns.w); - -#ifdef __HAIR__ - sd->segment = ~0; -#endif + sd->type = isect->type; #ifdef __UV__ sd->u = isect->u; sd->v = isect->v; #endif - /* vectors */ - sd->P = bvh_triangle_refine_subsurface(kg, sd, isect, ray); - sd->Ng = Ng; - sd->N = Ng; - - /* smooth normal */ - if(sd->shader & SHADER_SMOOTH_NORMAL) - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); + /* fetch triangle data */ + if(sd->type == PRIMITIVE_TRIANGLE) { + float4 Ns = kernel_tex_fetch(__tri_normal, sd->prim); + float3 Ng = make_float3(Ns.x, Ns.y, Ns.z); + sd->shader = __float_as_int(Ns.w); + + /* static triangle */ + sd->P = triangle_refine_subsurface(kg, sd, isect, ray); + sd->Ng = Ng; + sd->N = Ng; + + if(sd->shader & SHADER_SMOOTH_NORMAL) + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); #ifdef __DPDU__ - /* dPdu/dPdv */ - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + /* dPdu/dPdv */ + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #endif + } + else { + /* motion triangle */ + motion_triangle_shader_setup(kg, sd, isect, ray, true); + } sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); #ifdef __INSTANCING__ - if(isect->object != ~0) { + if(isect->object != OBJECT_NONE) { /* instance transform */ object_normal_transform(kg, sd, &sd->N); object_normal_transform(kg, sd, &sd->Ng); @@ -231,7 +229,7 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, const float3 P, const float3 Ng, const float3 I, - int shader, int object, int prim, float u, float v, float t, float time, int bounce, int segment) + int shader, int object, int prim, float u, float v, float t, float time, int bounce, int transparent_bounce) { /* vectors */ sd->P = P; @@ -239,9 +237,7 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, sd->Ng = Ng; sd->I = I; sd->shader = shader; -#ifdef __HAIR__ - sd->segment = segment; -#endif + sd->type = (prim == PRIM_NONE)? PRIMITIVE_NONE: PRIMITIVE_TRIANGLE; /* primitive */ #ifdef __INSTANCING__ @@ -255,12 +251,13 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, #endif sd->ray_length = t; sd->ray_depth = bounce; + sd->transparent_depth = transparent_bounce; /* detect instancing, for non-instanced the object index is -object-1 */ #ifdef __INSTANCING__ bool instanced = false; - if(sd->prim != ~0) { + if(sd->prim != PRIM_NONE) { if(sd->object >= 0) instanced = true; else @@ -271,7 +268,7 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, #endif sd->flag = kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); - if(sd->object != -1) { + if(sd->object != OBJECT_NONE) { sd->flag |= kernel_tex_fetch(__object_flag, sd->object); #ifdef __OBJECT_MOTION__ @@ -283,36 +280,20 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, } #endif - /* smooth normal */ -#ifdef __HAIR__ - if(sd->shader & SHADER_SMOOTH_NORMAL && sd->segment == ~0) { - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); -#else - if(sd->shader & SHADER_SMOOTH_NORMAL) { - sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); -#endif + if(sd->type & PRIMITIVE_TRIANGLE) { + /* smooth normal */ + if(sd->shader & SHADER_SMOOTH_NORMAL) { + sd->N = triangle_smooth_normal(kg, sd->prim, sd->u, sd->v); #ifdef __INSTANCING__ - if(instanced) - object_normal_transform(kg, sd, &sd->N); + if(instanced) + object_normal_transform(kg, sd, &sd->N); #endif - } + } + /* dPdu/dPdv */ #ifdef __DPDU__ - /* dPdu/dPdv */ -#ifdef __HAIR__ - if(sd->prim == ~0 || sd->segment != ~0) { - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); - } -#else - if(sd->prim == ~0) { - sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); - sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); - } -#endif - else { - triangle_dPdudv(kg, &sd->dPdu, &sd->dPdv, sd->prim); + triangle_dPdudv(kg, sd->prim, &sd->dPdu, &sd->dPdv); #ifdef __INSTANCING__ if(instanced) { @@ -320,11 +301,17 @@ ccl_device void shader_setup_from_sample(KernelGlobals *kg, ShaderData *sd, object_dir_transform(kg, sd, &sd->dPdv); } #endif +#endif } + else { +#ifdef __DPDU__ + sd->dPdu = make_float3(0.0f, 0.0f, 0.0f); + sd->dPdv = make_float3(0.0f, 0.0f, 0.0f); #endif + } /* backfacing test */ - if(sd->prim != ~0) { + if(sd->prim != PRIM_NONE) { bool backfacing = (dot(sd->Ng, sd->I) < 0.0f); if(backfacing) { @@ -355,20 +342,19 @@ ccl_device void shader_setup_from_displace(KernelGlobals *kg, ShaderData *sd, float3 P, Ng, I = make_float3(0.0f, 0.0f, 0.0f); int shader; - P = triangle_point_MT(kg, prim, u, v); - Ng = triangle_normal_MT(kg, prim, &shader); + triangle_point_normal(kg, prim, u, v, &P, &Ng, &shader); /* force smooth shading for displacement */ shader |= SHADER_SMOOTH_NORMAL; /* watch out: no instance transform currently */ - shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, 0, ~0); + shader_setup_from_sample(kg, sd, P, Ng, I, shader, object, prim, u, v, 0.0f, TIME_INVALID, 0, 0); } /* ShaderData setup from ray into background */ -ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce) +ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce) { /* vectors */ sd->P = ray->D; @@ -382,11 +368,12 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat #endif sd->ray_length = 0.0f; sd->ray_depth = bounce; + sd->transparent_depth = transparent_bounce; #ifdef __INSTANCING__ - sd->object = ~0; + sd->object = PRIM_NONE; #endif - sd->prim = ~0; + sd->prim = PRIM_NONE; #ifdef __UV__ sd->u = 0.0f; sd->v = 0.0f; @@ -411,28 +398,27 @@ ccl_device_inline void shader_setup_from_background(KernelGlobals *kg, ShaderDat /* ShaderData setup from point inside volume */ -ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce) +ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *sd, const Ray *ray, int bounce, int transparent_bounce) { /* vectors */ sd->P = ray->P; sd->N = -ray->D; sd->Ng = -ray->D; sd->I = -ray->D; - sd->shader = SHADER_NO_ID; + sd->shader = SHADER_NONE; sd->flag = 0; #ifdef __OBJECT_MOTION__ sd->time = ray->time; #endif sd->ray_length = 0.0f; /* todo: can we set this to some useful value? */ sd->ray_depth = bounce; + sd->transparent_depth = transparent_bounce; #ifdef __INSTANCING__ - sd->object = ~0; /* todo: fill this for texture coordinates */ -#endif - sd->prim = ~0; -#ifdef __HAIR__ - sd->segment = ~0; + sd->object = PRIM_NONE; /* todo: fill this for texture coordinates */ #endif + sd->prim = PRIM_NONE; + sd->type = PRIMITIVE_NONE; #ifdef __UV__ sd->u = 0.0f; @@ -471,23 +457,32 @@ ccl_device void shader_merge_closures(ShaderData *sd) ShaderClosure *scj = &sd->closure[j]; #ifdef __OSL__ - if(!sci->prim && !scj->prim && sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) { -#else - if(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1) { + if(sci->prim || scj->prim) + continue; #endif - sci->weight += scj->weight; - sci->sample_weight += scj->sample_weight; - - int size = sd->num_closure - (j+1); - if(size > 0) { - for(int k = 0; k < size; k++) { - scj[k] = scj[k+1]; - } - } - sd->num_closure--; - j--; + if(!(sci->type == scj->type && sci->data0 == scj->data0 && sci->data1 == scj->data1)) + continue; + + if(CLOSURE_IS_BSDF_OR_BSSRDF(sci->type)) { + if(sci->N != scj->N) + continue; + else if(CLOSURE_IS_BSDF_ANISOTROPIC(sci->type) && sci->T != scj->T) + continue; } + + sci->weight += scj->weight; + sci->sample_weight += scj->sample_weight; + + int size = sd->num_closure - (j+1); + if(size > 0) { + for(int k = 0; k < size; k++) { + scj[k] = scj[k+1]; + } + } + + sd->num_closure--; + j--; } } } @@ -495,8 +490,6 @@ ccl_device void shader_merge_closures(ShaderData *sd) /* BSDF */ -#ifdef __MULTI_CLOSURE__ - ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, float *pdf, int skip_bsdf, BsdfEval *result_eval, float sum_pdf, float sum_sample_weight) { @@ -524,28 +517,18 @@ ccl_device_inline void _shader_bsdf_multi_eval(KernelGlobals *kg, const ShaderDa *pdf = (sum_sample_weight > 0.0f)? sum_pdf/sum_sample_weight: 0.0f; } -#endif - ccl_device void shader_bsdf_eval(KernelGlobals *kg, const ShaderData *sd, const float3 omega_in, BsdfEval *eval, float *pdf) { -#ifdef __MULTI_CLOSURE__ bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass); _shader_bsdf_multi_eval(kg, sd, omega_in, pdf, -1, eval, 0.0f, 0.0f); -#else - const ShaderClosure *sc = &sd->closure; - - *pdf = 0.0f; - *eval = bsdf_eval(kg, sd, sc, omega_in, pdf)*sc->weight; -#endif } ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, float randu, float randv, BsdfEval *bsdf_eval, float3 *omega_in, differential3 *domega_in, float *pdf) { -#ifdef __MULTI_CLOSURE__ int sampled = 0; if(sd->num_closure > 1) { @@ -596,13 +579,6 @@ ccl_device int shader_bsdf_sample(KernelGlobals *kg, const ShaderData *sd, } return label; -#else - /* sample the single closure that we picked */ - *pdf = 0.0f; - int label = bsdf_sample(kg, sd, &sd->closure, randu, randv, bsdf_eval, omega_in, domega_in, pdf); - *bsdf_eval *= sd->closure.weight; - return label; -#endif } ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *sd, @@ -623,21 +599,16 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg, const ShaderData *s ccl_device void shader_bsdf_blur(KernelGlobals *kg, ShaderData *sd, float roughness) { -#ifdef __MULTI_CLOSURE__ for(int i = 0; i< sd->num_closure; i++) { ShaderClosure *sc = &sd->closure[i]; if(CLOSURE_IS_BSDF(sc->type)) bsdf_blur(kg, sc, roughness); } -#else - bsdf_blur(kg, &sd->closure, roughness); -#endif } ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -648,12 +619,6 @@ ccl_device float3 shader_bsdf_transparency(KernelGlobals *kg, ShaderData *sd) } return eval; -#else - if(sd->closure.type == CLOSURE_BSDF_TRANSPARENT_ID) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd) @@ -668,7 +633,6 @@ ccl_device float3 shader_bsdf_alpha(KernelGlobals *kg, ShaderData *sd) ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -679,17 +643,10 @@ ccl_device float3 shader_bsdf_diffuse(KernelGlobals *kg, ShaderData *sd) } return eval; -#else - if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type)) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -700,17 +657,10 @@ ccl_device float3 shader_bsdf_glossy(KernelGlobals *kg, ShaderData *sd) } return eval; -#else - if(CLOSURE_IS_BSDF_GLOSSY(sd->closure.type)) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -721,17 +671,10 @@ ccl_device float3 shader_bsdf_transmission(KernelGlobals *kg, ShaderData *sd) } return eval; -#else - if(CLOSURE_IS_BSDF_TRANSMISSION(sd->closure.type)) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -742,17 +685,10 @@ ccl_device float3 shader_bsdf_subsurface(KernelGlobals *kg, ShaderData *sd) } return eval; -#else - if(CLOSURE_IS_BSSRDF(sd->closure.type)) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_factor, float3 *N_) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); float3 N = make_float3(0.0f, 0.0f, 0.0f); @@ -776,21 +712,10 @@ ccl_device float3 shader_bsdf_ao(KernelGlobals *kg, ShaderData *sd, float ao_fac *N_ = N; return eval; -#else - *N_ = sd->N; - - if(CLOSURE_IS_BSDF_DIFFUSE(sd->closure.type)) - return sd->closure.weight*ao_factor; - else if(CLOSURE_IS_AMBIENT_OCCLUSION(sd->closure.type)) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif } ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_blur_) { -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); float3 N = make_float3(0.0f, 0.0f, 0.0f); float texture_blur = 0.0f, weight_sum = 0.0f; @@ -815,20 +740,6 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b *texture_blur_ = texture_blur/weight_sum; return eval; -#else - if(CLOSURE_IS_BSSRDF(sd->closure.type)) { - if(N_) *N_ = sd->closure.N; - if(texture_blur_) *texture_blur_ = sd->closure.data1; - - return sd->closure.weight; - } - else { - if(N_) *N_ = sd->N; - if(texture_blur_) *texture_blur_ = 0.0f; - - return make_float3(0.0f, 0.0f, 0.0f); - } -#endif } /* Emission */ @@ -841,7 +752,6 @@ ccl_device float3 emissive_eval(KernelGlobals *kg, ShaderData *sd, ShaderClosure ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) { float3 eval; -#ifdef __MULTI_CLOSURE__ eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i < sd->num_closure; i++) { @@ -850,9 +760,6 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) if(CLOSURE_IS_EMISSION(sc->type)) eval += emissive_eval(kg, sd, sc)*sc->weight; } -#else - eval = emissive_eval(kg, sd, &sd->closure)*sd->closure.weight; -#endif return eval; } @@ -861,7 +768,6 @@ ccl_device float3 shader_emissive_eval(KernelGlobals *kg, ShaderData *sd) ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) { -#ifdef __MULTI_CLOSURE__ float3 weight = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i < sd->num_closure; i++) { @@ -872,12 +778,6 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) } return weight; -#else - if(sd->closure.type == CLOSURE_HOLDOUT_ID) - return make_float3(1.0f, 1.0f, 1.0f); - - return make_float3(0.0f, 0.0f, 0.0f); -#endif } /* Surface Evaluation */ @@ -885,12 +785,8 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd) ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, float randb, int path_flag, ShaderContext ctx) { -#ifdef __MULTI_CLOSURE__ sd->num_closure = 0; sd->randb_closure = randb; -#else - sd->closure.type = NBUILTIN_CLOSURES; -#endif #ifdef __OSL__ if(kg->osl) @@ -899,7 +795,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, #endif { #ifdef __SVM__ - svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, randb, path_flag); + svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag); #else sd->closure.weight = make_float3(0.8f, 0.8f, 0.8f); sd->closure.N = sd->N; @@ -912,12 +808,8 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int path_flag, ShaderContext ctx) { -#ifdef __MULTI_CLOSURE__ sd->num_closure = 0; sd->randb_closure = 0.0f; -#else - sd->closure.type = NBUILTIN_CLOSURES; -#endif #ifdef __OSL__ if(kg->osl) { @@ -928,9 +820,8 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int { #ifdef __SVM__ - svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, 0.0f, path_flag); + svm_eval_nodes(kg, sd, SHADER_TYPE_SURFACE, path_flag); -#ifdef __MULTI_CLOSURE__ float3 eval = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i< sd->num_closure; i++) { @@ -942,13 +833,6 @@ ccl_device float3 shader_eval_background(KernelGlobals *kg, ShaderData *sd, int return eval; #else - if(sd->closure.type == CLOSURE_BACKGROUND_ID) - return sd->closure.weight; - else - return make_float3(0.0f, 0.0f, 0.0f); -#endif - -#else return make_float3(0.8f, 0.8f, 0.8f); #endif } @@ -1067,14 +951,10 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, { /* reset closures once at the start, we will be accumulating the closures * for all volumes in the stack into a single array of closures */ -#ifdef __MULTI_CLOSURE__ sd->num_closure = 0; -#else - sd->closure.type = NBUILTIN_CLOSURES; -#endif sd->flag = 0; - for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) { + for(int i = 0; stack[i].shader != SHADER_NONE; i++) { /* setup shaderdata from stack. it's mostly setup already in * shader_setup_from_volume, this switching should be quick */ sd->object = stack[i].object; @@ -1083,7 +963,7 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, sd->flag &= ~(SD_SHADER_FLAGS|SD_OBJECT_FLAGS); sd->flag |= kernel_tex_fetch(__shader_flag, (sd->shader & SHADER_MASK)*2); - if(sd->object != ~0) { + if(sd->object != OBJECT_NONE) { sd->flag |= kernel_tex_fetch(__object_flag, sd->object); #ifdef __OBJECT_MOTION__ @@ -1102,7 +982,7 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, else #endif { - svm_eval_nodes(kg, sd, SHADER_TYPE_VOLUME, 0.0f, path_flag); + svm_eval_nodes(kg, sd, SHADER_TYPE_VOLUME, path_flag); } #endif @@ -1118,12 +998,8 @@ ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd, ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, ShaderContext ctx) { -#ifdef __MULTI_CLOSURE__ sd->num_closure = 0; sd->randb_closure = 0.0f; -#else - sd->closure.type = NBUILTIN_CLOSURES; -#endif /* this will modify sd->P */ #ifdef __SVM__ @@ -1133,7 +1009,7 @@ ccl_device void shader_eval_displacement(KernelGlobals *kg, ShaderData *sd, Shad else #endif { - svm_eval_nodes(kg, sd, SHADER_TYPE_DISPLACEMENT, 0.0f, 0); + svm_eval_nodes(kg, sd, SHADER_TYPE_DISPLACEMENT, 0); } #endif } @@ -1147,7 +1023,7 @@ ccl_device bool shader_transparent_shadow(KernelGlobals *kg, Intersection *isect int shader = 0; #ifdef __HAIR__ - if(kernel_tex_fetch(__prim_segment, isect->prim) == ~0) { + if(kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) { #endif float4 Ns = kernel_tex_fetch(__tri_normal, prim); shader = __float_as_int(Ns.w); diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h index 9b015c98c40..ab7524c411a 100644 --- a/intern/cycles/kernel/kernel_shadow.h +++ b/intern/cycles/kernel/kernel_shadow.h @@ -16,6 +16,178 @@ CCL_NAMESPACE_BEGIN +#ifdef __SHADOW_RECORD_ALL__ + +/* Shadow function to compute how much light is blocked, CPU variation. + * + * We trace a single ray. If it hits any opaque surface, or more than a given + * number of transparent surfaces is hit, then we consider the geometry to be + * entirely blocked. If not, all transparent surfaces will be recorded and we + * will shade them one by one to determine how much light is blocked. This all + * happens in one scene intersection function. + * + * Recording all hits works well in some cases but may be slower in others. If + * we have many semi-transparent hairs, one intersection may be faster because + * you'd be reinteresecting the same hairs a lot with each step otherwise. If + * however there is mostly binary transparency then we may be recording many + * unnecessary intersections when one of the first surfaces blocks all light. + * + * From tests in real scenes it seems the performance loss is either minimal, + * or there is a performance increase anyway due to avoiding the need to send + * two rays with transparent shadows. + * + * This is CPU only because of qsort, and malloc or high stack space usage to + * record all these intersections. */ + +ccl_device_noinline int shadow_intersections_compare(const void *a, const void *b) +{ + const Intersection *isect_a = (const Intersection*)a; + const Intersection *isect_b = (const Intersection*)b; + + if(isect_a->t < isect_b->t) + return -1; + else if(isect_a->t > isect_b->t) + return 1; + else + return 0; +} + +#define STACK_MAX_HITS 64 + +ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) +{ + *shadow = make_float3(1.0f, 1.0f, 1.0f); + + if(ray->t == 0.0f) + return false; + + bool blocked; + + if(kernel_data.integrator.transparent_shadows) { + /* intersect to find an opaque surface, or record all transparent surface hits */ + Intersection hits_stack[STACK_MAX_HITS]; + Intersection *hits; + uint max_hits = kernel_data.integrator.transparent_max_bounce - state->transparent_bounce - 1; + + /* prefer to use stack but use dynamic allocation if too deep max hits + * we need max_hits + 1 storage space due to the logic in + * scene_intersect_shadow_all which will first store and then check if + * the limit is exceeded */ + if(max_hits + 1 <= STACK_MAX_HITS) + hits = hits_stack; + else + hits = (Intersection*)malloc(sizeof(Intersection)*(max_hits + 1)); + + uint num_hits; + blocked = scene_intersect_shadow_all(kg, ray, hits, max_hits, &num_hits); + + /* if no opaque surface found but we did find transparent hits, shade them */ + if(!blocked && num_hits > 0) { + float3 throughput = make_float3(1.0f, 1.0f, 1.0f); + float3 Pend = ray->P + ray->D*ray->t; + float last_t = 0.0f; + int bounce = state->transparent_bounce; + Intersection *isect = hits; +#ifdef __VOLUME__ + PathState ps = *state; +#endif + + qsort(hits, num_hits, sizeof(Intersection), shadow_intersections_compare); + + for(int hit = 0; hit < num_hits; hit++, isect++) { + /* adjust intersection distance for moving ray forward */ + float new_t = isect->t; + isect->t -= last_t; + + /* skip hit if we did not move forward, step by step raytracing + * would have skipped it as well then */ + if(last_t == new_t) + continue; + + last_t = new_t; + +#ifdef __VOLUME__ + /* attenuation between last surface and next surface */ + if(ps.volume_stack[0].shader != SHADER_NONE) { + Ray segment_ray = *ray; + segment_ray.t = isect->t; + kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); + } +#endif + + /* setup shader data at surface */ + ShaderData sd; + shader_setup_from_ray(kg, &sd, isect, ray, state->bounce+1, bounce); + + /* attenuation from transparent surface */ + if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { + shader_eval_surface(kg, &sd, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW); + throughput *= shader_bsdf_transparency(kg, &sd); + } + + /* stop if all light is blocked */ + if(is_zero(throughput)) { + /* free dynamic storage */ + if(hits != hits_stack) + free(hits); + return true; + } + + /* move ray forward */ + ray->P = sd.P; + if(ray->t != FLT_MAX) + ray->D = normalize_len(Pend - ray->P, &ray->t); + +#ifdef __VOLUME__ + /* exit/enter volume */ + kernel_volume_stack_enter_exit(kg, &sd, ps.volume_stack); +#endif + + bounce++; + } + +#ifdef __VOLUME__ + /* attenuation for last line segment towards light */ + if(ps.volume_stack[0].shader != SHADER_NONE) + kernel_volume_shadow(kg, &ps, ray, &throughput); +#endif + + *shadow *= throughput; + } + + /* free dynamic storage */ + if(hits != hits_stack) + free(hits); + } + else { + Intersection isect; +#ifdef __HAIR__ + blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); +#else + blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect); +#endif + } + +#ifdef __VOLUME__ + if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { + /* apply attenuation from current volume shader */ + kernel_volume_shadow(kg, state, ray, shadow); + } +#endif + + return blocked; +} + +#else + +/* Shadow function to compute how much light is blocked, GPU variation. + * + * Here we raytrace from one transparent surface to the next step by step. + * To minimize overhead in cases where we don't need transparent shadows, we + * first trace a regular shadow ray. We check if the hit primitive was + * potentially transparent, and only in that case start marching. this gives + * one extra ray cast for the cases were we do want transparency. */ + ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray *ray, float3 *shadow) { *shadow = make_float3(1.0f, 1.0f, 1.0f); @@ -25,21 +197,13 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * Intersection isect; #ifdef __HAIR__ - bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); + bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect, NULL, 0.0f, 0.0f); #else - bool result = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect); + bool blocked = scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect); #endif #ifdef __TRANSPARENT_SHADOWS__ - if(result && kernel_data.integrator.transparent_shadows) { - /* transparent shadows work in such a way to try to minimize overhead - * in cases where we don't need them. after a regular shadow ray is - * cast we check if the hit primitive was potentially transparent, and - * only in that case start marching. this gives on extra ray cast for - * the cases were we do want transparency. - * - * also note that for this to work correct, multi close sampling must - * be used, since we don't pass a random number to shader_eval_surface */ + if(blocked && kernel_data.integrator.transparent_shadows) { if(shader_transparent_shadow(kg, &isect)) { float3 throughput = make_float3(1.0f, 1.0f, 1.0f); float3 Pend = ray->P + ray->D*ray->t; @@ -49,35 +213,24 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * #endif for(;;) { - if(bounce >= kernel_data.integrator.transparent_max_bounce) { + if(bounce >= kernel_data.integrator.transparent_max_bounce) return true; - } - else if(bounce >= kernel_data.integrator.transparent_min_bounce) { - /* todo: get random number somewhere for probabilistic terminate */ -#if 0 - float probability = average(throughput); - float terminate = 0.0f; - - if(terminate >= probability) - return true; - - throughput /= probability; -#endif - } #ifdef __HAIR__ - if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) { + if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect, NULL, 0.0f, 0.0f)) #else - if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect)) { + if(!scene_intersect(kg, ray, PATH_RAY_SHADOW_TRANSPARENT, &isect)) #endif + { #ifdef __VOLUME__ /* attenuation for last line segment towards light */ - if(ps.volume_stack[0].shader != SHADER_NO_ID) + if(ps.volume_stack[0].shader != SHADER_NONE) kernel_volume_shadow(kg, &ps, ray, &throughput); #endif *shadow *= throughput; + return false; } @@ -86,7 +239,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * #ifdef __VOLUME__ /* attenuation between last surface and next surface */ - if(ps.volume_stack[0].shader != SHADER_NO_ID) { + if(ps.volume_stack[0].shader != SHADER_NONE) { Ray segment_ray = *ray; segment_ray.t = isect.t; kernel_volume_shadow(kg, &ps, &segment_ray, &throughput); @@ -95,7 +248,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * /* setup shader data at surface */ ShaderData sd; - shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1); + shader_setup_from_ray(kg, &sd, &isect, ray, state->bounce+1, bounce); /* attenuation from transparent surface */ if(!(sd.flag & SD_HAS_ONLY_VOLUME)) { @@ -103,6 +256,9 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * throughput *= shader_bsdf_transparency(kg, &sd); } + if(is_zero(throughput)) + return true; + /* move ray forward */ ray->P = ray_offset(sd.P, -sd.Ng); if(ray->t != FLT_MAX) @@ -118,15 +274,17 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, PathState *state, Ray * } } #ifdef __VOLUME__ - else if(!result && state->volume_stack[0].shader != SHADER_NO_ID) { + else if(!blocked && state->volume_stack[0].shader != SHADER_NONE) { /* apply attenuation from current volume shader */ kernel_volume_shadow(kg, state, ray, shadow); } #endif #endif - return result; + return blocked; } +#endif + CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp index 6a2a7804146..2d5f6091908 100644 --- a/intern/cycles/kernel/kernel_sse2.cpp +++ b/intern/cycles/kernel/kernel_sse2.cpp @@ -75,6 +75,6 @@ CCL_NAMESPACE_END /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse2(void); -void __dummy_function_cycles_sse2(void){} +void __dummy_function_cycles_sse2(void) {} #endif diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp index 9d0abb93cc6..1062fd0c990 100644 --- a/intern/cycles/kernel/kernel_sse3.cpp +++ b/intern/cycles/kernel/kernel_sse3.cpp @@ -76,6 +76,6 @@ CCL_NAMESPACE_END /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse3(void); -void __dummy_function_cycles_sse3(void){} +void __dummy_function_cycles_sse3(void) {} #endif diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp index bc20de0ec20..ba3b4887650 100644 --- a/intern/cycles/kernel/kernel_sse41.cpp +++ b/intern/cycles/kernel/kernel_sse41.cpp @@ -77,6 +77,6 @@ CCL_NAMESPACE_END /* needed for some linkers in combination with scons making empty compilation unit in a library */ void __dummy_function_cycles_sse41(void); -void __dummy_function_cycles_sse41(void){} +void __dummy_function_cycles_sse41(void) {} #endif diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h index f06fa119cfc..b07075c6c95 100644 --- a/intern/cycles/kernel/kernel_textures.h +++ b/intern/cycles/kernel/kernel_textures.h @@ -25,7 +25,7 @@ /* bvh */ KERNEL_TEX(float4, texture_float4, __bvh_nodes) KERNEL_TEX(float4, texture_float4, __tri_woop) -KERNEL_TEX(uint, texture_uint, __prim_segment) +KERNEL_TEX(uint, texture_uint, __prim_type) KERNEL_TEX(uint, texture_uint, __prim_visibility) KERNEL_TEX(uint, texture_uint, __prim_index) KERNEL_TEX(uint, texture_uint, __prim_object) @@ -174,6 +174,61 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098) KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099) +/* Kepler and above */ +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_100) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_101) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_102) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_103) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_104) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_105) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_106) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_107) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_108) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_109) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_110) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_111) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_112) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_113) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_114) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_115) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_116) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_117) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_118) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_119) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_120) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_121) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_122) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_123) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_124) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_125) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_126) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_127) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_128) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_129) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_130) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_131) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_132) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_133) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_134) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_135) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_136) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_137) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_138) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_139) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_140) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_141) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_142) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_143) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_144) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_145) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_146) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_147) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_148) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_149) +KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_150) +#endif + /* packed image (opencl) */ KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed) KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info) diff --git a/intern/cycles/kernel/kernel_triangle.h b/intern/cycles/kernel/kernel_triangle.h deleted file mode 100644 index 0455df85961..00000000000 --- a/intern/cycles/kernel/kernel_triangle.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -CCL_NAMESPACE_BEGIN - -/* Point on triangle for Moller-Trumbore triangles */ -ccl_device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v) -{ - /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); - - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); - - /* compute point */ - float t = 1.0f - u - v; - return (u*v0 + v*v1 + t*v2); -} - -/* Normal for Moller-Trumbore triangles */ -ccl_device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int *shader) -{ -#if 0 - /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); - - float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); - - /* compute normal */ - return normalize(cross(v2 - v0, v1 - v0)); -#else - float4 Nm = kernel_tex_fetch(__tri_normal, tri_index); - *shader = __float_as_int(Nm.w); - return make_float3(Nm.x, Nm.y, Nm.z); -#endif -} - -/* Return 3 triangle vertex locations */ -ccl_device_inline void triangle_vertices(KernelGlobals *kg, int tri_index, float3 P[3]) -{ - /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); - - P[0] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - P[1] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - P[2] = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); -} - -ccl_device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v) -{ - /* load triangle vertices */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index)); - - float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x))); - float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y))); - float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z))); - - return normalize((1.0f - u - v)*n2 + u*n0 + v*n1); -} - -ccl_device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri) -{ - /* fetch triangle vertex coordinates */ - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri)); - - float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x))); - float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y))); - float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z))); - - /* compute derivatives of P w.r.t. uv */ - *dPdu = (p0 - p2); - *dPdv = (p1 - p2); -} - -/* attributes */ - -ccl_device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy) -{ - if(elem == ATTR_ELEMENT_FACE) { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - - return kernel_tex_fetch(__attributes_float, offset + sd->prim); - } - else if(elem == ATTR_ELEMENT_VERTEX) { - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); - - float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x)); - float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y)); - float f2 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.z)); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; -#endif - - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + sd->prim*3; - float f0 = kernel_tex_fetch(__attributes_float, tri + 0); - float f1 = kernel_tex_fetch(__attributes_float, tri + 1); - float f2 = kernel_tex_fetch(__attributes_float, tri + 2); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; -#endif - - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = 0.0f; - if(dy) *dy = 0.0f; - - return 0.0f; - } -} - -ccl_device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy) -{ - if(elem == ATTR_ELEMENT_FACE) { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim)); - } - else if(elem == ATTR_ELEMENT_VERTEX) { - float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim)); - - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x))); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y))); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z))); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; -#endif - - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else if(elem == ATTR_ELEMENT_CORNER) { - int tri = offset + sd->prim*3; - float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0)); - float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1)); - float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2)); - -#ifdef __RAY_DIFFERENTIALS__ - if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2; - if(dy) *dy = sd->du.dy*f0 + sd->dv.dy*f1 - (sd->du.dy + sd->dv.dy)*f2; -#endif - - return sd->u*f0 + sd->v*f1 + (1.0f - sd->u - sd->v)*f2; - } - else { - if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f); - if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f); - - return make_float3(0.0f, 0.0f, 0.0f); - } -} - -CCL_NAMESPACE_END - diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 5ee25a6cb98..11445aa1c93 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -46,7 +46,10 @@ CCL_NAMESPACE_BEGIN #define TEX_NUM_FLOAT_IMAGES 5 -#define SHADER_NO_ID -1 +#define SHADER_NONE (~0) +#define OBJECT_NONE (~0) +#define PRIM_NONE (~0) +#define LAMP_NONE (~0) #define VOLUME_STACK_SIZE 16 @@ -61,13 +64,17 @@ CCL_NAMESPACE_BEGIN #define __SUBSURFACE__ #define __CMJ__ #define __VOLUME__ +#define __SHADOW_RECORD_ALL__ #endif #ifdef __KERNEL_CUDA__ #define __KERNEL_SHADING__ #define __KERNEL_ADV_SHADING__ #define __BRANCHED_PATH__ + +/* Experimental on GPU */ //#define __VOLUME__ +//#define __SUBSURFACE__ #endif #ifdef __KERNEL_OPENCL__ @@ -85,26 +92,24 @@ CCL_NAMESPACE_BEGIN #endif #ifdef __KERNEL_OPENCL_AMD__ -#define __SVM__ -#define __EMISSION__ -#define __IMAGE_TEXTURES__ -#define __PROCEDURAL_TEXTURES__ -#define __EXTRA_NODES__ -#define __HOLDOUT__ -#define __NORMAL_MAP__ -//#define __BACKGROUND_MIS__ -//#define __LAMP_MIS__ -//#define __AO__ -//#define __ANISOTROPIC__ +#define __CL_USE_NATIVE__ +#define __KERNEL_SHADING__ +//__KERNEL_ADV_SHADING__ +#define __MULTI_CLOSURE__ +#define __TRANSPARENT_SHADOWS__ +#define __PASSES__ +#define __BACKGROUND_MIS__ +#define __LAMP_MIS__ +#define __AO__ +#define __ANISOTROPIC__ //#define __CAMERA_MOTION__ //#define __OBJECT_MOTION__ //#define __HAIR__ -//#define __MULTI_CLOSURE__ -//#define __TRANSPARENT_SHADOWS__ -//#define __PASSES__ +//end __KERNEL_ADV_SHADING__ #endif #ifdef __KERNEL_OPENCL_INTEL_CPU__ +#define __CL_USE_NATIVE__ #define __KERNEL_SHADING__ #define __KERNEL_ADV_SHADING__ #endif @@ -147,12 +152,6 @@ CCL_NAMESPACE_BEGIN #define __HAIR__ #endif -/* Sanity check */ - -#if defined(__KERNEL_OPENCL_NEED_ADVANCED_SHADING__) && !defined(__MULTI_CLOSURE__) -#error "OpenCL: mismatch between advanced shading flags in device_opencl.cpp and kernel_types.h" -#endif - /* Random Numbers */ typedef uint RNG; @@ -161,7 +160,35 @@ typedef uint RNG; typedef enum ShaderEvalType { SHADER_EVAL_DISPLACE, - SHADER_EVAL_BACKGROUND + SHADER_EVAL_BACKGROUND, + /* bake types */ + SHADER_EVAL_BAKE, /* no real shade, it's used in the code to + * differentiate the type of shader eval from the above + */ + /* data passes */ + SHADER_EVAL_NORMAL, + SHADER_EVAL_UV, + SHADER_EVAL_DIFFUSE_COLOR, + SHADER_EVAL_GLOSSY_COLOR, + SHADER_EVAL_TRANSMISSION_COLOR, + SHADER_EVAL_SUBSURFACE_COLOR, + SHADER_EVAL_EMISSION, + + /* light passes */ + SHADER_EVAL_AO, + SHADER_EVAL_COMBINED, + SHADER_EVAL_SHADOW, + SHADER_EVAL_DIFFUSE_DIRECT, + SHADER_EVAL_GLOSSY_DIRECT, + SHADER_EVAL_TRANSMISSION_DIRECT, + SHADER_EVAL_SUBSURFACE_DIRECT, + SHADER_EVAL_DIFFUSE_INDIRECT, + SHADER_EVAL_GLOSSY_INDIRECT, + SHADER_EVAL_TRANSMISSION_INDIRECT, + SHADER_EVAL_SUBSURFACE_INDIRECT, + + /* extra */ + SHADER_EVAL_ENVIRONMENT, } ShaderEvalType; /* Path Tracing @@ -177,10 +204,8 @@ enum PathTraceDimension { PRNG_UNUSED_0 = 5, PRNG_UNUSED_1 = 6, /* for some reason (6, 7) is a bad sobol pattern */ PRNG_UNUSED_2 = 7, /* with a low number of samples (< 64) */ - PRNG_BASE_NUM = 8, -#else - PRNG_BASE_NUM = 4, #endif + PRNG_BASE_NUM = 8, PRNG_BSDF_U = 0, PRNG_BSDF_V = 1, @@ -188,7 +213,7 @@ enum PathTraceDimension { PRNG_LIGHT = 3, PRNG_LIGHT_U = 4, PRNG_LIGHT_V = 5, - PRNG_LIGHT_F = 6, + PRNG_UNUSED_3 = 6, PRNG_TERMINATE = 7, #ifdef __VOLUME__ @@ -220,7 +245,6 @@ enum PathRayFlag { PATH_RAY_GLOSSY = 16, PATH_RAY_SINGULAR = 32, PATH_RAY_TRANSPARENT = 64, - PATH_RAY_VOLUME_SCATTER = 128, PATH_RAY_SHADOW_OPAQUE = 128, PATH_RAY_SHADOW_TRANSPARENT = 256, @@ -228,16 +252,17 @@ enum PathRayFlag { PATH_RAY_CURVE = 512, /* visibility flag to define curve segments*/ + /* note that these can use maximum 12 bits, the other are for layers */ PATH_RAY_ALL_VISIBILITY = (1|2|4|8|16|32|64|128|256|512), PATH_RAY_MIS_SKIP = 1024, PATH_RAY_DIFFUSE_ANCESTOR = 2048, PATH_RAY_GLOSSY_ANCESTOR = 4096, PATH_RAY_BSSRDF_ANCESTOR = 8192, - PATH_RAY_SINGLE_PASS_DONE = 8192, + PATH_RAY_SINGLE_PASS_DONE = 16384, + PATH_RAY_VOLUME_SCATTER = 32768, - /* this gives collisions with localview bits - * see: blender_util.h, grr - Campbell */ + /* we need layer member flags to be the 20 upper bits */ PATH_RAY_LAYER_SHIFT = (32-20) }; @@ -282,7 +307,8 @@ typedef enum PassType { PASS_MIST = 2097152, PASS_SUBSURFACE_DIRECT = 4194304, PASS_SUBSURFACE_INDIRECT = 8388608, - PASS_SUBSURFACE_COLOR = 16777216 + PASS_SUBSURFACE_COLOR = 16777216, + PASS_LIGHT = 33554432, /* no real pass, used to force use_light_pass */ } PassType; #define PASS_ALL (~0) @@ -418,9 +444,27 @@ typedef struct Intersection { float t, u, v; int prim; int object; - int segment; + int type; } Intersection; +/* Primitives */ + +typedef enum PrimitiveType { + PRIMITIVE_NONE = 0, + PRIMITIVE_TRIANGLE = 1, + PRIMITIVE_MOTION_TRIANGLE = 2, + PRIMITIVE_CURVE = 4, + PRIMITIVE_MOTION_CURVE = 8, + + PRIMITIVE_ALL_TRIANGLE = (PRIMITIVE_TRIANGLE|PRIMITIVE_MOTION_TRIANGLE), + PRIMITIVE_ALL_CURVE = (PRIMITIVE_CURVE|PRIMITIVE_MOTION_CURVE), + PRIMITIVE_ALL_MOTION = (PRIMITIVE_MOTION_TRIANGLE|PRIMITIVE_MOTION_CURVE), + PRIMITIVE_ALL = (PRIMITIVE_ALL_TRIANGLE|PRIMITIVE_ALL_CURVE) +} PrimitiveType; + +#define PRIMITIVE_PACK_SEGMENT(type, segment) ((segment << 16) | type) +#define PRIMITIVE_UNPACK_SEGMENT(type) (type >> 16) + /* Attributes */ #define ATTR_PRIM_TYPES 2 @@ -432,9 +476,12 @@ typedef enum AttributeElement { ATTR_ELEMENT_MESH, ATTR_ELEMENT_FACE, ATTR_ELEMENT_VERTEX, + ATTR_ELEMENT_VERTEX_MOTION, ATTR_ELEMENT_CORNER, ATTR_ELEMENT_CURVE, - ATTR_ELEMENT_CURVE_KEY + ATTR_ELEMENT_CURVE_KEY, + ATTR_ELEMENT_CURVE_KEY_MOTION, + ATTR_ELEMENT_VOXEL } AttributeElement; typedef enum AttributeStandard { @@ -448,12 +495,17 @@ typedef enum AttributeStandard { ATTR_STD_GENERATED_TRANSFORM, ATTR_STD_POSITION_UNDEFORMED, ATTR_STD_POSITION_UNDISPLACED, - ATTR_STD_MOTION_PRE, - ATTR_STD_MOTION_POST, + ATTR_STD_MOTION_VERTEX_POSITION, + ATTR_STD_MOTION_VERTEX_NORMAL, ATTR_STD_PARTICLE, ATTR_STD_CURVE_INTERCEPT, ATTR_STD_PTEX_FACE_ID, ATTR_STD_PTEX_UV, + ATTR_STD_VOLUME_DENSITY, + ATTR_STD_VOLUME_COLOR, + ATTR_STD_VOLUME_FLAME, + ATTR_STD_VOLUME_HEAT, + ATTR_STD_VOLUME_VELOCITY, ATTR_STD_NUM, ATTR_STD_NOT_FOUND = ~0 @@ -461,15 +513,17 @@ typedef enum AttributeStandard { /* Closure data */ +#ifdef __MULTI_CLOSURE__ #define MAX_CLOSURE 64 +#else +#define MAX_CLOSURE 1 +#endif typedef struct ShaderClosure { ClosureType type; float3 weight; -#ifdef __MULTI_CLOSURE__ float sample_weight; -#endif float data0; float data1; @@ -561,13 +615,9 @@ typedef struct ShaderData { /* primitive id if there is one, ~0 otherwise */ int prim; -#ifdef __HAIR__ - /* for curves, segment number in curve, ~0 for triangles */ - int segment; - /* variables for minimum hair width using transparency bsdf */ - /*float curve_transparency; */ - /*float curve_radius; */ -#endif + /* combined type and curve segment for hair */ + int type; + /* parametric coordinates * - barycentric weights for triangles */ float u, v; @@ -583,6 +633,9 @@ typedef struct ShaderData { /* ray bounce depth */ int ray_depth; + /* ray transparent depth */ + int transparent_depth; + #ifdef __RAY_DIFFERENTIALS__ /* differential of P. these are orthogonal to Ng, not N */ differential3 dP; @@ -605,15 +658,10 @@ typedef struct ShaderData { Transform ob_itfm; #endif -#ifdef __MULTI_CLOSURE__ /* Closure data, we store a fixed array of closures */ ShaderClosure closure[MAX_CLOSURE]; int num_closure; float randb_closure; -#else - /* Closure data, with a single sampled closure for low memory usage */ - ShaderClosure closure; -#endif /* ray start position, only set for backgrounds */ float3 ray_P; @@ -824,25 +872,27 @@ typedef struct KernelIntegrator { /* clamp */ float sample_clamp_direct; float sample_clamp_indirect; - float pad1, pad2, pad3; /* branched path */ int branched; - int aa_samples; int diffuse_samples; int glossy_samples; int transmission_samples; int ao_samples; int mesh_light_samples; int subsurface_samples; - + int sample_all_lights_direct; + int sample_all_lights_indirect; + /* mis */ int use_lamp_mis; /* sampler */ int sampling_pattern; + int aa_samples; /* volume render */ + int volume_homogeneous_sampling; int use_volumes; int volume_max_steps; float volume_step_size; diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h index dc2ddf1098e..faaa68e3309 100644 --- a/intern/cycles/kernel/kernel_volume.h +++ b/intern/cycles/kernel/kernel_volume.h @@ -16,6 +16,8 @@ CCL_NAMESPACE_BEGIN +/* Events for probalistic scattering */ + typedef enum VolumeIntegrateResult { VOLUME_PATH_SCATTERED = 0, VOLUME_PATH_ATTENUATED = 1, @@ -92,14 +94,19 @@ ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathStat return true; } -ccl_device float3 volume_color_attenuation(float3 sigma, float t) +ccl_device float3 volume_color_transmittance(float3 sigma, float t) { return make_float3(expf(-sigma.x * t), expf(-sigma.y * t), expf(-sigma.z * t)); } +ccl_device float kernel_volume_channel_get(float3 value, int channel) +{ + return (channel == 0)? value.x: ((channel == 1)? value.y: value.z); +} + ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack) { - for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) { + for(int i = 0; stack[i].shader != SHADER_NONE; i++) { int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*2); if(shader_flag & SD_HETEROGENEOUS_VOLUME) @@ -114,14 +121,14 @@ ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *st * These functions are used to attenuate shadow rays to lights. Both absorption * and scattering will block light, represented by the extinction coefficient. */ -/* homogenous volume: assume shader evaluation at the starts gives +/* homogeneous volume: assume shader evaluation at the starts gives * the extinction coefficient for the entire line segment */ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput) { float3 sigma_t; if(volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t)) - *throughput *= volume_color_attenuation(sigma_t, ray->t); + *throughput *= volume_color_transmittance(sigma_t, ray->t); } /* heterogeneous volume: integrate stepping through the volume until we @@ -138,34 +145,29 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState /* compute extinction at the start */ float t = 0.0f; - float3 P = ray->P; - float3 sigma_t; - - if(!volume_shader_extinction_sample(kg, sd, state, P, &sigma_t)) - sigma_t = make_float3(0.0f, 0.0f, 0.0f); for(int i = 0; i < max_steps; i++) { /* advance to new position */ - float new_t = min(ray->t, t + random_jitter_offset + i * step); - float3 new_P = ray->P + ray->D * new_t; - float3 new_sigma_t; + float new_t = min(ray->t, (i+1) * step); + float dt = new_t - t; + + /* use random position inside this segment to sample shader */ + if(new_t == ray->t) + random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt; + + float3 new_P = ray->P + ray->D * (t + random_jitter_offset); + float3 sigma_t; /* compute attenuation over segment */ - if(volume_shader_extinction_sample(kg, sd, state, new_P, &new_sigma_t)) { + if(volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) { /* todo: we could avoid computing expf() for each step by summing, * because exp(a)*exp(b) = exp(a+b), but we still want a quick * tp_eps check too */ - tp *= volume_color_attenuation(0.5f*(sigma_t + new_sigma_t), new_t - t); + tp *= volume_color_transmittance(sigma_t, new_t - t); /* stop if nearly all light blocked */ if(tp.x < tp_eps && tp.y < tp_eps && tp.z < tp_eps) break; - - sigma_t = new_sigma_t; - } - else { - /* skip empty space */ - sigma_t = make_float3(0.0f, 0.0f, 0.0f); } /* stop if at the end of the volume */ @@ -182,7 +184,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *state, Ray *ray, float3 *throughput) { ShaderData sd; - shader_setup_from_volume(kg, &sd, ray, state->bounce); + shader_setup_from_volume(kg, &sd, ray, state->bounce, state->transparent_bounce); if(volume_stack_is_heterogeneous(kg, state->volume_stack)) kernel_volume_shadow_heterogeneous(kg, state, ray, &sd, throughput); @@ -190,9 +192,123 @@ ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, PathState *stat kernel_volume_shadow_homogeneous(kg, state, ray, &sd, throughput); } +/* Equi-angular sampling as in: + * "Importance Sampling Techniques for Path Tracing in Participating Media" */ + +ccl_device float kernel_volume_equiangular_sample(Ray *ray, float3 light_P, float xi, float *pdf) +{ + float t = ray->t; + + float delta = dot((light_P - ray->P) , ray->D); + float D = sqrtf(len_squared(light_P - ray->P) - delta * delta); + float theta_a = -atan2f(delta, D); + float theta_b = atan2f(t - delta, D); + float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); + + *pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + + return min(t, delta + t_); /* min is only for float precision errors */ +} + +ccl_device float kernel_volume_equiangular_pdf(Ray *ray, float3 light_P, float sample_t) +{ + float delta = dot((light_P - ray->P) , ray->D); + float D = sqrtf(len_squared(light_P - ray->P) - delta * delta); + + float t = ray->t; + float t_ = sample_t - delta; + + float theta_a = -atan2f(delta, D); + float theta_b = atan2f(t - delta, D); + + float pdf = D / ((theta_b - theta_a) * (D * D + t_ * t_)); + + return pdf; +} + +ccl_device bool kernel_volume_equiangular_light_position(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float3 *light_P) +{ + /* light RNGs */ + float light_t = path_state_rng_1D(kg, rng, state, PRNG_LIGHT); + float light_u, light_v; + path_state_rng_2D(kg, rng, state, PRNG_LIGHT_U, &light_u, &light_v); + + /* light sample */ + LightSample ls; + light_sample(kg, light_t, light_u, light_v, ray->time, ray->P, &ls); + if(ls.pdf == 0.0f) + return false; + + *light_P = ls.P; + return true; +} + +ccl_device float kernel_volume_decoupled_equiangular_pdf(KernelGlobals *kg, PathState *state, Ray *ray, RNG *rng, float sample_t) +{ + float3 light_P; + + if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P)) + return 0.0f; + + return kernel_volume_equiangular_pdf(ray, light_P, sample_t); +} + +/* Distance sampling */ + +ccl_device float kernel_volume_distance_sample(float max_t, float3 sigma_t, int channel, float xi, float3 *transmittance, float3 *pdf) +{ + /* xi is [0, 1[ so log(0) should never happen, division by zero is + * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float sample_transmittance = kernel_volume_channel_get(full_transmittance, channel); + + float sample_t = min(max_t, -logf(1.0f - xi*(1.0f - sample_transmittance))/sample_sigma_t); + + *transmittance = volume_color_transmittance(sigma_t, sample_t); + *pdf = (sigma_t * *transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); + + /* todo: optimization: when taken together with hit/miss decision, + * the full_transmittance cancels out drops out and xi does not + * need to be remapped */ + + return sample_t; +} + +ccl_device float3 kernel_volume_distance_pdf(float max_t, float3 sigma_t, float sample_t) +{ + float3 full_transmittance = volume_color_transmittance(sigma_t, max_t); + float3 transmittance = volume_color_transmittance(sigma_t, sample_t); + + return (sigma_t * transmittance)/(make_float3(1.0f, 1.0f, 1.0f) - full_transmittance); +} + +/* Emission */ + +ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coeff, int closure_flag, float3 transmittance, float t) +{ + /* integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t + * this goes to E * t as sigma_t goes to zero + * + * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ + float3 emission = coeff->emission; + + if(closure_flag & SD_ABSORPTION) { + float3 sigma_t = coeff->sigma_a + coeff->sigma_s; + + emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t; + emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t; + emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t; + } + else + emission *= t; + + return emission; +} + /* Volume Path */ -/* homogenous volume: assume shader evaluation at the starts gives +/* homogeneous volume: assume shader evaluation at the start gives * the volume shading coefficient for the entire line segment */ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, @@ -206,69 +322,73 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba int closure_flag = sd->flag; float t = ray->t; float3 new_tp; - float3 transmittance; /* randomly scatter, and if we do t is shortened */ if(closure_flag & SD_SCATTER) { + /* extinction coefficient */ float3 sigma_t = coeff.sigma_a + coeff.sigma_s; - /* set up variables for sampling */ + /* pick random color channel, we use the Veach one-sample + * model with balance heuristic for the channels */ float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE); int channel = (int)(rphase*3.0f); sd->randb_closure = rphase*3.0f - channel; - /* pick random color channel, we use the Veach one-sample - * model with balance heuristic for the channels */ - float sample_sigma_t; + float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); - if(channel == 0) - sample_sigma_t = sigma_t.x; - else if(channel == 1) - sample_sigma_t = sigma_t.y; - else - sample_sigma_t = sigma_t.z; + /* decide if we will hit or miss */ + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float sample_transmittance = expf(-sample_sigma_t * t); - /* xi is [0, 1[ so log(0) should never happen, division by zero is - * avoided because sample_sigma_t > 0 when SD_SCATTER is set */ - float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); - float sample_t = min(t, -logf(1.0f - xi)/sample_sigma_t); + if(xi >= sample_transmittance) { + /* scattering */ + float3 pdf; + float3 transmittance; + float sample_t; - transmittance = volume_color_attenuation(sigma_t, sample_t); + /* rescale random number so we can reuse it */ + xi = (xi - sample_transmittance)/(1.0f - sample_transmittance); - if(sample_t < t) { - float pdf = dot(sigma_t, transmittance); - new_tp = *throughput * coeff.sigma_s * transmittance * (3.0f / pdf); + if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { + /* distance sampling */ + sample_t = kernel_volume_distance_sample(ray->t, sigma_t, channel, xi, &transmittance, &pdf); + } + else { + /* equiangular sampling */ + float3 light_P; + float equi_pdf; + if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P)) + return VOLUME_PATH_MISSED; + + sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &equi_pdf); + transmittance = volume_color_transmittance(sigma_t, sample_t); + pdf = make_float3(equi_pdf, equi_pdf, equi_pdf); + } + + /* modifiy pdf for hit/miss decision */ + pdf *= make_float3(1.0f, 1.0f, 1.0f) - volume_color_transmittance(sigma_t, t); + + new_tp = *throughput * coeff.sigma_s * transmittance / average(pdf); t = sample_t; } else { - float pdf = (transmittance.x + transmittance.y + transmittance.z); - new_tp = *throughput * transmittance * (3.0f / pdf); + /* no scattering */ + float3 transmittance = volume_color_transmittance(sigma_t, t); + float pdf = average(transmittance); + new_tp = *throughput * transmittance / pdf; } } else if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ - transmittance = volume_color_attenuation(coeff.sigma_a, t); + float3 transmittance = volume_color_transmittance(coeff.sigma_a, t); new_tp = *throughput * transmittance; } - /* integrate emission attenuated by extinction - * integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t - * this goes to E * t as sigma_t goes to zero - * - * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ + /* integrate emission attenuated by extinction */ if(closure_flag & SD_EMISSION) { - float3 emission = coeff.emission; - - if(closure_flag & SD_ABSORPTION) { - float3 sigma_t = coeff.sigma_a + coeff.sigma_s; - - emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: t; - emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: t; - emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: t; - } - else - emission *= t; - + float3 sigma_t = coeff.sigma_a + coeff.sigma_s; + float3 transmittance = volume_color_transmittance(sigma_t, ray->t); + float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, ray->t); path_radiance_accum_emission(L, *throughput, emission, state->bounce); } @@ -293,45 +413,38 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng) { - VolumeShaderCoefficients coeff; float3 tp = *throughput; const float tp_eps = 1e-10f; /* todo: this is likely not the right value */ /* prepare for stepping */ int max_steps = kernel_data.integrator.volume_max_steps; - float step = kernel_data.integrator.volume_step_size; - float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step; + float step_size = kernel_data.integrator.volume_step_size; + float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; /* compute coefficients at the start */ float t = 0.0f; - float3 P = ray->P; - - if(!volume_shader_sample(kg, sd, state, P, &coeff)) { - coeff.sigma_a = make_float3(0.0f, 0.0f, 0.0f); - coeff.sigma_s = make_float3(0.0f, 0.0f, 0.0f); - coeff.emission = make_float3(0.0f, 0.0f, 0.0f); - } - - /* accumulate these values so we can use a single stratified number to sample */ float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); - float3 accum_sigma_t = make_float3(0.0f, 0.0f, 0.0f); - float3 accum_sigma_s = make_float3(0.0f, 0.0f, 0.0f); /* cache some constant variables */ - float nlogxi; + float xi; int channel = -1; bool has_scatter = false; for(int i = 0; i < max_steps; i++) { /* advance to new position */ - float new_t = min(ray->t, t + random_jitter_offset + i * step); - float3 new_P = ray->P + ray->D * new_t; - VolumeShaderCoefficients new_coeff; + float new_t = min(ray->t, (i+1) * step_size); + float dt = new_t - t; + + /* use random position inside this segment to sample shader */ + if(new_t == ray->t) + random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt; + + float3 new_P = ray->P + ray->D * (t + random_jitter_offset); + VolumeShaderCoefficients coeff; /* compute segment */ - if(volume_shader_sample(kg, sd, state, new_P, &new_coeff)) { + if(volume_shader_sample(kg, sd, state, new_P, &coeff)) { int closure_flag = sd->flag; - float dt = new_t - t; float3 new_tp; float3 transmittance; bool scatter = false; @@ -341,94 +454,58 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo has_scatter = true; /* average sigma_t and sigma_s over segment */ - float3 last_sigma_t = coeff.sigma_a + coeff.sigma_s; - float3 new_sigma_t = new_coeff.sigma_a + new_coeff.sigma_s; - float3 sigma_t = 0.5f*(last_sigma_t + new_sigma_t); - float3 sigma_s = 0.5f*(coeff.sigma_s + new_coeff.sigma_s); + float3 sigma_t = coeff.sigma_a + coeff.sigma_s; + float3 sigma_s = coeff.sigma_s; /* lazily set up variables for sampling */ if(channel == -1) { - float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); - nlogxi = -logf(1.0f - xi); + /* pick random color channel, we use the Veach one-sample + * model with balance heuristic for the channels */ + xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE); channel = (int)(rphase*3.0f); sd->randb_closure = rphase*3.0f - channel; } - /* pick random color channel, we use the Veach one-sample - * model with balance heuristic for the channels */ - float sample_sigma_t; + /* compute transmittance over full step */ + transmittance = volume_color_transmittance(sigma_t, dt); - if(channel == 0) - sample_sigma_t = accum_sigma_t.x + dt*sigma_t.x; - else if(channel == 1) - sample_sigma_t = accum_sigma_t.y + dt*sigma_t.y; - else - sample_sigma_t = accum_sigma_t.z + dt*sigma_t.z; + /* decide if we will scatter or continue */ + float sample_transmittance = kernel_volume_channel_get(transmittance, channel); - if(nlogxi < sample_sigma_t) { + if(1.0f - xi >= sample_transmittance) { /* compute sampling distance */ - sample_sigma_t /= new_t; - new_t = nlogxi/sample_sigma_t; - dt = new_t - t; - - transmittance = volume_color_attenuation(sigma_t, dt); - - accum_transmittance *= transmittance; - accum_sigma_t = (accum_sigma_t + dt*sigma_t)/new_t; - accum_sigma_s = (accum_sigma_s + dt*sigma_s)/new_t; - - /* todo: it's not clear to me that this is correct if we move - * through a color volumed, needs verification */ - float pdf = dot(accum_sigma_t, accum_transmittance); - new_tp = tp * accum_sigma_s * transmittance * (3.0f / pdf); - + float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel); + float new_dt = -logf(1.0f - xi)/sample_sigma_t; + new_t = t + new_dt; + + /* transmittance, throughput */ + float3 new_transmittance = volume_color_transmittance(sigma_t, new_dt); + float pdf = average(sigma_t * new_transmittance); + new_tp = tp * sigma_s * new_transmittance / pdf; scatter = true; } else { - transmittance = volume_color_attenuation(sigma_t, dt); - - accum_transmittance *= transmittance; - accum_sigma_t += dt*sigma_t; - accum_sigma_s += dt*sigma_s; + /* throughput */ + float pdf = average(transmittance); + new_tp = tp * transmittance / pdf; - new_tp = tp * transmittance; + /* remap xi so we can reuse it and keep thing stratified */ + xi = 1.0f - (1.0f - xi)/sample_transmittance; } } else if(closure_flag & SD_ABSORPTION) { /* absorption only, no sampling needed */ - float3 sigma_a = 0.5f*(coeff.sigma_a + new_coeff.sigma_a); - transmittance = volume_color_attenuation(sigma_a, dt); - - accum_transmittance *= transmittance; - accum_sigma_t += dt*sigma_a; + float3 sigma_a = coeff.sigma_a; + transmittance = volume_color_transmittance(sigma_a, dt); new_tp = tp * transmittance; - - /* todo: we could avoid computing expf() for each step by summing, - * because exp(a)*exp(b) = exp(a+b), but we still want a quick - * tp_eps check too */ } - /* integrate emission attenuated by absorption - * integral E * exp(-sigma_t * t) from 0 to t = E * (1 - exp(-sigma_t * t))/sigma_t - * this goes to E * t as sigma_t goes to zero - * - * todo: we should use an epsilon to avoid precision issues near zero sigma_t */ + /* integrate emission attenuated by absorption */ if(closure_flag & SD_EMISSION) { - float3 emission = 0.5f*(coeff.emission + new_coeff.emission); - - if(closure_flag & SD_ABSORPTION) { - float3 sigma_t = 0.5f*(coeff.sigma_a + coeff.sigma_s + new_coeff.sigma_a + new_coeff.sigma_s); - - emission.x *= (sigma_t.x > 0.0f)? (1.0f - transmittance.x)/sigma_t.x: dt; - emission.y *= (sigma_t.y > 0.0f)? (1.0f - transmittance.y)/sigma_t.y: dt; - emission.z *= (sigma_t.z > 0.0f)? (1.0f - transmittance.z)/sigma_t.z: dt; - } - else - emission *= dt; - + float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt); path_radiance_accum_emission(L, tp, emission, state->bounce); } @@ -450,47 +527,323 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous(KernelGlo return VOLUME_PATH_SCATTERED; } + else { + /* accumulate transmittance */ + accum_transmittance *= transmittance; + } } + } + + /* stop if at the end of the volume */ + t = new_t; + if(t == ray->t) + break; + } + + *throughput = tp; - coeff = new_coeff; + return VOLUME_PATH_ATTENUATED; +} + +/* Decoupled Volume Sampling + * + * VolumeSegment is list of coefficients and transmittance stored at all steps + * through a volume. This can then latter be used for decoupled sampling as in: + * "Importance Sampling Techniques for Path Tracing in Participating Media" */ + +/* CPU only because of malloc/free */ +#ifdef __KERNEL_CPU__ + +typedef struct VolumeStep { + float3 sigma_s; /* scatter coefficient */ + float3 sigma_t; /* extinction coefficient */ + float3 accum_transmittance; /* accumulated transmittance including this step */ + float3 cdf_distance; /* cumulative density function for distance sampling */ + float t; /* distance at end of this step */ + float shade_t; /* jittered distance where shading was done in step */ + int closure_flag; /* shader evaluation closure flags */ +} VolumeStep; + +typedef struct VolumeSegment { + VolumeStep *steps; /* recorded steps */ + int numsteps; /* number of steps */ + int closure_flag; /* accumulated closure flags from all steps */ + + float3 accum_emission; /* accumulated emission at end of segment */ + float3 accum_transmittance; /* accumulated transmittance at end of segment */ +} VolumeSegment; + +/* record volume steps to the end of the volume. + * + * it would be nice if we could only record up to the point that we need to scatter, + * but the entire segment is needed to do always scattering, rather than probalistically + * hitting or missing the volume. if we don't know the transmittance at the end of the + * volume we can't generate stratitied distance samples up to that transmittance */ +ccl_device void kernel_volume_decoupled_record(KernelGlobals *kg, PathState *state, + Ray *ray, ShaderData *sd, VolumeSegment *segment, bool heterogeneous) +{ + /* prepare for volume stepping */ + int max_steps; + float step_size, random_jitter_offset; + + if(heterogeneous) { + max_steps = kernel_data.integrator.volume_max_steps; + step_size = kernel_data.integrator.volume_step_size; + random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size; + + /* compute exact steps in advance for malloc */ + max_steps = max((int)ceilf(ray->t/step_size), 1); + } + else { + max_steps = 1; + step_size = ray->t; + random_jitter_offset = 0.0f; + } + + /* init accumulation variables */ + float3 accum_emission = make_float3(0.0f, 0.0f, 0.0f); + float3 accum_transmittance = make_float3(1.0f, 1.0f, 1.0f); + float3 cdf_distance = make_float3(0.0f, 0.0f, 0.0f); + float t = 0.0f; + + segment->closure_flag = 0; + segment->numsteps = 0; + segment->steps = (VolumeStep*)malloc(sizeof(VolumeStep)*max_steps); + + VolumeStep *step = segment->steps; + + for(int i = 0; i < max_steps; i++, step++) { + /* advance to new position */ + float new_t = min(ray->t, (i+1) * step_size); + float dt = new_t - t; + + /* use random position inside this segment to sample shader */ + if(heterogeneous && new_t == ray->t) + random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt; + + float3 new_P = ray->P + ray->D * (t + random_jitter_offset); + VolumeShaderCoefficients coeff; + + /* compute segment */ + if(volume_shader_sample(kg, sd, state, new_P, &coeff)) { + int closure_flag = sd->flag; + float3 sigma_t = coeff.sigma_a + coeff.sigma_s; + + /* compute accumulated transmittance */ + float3 transmittance = volume_color_transmittance(sigma_t, dt); + + /* compute emission attenuated by absorption */ + if(closure_flag & SD_EMISSION) { + float3 emission = kernel_volume_emission_integrate(&coeff, closure_flag, transmittance, dt); + accum_emission += accum_transmittance * emission; + } + + accum_transmittance *= transmittance; + + /* compute pdf for distance sampling */ + float3 pdf_distance = dt * accum_transmittance * coeff.sigma_s; + cdf_distance = cdf_distance + pdf_distance; + + /* write step data */ + step->sigma_t = sigma_t; + step->sigma_s = coeff.sigma_s; + step->closure_flag = closure_flag; + + segment->closure_flag |= closure_flag; } else { - /* skip empty space */ - coeff.sigma_a = make_float3(0.0f, 0.0f, 0.0f); - coeff.sigma_s = make_float3(0.0f, 0.0f, 0.0f); - coeff.emission = make_float3(0.0f, 0.0f, 0.0f); + /* store empty step (todo: skip consecutive empty steps) */ + step->sigma_t = make_float3(0.0f, 0.0f, 0.0f); + step->sigma_s = make_float3(0.0f, 0.0f, 0.0f); + step->closure_flag = 0; } + step->accum_transmittance = accum_transmittance; + step->cdf_distance = cdf_distance; + step->t = new_t; + step->shade_t = t + random_jitter_offset; + + segment->numsteps++; + /* stop if at the end of the volume */ t = new_t; if(t == ray->t) break; } - /* include pdf for volumes with scattering */ - if(has_scatter) { - float pdf = (accum_transmittance.x + accum_transmittance.y + accum_transmittance.z); - if(pdf > 0.0f) - tp *= (3.0f/pdf); + /* store total emission and transmittance */ + segment->accum_emission = accum_emission; + segment->accum_transmittance = accum_transmittance; + + /* normalize cumulative density function for distance sampling */ + VolumeStep *last_step = segment->steps + segment->numsteps - 1; + + if(!is_zero(last_step->cdf_distance)) { + VolumeStep *step = &segment->steps[0]; + int numsteps = segment->numsteps; + float3 inv_cdf_distance_sum = safe_invert_color(last_step->cdf_distance); + + for(int i = 0; i < numsteps; i++, step++) + step->cdf_distance *= inv_cdf_distance_sum; + } +} + +ccl_device void kernel_volume_decoupled_free(KernelGlobals *kg, VolumeSegment *segment) +{ + free(segment->steps); +} + +/* scattering for homogeneous and heterogeneous volumes, using decoupled ray + * marching. unlike the non-decoupled functions, these do not do probalistic + * scattering, they always scatter if there is any non-zero scattering + * coefficient. + * + * these also do not do emission or modify throughput. */ +ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter( + KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, + float3 *throughput, RNG *rng, VolumeSegment *segment) +{ + int closure_flag = segment->closure_flag; + + if(!(closure_flag & SD_SCATTER)) + return VOLUME_PATH_MISSED; + + /* pick random color channel, we use the Veach one-sample + * model with balance heuristic for the channels */ + float rphase = path_state_rng_1D(kg, rng, state, PRNG_PHASE); + int channel = (int)(rphase*3.0f); + sd->randb_closure = rphase*3.0f - channel; + + float xi = path_state_rng_1D(kg, rng, state, PRNG_SCATTER_DISTANCE); + + VolumeStep *step; + float3 transmittance; + float pdf, sample_t; + + /* distance sampling */ + if(kernel_data.integrator.volume_homogeneous_sampling == 0 || !kernel_data.integrator.num_all_lights) { + /* find step in cdf */ + step = segment->steps; + + float prev_t = 0.0f; + float3 step_pdf = make_float3(1.0f, 1.0f, 1.0f); + + if(segment->numsteps > 1) { + float prev_cdf = 0.0f; + float step_cdf = 1.0f; + float3 prev_cdf_distance = make_float3(0.0f, 0.0f, 0.0f); + + for(int i = 0; ; i++, step++) { + /* todo: optimize using binary search */ + step_cdf = kernel_volume_channel_get(step->cdf_distance, channel); + + if(xi < step_cdf || i == segment->numsteps-1) + break; + + prev_cdf = step_cdf; + prev_t = step->t; + prev_cdf_distance = step->cdf_distance; + } + + /* remap xi so we can reuse it */ + xi = (xi - prev_cdf)/(step_cdf - prev_cdf); + + /* pdf for picking step */ + step_pdf = step->cdf_distance - prev_cdf_distance; + } + + /* determine range in which we will sample */ + float step_t = step->t - prev_t; + + /* sample distance and compute transmittance */ + float3 distance_pdf; + sample_t = prev_t + kernel_volume_distance_sample(step_t, step->sigma_t, channel, xi, &transmittance, &distance_pdf); + pdf = average(distance_pdf * step_pdf); } + /* equi-angular sampling */ + else { + /* pick position on light */ + float3 light_P; + if(!kernel_volume_equiangular_light_position(kg, state, ray, rng, &light_P)) + return VOLUME_PATH_MISSED; - *throughput = tp; + /* sample distance */ + sample_t = kernel_volume_equiangular_sample(ray, light_P, xi, &pdf); - return VOLUME_PATH_ATTENUATED; + /* find step in which sampled distance is located */ + step = segment->steps; + + float prev_t = 0.0f; + + if(segment->numsteps > 1) { + /* todo: optimize using binary search */ + for(int i = 0; i < segment->numsteps-1; i++, step++) { + if(sample_t < step->t) + break; + + prev_t = step->t; + } + } + + /* compute transmittance */ + transmittance = volume_color_transmittance(step->sigma_t, sample_t - prev_t); + } + + /* compute transmittance up to this step */ + if(step != segment->steps) + transmittance *= (step-1)->accum_transmittance; + + /* modify throughput */ + *throughput *= step->sigma_s * transmittance / pdf; + + /* evaluate shader to create closures at shading point */ + if(segment->numsteps > 1) { + sd->P = ray->P + step->shade_t*ray->D; + + VolumeShaderCoefficients coeff; + volume_shader_sample(kg, sd, state, sd->P, &coeff); + } + + /* move to new position */ + sd->P = ray->P + sample_t*ray->D; + + return VOLUME_PATH_SCATTERED; } +#endif + /* get the volume attenuation and emission over line segment defined by * ray, with the assumption that there are no surfaces blocking light * between the endpoints */ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg, PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng) { - shader_setup_from_volume(kg, sd, ray, state->bounce); + /* workaround to fix correlation bug in T38710, can find better solution + * in random number generator later, for now this is done here to not impact + * performance of rendering without volumes */ + RNG tmp_rng = cmj_hash(*rng, state->rng_offset); + bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack); - if(volume_stack_is_heterogeneous(kg, state->volume_stack)) - return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, rng); +#if 0 + /* debugging code to compare decoupled ray marching */ + VolumeSegment segment; + + shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce); + kernel_volume_decoupled_record(kg, state, ray, sd, &segment, heterogeneous); + + VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg, state, ray, sd, throughput, &tmp_rng, &segment); + + kernel_volume_decoupled_free(kg, &segment); + + return result; +#else + shader_setup_from_volume(kg, sd, ray, state->bounce, state->transparent_bounce); + + if(heterogeneous) + return kernel_volume_integrate_heterogeneous(kg, state, ray, sd, L, throughput, &tmp_rng); else - return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng); + return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng); +#endif } /* Volume Stack @@ -501,13 +854,13 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals ccl_device void kernel_volume_stack_init(KernelGlobals *kg, VolumeStack *stack) { /* todo: this assumes camera is always in air, need to detect when it isn't */ - if(kernel_data.background.volume_shader == SHADER_NO_ID) { - stack[0].shader = SHADER_NO_ID; + if(kernel_data.background.volume_shader == SHADER_NONE) { + stack[0].shader = SHADER_NONE; } else { stack[0].shader = kernel_data.background.volume_shader; - stack[0].object = ~0; - stack[1].shader = SHADER_NO_ID; + stack[0].object = PRIM_NONE; + stack[1].shader = SHADER_NONE; } } @@ -522,14 +875,14 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd if(sd->flag & SD_BACKFACING) { /* exit volume object: remove from stack */ - for(int i = 0; stack[i].shader != SHADER_NO_ID; i++) { + for(int i = 0; stack[i].shader != SHADER_NONE; i++) { if(stack[i].object == sd->object) { /* shift back next stack entries */ do { stack[i] = stack[i+1]; i++; } - while(stack[i].shader != SHADER_NO_ID); + while(stack[i].shader != SHADER_NONE); return; } @@ -539,7 +892,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd /* enter volume object: add to stack */ int i; - for(i = 0; stack[i].shader != SHADER_NO_ID; i++) { + for(i = 0; stack[i].shader != SHADER_NONE; i++) { /* already in the stack? then we have nothing to do */ if(stack[i].object == sd->object) return; @@ -552,7 +905,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd /* add to the end of the stack */ stack[i].shader = sd->shader; stack[i].object = sd->object; - stack[i+1].shader = SHADER_NO_ID; + stack[i+1].shader = SHADER_NONE; } } diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp index 4fad66be6e1..54894ea19eb 100644 --- a/intern/cycles/kernel/osl/osl_services.cpp +++ b/intern/cycles/kernel/osl/osl_services.cpp @@ -30,18 +30,16 @@ #include "kernel_compat_cpu.h" #include "kernel_globals.h" -#include "kernel_montecarlo.h" +#include "kernel_random.h" #include "kernel_projection.h" #include "kernel_differential.h" -#include "kernel_object.h" -#include "kernel_random.h" -#include "kernel_bvh.h" -#include "kernel_triangle.h" -#include "kernel_curve.h" -#include "kernel_primitive.h" +#include "kernel_montecarlo.h" +#include "kernel_camera.h" + +#include "geom/geom.h" + #include "kernel_projection.h" #include "kernel_accumulate.h" -#include "kernel_camera.h" #include "kernel_shader.h" #ifdef WITH_PTEX @@ -52,11 +50,16 @@ CCL_NAMESPACE_BEGIN /* RenderServices implementation */ -#define COPY_MATRIX44(m1, m2) memcpy(m1, m2, sizeof(*m2)) +#define COPY_MATRIX44(m1, m2) { \ + CHECK_TYPE(m1, OSL::Matrix44*); \ + CHECK_TYPE(m2, Transform*); \ + memcpy(m1, m2, sizeof(*m2)); \ +} (void)0 /* static ustrings */ ustring OSLRenderServices::u_distance("distance"); ustring OSLRenderServices::u_index("index"); +ustring OSLRenderServices::u_world("world"); ustring OSLRenderServices::u_camera("camera"); ustring OSLRenderServices::u_screen("screen"); ustring OSLRenderServices::u_raster("raster"); @@ -87,6 +90,7 @@ ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal"); #endif ustring OSLRenderServices::u_path_ray_length("path:ray_length"); ustring OSLRenderServices::u_path_ray_depth("path:ray_depth"); +ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth"); ustring OSLRenderServices::u_trace("trace"); ustring OSLRenderServices::u_hit("hit"); ustring OSLRenderServices::u_hitdist("hitdist"); @@ -131,7 +135,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr KernelGlobals *kg = sd->osl_globals; int object = sd->object; - if (object != ~0) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm; @@ -161,7 +165,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform KernelGlobals *kg = sd->osl_globals; int object = sd->object; - if (object != ~0) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform itfm; @@ -206,6 +210,10 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, ustring from, float ti COPY_MATRIX44(&result, &tfm); return true; } + else if (from == u_world) { + result.makeIdentity(); + return true; + } return false; } @@ -234,6 +242,10 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, ustring to, fl COPY_MATRIX44(&result, &tfm); return true; } + else if (to == u_world) { + result.makeIdentity(); + return true; + } return false; } @@ -246,7 +258,7 @@ bool OSLRenderServices::get_matrix(OSL::Matrix44 &result, OSL::TransformationPtr const ShaderData *sd = (const ShaderData *)xform; int object = sd->object; - if (object != ~0) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_tfm; #else @@ -271,7 +283,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::Matrix44 &result, OSL::Transform const ShaderData *sd = (const ShaderData *)xform; int object = sd->object; - if (object != ~0) { + if (object != OBJECT_NONE) { #ifdef __OBJECT_MOTION__ Transform tfm = sd->ob_itfm; #else @@ -525,7 +537,8 @@ static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const TypeDesc& type, bool derivatives, void *val) { if (attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector || - attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) { + attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor) + { float3 fval[3]; fval[0] = primitive_attribute_float3(kg, sd, attr.elem, attr.offset, (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL); @@ -596,44 +609,44 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD /* Particle Attributes */ else if (name == u_particle_index) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float f = particle_index(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } else if (name == u_particle_age) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float f = particle_age(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } else if (name == u_particle_lifetime) { - uint particle_id = object_particle_id(kg, sd->object); - float f= particle_lifetime(kg, particle_id); + int particle_id = object_particle_id(kg, sd->object); + float f = particle_lifetime(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } else if (name == u_particle_location) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float3 f = particle_location(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } #if 0 /* unsupported */ else if (name == u_particle_rotation) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float4 f = particle_rotation(kg, particle_id); return set_attribute_float4(f, type, derivatives, val); } #endif else if (name == u_particle_size) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float f = particle_size(kg, particle_id); return set_attribute_float(f, type, derivatives, val); } else if (name == u_particle_velocity) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float3 f = particle_velocity(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } else if (name == u_particle_angular_velocity) { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); float3 f = particle_angular_velocity(kg, particle_id); return set_attribute_float3(f, type, derivatives, val); } @@ -644,12 +657,17 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD } else if ((name == u_geom_trianglevertices || name == u_geom_polyvertices) #ifdef __HAIR__ - && sd->segment == ~0) { + && sd->type & PRIMITIVE_ALL_TRIANGLE) #else - ) { + ) #endif + { float3 P[3]; - triangle_vertices(kg, sd->prim, P); + + if(sd->type & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, sd->prim, P); + else + motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P); if(!(sd->flag & SD_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, &P[0]); @@ -670,7 +688,7 @@ bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderD #ifdef __HAIR__ /* Hair Attributes */ else if (name == u_is_curve) { - float f = (sd->segment != ~0); + float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0; return set_attribute_float(f, type, derivatives, val); } else if (name == u_curve_thickness) { @@ -699,13 +717,18 @@ bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData * int f = sd->ray_depth; return set_attribute_int(f, type, derivatives, val); } + else if (name == u_path_transparent_depth) { + /* Ray Depth */ + int f = sd->transparent_depth; + return set_attribute_int(f, type, derivatives, val); + } else if (name == u_ndc) { /* NDC coordinates with special exception for otho */ OSLThreadData *tdata = kg->osl_tdata; OSL::ShaderGlobals *globals = &tdata->globals; float3 ndc[3]; - if((globals->raytype & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { + if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); if(derivatives) { @@ -733,7 +756,9 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri { ShaderData *sd = (ShaderData *)renderstate; KernelGlobals *kg = sd->osl_globals; - int object, prim, segment; + bool is_curve; + int object; + // int prim; /* lookup of attribute on another object */ if (object_name != u_empty) { @@ -743,24 +768,20 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri return false; object = it->second; - prim = ~0; - segment = ~0; + // prim = PRIM_NONE; + is_curve = false; } else { object = sd->object; - prim = sd->prim; -#ifdef __HAIR__ - segment = sd->segment; -#else - segment = ~0; -#endif + // prim = sd->prim; + is_curve = (sd->type & PRIMITIVE_ALL_CURVE) != 0; - if (object == ~0) + if (object == OBJECT_NONE) return get_background_attribute(kg, sd, name, type, derivatives, val); } /* find attribute on object */ - object = object*ATTR_PRIM_TYPES + (segment != ~0); + object = object*ATTR_PRIM_TYPES + (is_curve == true); OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object]; OSLGlobals::AttributeMap::iterator it = attribute_map.find(name); @@ -769,8 +790,8 @@ bool OSLRenderServices::get_attribute(void *renderstate, bool derivatives, ustri if (attr.elem != ATTR_ELEMENT_OBJECT) { /* triangle and vertex attributes */ - if (prim != ~0) - return get_mesh_element_attribute(kg, sd, attr, type, derivatives, val); + if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val)) + return true; else return get_mesh_attribute(kg, sd, attr, type, derivatives, val); } @@ -1001,12 +1022,13 @@ bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg, tracedata->ray = ray; tracedata->setup = false; tracedata->init = true; + tracedata->sd.osl_globals = sd->osl_globals; /* raytrace */ #ifdef __HAIR__ - return scene_intersect(sd->osl_globals, &ray, ~0, &tracedata->isect, NULL, 0.0f, 0.0f); + return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f); #else - return scene_intersect(sd->osl_globals, &ray, ~0, &tracedata->isect); + return scene_intersect(sd->osl_globals, &ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect); #endif } @@ -1018,9 +1040,9 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri if(source == u_trace && tracedata->init) { if(name == u_hit) { - return set_attribute_int((tracedata->isect.prim != ~0), type, derivatives, val); + return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val); } - else if(tracedata->isect.prim != ~0) { + else if(tracedata->isect.prim != PRIM_NONE) { if(name == u_hitdist) { float f[3] = {tracedata->isect.t, 0.0f, 0.0f}; return set_attribute_float(f, type, derivatives, val); @@ -1033,8 +1055,9 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustri /* lazy shader data setup */ ShaderData *original_sd = (ShaderData *)(sg->renderstate); int bounce = original_sd->ray_depth + 1; + int transparent_bounce = original_sd->transparent_depth; - shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray, bounce); + shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray, bounce, transparent_bounce); tracedata->setup = true; } diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h index 479b6da1afb..069722d81b6 100644 --- a/intern/cycles/kernel/osl/osl_services.h +++ b/intern/cycles/kernel/osl/osl_services.h @@ -110,12 +110,13 @@ public: ustring dataname, TypeDesc datatype, void *data); static bool get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val); + TypeDesc type, bool derivatives, void *val); static bool get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name, - TypeDesc type, bool derivatives, void *val); + TypeDesc type, bool derivatives, void *val); static ustring u_distance; static ustring u_index; + static ustring u_world; static ustring u_camera; static ustring u_screen; static ustring u_raster; @@ -144,6 +145,7 @@ public: static ustring u_curve_tangent_normal; static ustring u_path_ray_length; static ustring u_path_ray_depth; + static ustring u_path_transparent_depth; static ustring u_trace; static ustring u_hit; static ustring u_hitdist; diff --git a/intern/cycles/kernel/osl/osl_shader.cpp b/intern/cycles/kernel/osl/osl_shader.cpp index 554f647df7c..843dcdd0985 100644 --- a/intern/cycles/kernel/osl/osl_shader.cpp +++ b/intern/cycles/kernel/osl/osl_shader.cpp @@ -18,7 +18,8 @@ #include "kernel_montecarlo.h" #include "kernel_types.h" #include "kernel_globals.h" -#include "kernel_object.h" + +#include "geom/geom_object.h" #include "closure/bsdf_diffuse.h" #include "closure/bssrdf.h" @@ -112,7 +113,7 @@ static void shaderdata_to_shaderglobals(KernelGlobals *kg, ShaderData *sd, globals->dvdy = sd->dv.dy; globals->dPdu = TO_VEC3(sd->dPdu); globals->dPdv = TO_VEC3(sd->dPdv); - globals->surfacearea = (sd->object == ~0) ? 1.0f : object_surface_area(kg, sd->object); + globals->surfacearea = (sd->object == OBJECT_NONE) ? 1.0f : object_surface_area(kg, sd->object); globals->time = sd->time; /* booleans */ @@ -408,8 +409,9 @@ static void flatten_volume_closure_tree(ShaderData *sd, sc.data1 = volume->sc.data1; /* add */ - if(sc.sample_weight > CLOSURE_WEIGHT_CUTOFF && - sd->num_closure < MAX_CLOSURE) { + if((sc.sample_weight > CLOSURE_WEIGHT_CUTOFF) && + (sd->num_closure < MAX_CLOSURE)) + { sd->closure[sd->num_closure++] = sc; sd->flag |= volume->shaderdata_flag(); } @@ -535,7 +537,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, /* for OSL, a hash map is used to lookup the attribute by name. */ int object = sd->object*ATTR_PRIM_TYPES; #ifdef __HAIR__ - if(sd->segment != ~0) object += ATTR_PRIM_CURVE; + if(sd->type & PRIMITIVE_ALL_CURVE) object += ATTR_PRIM_CURVE; #endif OSLGlobals::AttributeMap &attr_map = kg->osl->attribute_map[object]; @@ -546,7 +548,7 @@ int OSLShader::find_attribute(KernelGlobals *kg, const ShaderData *sd, uint id, const OSLGlobals::Attribute &osl_attr = it->second; *elem = osl_attr.elem; - if(sd->prim == ~0 && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH) + if(sd->prim == PRIM_NONE && (AttributeElement)osl_attr.elem != ATTR_ELEMENT_MESH) return ATTR_STD_NOT_FOUND; /* return result */ diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt index 045abdb80af..5518d652bf9 100644 --- a/intern/cycles/kernel/shaders/CMakeLists.txt +++ b/intern/cycles/kernel/shaders/CMakeLists.txt @@ -77,6 +77,7 @@ set(SRC_OSL node_wave_texture.osl node_wireframe.osl node_hair_bsdf.osl + node_uv_map.osl ) set(SRC_OSL_HEADERS diff --git a/intern/cycles/kernel/shaders/node_absorption_volume.osl b/intern/cycles/kernel/shaders/node_absorption_volume.osl index 69c4c0ef7af..6bac83ba4f5 100644 --- a/intern/cycles/kernel/shaders/node_absorption_volume.osl +++ b/intern/cycles/kernel/shaders/node_absorption_volume.osl @@ -21,6 +21,6 @@ shader node_absorption_volume( float Density = 1.0, output closure color Volume = 0) { - Volume = ((color(1.0, 1.0, 1.0) - Color) * Density) * absorption(); + Volume = ((color(1.0, 1.0, 1.0) - Color) * max(Density, 0.0)) * absorption(); } diff --git a/intern/cycles/kernel/shaders/node_fresnel.osl b/intern/cycles/kernel/shaders/node_fresnel.osl index 8c59d5bb512..7ef553c0f39 100644 --- a/intern/cycles/kernel/shaders/node_fresnel.osl +++ b/intern/cycles/kernel/shaders/node_fresnel.osl @@ -23,7 +23,7 @@ shader node_fresnel( output float Fac = 0.0) { float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f: f; + float eta = backfacing() ? 1.0 / f : f; float cosi = dot(I, Normal); Fac = fresnel_dielectric_cos(cosi, eta); } diff --git a/intern/cycles/kernel/shaders/node_glass_bsdf.osl b/intern/cycles/kernel/shaders/node_glass_bsdf.osl index 96934199621..b3d6133553b 100644 --- a/intern/cycles/kernel/shaders/node_glass_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_glass_bsdf.osl @@ -26,7 +26,7 @@ shader node_glass_bsdf( output closure color BSDF = 0) { float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f: f; + float eta = backfacing() ? 1.0 / f : f; float cosi = dot(I, Normal); float Fr = fresnel_dielectric_cos(cosi, eta); diff --git a/intern/cycles/kernel/shaders/node_image_texture.osl b/intern/cycles/kernel/shaders/node_image_texture.osl index caa755636b9..7238a1e8862 100644 --- a/intern/cycles/kernel/shaders/node_image_texture.osl +++ b/intern/cycles/kernel/shaders/node_image_texture.osl @@ -17,9 +17,9 @@ #include "stdosl.h" #include "node_color.h" -color image_texture_lookup(string filename, string color_space, float u, float v, output float Alpha, int use_alpha, int is_float) +color image_texture_lookup(string filename, string color_space, float u, float v, output float Alpha, int use_alpha, int is_float, string interpolation) { - color rgb = (color)texture(filename, u, 1.0 - v, "wrap", "periodic", "alpha", Alpha); + color rgb = (color)texture(filename, u, 1.0 - v, "wrap", "periodic", "interp", interpolation, "alpha", Alpha); if (use_alpha) { rgb = color_unpremultiply(rgb, Alpha); @@ -42,6 +42,7 @@ shader node_image_texture( string filename = "", string color_space = "sRGB", string projection = "Flat", + string interpolation = "smartcubic", float projection_blend = 0.0, int is_float = 1, int use_alpha = 1, @@ -54,7 +55,7 @@ shader node_image_texture( p = transform(mapping, p); if (projection == "Flat") { - Color = image_texture_lookup(filename, color_space, p[0], p[1], Alpha, use_alpha, is_float); + Color = image_texture_lookup(filename, color_space, p[0], p[1], Alpha, use_alpha, is_float, interpolation); } else if (projection == "Box") { /* object space normal */ @@ -119,15 +120,15 @@ shader node_image_texture( float tmp_alpha; if (weight[0] > 0.0) { - Color += weight[0] * image_texture_lookup(filename, color_space, p[1], p[2], tmp_alpha, use_alpha, is_float); + Color += weight[0] * image_texture_lookup(filename, color_space, p[1], p[2], tmp_alpha, use_alpha, is_float, interpolation); Alpha += weight[0] * tmp_alpha; } if (weight[1] > 0.0) { - Color += weight[1] * image_texture_lookup(filename, color_space, p[0], p[2], tmp_alpha, use_alpha, is_float); + Color += weight[1] * image_texture_lookup(filename, color_space, p[0], p[2], tmp_alpha, use_alpha, is_float, interpolation); Alpha += weight[1] * tmp_alpha; } if (weight[2] > 0.0) { - Color += weight[2] * image_texture_lookup(filename, color_space, p[1], p[0], tmp_alpha, use_alpha, is_float); + Color += weight[2] * image_texture_lookup(filename, color_space, p[1], p[0], tmp_alpha, use_alpha, is_float, interpolation); Alpha += weight[2] * tmp_alpha; } } diff --git a/intern/cycles/kernel/shaders/node_light_path.osl b/intern/cycles/kernel/shaders/node_light_path.osl index 599c7f5a262..95fbcabf917 100644 --- a/intern/cycles/kernel/shaders/node_light_path.osl +++ b/intern/cycles/kernel/shaders/node_light_path.osl @@ -26,7 +26,8 @@ shader node_light_path( output float IsTransmissionRay = 0.0, output float IsVolumeScatterRay = 0.0, output float RayLength = 0.0, - output float RayDepth = 0.0) + output float RayDepth = 0.0, + output float TransparentDepth = 0.0) { IsCameraRay = raytype("camera"); IsShadowRay = raytype("shadow"); @@ -42,5 +43,9 @@ shader node_light_path( int ray_depth; getattribute("path:ray_depth", ray_depth); RayDepth = (float)ray_depth; + + int transparent_depth; + getattribute("path:transparent_depth", transparent_depth); + TransparentDepth = (float)transparent_depth; } diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl index 066e5f8dbe1..abb6a359e75 100644 --- a/intern/cycles/kernel/shaders/node_math.osl +++ b/intern/cycles/kernel/shaders/node_math.osl @@ -93,6 +93,8 @@ shader node_math( Value = Value1 > Value2; else if (type == "Modulo") Value = safe_modulo(Value1, Value2); + else if (type == "Absolute") + Value = fabs(Value1); if (Clamp) Value = clamp(Value, 0.0, 1.0); diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl index c2c397c6446..dd54fd814de 100644 --- a/intern/cycles/kernel/shaders/node_mix.osl +++ b/intern/cycles/kernel/shaders/node_mix.osl @@ -88,7 +88,7 @@ color node_mix_diff(float t, color col1, color col2) color node_mix_dark(float t, color col1, color col2) { - return min(col1, col2 * t); + return min(col1, col2) * t + col1 * (1.0 - t); } color node_mix_light(float t, color col1, color col2) diff --git a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl index f87b3a5dd86..4a32415b482 100644 --- a/intern/cycles/kernel/shaders/node_refraction_bsdf.osl +++ b/intern/cycles/kernel/shaders/node_refraction_bsdf.osl @@ -25,7 +25,7 @@ shader node_refraction_bsdf( output closure color BSDF = 0) { float f = max(IOR, 1e-5); - float eta = backfacing() ? 1.0 / f: f; + float eta = backfacing() ? 1.0 / f : f; if (distribution == "Sharp") BSDF = Color * refraction(Normal, eta); diff --git a/intern/cycles/kernel/shaders/node_scatter_volume.osl b/intern/cycles/kernel/shaders/node_scatter_volume.osl index bf23abbf933..77c157bd92b 100644 --- a/intern/cycles/kernel/shaders/node_scatter_volume.osl +++ b/intern/cycles/kernel/shaders/node_scatter_volume.osl @@ -22,6 +22,6 @@ shader node_scatter_volume( float Anisotropy = 0.0, output closure color Volume = 0) { - Volume = (Color * Density) * henyey_greenstein(Anisotropy); + Volume = (Color * max(Density, 0.0)) * henyey_greenstein(Anisotropy); } diff --git a/intern/cycles/kernel/shaders/node_uv_map.osl b/intern/cycles/kernel/shaders/node_uv_map.osl new file mode 100644 index 00000000000..01c984aff4c --- /dev/null +++ b/intern/cycles/kernel/shaders/node_uv_map.osl @@ -0,0 +1,45 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#include "stdosl.h" + +shader node_uv_map( + int from_dupli = 0, + string name = "", + string bump_offset = "center", + output point UV = point(0.0, 0.0, 0.0)) +{ + if (from_dupli) { + getattribute("geom:dupli_uv", UV); + } + else { + if (name == "") + getattribute("geom:uv", UV); + else + getattribute(name, UV); + } + + if (bump_offset == "dx") { + if (!from_dupli) { + UV += Dx(UV); + } + } + else if (bump_offset == "dy") { + if (!from_dupli) { + UV += Dy(UV); + } + } +} diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h index 96c7cefbcb2..dbf59c60cb0 100644 --- a/intern/cycles/kernel/svm/svm.h +++ b/intern/cycles/kernel/svm/svm.h @@ -182,10 +182,9 @@ CCL_NAMESPACE_BEGIN /* Main Interpreter Loop */ -ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, float randb, int path_flag) +ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, ShaderType type, int path_flag) { float stack[SVM_STACK_SIZE]; - float closure_weight = 1.0f; int offset = sd->shader & SHADER_MASK; while(1) { @@ -200,7 +199,7 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade break; } case NODE_CLOSURE_BSDF: - svm_node_closure_bsdf(kg, sd, stack, node, randb, path_flag, &offset); + svm_node_closure_bsdf(kg, sd, stack, node, path_flag, &offset); break; case NODE_CLOSURE_EMISSION: svm_node_closure_emission(sd, stack, node); @@ -227,13 +226,15 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade svm_node_emission_weight(kg, sd, stack, node); break; case NODE_MIX_CLOSURE: - svm_node_mix_closure(sd, stack, node, &offset, &randb); + svm_node_mix_closure(sd, stack, node); break; - case NODE_ADD_CLOSURE: - svm_node_add_closure(sd, stack, node.y, node.z, &offset, &randb, &closure_weight); + case NODE_JUMP_IF_ZERO: + if(stack_load_float(stack, node.z) == 0.0f) + offset += node.y; break; - case NODE_JUMP: - offset = node.y; + case NODE_JUMP_IF_ONE: + if(stack_load_float(stack, node.z) == 1.0f) + offset += node.y; break; #ifdef __IMAGE_TEXTURES__ case NODE_TEX_IMAGE: @@ -437,9 +438,6 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg, ShaderData *sd, Shade #endif case NODE_END: default: -#ifndef __MULTI_CLOSURE__ - sd->closure.weight *= closure_weight; -#endif return; } } diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h index 4c53bfd74fa..fd0ea7fef31 100644 --- a/intern/cycles/kernel/svm/svm_attribute.h +++ b/intern/cycles/kernel/svm/svm_attribute.h @@ -22,12 +22,12 @@ ccl_device void svm_node_attr_init(KernelGlobals *kg, ShaderData *sd, uint4 node, NodeAttributeType *type, NodeAttributeType *mesh_type, AttributeElement *elem, int *offset, uint *out_offset) { - if(sd->object != ~0 && sd->prim != ~0) { + if(sd->object != OBJECT_NONE) { /* find attribute by unique id */ uint id = node.y; uint attr_offset = sd->object*kernel_data.bvh.attributes_map_stride; #ifdef __HAIR__ - attr_offset = (sd->segment == ~0)? attr_offset: attr_offset + ATTR_PRIM_CURVE; + attr_offset = (sd->type & PRIMITIVE_ALL_CURVE)? attr_offset + ATTR_PRIM_CURVE: attr_offset; #endif uint4 attr_map = kernel_tex_fetch(__attributes_map, attr_offset); diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h index 2813e38d8f7..a3770877544 100644 --- a/intern/cycles/kernel/svm/svm_closure.h +++ b/intern/cycles/kernel/svm/svm_closure.h @@ -51,7 +51,6 @@ ccl_device void svm_node_glass_setup(ShaderData *sd, ShaderClosure *sc, int type ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, ClosureType type, float mix_weight) { -#ifdef __MULTI_CLOSURE__ ShaderClosure *sc = &sd->closure[sd->num_closure]; if(sd->num_closure < MAX_CLOSURE) { @@ -65,14 +64,10 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_non_bsdf(ShaderData *sd, C } return NULL; -#else - return &sd->closure; -#endif } ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float mix_weight) { -#ifdef __MULTI_CLOSURE__ ShaderClosure *sc = &sd->closure[sd->num_closure]; float3 weight = sc->weight * mix_weight; float sample_weight = fabsf(average(weight)); @@ -88,14 +83,10 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_bsdf(ShaderData *sd, float } return NULL; -#else - return &sd->closure; -#endif } ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, float mix_weight) { -#ifdef __MULTI_CLOSURE__ ShaderClosure *sc = &sd->closure[sd->num_closure]; float3 weight = (make_float3(1.0f, 1.0f, 1.0f) - sc->weight) * mix_weight; float sample_weight = fabsf(average(weight)); @@ -111,16 +102,12 @@ ccl_device_inline ShaderClosure *svm_node_closure_get_absorption(ShaderData *sd, } return NULL; -#else - return &sd->closure; -#endif } -ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, float randb, int path_flag, int *offset) +ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int path_flag, int *offset) { uint type, param1_offset, param2_offset; -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset; decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f); @@ -132,13 +119,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * return; float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; -#else - decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, NULL); - float mix_weight = 1.0f; - - uint4 data_node = read_node(kg, offset); - float3 N = stack_valid(data_node.x)? stack_load_float3(stack, data_node.x): sd->N; -#endif float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); @@ -255,7 +235,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * float fresnel = fresnel_dielectric_cos(cosNO, eta); float roughness = param1; -#ifdef __MULTI_CLOSURE__ /* reflection */ ShaderClosure *sc = &sd->closure[sd->num_closure]; float3 weight = sc->weight; @@ -279,15 +258,6 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->N = N; svm_node_glass_setup(sd, sc, type, eta, roughness, true); } -#else - ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); - - if(sc) { - sc->N = N; - bool refract = (randb > fresnel); - svm_node_glass_setup(sd, sc, type, eta, roughness, refract); - } -#endif break; } @@ -364,10 +334,16 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * case CLOSURE_BSDF_HAIR_REFLECTION_ID: case CLOSURE_BSDF_HAIR_TRANSMISSION_ID: { - if(sd->flag & SD_BACKFACING && sd->segment != ~0) { + if(sd->flag & SD_BACKFACING && sd->type & PRIMITIVE_ALL_CURVE) { ShaderClosure *sc = svm_node_closure_get_bsdf(sd, mix_weight); + if(sc) { - sc->weight = make_float3(1.0f,1.0f,1.0f); + /* todo: giving a fixed weight here will cause issues when + * mixing multiple BSDFS. energey will not be conserved and + * the throughput can blow up after multiple bounces. we + * better figure out a way to skip backfaces from rays + * spawned by transmission from the front */ + sc->weight = make_float3(1.0f, 1.0f, 1.0f); sc->N = N; sd->flag |= bsdf_transparent_setup(sc); } @@ -381,12 +357,14 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float * sc->data0 = param1; sc->data1 = param2; sc->offset = -stack_load_float(stack, data_node.z); - if(sd->segment == ~0) { + + if(!(sd->type & PRIMITIVE_ALL_CURVE)) { sc->T = normalize(sd->dPdv); sc->offset = 0.0f; } else sc->T = sd->dPdu; + if(type == CLOSURE_BSDF_HAIR_REFLECTION_ID) { sd->flag |= bsdf_hair_reflection_setup(sc); } @@ -484,21 +462,16 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float #ifdef __VOLUME__ uint type, param1_offset, param2_offset; -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset; decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset); float mix_weight = (stack_valid(mix_weight_offset)? stack_load_float(stack, mix_weight_offset): 1.0f); if(mix_weight == 0.0f) return; -#else - decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, NULL); - float mix_weight = 1.0f; -#endif float param1 = (stack_valid(param1_offset))? stack_load_float(stack, param1_offset): __uint_as_float(node.z); float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w); - float density = param1; + float density = fmaxf(param1, 0.0f); switch(type) { case CLOSURE_VOLUME_ABSORPTION_ID: { @@ -527,7 +500,6 @@ ccl_device void svm_node_closure_volume(KernelGlobals *kg, ShaderData *sd, float ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 node) { -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset = node.y; if(stack_valid(mix_weight_offset)) { @@ -540,17 +512,12 @@ ccl_device void svm_node_closure_emission(ShaderData *sd, float *stack, uint4 no } else svm_node_closure_get_non_bsdf(sd, CLOSURE_EMISSION_ID, 1.0f); -#else - ShaderClosure *sc = &sd->closure; - sc->type = CLOSURE_EMISSION_ID; -#endif sd->flag |= SD_EMISSION; } ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 node) { -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset = node.y; if(stack_valid(mix_weight_offset)) { @@ -563,15 +530,10 @@ ccl_device void svm_node_closure_background(ShaderData *sd, float *stack, uint4 } else svm_node_closure_get_non_bsdf(sd, CLOSURE_BACKGROUND_ID, 1.0f); -#else - ShaderClosure *sc = &sd->closure; - sc->type = CLOSURE_BACKGROUND_ID; -#endif } ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 node) { -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset = node.y; if(stack_valid(mix_weight_offset)) { @@ -584,17 +546,12 @@ ccl_device void svm_node_closure_holdout(ShaderData *sd, float *stack, uint4 nod } else svm_node_closure_get_non_bsdf(sd, CLOSURE_HOLDOUT_ID, 1.0f); -#else - ShaderClosure *sc = &sd->closure; - sc->type = CLOSURE_HOLDOUT_ID; -#endif sd->flag |= SD_HOLDOUT; } ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, uint4 node) { -#ifdef __MULTI_CLOSURE__ uint mix_weight_offset = node.y; if(stack_valid(mix_weight_offset)) { @@ -607,10 +564,6 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, } else svm_node_closure_get_non_bsdf(sd, CLOSURE_AMBIENT_OCCLUSION_ID, 1.0f); -#else - ShaderClosure *sc = &sd->closure; - sc->type = CLOSURE_AMBIENT_OCCLUSION_ID; -#endif sd->flag |= SD_AO; } @@ -619,12 +572,8 @@ ccl_device void svm_node_closure_ambient_occlusion(ShaderData *sd, float *stack, ccl_device_inline void svm_node_closure_store_weight(ShaderData *sd, float3 weight) { -#ifdef __MULTI_CLOSURE__ if(sd->num_closure < MAX_CLOSURE) sd->closure[sd->num_closure].weight = weight; -#else - sd->closure.weight = weight; -#endif } ccl_device void svm_node_closure_set_weight(ShaderData *sd, uint r, uint g, uint b) @@ -637,7 +586,7 @@ ccl_device void svm_node_emission_set_weight_total(KernelGlobals *kg, ShaderData { float3 weight = make_float3(__uint_as_float(r), __uint_as_float(g), __uint_as_float(b)); - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) weight /= object_surface_area(kg, sd->object); svm_node_closure_store_weight(sd, weight); @@ -659,16 +608,14 @@ ccl_device void svm_node_emission_weight(KernelGlobals *kg, ShaderData *sd, floa float strength = stack_load_float(stack, strength_offset); float3 weight = stack_load_float3(stack, color_offset)*strength; - if(total_power && sd->object != ~0) + if(total_power && sd->object != OBJECT_NONE) weight /= object_surface_area(kg, sd->object); svm_node_closure_store_weight(sd, weight); } -ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, - uint4 node, int *offset, float *randb) +ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node) { -#ifdef __MULTI_CLOSURE__ /* fetch weight from blend input, previous mix closures, * and write to stack to be used by closure nodes later */ uint weight_offset, in_weight_offset, weight1_offset, weight2_offset; @@ -683,44 +630,6 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, stack_store_float(stack, weight1_offset, in_weight*(1.0f - weight)); if(stack_valid(weight2_offset)) stack_store_float(stack, weight2_offset, in_weight*weight); -#else - /* pick a closure and make the random number uniform over 0..1 again. - * closure 1 starts on the next node, for closure 2 the start is at an - * offset from the current node, so we jump */ - uint weight_offset = node.y; - uint node_jump = node.z; - float weight = stack_load_float(stack, weight_offset); - weight = clamp(weight, 0.0f, 1.0f); - - if(*randb < weight) { - *offset += node_jump; - *randb = *randb/weight; - } - else - *randb = (*randb - weight)/(1.0f - weight); -#endif -} - -ccl_device void svm_node_add_closure(ShaderData *sd, float *stack, uint unused, - uint node_jump, int *offset, float *randb, float *closure_weight) -{ -#ifdef __MULTI_CLOSURE__ - /* nothing to do, handled in compiler */ -#else - /* pick one of the two closures with probability 0.5. sampling quality - * is not going to be great, for that we'd need to evaluate the weights - * of the two closures being added */ - float weight = 0.5f; - - if(*randb < weight) { - *offset += node_jump; - *randb = *randb/weight; - } - else - *randb = (*randb - weight)/(1.0f - weight); - - *closure_weight *= 2.0f; -#endif } /* (Bump) normal */ diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h index ad0cacb027a..fe681ec92af 100644 --- a/intern/cycles/kernel/svm/svm_geometry.h +++ b/intern/cycles/kernel/svm/svm_geometry.h @@ -98,44 +98,44 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float { switch(type) { case NODE_INFO_PAR_INDEX: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_index(kg, particle_id)); break; } case NODE_INFO_PAR_AGE: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_age(kg, particle_id)); break; } case NODE_INFO_PAR_LIFETIME: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_lifetime(kg, particle_id)); break; } case NODE_INFO_PAR_LOCATION: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_location(kg, particle_id)); break; } - #if 0 /* XXX float4 currently not supported in SVM stack */ +#if 0 /* XXX float4 currently not supported in SVM stack */ case NODE_INFO_PAR_ROTATION: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float4(stack, out_offset, particle_rotation(kg, particle_id)); break; } - #endif +#endif case NODE_INFO_PAR_SIZE: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float(stack, out_offset, particle_size(kg, particle_id)); break; } case NODE_INFO_PAR_VELOCITY: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_velocity(kg, particle_id)); break; } case NODE_INFO_PAR_ANGULAR_VELOCITY: { - uint particle_id = object_particle_id(kg, sd->object); + int particle_id = object_particle_id(kg, sd->object); stack_store_float3(stack, out_offset, particle_angular_velocity(kg, particle_id)); break; } @@ -153,7 +153,7 @@ ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *sta switch(type) { case NODE_INFO_CURVE_IS_STRAND: { - data = (sd->segment != ~0); + data = (sd->type & PRIMITIVE_ALL_CURVE) != 0; stack_store_float(stack, out_offset, data); break; } diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index bc76ea1e662..daf7c6652d2 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -60,31 +60,51 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint width = info.x; uint height = info.y; uint offset = info.z; - uint periodic = info.w; + uint periodic = (info.w & 0x1); + uint interpolation = info.w >> 1; + float4 r; int ix, iy, nix, niy; - float tx = svm_image_texture_frac(x*width, &ix); - float ty = svm_image_texture_frac(y*height, &iy); + if (interpolation == INTERPOLATION_CLOSEST) { + svm_image_texture_frac(x*width, &ix); + svm_image_texture_frac(y*height, &iy); - if(periodic) { - ix = svm_image_texture_wrap_periodic(ix, width); - iy = svm_image_texture_wrap_periodic(iy, height); + if(periodic) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); + } + else { + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); - nix = svm_image_texture_wrap_periodic(ix+1, width); - niy = svm_image_texture_wrap_periodic(iy+1, height); + } + r = svm_image_texture_read(kg, offset + ix + iy*width); } - else { - ix = svm_image_texture_wrap_clamp(ix, width); - iy = svm_image_texture_wrap_clamp(iy, height); + else { /* We default to linear interpolation if it is not closest */ + float tx = svm_image_texture_frac(x*width, &ix); + float ty = svm_image_texture_frac(y*height, &iy); - nix = svm_image_texture_wrap_clamp(ix+1, width); - niy = svm_image_texture_wrap_clamp(iy+1, height); - } + if(periodic) { + ix = svm_image_texture_wrap_periodic(ix, width); + iy = svm_image_texture_wrap_periodic(iy, height); - float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width); - r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width); - r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); - r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); + nix = svm_image_texture_wrap_periodic(ix+1, width); + niy = svm_image_texture_wrap_periodic(iy+1, height); + } + else { + ix = svm_image_texture_wrap_clamp(ix, width); + iy = svm_image_texture_wrap_clamp(iy, height); + + nix = svm_image_texture_wrap_clamp(ix+1, width); + niy = svm_image_texture_wrap_clamp(iy+1, height); + } + + + r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width); + r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width); + r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width); + r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width); + } if(use_alpha && r.w != 1.0f && r.w != 0.0f) { float invw = 1.0f/r.w; @@ -129,8 +149,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, * - group by size and use a 3d texture, performance impact * - group into larger texture with some padding for correct lerp * - * also note that cuda has 128 textures limit, we use 100 now, since - * we still need some for other storage */ + * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler), + * and we cannot use all since we still need some for other storage */ switch(id) { case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break; @@ -233,7 +253,62 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break; case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break; case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break; - default: + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 300) + case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break; + case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break; + case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break; + case 103: r = kernel_tex_image_interp(__tex_image_103, x, y); break; + case 104: r = kernel_tex_image_interp(__tex_image_104, x, y); break; + case 105: r = kernel_tex_image_interp(__tex_image_105, x, y); break; + case 106: r = kernel_tex_image_interp(__tex_image_106, x, y); break; + case 107: r = kernel_tex_image_interp(__tex_image_107, x, y); break; + case 108: r = kernel_tex_image_interp(__tex_image_108, x, y); break; + case 109: r = kernel_tex_image_interp(__tex_image_109, x, y); break; + case 110: r = kernel_tex_image_interp(__tex_image_110, x, y); break; + case 111: r = kernel_tex_image_interp(__tex_image_111, x, y); break; + case 112: r = kernel_tex_image_interp(__tex_image_112, x, y); break; + case 113: r = kernel_tex_image_interp(__tex_image_113, x, y); break; + case 114: r = kernel_tex_image_interp(__tex_image_114, x, y); break; + case 115: r = kernel_tex_image_interp(__tex_image_115, x, y); break; + case 116: r = kernel_tex_image_interp(__tex_image_116, x, y); break; + case 117: r = kernel_tex_image_interp(__tex_image_117, x, y); break; + case 118: r = kernel_tex_image_interp(__tex_image_118, x, y); break; + case 119: r = kernel_tex_image_interp(__tex_image_119, x, y); break; + case 120: r = kernel_tex_image_interp(__tex_image_120, x, y); break; + case 121: r = kernel_tex_image_interp(__tex_image_121, x, y); break; + case 122: r = kernel_tex_image_interp(__tex_image_122, x, y); break; + case 123: r = kernel_tex_image_interp(__tex_image_123, x, y); break; + case 124: r = kernel_tex_image_interp(__tex_image_124, x, y); break; + case 125: r = kernel_tex_image_interp(__tex_image_125, x, y); break; + case 126: r = kernel_tex_image_interp(__tex_image_126, x, y); break; + case 127: r = kernel_tex_image_interp(__tex_image_127, x, y); break; + case 128: r = kernel_tex_image_interp(__tex_image_128, x, y); break; + case 129: r = kernel_tex_image_interp(__tex_image_129, x, y); break; + case 130: r = kernel_tex_image_interp(__tex_image_130, x, y); break; + case 131: r = kernel_tex_image_interp(__tex_image_131, x, y); break; + case 132: r = kernel_tex_image_interp(__tex_image_132, x, y); break; + case 133: r = kernel_tex_image_interp(__tex_image_133, x, y); break; + case 134: r = kernel_tex_image_interp(__tex_image_134, x, y); break; + case 135: r = kernel_tex_image_interp(__tex_image_135, x, y); break; + case 136: r = kernel_tex_image_interp(__tex_image_136, x, y); break; + case 137: r = kernel_tex_image_interp(__tex_image_137, x, y); break; + case 138: r = kernel_tex_image_interp(__tex_image_138, x, y); break; + case 139: r = kernel_tex_image_interp(__tex_image_139, x, y); break; + case 140: r = kernel_tex_image_interp(__tex_image_140, x, y); break; + case 141: r = kernel_tex_image_interp(__tex_image_141, x, y); break; + case 142: r = kernel_tex_image_interp(__tex_image_142, x, y); break; + case 143: r = kernel_tex_image_interp(__tex_image_143, x, y); break; + case 144: r = kernel_tex_image_interp(__tex_image_144, x, y); break; + case 145: r = kernel_tex_image_interp(__tex_image_145, x, y); break; + case 146: r = kernel_tex_image_interp(__tex_image_146, x, y); break; + case 147: r = kernel_tex_image_interp(__tex_image_147, x, y); break; + case 148: r = kernel_tex_image_interp(__tex_image_148, x, y); break; + case 149: r = kernel_tex_image_interp(__tex_image_149, x, y); break; + case 150: r = kernel_tex_image_interp(__tex_image_150, x, y); break; +#endif + + default: kernel_assert(0); return make_float4(0.0f, 0.0f, 0.0f, 0.0f); } @@ -302,7 +377,7 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float float3 N = sd->N; N = sd->N; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &N); /* project from direction vector to barycentric coordinates in triangles */ diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h index 8968146c5e2..da544c63ae0 100644 --- a/intern/cycles/kernel/svm/svm_light_path.h +++ b/intern/cycles/kernel/svm/svm_light_path.h @@ -34,6 +34,7 @@ ccl_device void svm_node_light_path(ShaderData *sd, float *stack, uint type, uin case NODE_LP_backfacing: info = (sd->flag & SD_BACKFACING)? 1.0f: 0.0f; break; case NODE_LP_ray_length: info = sd->ray_length; break; case NODE_LP_ray_depth: info = (float)sd->ray_depth; break; + case NODE_LP_ray_transparent: info = sd->transparent_depth; break; } stack_store_float(stack, out_offset, info); diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h index bb46d443a6b..1ce9386e40e 100644 --- a/intern/cycles/kernel/svm/svm_math.h +++ b/intern/cycles/kernel/svm/svm_math.h @@ -56,6 +56,8 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2) Fac = Fac1 > Fac2; else if(type == NODE_MATH_MODULO) Fac = safe_modulo(Fac1, Fac2); + else if(type == NODE_MATH_ABSOLUTE) + Fac = fabsf(Fac1); else if(type == NODE_MATH_CLAMP) Fac = clamp(Fac1, 0.0f, 1.0f); else diff --git a/intern/cycles/kernel/svm/svm_mix.h b/intern/cycles/kernel/svm/svm_mix.h index 4e834b7c500..edc3903865e 100644 --- a/intern/cycles/kernel/svm/svm_mix.h +++ b/intern/cycles/kernel/svm/svm_mix.h @@ -89,7 +89,7 @@ ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2) ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2) { - return min(col1, col2*t); + return min(col1, col2)*t + col1*(1.0f - t); } ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2) diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h index 282ad191470..91dda8972f9 100644 --- a/intern/cycles/kernel/svm/svm_noise.h +++ b/intern/cycles/kernel/svm/svm_noise.h @@ -357,15 +357,13 @@ ccl_device float3 cellnoise_color(float3 p) return make_float3(r, g, b); } #else -ccl_device float3 cellnoise_color(const float3& p) +ccl_device __m128 cellnoise_color(const __m128& p) { - __m128i v_yxz = quick_floor_sse(_mm_setr_ps(p.y, p.x, p.z, 0.0f)); - __m128i v_xyy = shuffle<1, 0, 0, 3>(v_yxz); - __m128i v_zzx = shuffle<2, 2, 1, 3>(v_yxz); - __m128 rgb = bits_to_01_sse(hash_sse(v_xyy, v_yxz, v_zzx)); - - float3 result = *(float3*)&rgb; - return result; + __m128i ip = quick_floor_sse(p); + __m128i ip_yxz = shuffle<1, 0, 2, 3>(ip); + __m128i ip_xyy = shuffle<0, 1, 1, 3>(ip); + __m128i ip_zzx = shuffle<2, 2, 0, 3>(ip); + return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx)); } #endif diff --git a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h index 0f68ecbea03..111d5d47988 100644 --- a/intern/cycles/kernel/svm/svm_sepcomb_hsv.h +++ b/intern/cycles/kernel/svm/svm_sepcomb_hsv.h @@ -42,12 +42,12 @@ ccl_device void svm_node_separate_hsv(KernelGlobals *kg, ShaderData *sd, float * /* Convert to HSV */ color = rgb_to_hsv(color); - if (stack_valid(hue_out)) - stack_store_float(stack, hue_out, color.x); - if (stack_valid(saturation_out)) - stack_store_float(stack, saturation_out, color.y); - if (stack_valid(value_out)) - stack_store_float(stack, value_out, color.z); + if (stack_valid(hue_out)) + stack_store_float(stack, hue_out, color.x); + if (stack_valid(saturation_out)) + stack_store_float(stack, saturation_out, color.y); + if (stack_valid(value_out)) + stack_store_float(stack, value_out, color.z); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_sky.h b/intern/cycles/kernel/svm/svm_sky.h index 1e3552647bd..500b5146931 100644 --- a/intern/cycles/kernel/svm/svm_sky.h +++ b/intern/cycles/kernel/svm/svm_sky.h @@ -74,7 +74,7 @@ ccl_device float sky_radiance_internal(float *configuration, float theta, float float expM = expf(configuration[4] * gamma); float rayM = cgamma * cgamma; float mieM = (1.0f + rayM) / powf((1.0f + configuration[8]*configuration[8] - 2.0f*configuration[8]*cgamma), 1.5f); - float zenith = sqrt(ctheta); + float zenith = sqrtf(ctheta); return (1.0f + configuration[0] * expf(configuration[1] / (ctheta + 0.01f))) * (configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith); diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h index 4b1f30e55bb..a17e4a25efe 100644 --- a/intern/cycles/kernel/svm/svm_tex_coord.h +++ b/intern/cycles/kernel/svm/svm_tex_coord.h @@ -25,27 +25,27 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f switch(type) { case NODE_TEXCO_OBJECT: { data = sd->P; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_position_transform(kg, sd, &data); break; } case NODE_TEXCO_NORMAL: { data = sd->N; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = transform_point(&tfm, sd->P); else data = transform_point(&tfm, sd->P + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) data = camera_world_to_ndc(kg, sd, sd->ray_P); else data = camera_world_to_ndc(kg, sd, sd->P); @@ -53,7 +53,7 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else data = sd->I; @@ -70,17 +70,10 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg, ShaderData *sd, int path_f case NODE_TEXCO_VOLUME_GENERATED: { data = sd->P; - if(sd->object != ~0) { - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); - - object_inverse_position_transform(kg, sd, &data); - - if(attr_offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); - data = transform_point(&tfm, data); - } - } +#ifdef __VOLUME__ + if(sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); +#endif break; } } @@ -96,27 +89,27 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in switch(type) { case NODE_TEXCO_OBJECT: { data = sd->P + sd->dP.dx; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_position_transform(kg, sd, &data); break; } case NODE_TEXCO_NORMAL: { data = sd->N; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = transform_point(&tfm, sd->P + sd->dP.dx); else data = transform_point(&tfm, sd->P + sd->dP.dx + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx); @@ -124,7 +117,7 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else data = sd->I; @@ -141,17 +134,10 @@ ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg, ShaderData *sd, in case NODE_TEXCO_VOLUME_GENERATED: { data = sd->P + sd->dP.dx; - if(sd->object != ~0) { - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); - - object_inverse_position_transform(kg, sd, &data); - - if(attr_offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); - data = transform_point(&tfm, data); - } - } +#ifdef __VOLUME__ + if(sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); +#endif break; } } @@ -170,27 +156,27 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in switch(type) { case NODE_TEXCO_OBJECT: { data = sd->P + sd->dP.dy; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_position_transform(kg, sd, &data); break; } case NODE_TEXCO_NORMAL: { data = sd->N; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) object_inverse_normal_transform(kg, sd, &data); break; } case NODE_TEXCO_CAMERA: { Transform tfm = kernel_data.cam.worldtocamera; - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = transform_point(&tfm, sd->P + sd->dP.dy); else data = transform_point(&tfm, sd->P + sd->dP.dy + camera_position(kg)); break; } case NODE_TEXCO_WINDOW: { - if((path_flag & PATH_RAY_CAMERA) && sd->object == ~0 && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) + if((path_flag & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) data = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy); else data = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy); @@ -198,7 +184,7 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in break; } case NODE_TEXCO_REFLECTION: { - if(sd->object != ~0) + if(sd->object != OBJECT_NONE) data = 2.0f*dot(sd->N, sd->I)*sd->N - sd->I; else data = sd->I; @@ -215,17 +201,10 @@ ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg, ShaderData *sd, in case NODE_TEXCO_VOLUME_GENERATED: { data = sd->P + sd->dP.dy; - if(sd->object != ~0) { - AttributeElement attr_elem; - int attr_offset = find_attribute(kg, sd, ATTR_STD_GENERATED_TRANSFORM, &attr_elem); - - object_inverse_position_transform(kg, sd, &data); - - if(attr_offset != ATTR_STD_NOT_FOUND) { - Transform tfm = primitive_attribute_matrix(kg, sd, attr_offset); - data = transform_point(&tfm, data); - } - } +#ifdef __VOLUME__ + if(sd->object != OBJECT_NONE) + data = volume_normalized_position(kg, sd, data); +#endif break; } } @@ -248,7 +227,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st if(space == NODE_NORMAL_MAP_TANGENT) { /* tangent space */ - if(sd->object == ~0) { + if(sd->object == OBJECT_NONE) { stack_store_float3(stack, normal_offset, make_float3(0.0f, 0.0f, 0.0f)); return; } diff --git a/intern/cycles/kernel/svm/svm_texture.h b/intern/cycles/kernel/svm/svm_texture.h index 8ced8390b0b..5fd9204cbf6 100644 --- a/intern/cycles/kernel/svm/svm_texture.h +++ b/intern/cycles/kernel/svm/svm_texture.h @@ -18,6 +18,7 @@ CCL_NAMESPACE_BEGIN /* Voronoi Distances */ +#if 0 ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, float e) { #if 0 @@ -43,8 +44,7 @@ ccl_device float voronoi_distance(NodeDistanceMetric distance_metric, float3 d, } /* Voronoi / Worley like */ - -ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2) +ccl_device_inline float4 voronoi_Fn(float3 p, float e, int n1, int n2) { float da[4]; float3 pa[4]; @@ -119,7 +119,95 @@ ccl_device_noinline float4 voronoi_Fn(float3 p, float e, int n1, int n2) return result; } +#endif + +ccl_device float voronoi_F1_distance(float3 p) +{ + /* returns squared distance in da */ + float da = 1e10f; + +#ifndef __KERNEL_SSE2__ + int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); + + for (int xx = -1; xx <= 1; xx++) { + for (int yy = -1; yy <= 1; yy++) { + for (int zz = -1; zz <= 1; zz++) { + float3 ip = make_float3(ix + xx, iy + yy, iz + zz); + float3 vp = ip + cellnoise_color(ip); + float d = len_squared(p - vp); + da = min(d, da); + } + } + } +#else + __m128 vec_p = load_m128(p); + __m128i xyzi = quick_floor_sse(vec_p); + + for (int xx = -1; xx <= 1; xx++) { + for (int yy = -1; yy <= 1; yy++) { + for (int zz = -1; zz <= 1; zz++) { + __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0))); + __m128 vp = _mm_add_ps(ip, cellnoise_color(ip)); + float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp)); + da = min(d, da); + } + } + } +#endif + + return da; +} + +ccl_device float3 voronoi_F1_color(float3 p) +{ + /* returns color of the nearest point */ + float da = 1e10f; + +#ifndef __KERNEL_SSE2__ + float3 pa; + int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z); + + for (int xx = -1; xx <= 1; xx++) { + for (int yy = -1; yy <= 1; yy++) { + for (int zz = -1; zz <= 1; zz++) { + float3 ip = make_float3(ix + xx, iy + yy, iz + zz); + float3 vp = ip + cellnoise_color(ip); + float d = len_squared(p - vp); + + if(d < da) { + da = d; + pa = vp; + } + } + } + } + + return cellnoise_color(pa); +#else + __m128 pa, vec_p = load_m128(p); + __m128i xyzi = quick_floor_sse(vec_p); + + for (int xx = -1; xx <= 1; xx++) { + for (int yy = -1; yy <= 1; yy++) { + for (int zz = -1; zz <= 1; zz++) { + __m128 ip = _mm_cvtepi32_ps(_mm_add_epi32(xyzi, _mm_setr_epi32(xx, yy, zz, 0))); + __m128 vp = _mm_add_ps(ip, cellnoise_color(ip)); + float d = len_squared<1, 1, 1, 0>(_mm_sub_ps(vec_p, vp)); + + if(d < da) { + da = d; + pa = vp; + } + } + } + } + + __m128 color = cellnoise_color(pa); + return (float3 &)color; +#endif +} +#if 0 ccl_device float voronoi_F1(float3 p) { return voronoi_Fn(p, 0.0f, 0, -1).w; } ccl_device float voronoi_F2(float3 p) { return voronoi_Fn(p, 0.0f, 1, -1).w; } ccl_device float voronoi_F3(float3 p) { return voronoi_Fn(p, 0.0f, 2, -1).w; } @@ -139,6 +227,7 @@ ccl_device float voronoi_F3S(float3 p) { return 2.0f*voronoi_F3(p) - 1.0f; } ccl_device float voronoi_F4S(float3 p) { return 2.0f*voronoi_F4(p) - 1.0f; } ccl_device float voronoi_F1F2S(float3 p) { return 2.0f*voronoi_F1F2(p) - 1.0f; } ccl_device float voronoi_CrS(float3 p) { return 2.0f*voronoi_Cr(p) - 1.0f; } +#endif /* Noise Bases */ diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h index ad5e1ea6d2e..80972ec82bc 100644 --- a/intern/cycles/kernel/svm/svm_types.h +++ b/intern/cycles/kernel/svm/svm_types.h @@ -36,7 +36,8 @@ typedef enum NodeType { NODE_CLOSURE_SET_WEIGHT, NODE_CLOSURE_WEIGHT, NODE_MIX_CLOSURE, - NODE_JUMP, + NODE_JUMP_IF_ZERO, + NODE_JUMP_IF_ONE, NODE_TEX_IMAGE, NODE_TEX_IMAGE_BOX, NODE_TEX_SKY, @@ -71,7 +72,6 @@ typedef enum NodeType { NODE_TEX_COORD, NODE_TEX_COORD_BUMP_DX, NODE_TEX_COORD_BUMP_DY, - NODE_ADD_CLOSURE, NODE_EMISSION_SET_WEIGHT_TOTAL, NODE_ATTR_BUMP_DX, NODE_ATTR_BUMP_DY, @@ -102,7 +102,8 @@ typedef enum NodeType { NODE_CLOSURE_AMBIENT_OCCLUSION, NODE_TANGENT, NODE_NORMAL_MAP, - NODE_HAIR_INFO + NODE_HAIR_INFO, + NODE_UVMAP } NodeType; typedef enum NodeAttributeType { @@ -158,7 +159,8 @@ typedef enum NodeLightPath { NODE_LP_volume_scatter, NODE_LP_backfacing, NODE_LP_ray_length, - NODE_LP_ray_depth + NODE_LP_ray_depth, + NODE_LP_ray_transparent } NodeLightPath; typedef enum NodeLightFalloff { @@ -219,6 +221,7 @@ typedef enum NodeMath { NODE_MATH_LESS_THAN, NODE_MATH_GREATER_THAN, NODE_MATH_MODULO, + NODE_MATH_ABSOLUTE, NODE_MATH_CLAMP /* used for the clamp UI option */ } NodeMath; @@ -401,6 +404,8 @@ typedef enum ClosureType { #define CLOSURE_IS_BSDF_GLOSSY(type) (type >= CLOSURE_BSDF_GLOSSY_ID && type <= CLOSURE_BSDF_HAIR_REFLECTION_ID) #define CLOSURE_IS_BSDF_TRANSMISSION(type) (type >= CLOSURE_BSDF_TRANSMISSION_ID && type <= CLOSURE_BSDF_HAIR_TRANSMISSION_ID) #define CLOSURE_IS_BSDF_BSSRDF(type) (type == CLOSURE_BSDF_BSSRDF_ID) +#define CLOSURE_IS_BSDF_ANISOTROPIC(type) (type == CLOSURE_BSDF_WARD_ID) +#define CLOSURE_IS_BSDF_OR_BSSRDF(type) (type <= CLOSURE_BSSRDF_GAUSSIAN_ID) #define CLOSURE_IS_BSSRDF(type) (type >= CLOSURE_BSSRDF_CUBIC_ID && type <= CLOSURE_BSSRDF_GAUSSIAN_ID) #define CLOSURE_IS_VOLUME(type) (type >= CLOSURE_VOLUME_ID && type <= CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID) #define CLOSURE_IS_EMISSION(type) (type == CLOSURE_EMISSION_ID) diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h index 1e3fc2fa03b..61d33aeb8cf 100644 --- a/intern/cycles/kernel/svm/svm_vector_transform.h +++ b/intern/cycles/kernel/svm/svm_vector_transform.h @@ -33,7 +33,7 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo NodeVectorTransformConvertSpace to = (NodeVectorTransformConvertSpace)ito; Transform tfm; - bool is_object = (sd->object != ~0); + bool is_object = (sd->object != OBJECT_NONE); bool is_direction = (type == NODE_VECTOR_TRANSFORM_TYPE_VECTOR || type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL); /* From world */ @@ -91,9 +91,9 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg, ShaderData *sd, flo if(type == NODE_VECTOR_TRANSFORM_TYPE_NORMAL) in = normalize(in); - /* Output */ + /* Output */ if(stack_valid(vector_out)) { - stack_store_float3(stack, vector_out, in); + stack_store_float3(stack, vector_out, in); } } diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h index 7f597dc8bff..083a2f30e06 100644 --- a/intern/cycles/kernel/svm/svm_voronoi.h +++ b/intern/cycles/kernel/svm/svm_voronoi.h @@ -20,23 +20,16 @@ CCL_NAMESPACE_BEGIN ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p) { - /* compute distance and point coordinate of 4 nearest neighbours */ - float4 dpa0 = voronoi_Fn(p, 1.0f, 0, -1); - - /* output */ - float fac; - float3 color; - if(coloring == NODE_VORONOI_INTENSITY) { - fac = fabsf(dpa0.w); - color = make_float3(fac, fac, fac); + /* compute squared distance to the nearest neighbour */ + float fac = voronoi_F1_distance(p); + return make_float4(fac, fac, fac, fac); } else { - color = cellnoise_color(float4_to_float3(dpa0)); - fac = average(color); + /* compute color of the nearest neighbour */ + float3 color = voronoi_F1_color(p); + return make_float4(color.x, color.y, color.z, average(color)); } - - return make_float4(color.x, color.y, color.z, fac); } ccl_device void svm_node_tex_voronoi(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset) diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h index dca4003b89a..9e57c470c0f 100644 --- a/intern/cycles/kernel/svm/svm_wavelength.h +++ b/intern/cycles/kernel/svm/svm_wavelength.h @@ -43,33 +43,33 @@ ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelengt // cie_colour_match[(lambda - 380) / 5][1] = yBar // cie_colour_match[(lambda - 380) / 5][2] = zBar const float cie_colour_match[81][3] = { - {0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201}, - {0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102}, - {0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456}, - {0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230}, - {0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721}, - {0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281}, - {0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130}, - {0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533}, - {0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582}, - {0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573}, - {0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203}, - {0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057}, - {0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021}, - {0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014}, - {1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008}, - {1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002}, - {0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000}, - {0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000}, - {0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000}, - {0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000}, - {0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000}, - {0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000}, - {0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000}, - {0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000}, - {0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000}, - {0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000}, - {0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000} + {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f}, + {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f}, + {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f}, + {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f}, + {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f}, + {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f}, + {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f}, + {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f}, + {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f}, + {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f}, + {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f}, + {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f}, + {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f}, + {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f}, + {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f}, + {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f}, + {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f}, + {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f}, + {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f}, + {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f}, + {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f}, + {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f}, + {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f}, + {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f}, + {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f}, + {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f}, + {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f} }; float lambda_nm = stack_load_float(stack, wavelength); diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h index e560e6303cc..660e6e2ca47 100644 --- a/intern/cycles/kernel/svm/svm_wireframe.h +++ b/intern/cycles/kernel/svm/svm_wireframe.h @@ -45,17 +45,21 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta /* Calculate wireframe */ #ifdef __HAIR__ - if (sd->prim != ~0 && sd->segment == ~0) { + if (sd->prim != PRIM_NONE && sd->type & PRIMITIVE_ALL_TRIANGLE) #else - if (sd->prim != ~0) { + if (sd->prim != PRIM_NONE) #endif + { float3 Co[3]; float pixelwidth = 1.0f; /* Triangles */ - float np = 3; + int np = 3; - triangle_vertices(kg, sd->prim, Co); + if(sd->type & PRIMITIVE_TRIANGLE) + triangle_vertices(kg, sd->prim, Co); + else + motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, Co); if(!(sd->flag & SD_TRANSFORM_APPLIED)) { object_position_transform(kg, sd, &Co[0]); diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index 7d00ed92164..449c1391980 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -16,6 +16,7 @@ set(INC_SYS set(SRC attribute.cpp background.cpp + bake.cpp blackbody.cpp buffers.cpp camera.cpp @@ -43,6 +44,7 @@ set(SRC set(SRC_HEADERS attribute.h + bake.h background.h blackbody.h buffers.h diff --git a/intern/cycles/render/attribute.cpp b/intern/cycles/render/attribute.cpp index 61b9cf2f3bc..14805b6f11a 100644 --- a/intern/cycles/render/attribute.cpp +++ b/intern/cycles/render/attribute.cpp @@ -14,6 +14,7 @@ * limitations under the License */ +#include "image.h" #include "mesh.h" #include "attribute.h" @@ -25,6 +26,17 @@ CCL_NAMESPACE_BEGIN /* Attribute */ +Attribute::~Attribute() +{ + /* for voxel data, we need to remove the image from the image manager */ + if(element == ATTR_ELEMENT_VOXEL) { + VoxelAttribute *voxel_data = data_voxel(); + + if(voxel_data) + voxel_data->manager->remove_image(voxel_data->slot); + } +} + void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_) { name = name_; @@ -38,9 +50,14 @@ void Attribute::set(ustring name_, TypeDesc type_, AttributeElement element_) type == TypeDesc::TypeNormal || type == TypeDesc::TypeMatrix); } -void Attribute::reserve(int numverts, int numtris, int numcurves, int numkeys) +void Attribute::reserve(int numverts, int numtris, int numsteps, int numcurves, int numkeys, bool resize) { - buffer.resize(buffer_size(numverts, numtris, numcurves, numkeys), 0); + if (resize) { + buffer.resize(buffer_size(numverts, numtris, numsteps, numcurves, numkeys), 0); + } + else { + buffer.reserve(buffer_size(numverts, numtris, numsteps, numcurves, numkeys)); + } } void Attribute::add(const float& f) @@ -70,9 +87,28 @@ void Attribute::add(const Transform& f) buffer.push_back(data[i]); } +void Attribute::add(const VoxelAttribute& f) +{ + char *data = (char*)&f; + size_t size = sizeof(f); + + for(size_t i = 0; i < size; i++) + buffer.push_back(data[i]); +} + +void Attribute::add(const char *data) +{ + size_t size = data_sizeof(); + + for(size_t i = 0; i < size; i++) + buffer.push_back(data[i]); +} + size_t Attribute::data_sizeof() const { - if(type == TypeDesc::TypeFloat) + if(element == ATTR_ELEMENT_VOXEL) + return sizeof(VoxelAttribute); + else if(type == TypeDesc::TypeFloat) return sizeof(float); else if(type == TypeDesc::TypeMatrix) return sizeof(Transform); @@ -80,18 +116,22 @@ size_t Attribute::data_sizeof() const return sizeof(float3); } -size_t Attribute::element_size(int numverts, int numtris, int numcurves, int numkeys) const +size_t Attribute::element_size(int numverts, int numtris, int numsteps, int numcurves, int numkeys) const { size_t size; switch(element) { case ATTR_ELEMENT_OBJECT: case ATTR_ELEMENT_MESH: + case ATTR_ELEMENT_VOXEL: size = 1; break; case ATTR_ELEMENT_VERTEX: size = numverts; break; + case ATTR_ELEMENT_VERTEX_MOTION: + size = numverts * (numsteps - 1); + break; case ATTR_ELEMENT_FACE: size = numtris; break; @@ -104,6 +144,9 @@ size_t Attribute::element_size(int numverts, int numtris, int numcurves, int num case ATTR_ELEMENT_CURVE_KEY: size = numkeys; break; + case ATTR_ELEMENT_CURVE_KEY_MOTION: + size = numkeys * (numsteps - 1); + break; default: size = 0; break; @@ -112,9 +155,9 @@ size_t Attribute::element_size(int numverts, int numtris, int numcurves, int num return size; } -size_t Attribute::buffer_size(int numverts, int numtris, int numcurves, int numkeys) const +size_t Attribute::buffer_size(int numverts, int numtris, int numsteps, int numcurves, int numkeys) const { - return element_size(numverts, numtris, numcurves, numkeys)*data_sizeof(); + return element_size(numverts, numtris, numsteps, numcurves, numkeys)*data_sizeof(); } bool Attribute::same_storage(TypeDesc a, TypeDesc b) @@ -136,40 +179,65 @@ bool Attribute::same_storage(TypeDesc a, TypeDesc b) const char *Attribute::standard_name(AttributeStandard std) { - if(std == ATTR_STD_VERTEX_NORMAL) - return "N"; - else if(std == ATTR_STD_FACE_NORMAL) - return "Ng"; - else if(std == ATTR_STD_UV) - return "uv"; - else if(std == ATTR_STD_GENERATED) - return "generated"; - else if(std == ATTR_STD_UV_TANGENT) - return "tangent"; - else if(std == ATTR_STD_UV_TANGENT_SIGN) - return "tangent_sign"; - else if(std == ATTR_STD_POSITION_UNDEFORMED) - return "undeformed"; - else if(std == ATTR_STD_POSITION_UNDISPLACED) - return "undisplaced"; - else if(std == ATTR_STD_MOTION_PRE) - return "motion_pre"; - else if(std == ATTR_STD_MOTION_POST) - return "motion_post"; - else if(std == ATTR_STD_PARTICLE) - return "particle"; - else if(std == ATTR_STD_CURVE_INTERCEPT) - return "curve_intercept"; - else if(std == ATTR_STD_PTEX_FACE_ID) - return "ptex_face_id"; - else if(std == ATTR_STD_PTEX_UV) - return "ptex_uv"; - else if(std == ATTR_STD_GENERATED_TRANSFORM) - return "generated_transform"; + switch(std) { + case ATTR_STD_VERTEX_NORMAL: + return "N"; + case ATTR_STD_FACE_NORMAL: + return "Ng"; + case ATTR_STD_UV: + return "uv"; + case ATTR_STD_GENERATED: + return "generated"; + case ATTR_STD_GENERATED_TRANSFORM: + return "generated_transform"; + case ATTR_STD_UV_TANGENT: + return "tangent"; + case ATTR_STD_UV_TANGENT_SIGN: + return "tangent_sign"; + case ATTR_STD_POSITION_UNDEFORMED: + return "undeformed"; + case ATTR_STD_POSITION_UNDISPLACED: + return "undisplaced"; + case ATTR_STD_MOTION_VERTEX_POSITION: + return "motion_P"; + case ATTR_STD_MOTION_VERTEX_NORMAL: + return "motion_N"; + case ATTR_STD_PARTICLE: + return "particle"; + case ATTR_STD_CURVE_INTERCEPT: + return "curve_intercept"; + case ATTR_STD_PTEX_FACE_ID: + return "ptex_face_id"; + case ATTR_STD_PTEX_UV: + return "ptex_uv"; + case ATTR_STD_VOLUME_DENSITY: + return "density"; + case ATTR_STD_VOLUME_COLOR: + return "color"; + case ATTR_STD_VOLUME_FLAME: + return "flame"; + case ATTR_STD_VOLUME_HEAT: + return "heat"; + case ATTR_STD_VOLUME_VELOCITY: + return "velocity"; + case ATTR_STD_NOT_FOUND: + case ATTR_STD_NONE: + case ATTR_STD_NUM: + return ""; + } return ""; } +AttributeStandard Attribute::name_standard(const char *name) +{ + for(int std = ATTR_STD_NONE; std < ATTR_STD_NUM; std++) + if(strcmp(name, Attribute::standard_name((AttributeStandard)std)) == 0) + return (AttributeStandard)std; + + return ATTR_STD_NONE; +} + /* Attribute Set */ AttributeSet::AttributeSet() @@ -182,7 +250,7 @@ AttributeSet::~AttributeSet() { } -Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement element) +Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement element, bool resize) { Attribute *attr = find(name); @@ -202,9 +270,9 @@ Attribute *AttributeSet::add(ustring name, TypeDesc type, AttributeElement eleme /* this is weak .. */ if(triangle_mesh) - attr->reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), 0, 0); + attr->reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), triangle_mesh->motion_steps, 0, 0, resize); if(curve_mesh) - attr->reserve(0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size()); + attr->reserve(0, 0, curve_mesh->motion_steps, curve_mesh->curves.size(), curve_mesh->curve_keys.size(), resize); return attr; } @@ -261,10 +329,14 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) case ATTR_STD_GENERATED: case ATTR_STD_POSITION_UNDEFORMED: case ATTR_STD_POSITION_UNDISPLACED: - case ATTR_STD_MOTION_PRE: - case ATTR_STD_MOTION_POST: attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_VERTEX); break; + case ATTR_STD_MOTION_VERTEX_POSITION: + attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_VERTEX_MOTION); + break; + case ATTR_STD_MOTION_VERTEX_NORMAL: + attr = add(name, TypeDesc::TypeNormal, ATTR_ELEMENT_VERTEX_MOTION); + break; case ATTR_STD_PTEX_FACE_ID: attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_FACE); break; @@ -274,6 +346,17 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) case ATTR_STD_GENERATED_TRANSFORM: attr = add(name, TypeDesc::TypeMatrix, ATTR_ELEMENT_MESH); break; + case ATTR_STD_VOLUME_DENSITY: + case ATTR_STD_VOLUME_FLAME: + case ATTR_STD_VOLUME_HEAT: + attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_VOXEL); + break; + case ATTR_STD_VOLUME_COLOR: + attr = add(name, TypeDesc::TypeColor, ATTR_ELEMENT_VOXEL); + break; + case ATTR_STD_VOLUME_VELOCITY: + attr = add(name, TypeDesc::TypeVector, ATTR_ELEMENT_VOXEL); + break; default: assert(0); break; @@ -285,9 +368,8 @@ Attribute *AttributeSet::add(AttributeStandard std, ustring name) case ATTR_STD_GENERATED: attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE); break; - case ATTR_STD_MOTION_PRE: - case ATTR_STD_MOTION_POST: - attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY); + case ATTR_STD_MOTION_VERTEX_POSITION: + attr = add(name, TypeDesc::TypePoint, ATTR_ELEMENT_CURVE_KEY_MOTION); break; case ATTR_STD_CURVE_INTERCEPT: attr = add(name, TypeDesc::TypeFloat, ATTR_ELEMENT_CURVE_KEY); @@ -343,9 +425,9 @@ void AttributeSet::reserve() { foreach(Attribute& attr, attributes) { if(triangle_mesh) - attr.reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), 0, 0); + attr.reserve(triangle_mesh->verts.size(), triangle_mesh->triangles.size(), triangle_mesh->motion_steps, 0, 0, true); if(curve_mesh) - attr.reserve(0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size()); + attr.reserve(0, 0, 0, curve_mesh->curves.size(), curve_mesh->curve_keys.size(), true); } } diff --git a/intern/cycles/render/attribute.h b/intern/cycles/render/attribute.h index 0b8905ae5a3..9fc32db8444 100644 --- a/intern/cycles/render/attribute.h +++ b/intern/cycles/render/attribute.h @@ -27,12 +27,20 @@ CCL_NAMESPACE_BEGIN class Attribute; -class AttributeSet; class AttributeRequest; class AttributeRequestSet; +class AttributeSet; +class ImageManager; class Mesh; struct Transform; +/* Attributes for voxels are images */ + +struct VoxelAttribute { + ImageManager *manager; + int slot; +}; + /* Attribute * * Arbitrary data layers on meshes. @@ -48,29 +56,37 @@ public: AttributeElement element; Attribute() {} + ~Attribute(); void set(ustring name, TypeDesc type, AttributeElement element); - void reserve(int numverts, int numfaces, int numcurves, int numkeys); + void reserve(int numverts, int numfaces, int numsteps, int numcurves, int numkeys, bool resize); size_t data_sizeof() const; - size_t element_size(int numverts, int numfaces, int numcurves, int numkeys) const; - size_t buffer_size(int numverts, int numfaces, int numcurves, int numkeys) const; + size_t element_size(int numverts, int numfaces, int numsteps, int numcurves, int numkeys) const; + size_t buffer_size(int numverts, int numfaces, int numsteps, int numcurves, int numkeys) const; char *data() { return (buffer.size())? &buffer[0]: NULL; }; float3 *data_float3() { return (float3*)data(); } + float4 *data_float4() { return (float4*)data(); } float *data_float() { return (float*)data(); } Transform *data_transform() { return (Transform*)data(); } + VoxelAttribute *data_voxel() { return ( VoxelAttribute*)data(); } const char *data() const { return (buffer.size())? &buffer[0]: NULL; } const float3 *data_float3() const { return (const float3*)data(); } + const float4 *data_float4() const { return (const float4*)data(); } const float *data_float() const { return (const float*)data(); } const Transform *data_transform() const { return (const Transform*)data(); } + const VoxelAttribute *data_voxel() const { return (const VoxelAttribute*)data(); } void add(const float& f); void add(const float3& f); void add(const Transform& f); + void add(const VoxelAttribute& f); + void add(const char *data); static bool same_storage(TypeDesc a, TypeDesc b); static const char *standard_name(AttributeStandard std); + static AttributeStandard name_standard(const char *name); }; /* Attribute Set @@ -86,7 +102,7 @@ public: AttributeSet(); ~AttributeSet(); - Attribute *add(ustring name, TypeDesc type, AttributeElement element); + Attribute *add(ustring name, TypeDesc type, AttributeElement element, bool resize = true); Attribute *find(ustring name) const; void remove(ustring name); diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp index c9c66dad3fe..a877c52fbed 100644 --- a/intern/cycles/render/background.cpp +++ b/intern/cycles/render/background.cpp @@ -35,7 +35,7 @@ Background::Background() use = true; - visibility = ~0; + visibility = PATH_RAY_ALL_VISIBILITY; shader = 0; transparent = false; @@ -70,7 +70,7 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene if(scene->shaders[shader]->has_volume) kbackground->volume_shader = kbackground->surface_shader; else - kbackground->volume_shader = SHADER_NO_ID; + kbackground->volume_shader = SHADER_NONE; if(!(visibility & PATH_RAY_DIFFUSE)) kbackground->surface_shader |= SHADER_EXCLUDE_DIFFUSE; diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp new file mode 100644 index 00000000000..aa317ab672f --- /dev/null +++ b/intern/cycles/render/bake.cpp @@ -0,0 +1,206 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#include "bake.h" + +CCL_NAMESPACE_BEGIN + +BakeData::BakeData(const int object, const int tri_offset, const int num_pixels): +m_object(object), +m_tri_offset(tri_offset), +m_num_pixels(num_pixels) +{ + m_primitive.resize(num_pixels); + m_u.resize(num_pixels); + m_v.resize(num_pixels); + m_dudx.resize(num_pixels); + m_dudy.resize(num_pixels); + m_dvdx.resize(num_pixels); + m_dvdy.resize(num_pixels); +} + +BakeData::~BakeData() +{ + m_primitive.clear(); + m_u.clear(); + m_v.clear(); + m_dudx.clear(); + m_dudy.clear(); + m_dvdx.clear(); + m_dvdy.clear(); +} + +void BakeData::set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy) +{ + m_primitive[i] = (prim == -1 ? -1 : m_tri_offset + prim); + m_u[i] = uv[0]; + m_v[i] = uv[1]; + m_dudx[i] = dudx; + m_dudy[i] = dudy; + m_dvdx[i] = dvdx; + m_dvdy[i] = dvdy; +} + +int BakeData::object() +{ + return m_object; +} + +int BakeData::size() +{ + return m_num_pixels; +} + +bool BakeData::is_valid(int i) +{ + return m_primitive[i] != -1; +} + +uint4 BakeData::data(int i) +{ + return make_uint4( + m_object, + m_primitive[i], + __float_as_int(m_u[i]), + __float_as_int(m_v[i]) + ); +} + +uint4 BakeData::differentials(int i) +{ + return make_uint4( + __float_as_int(m_dudx[i]), + __float_as_int(m_dudy[i]), + __float_as_int(m_dvdx[i]), + __float_as_int(m_dvdy[i]) + ); +} + +BakeManager::BakeManager() +{ + m_bake_data = NULL; + m_is_baking = false; + need_update = true; +} + +BakeManager::~BakeManager() +{ + if(m_bake_data) + delete m_bake_data; +} + +bool BakeManager::get_baking() +{ + return m_is_baking; +} + +void BakeManager::set_baking(const bool value) +{ + m_is_baking = value; +} + +BakeData *BakeManager::init(const int object, const int tri_offset, const int num_pixels) +{ + m_bake_data = new BakeData(object, tri_offset, num_pixels); + return m_bake_data; +} + +bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]) +{ + size_t limit = bake_data->size(); + + /* setup input for device task */ + device_vector<uint4> d_input; + uint4 *d_input_data = d_input.resize(limit * 2); + size_t d_input_size = 0; + + for(size_t i = 0; i < limit; i++) { + d_input_data[d_input_size++] = bake_data->data(i); + d_input_data[d_input_size++] = bake_data->differentials(i); + } + + if(d_input_size == 0) + return false; + + /* run device task */ + device_vector<float4> d_output; + d_output.resize(limit); + + /* needs to be up to data for attribute access */ + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + + device->mem_alloc(d_input, MEM_READ_ONLY); + device->mem_copy_to(d_input); + device->mem_alloc(d_output, MEM_WRITE_ONLY); + + DeviceTask task(DeviceTask::SHADER); + task.shader_input = d_input.device_pointer; + task.shader_output = d_output.device_pointer; + task.shader_eval_type = shader_type; + task.shader_x = 0; + task.shader_w = d_output.size(); + task.get_cancel = function_bind(&Progress::get_cancel, &progress); + + device->task_add(task); + device->task_wait(); + + if(progress.get_cancel()) { + device->mem_free(d_input); + device->mem_free(d_output); + m_is_baking = false; + return false; + } + + device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); + device->mem_free(d_input); + device->mem_free(d_output); + + /* read result */ + int k = 0; + + float4 *offset = (float4*)d_output.data_pointer; + + size_t depth = 4; + for(size_t i = 0; i < limit; i++) { + size_t index = i * depth; + float4 out = offset[k++]; + + if(bake_data->is_valid(i)) { + for(size_t j=0; j < 4; j++) { + result[index + j] = out[j]; + } + } + } + + m_is_baking = false; + return true; +} + +void BakeManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress) +{ + if(!need_update) + return; + + if(progress.get_cancel()) return; + + need_update = false; +} + +void BakeManager::device_free(Device *device, DeviceScene *dscene) +{ +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h new file mode 100644 index 00000000000..ea403f7d39a --- /dev/null +++ b/intern/cycles/render/bake.h @@ -0,0 +1,77 @@ +/* + * Copyright 2011-2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +#ifndef __BAKE_H__ +#define __BAKE_H__ + +#include "util_vector.h" +#include "device.h" +#include "scene.h" +#include "session.h" + +CCL_NAMESPACE_BEGIN + +class BakeData { +public: + BakeData(const int object, const int tri_offset, const int num_pixels); + ~BakeData(); + + void set(int i, int prim, float uv[2], float dudx, float dudy, float dvdx, float dvdy); + int object(); + int size(); + uint4 data(int i); + uint4 differentials(int i); + bool is_valid(int i); + +private: + int m_object; + int m_tri_offset; + int m_num_pixels; + vector<int>m_primitive; + vector<float>m_u; + vector<float>m_v; + vector<float>m_dudx; + vector<float>m_dudy; + vector<float>m_dvdx; + vector<float>m_dvdy; +}; + +class BakeManager { +public: + BakeManager(); + ~BakeManager(); + + bool get_baking(); + void set_baking(const bool value); + + BakeData *init(const int object, const int tri_offset, const int num_pixels); + + bool bake(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress, ShaderEvalType shader_type, BakeData *bake_data, float result[]); + + void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress); + void device_free(Device *device, DeviceScene *dscene); + + bool need_update; + +private: + BakeData *m_bake_data; + bool m_is_baking; +}; + +CCL_NAMESPACE_END + +#endif /* __BAKE_H__ */ + diff --git a/intern/cycles/render/blackbody.cpp b/intern/cycles/render/blackbody.cpp index ab61886e262..89af714e8ec 100644 --- a/intern/cycles/render/blackbody.cpp +++ b/intern/cycles/render/blackbody.cpp @@ -59,33 +59,33 @@ vector<float> blackbody_table() */ const float cie_colour_match[81][3] = { - {0.0014,0.0000,0.0065}, {0.0022,0.0001,0.0105}, {0.0042,0.0001,0.0201}, - {0.0076,0.0002,0.0362}, {0.0143,0.0004,0.0679}, {0.0232,0.0006,0.1102}, - {0.0435,0.0012,0.2074}, {0.0776,0.0022,0.3713}, {0.1344,0.0040,0.6456}, - {0.2148,0.0073,1.0391}, {0.2839,0.0116,1.3856}, {0.3285,0.0168,1.6230}, - {0.3483,0.0230,1.7471}, {0.3481,0.0298,1.7826}, {0.3362,0.0380,1.7721}, - {0.3187,0.0480,1.7441}, {0.2908,0.0600,1.6692}, {0.2511,0.0739,1.5281}, - {0.1954,0.0910,1.2876}, {0.1421,0.1126,1.0419}, {0.0956,0.1390,0.8130}, - {0.0580,0.1693,0.6162}, {0.0320,0.2080,0.4652}, {0.0147,0.2586,0.3533}, - {0.0049,0.3230,0.2720}, {0.0024,0.4073,0.2123}, {0.0093,0.5030,0.1582}, - {0.0291,0.6082,0.1117}, {0.0633,0.7100,0.0782}, {0.1096,0.7932,0.0573}, - {0.1655,0.8620,0.0422}, {0.2257,0.9149,0.0298}, {0.2904,0.9540,0.0203}, - {0.3597,0.9803,0.0134}, {0.4334,0.9950,0.0087}, {0.5121,1.0000,0.0057}, - {0.5945,0.9950,0.0039}, {0.6784,0.9786,0.0027}, {0.7621,0.9520,0.0021}, - {0.8425,0.9154,0.0018}, {0.9163,0.8700,0.0017}, {0.9786,0.8163,0.0014}, - {1.0263,0.7570,0.0011}, {1.0567,0.6949,0.0010}, {1.0622,0.6310,0.0008}, - {1.0456,0.5668,0.0006}, {1.0026,0.5030,0.0003}, {0.9384,0.4412,0.0002}, - {0.8544,0.3810,0.0002}, {0.7514,0.3210,0.0001}, {0.6424,0.2650,0.0000}, - {0.5419,0.2170,0.0000}, {0.4479,0.1750,0.0000}, {0.3608,0.1382,0.0000}, - {0.2835,0.1070,0.0000}, {0.2187,0.0816,0.0000}, {0.1649,0.0610,0.0000}, - {0.1212,0.0446,0.0000}, {0.0874,0.0320,0.0000}, {0.0636,0.0232,0.0000}, - {0.0468,0.0170,0.0000}, {0.0329,0.0119,0.0000}, {0.0227,0.0082,0.0000}, - {0.0158,0.0057,0.0000}, {0.0114,0.0041,0.0000}, {0.0081,0.0029,0.0000}, - {0.0058,0.0021,0.0000}, {0.0041,0.0015,0.0000}, {0.0029,0.0010,0.0000}, - {0.0020,0.0007,0.0000}, {0.0014,0.0005,0.0000}, {0.0010,0.0004,0.0000}, - {0.0007,0.0002,0.0000}, {0.0005,0.0002,0.0000}, {0.0003,0.0001,0.0000}, - {0.0002,0.0001,0.0000}, {0.0002,0.0001,0.0000}, {0.0001,0.0000,0.0000}, - {0.0001,0.0000,0.0000}, {0.0001,0.0000,0.0000}, {0.0000,0.0000,0.0000} + {0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f}, + {0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f}, + {0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f}, + {0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f}, + {0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f}, + {0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f}, + {0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f}, + {0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f}, + {0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f}, + {0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f}, + {0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f}, + {0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f}, + {0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f}, + {0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f}, + {1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f}, + {1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f}, + {0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f}, + {0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f}, + {0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f}, + {0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f}, + {0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f}, + {0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f}, + {0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f}, + {0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f}, + {0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f}, + {0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f}, + {0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f} }; const double c1 = 3.74183e-16; // 2*pi*h*c^2, W*m^2 diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index da1b7484b77..fc65922fc87 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -358,14 +358,14 @@ void DisplayBuffer::draw_set(int width, int height) draw_height = height; } -void DisplayBuffer::draw(Device *device) +void DisplayBuffer::draw(Device *device, const DeviceDrawParams& draw_params) { if(draw_width != 0 && draw_height != 0) { glPushMatrix(); glTranslatef(params.full_x, params.full_y, 0.0f); device_memory& rgba = rgba_data(); - device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent); + device->draw_pixels(rgba, 0, draw_width, draw_height, 0, params.width, params.height, transparent, draw_params); glPopMatrix(); } diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 81eaf41077f..27ab20bbafd 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -31,6 +31,7 @@ CCL_NAMESPACE_BEGIN class Device; +struct DeviceDrawParams; struct float4; /* Buffer Parameters @@ -114,7 +115,7 @@ public: void write(Device *device, const string& filename); void draw_set(int width, int height); - void draw(Device *device); + void draw(Device *device, const DeviceDrawParams& draw_params); bool draw_ready(); device_memory& rgba_data(); diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index edf7f7fb09d..8659fe4f7a3 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -44,8 +44,8 @@ Camera::Camera() fisheye_lens = 10.5f; fov = M_PI_4_F; - sensorwidth = 0.036; - sensorheight = 0.024; + sensorwidth = 0.036f; + sensorheight = 0.024f; nearclip = 1e-5f; farclip = 1e5f; @@ -78,6 +78,24 @@ Camera::~Camera() { } +void Camera::compute_auto_viewplane() +{ + float aspect = (float)width/(float)height; + + if(width >= height) { + viewplane.left = -aspect; + viewplane.right = aspect; + viewplane.bottom = -1.0f; + viewplane.top = 1.0f; + } + else { + viewplane.left = -1.0f; + viewplane.right = 1.0f; + viewplane.bottom = -1.0f/aspect; + viewplane.top = 1.0f/aspect; + } +} + void Camera::update() { if(!need_update) diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index 4e8f3d72111..c28670bc55f 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -102,6 +102,8 @@ public: /* functions */ Camera(); ~Camera(); + + void compute_auto_viewplane(); void update(); diff --git a/intern/cycles/render/curves.cpp b/intern/cycles/render/curves.cpp index 6e6b11ca92f..2c96ffa655e 100644 --- a/intern/cycles/render/curves.cpp +++ b/intern/cycles/render/curves.cpp @@ -110,7 +110,7 @@ void CurveSystemManager::device_update(Device *device, DeviceScene *dscene, Scen progress.set_status("Updating Hair settings", "Copying Hair settings to device"); - KernelCurves *kcurve= &dscene->data.curve; + KernelCurves *kcurve = &dscene->data.curve; kcurve->curveflags = 0; diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 30ad86a8d4c..c1aefbcfbbc 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -155,6 +155,9 @@ void Pass::add(PassType type, vector<Pass>& passes) pass.components = 4; pass.exposure = false; break; + case PASS_LIGHT: + /* ignores */ + break; } passes.push_back(pass); @@ -393,6 +396,10 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_shadow = kfilm->pass_stride; kfilm->use_light_pass = 1; break; + + case PASS_LIGHT: + kfilm->use_light_pass = 1; + break; case PASS_NONE: break; } diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp index 9142eb5308c..0ff904d06e7 100644 --- a/intern/cycles/render/graph.cpp +++ b/intern/cycles/render/graph.cpp @@ -227,7 +227,7 @@ void ShaderGraph::disconnect(ShaderInput *to) from->links.erase(remove(from->links.begin(), from->links.end(), to), from->links.end()); } -void ShaderGraph::finalize(bool do_bump, bool do_osl, bool do_multi_transform) +void ShaderGraph::finalize(bool do_bump, bool do_osl) { /* before compiling, the shader graph may undergo a number of modifications. * currently we set default geometry shader inputs, and create automatic bump @@ -242,17 +242,15 @@ void ShaderGraph::finalize(bool do_bump, bool do_osl, bool do_multi_transform) if(do_bump) bump_from_displacement(); - if(do_multi_transform) { - ShaderInput *surface_in = output()->input("Surface"); - ShaderInput *volume_in = output()->input("Volume"); + ShaderInput *surface_in = output()->input("Surface"); + ShaderInput *volume_in = output()->input("Volume"); - /* todo: make this work when surface and volume closures are tangled up */ + /* todo: make this work when surface and volume closures are tangled up */ - if(surface_in->link) - transform_multi_closure(surface_in->link->parent, NULL, false); - if(volume_in->link) - transform_multi_closure(volume_in->link->parent, NULL, true); - } + if(surface_in->link) + transform_multi_closure(surface_in->link->parent, NULL, false); + if(volume_in->link) + transform_multi_closure(volume_in->link->parent, NULL, true); finalized = true; } diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index f31e2103229..89a066195d6 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -193,6 +193,7 @@ public: virtual bool has_surface_bssrdf() { return false; } virtual bool has_converter_blackbody() { return false; } virtual bool has_bssrdf_bump() { return false; } + virtual bool has_spatial_varying() { return false; } vector<ShaderInput*> inputs; vector<ShaderOutput*> outputs; @@ -246,7 +247,7 @@ public: void disconnect(ShaderInput *to); void remove_unneeded_nodes(); - void finalize(bool do_bump = false, bool do_osl = false, bool do_multi_closure = false); + void finalize(bool do_bump = false, bool do_osl = false); protected: typedef pair<ShaderNode* const, ShaderNode*> NodePair; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 91aae6f3ec3..86755badc42 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -59,11 +59,16 @@ void ImageManager::set_osl_texture_system(void *texture_system) osl_texture_system = texture_system; } -void ImageManager::set_extended_image_limits(void) +void ImageManager::set_extended_image_limits(const DeviceInfo& info) { - tex_num_images = TEX_EXTENDED_NUM_IMAGES; - tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES; - tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START; + if(info.type == DEVICE_CPU) { + tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU; + tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES; + tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START; + } + else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) { + tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU; + } } bool ImageManager::set_animation_frame_update(int frame) @@ -90,8 +95,8 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo if(builtin_data) { if(builtin_image_info_cb) { - int width, height, channels; - builtin_image_info_cb(filename, builtin_data, is_float, width, height, channels); + int width, height, depth, channels; + builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels); } if(is_float) @@ -145,7 +150,14 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo return is_float; } -int ImageManager::add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear) +static bool image_equals(ImageManager::Image *image, const string& filename, void *builtin_data, InterpolationType interpolation) +{ + return image->filename == filename && + image->builtin_data == builtin_data && + image->interpolation == interpolation; +} + +int ImageManager::add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear, InterpolationType interpolation, bool use_alpha) { Image *img; size_t slot; @@ -156,7 +168,7 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani if(is_float) { /* find existing image */ for(slot = 0; slot < float_images.size(); slot++) { - if(float_images[slot] && float_images[slot]->filename == filename) { + if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) { float_images[slot]->users++; return slot; } @@ -185,13 +197,15 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani img->builtin_data = builtin_data; img->need_load = true; img->animated = animated; + img->interpolation = interpolation; img->users = 1; + img->use_alpha = use_alpha; float_images[slot] = img; } else { for(slot = 0; slot < images.size(); slot++) { - if(images[slot] && images[slot]->filename == filename) { + if(images[slot] && image_equals(images[slot], filename, builtin_data, interpolation)) { images[slot]->users++; return slot+tex_image_byte_start; } @@ -220,7 +234,9 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani img->builtin_data = builtin_data; img->need_load = true; img->animated = animated; + img->interpolation = interpolation; img->users = 1; + img->use_alpha = use_alpha; images[slot] = img; @@ -231,22 +247,43 @@ int ImageManager::add_image(const string& filename, void *builtin_data, bool ani return slot; } -void ImageManager::remove_image(const string& filename, void *builtin_data) +void ImageManager::remove_image(int slot) { - size_t slot; + if(slot >= tex_image_byte_start) { + slot -= tex_image_byte_start; - for(slot = 0; slot < images.size(); slot++) { - if(images[slot] && images[slot]->filename == filename && images[slot]->builtin_data == builtin_data) { - /* decrement user count */ - images[slot]->users--; - assert(images[slot]->users >= 0); + assert(images[slot] != NULL); + + /* decrement user count */ + images[slot]->users--; + assert(images[slot]->users >= 0); + + /* don't remove immediately, rather do it all together later on. one of + * the reasons for this is that on shader changes we add and remove nodes + * that use them, but we do not want to reload the image all the time. */ + if(images[slot]->users == 0) + need_update = true; + } + else { + /* decrement user count */ + float_images[slot]->users--; + assert(float_images[slot]->users >= 0); + + /* don't remove immediately, rather do it all together later on. one of + * the reasons for this is that on shader changes we add and remove nodes + * that use them, but we do not want to reload the image all the time. */ + if(float_images[slot]->users == 0) + need_update = true; + } +} - /* don't remove immediately, rather do it all together later on. one of - * the reasons for this is that on shader changes we add and remove nodes - * that use them, but we do not want to reload the image all the time. */ - if(images[slot]->users == 0) - need_update = true; +void ImageManager::remove_image(const string& filename, void *builtin_data, InterpolationType interpolation) +{ + size_t slot; + for(slot = 0; slot < images.size(); slot++) { + if(images[slot] && image_equals(images[slot], filename, builtin_data, interpolation)) { + remove_image(slot+tex_image_byte_start); break; } } @@ -254,17 +291,8 @@ void ImageManager::remove_image(const string& filename, void *builtin_data) if(slot == images.size()) { /* see if it's in a float texture slot */ for(slot = 0; slot < float_images.size(); slot++) { - if(float_images[slot] && float_images[slot]->filename == filename && float_images[slot]->builtin_data == builtin_data) { - /* decrement user count */ - float_images[slot]->users--; - assert(float_images[slot]->users >= 0); - - /* don't remove immediately, rather do it all together later on. one of - * the reasons for this is that on shader changes we add and remove nodes - * that use them, but we do not want to reload the image all the time. */ - if(float_images[slot]->users == 0) - need_update = true; - + if(float_images[slot] && image_equals(float_images[slot], filename, builtin_data, interpolation)) { + remove_image(slot); break; } } @@ -277,7 +305,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) return false; ImageInput *in = NULL; - int width, height, components; + int width, height, depth, components; if(!img->builtin_data) { /* load image from file through OIIO */ @@ -286,15 +314,20 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) if(!in) return false; - ImageSpec spec; + ImageSpec spec = ImageSpec(); + ImageSpec config = ImageSpec(); + + if(img->use_alpha == false) + config.attribute("oiio:UnassociatedAlpha", 1); - if(!in->open(img->filename, spec)) { + if(!in->open(img->filename, spec, config)) { delete in; return false; } width = spec.width; height = spec.height; + depth = spec.depth; components = spec.nchannels; } else { @@ -303,7 +336,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) return false; bool is_float; - builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, components); + builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components); } /* we only handle certain number of components */ @@ -317,15 +350,21 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) } /* read RGBA pixels */ - uchar *pixels = (uchar*)tex_img.resize(width, height); - int scanlinesize = width*components*sizeof(uchar); + uchar *pixels = (uchar*)tex_img.resize(width, height, depth); if(in) { - in->read_image(TypeDesc::UINT8, - (uchar*)pixels + (height-1)*scanlinesize, - AutoStride, - -scanlinesize, - AutoStride); + if(depth <= 1) { + int scanlinesize = width*components*sizeof(uchar); + + in->read_image(TypeDesc::UINT8, + (uchar*)pixels + (height-1)*scanlinesize, + AutoStride, + -scanlinesize, + AutoStride); + } + else { + in->read_image(TypeDesc::UINT8, (uchar*)pixels); + } in->close(); delete in; @@ -335,7 +374,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) } if(components == 2) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = pixels[i*2+1]; pixels[i*4+2] = pixels[i*2+0]; pixels[i*4+1] = pixels[i*2+0]; @@ -343,7 +382,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) } } else if(components == 3) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = 255; pixels[i*4+2] = pixels[i*3+2]; pixels[i*4+1] = pixels[i*3+1]; @@ -351,7 +390,7 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) } } else if(components == 1) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = 255; pixels[i*4+2] = pixels[i]; pixels[i*4+1] = pixels[i]; @@ -359,6 +398,12 @@ bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img) } } + if(img->use_alpha == false) { + for(int i = width*height*depth-1; i >= 0; i--) { + pixels[i*4+3] = 255; + } + } + return true; } @@ -368,7 +413,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ return false; ImageInput *in = NULL; - int width, height, components; + int width, height, depth, components; if(!img->builtin_data) { /* load image from file through OIIO */ @@ -377,9 +422,13 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ if(!in) return false; - ImageSpec spec; + ImageSpec spec = ImageSpec(); + ImageSpec config = ImageSpec(); + + if(img->use_alpha == false) + config.attribute("oiio:UnassociatedAlpha",1); - if(!in->open(img->filename, spec)) { + if(!in->open(img->filename, spec, config)) { delete in; return false; } @@ -387,6 +436,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ /* we only handle certain number of components */ width = spec.width; height = spec.height; + depth = spec.depth; components = spec.nchannels; } else { @@ -395,7 +445,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ return false; bool is_float; - builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, components); + builtin_image_info_cb(img->filename, img->builtin_data, is_float, width, height, depth, components); } if(!(components >= 1 && components <= 4)) { @@ -407,15 +457,21 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ } /* read RGBA pixels */ - float *pixels = (float*)tex_img.resize(width, height); - int scanlinesize = width*components*sizeof(float); + float *pixels = (float*)tex_img.resize(width, height, depth); if(in) { - in->read_image(TypeDesc::FLOAT, - (uchar*)pixels + (height-1)*scanlinesize, - AutoStride, - -scanlinesize, - AutoStride); + if(depth <= 1) { + int scanlinesize = width*components*sizeof(float); + + in->read_image(TypeDesc::FLOAT, + (uchar*)pixels + (height-1)*scanlinesize, + AutoStride, + -scanlinesize, + AutoStride); + } + else { + in->read_image(TypeDesc::FLOAT, (uchar*)pixels); + } in->close(); delete in; @@ -425,7 +481,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ } if(components == 2) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = pixels[i*2+1]; pixels[i*4+2] = pixels[i*2+0]; pixels[i*4+1] = pixels[i*2+0]; @@ -433,7 +489,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ } } else if(components == 3) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = 1.0f; pixels[i*4+2] = pixels[i*3+2]; pixels[i*4+1] = pixels[i*3+1]; @@ -441,7 +497,7 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ } } else if(components == 1) { - for(int i = width*height-1; i >= 0; i--) { + for(int i = width*height*depth-1; i >= 0; i--) { pixels[i*4+3] = 1.0f; pixels[i*4+2] = pixels[i]; pixels[i*4+1] = pixels[i]; @@ -449,6 +505,12 @@ bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_ } } + if(img->use_alpha == false) { + for(int i = width*height*depth-1; i >= 0; i--) { + pixels[i*4+3] = 1.0f; + } + } + return true; } @@ -456,9 +518,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl { if(progress->get_cancel()) return; - if(osl_texture_system) - return; - + Image *img; bool is_float; @@ -471,6 +531,9 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl is_float = true; } + if(osl_texture_system && !img->builtin_data) + return; + if(is_float) { string filename = path_filename(float_images[slot]->filename); progress->set_status("Updating Images", "Loading " + filename); @@ -499,7 +562,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl if(!pack_images) { thread_scoped_lock device_lock(device_mutex); - device->tex_alloc(name.c_str(), tex_img, true, true); + device->tex_alloc(name.c_str(), tex_img, img->interpolation, true); } } else { @@ -530,7 +593,7 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, int sl if(!pack_images) { thread_scoped_lock device_lock(device_mutex); - device->tex_alloc(name.c_str(), tex_img, true, true); + device->tex_alloc(name.c_str(), tex_img, img->interpolation, true); } } @@ -552,7 +615,7 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, int sl } if(img) { - if(osl_texture_system) { + if(osl_texture_system && !img->builtin_data) { #ifdef WITH_OSL ustring filename(images[slot]->filename); ((OSL::TextureSystem*)osl_texture_system)->invalidate(filename); @@ -602,7 +665,7 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& device_free_image(device, dscene, slot + tex_image_byte_start); } else if(images[slot]->need_load) { - if(!osl_texture_system) + if(!osl_texture_system || images[slot]->builtin_data) pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + tex_image_byte_start, &progress)); } } @@ -615,7 +678,7 @@ void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& device_free_image(device, dscene, slot); } else if(float_images[slot]->need_load) { - if(!osl_texture_system) + if(!osl_texture_system || float_images[slot]->builtin_data) pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress)); } } @@ -653,16 +716,32 @@ void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progr device_vector<uchar4>& tex_img = dscene->tex_image[slot]; - info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1); + /* todo: support 3D textures, only CPU for now */ + + /* The image options are packed + bit 0 -> periodic + bit 1 + 2 -> interpolation type */ + uint8_t interpolation = (images[slot]->interpolation << 1) + 1; + info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation); memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } - if(dscene->tex_image_packed.size()) + if(dscene->tex_image_packed.size()) { + if(dscene->tex_image_packed.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_packed); + } device->tex_alloc("__tex_image_packed", dscene->tex_image_packed); - if(dscene->tex_image_packed_info.size()) + } + if(dscene->tex_image_packed_info.size()) { + if(dscene->tex_image_packed_info.device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(dscene->tex_image_packed_info); + } device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info); + } } void ImageManager::device_free(Device *device, DeviceScene *dscene) diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 187c5fd0f02..561550fe0d2 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -17,6 +17,7 @@ #ifndef __IMAGE_H__ #define __IMAGE_H__ +#include "device.h" #include "device_memory.h" #include "util_string.h" @@ -27,11 +28,16 @@ CCL_NAMESPACE_BEGIN +/* generic */ #define TEX_NUM_IMAGES 95 #define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES +/* extended gpu */ +#define TEX_EXTENDED_NUM_IMAGES_GPU 145 + +/* extended cpu */ #define TEX_EXTENDED_NUM_FLOAT_IMAGES 1024 -#define TEX_EXTENDED_NUM_IMAGES 1024 +#define TEX_EXTENDED_NUM_IMAGES_CPU 1024 #define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES /* color to use when textures are not found */ @@ -49,8 +55,9 @@ public: ImageManager(); ~ImageManager(); - int add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear); - void remove_image(const string& filename, void *builtin_data); + int add_image(const string& filename, void *builtin_data, bool animated, bool& is_float, bool& is_linear, InterpolationType interpolation, bool use_alpha); + void remove_image(int slot); + void remove_image(const string& filename, void *builtin_data, InterpolationType interpolation); bool is_float_image(const string& filename, void *builtin_data, bool& is_linear); void device_update(Device *device, DeviceScene *dscene, Progress& progress); @@ -58,30 +65,34 @@ public: void set_osl_texture_system(void *texture_system); void set_pack_images(bool pack_images_); - void set_extended_image_limits(void); + void set_extended_image_limits(const DeviceInfo& info); bool set_animation_frame_update(int frame); bool need_update; - boost::function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &channels)> builtin_image_info_cb; + boost::function<void(const string &filename, void *data, bool &is_float, int &width, int &height, int &depth, int &channels)> builtin_image_info_cb; boost::function<bool(const string &filename, void *data, unsigned char *pixels)> builtin_image_pixels_cb; boost::function<bool(const string &filename, void *data, float *pixels)> builtin_image_float_pixels_cb; -private: - int tex_num_images; - int tex_num_float_images; - int tex_image_byte_start; - thread_mutex device_mutex; - int animation_frame; struct Image { string filename; void *builtin_data; + bool use_alpha; bool need_load; bool animated; + InterpolationType interpolation; + int users; }; +private: + int tex_num_images; + int tex_num_float_images; + int tex_image_byte_start; + thread_mutex device_mutex; + int animation_frame; + vector<Image*> images; vector<Image*> float_images; void *osl_texture_system; diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index f48e04f31e1..59a0de07e5a 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -34,15 +34,14 @@ Integrator::Integrator() max_glossy_bounce = max_bounce; max_transmission_bounce = max_bounce; max_volume_bounce = max_bounce; - probalistic_termination = true; transparent_min_bounce = min_bounce; transparent_max_bounce = max_bounce; - transparent_probalistic = true; transparent_shadows = false; + volume_homogeneous_sampling = 0; volume_max_steps = 1024; - volume_step_size = 0.1; + volume_step_size = 0.1f; no_caustics = false; filter_glossy = 0.0f; @@ -82,10 +81,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene /* integrator parameters */ kintegrator->max_bounce = max_bounce + 1; - if(probalistic_termination) - kintegrator->min_bounce = min_bounce + 1; - else - kintegrator->min_bounce = kintegrator->max_bounce; + kintegrator->min_bounce = min_bounce + 1; kintegrator->max_diffuse_bounce = max_diffuse_bounce + 1; kintegrator->max_glossy_bounce = max_glossy_bounce + 1; @@ -97,13 +93,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->max_volume_bounce = 1; kintegrator->transparent_max_bounce = transparent_max_bounce + 1; - if(transparent_probalistic) - kintegrator->transparent_min_bounce = transparent_min_bounce + 1; - else - kintegrator->transparent_min_bounce = kintegrator->transparent_max_bounce; + kintegrator->transparent_min_bounce = transparent_min_bounce + 1; kintegrator->transparent_shadows = transparent_shadows; + kintegrator->volume_homogeneous_sampling = volume_homogeneous_sampling; kintegrator->volume_max_steps = volume_max_steps; kintegrator->volume_step_size = volume_step_size; @@ -120,7 +114,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f; kintegrator->branched = (method == BRANCHED_PATH); - kintegrator->aa_samples = aa_samples; kintegrator->diffuse_samples = diffuse_samples; kintegrator->glossy_samples = glossy_samples; kintegrator->transmission_samples = transmission_samples; @@ -128,8 +121,11 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->mesh_light_samples = mesh_light_samples; kintegrator->subsurface_samples = subsurface_samples; kintegrator->volume_samples = volume_samples; + kintegrator->sample_all_lights_direct = sample_all_lights_direct; + kintegrator->sample_all_lights_indirect = sample_all_lights_indirect; kintegrator->sampling_pattern = sampling_pattern; + kintegrator->aa_samples = aa_samples; /* sobol directions table */ int max_samples = 1; @@ -171,11 +167,10 @@ bool Integrator::modified(const Integrator& integrator) max_glossy_bounce == integrator.max_glossy_bounce && max_transmission_bounce == integrator.max_transmission_bounce && max_volume_bounce == integrator.max_volume_bounce && - probalistic_termination == integrator.probalistic_termination && transparent_min_bounce == integrator.transparent_min_bounce && transparent_max_bounce == integrator.transparent_max_bounce && - transparent_probalistic == integrator.transparent_probalistic && transparent_shadows == integrator.transparent_shadows && + volume_homogeneous_sampling == integrator.volume_homogeneous_sampling && volume_max_steps == integrator.volume_max_steps && volume_step_size == integrator.volume_step_size && no_caustics == integrator.no_caustics && @@ -194,7 +189,9 @@ bool Integrator::modified(const Integrator& integrator) subsurface_samples == integrator.subsurface_samples && volume_samples == integrator.volume_samples && motion_blur == integrator.motion_blur && - sampling_pattern == integrator.sampling_pattern); + sampling_pattern == integrator.sampling_pattern && + sample_all_lights_direct == integrator.sample_all_lights_direct && + sample_all_lights_indirect == integrator.sample_all_lights_indirect); } void Integrator::tag_update(Scene *scene) diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 573b258af60..380c1a65722 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -34,13 +34,12 @@ public: int max_glossy_bounce; int max_transmission_bounce; int max_volume_bounce; - bool probalistic_termination; int transparent_min_bounce; int transparent_max_bounce; - bool transparent_probalistic; bool transparent_shadows; + int volume_homogeneous_sampling; int volume_max_steps; float volume_step_size; @@ -62,6 +61,8 @@ public: int mesh_light_samples; int subsurface_samples; int volume_samples; + bool sample_all_lights_direct; + bool sample_all_lights_indirect; enum Method { BRANCHED_PATH = 0, diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index bab4218aae9..7bdb1fbf8af 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN -static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels) +static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels, Progress& progress) { /* create input */ int width = res; @@ -66,6 +66,7 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res main_task.shader_eval_type = SHADER_EVAL_BACKGROUND; main_task.shader_x = 0; main_task.shader_w = width*height; + main_task.get_cancel = function_bind(&Progress::get_cancel, &progress); /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */ list<DeviceTask> split_tasks; @@ -149,7 +150,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen size_t num_lights = scene->lights.size(); size_t num_background_lights = 0; size_t num_triangles = 0; - size_t num_curve_segments = 0; foreach(Object *object, scene->objects) { Mesh *mesh = object->mesh; @@ -159,6 +159,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen if(!(object->visibility & (PATH_RAY_DIFFUSE|PATH_RAY_GLOSSY|PATH_RAY_TRANSMIT))) continue; + /* skip motion blurred deforming meshes, not supported yet */ + if(mesh->has_motion_blur()) + continue; + /* skip if we have no emission shaders */ foreach(uint sindex, mesh->used_shaders) { Shader *shader = scene->shaders[sindex]; @@ -177,20 +181,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen if(shader->use_mis && shader->has_surface_emission) num_triangles++; } - - /* disabled for curves */ -#if 0 - foreach(Mesh::Curve& curve, mesh->curves) { - Shader *shader = scene->shaders[curve.shader]; - - if(shader->use_mis && shader->has_surface_emission) - num_curve_segments += curve.num_segments(); -#endif } } - size_t num_distribution = num_triangles + num_curve_segments; - num_distribution += num_lights; + size_t num_distribution = num_triangles + num_lights; /* emission area */ float4 *distribution = dscene->light_distribution.resize(num_distribution + 1); @@ -210,6 +204,10 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen continue; } + /* skip motion blurred deforming meshes, not supported yet */ + if(mesh->has_motion_blur()) + continue; + /* skip if we have no emission shaders */ foreach(uint sindex, mesh->used_shaders) { Shader *shader = scene->shaders[sindex]; @@ -225,21 +223,21 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen bool transform_applied = mesh->transform_applied; Transform tfm = object->tfm; int object_id = j; - int shader_id = SHADER_MASK; + int shader_flag = 0; if(transform_applied) object_id = ~object_id; if(!(object->visibility & PATH_RAY_DIFFUSE)) { - shader_id |= SHADER_EXCLUDE_DIFFUSE; + shader_flag |= SHADER_EXCLUDE_DIFFUSE; use_light_visibility = true; } if(!(object->visibility & PATH_RAY_GLOSSY)) { - shader_id |= SHADER_EXCLUDE_GLOSSY; + shader_flag |= SHADER_EXCLUDE_GLOSSY; use_light_visibility = true; } if(!(object->visibility & PATH_RAY_TRANSMIT)) { - shader_id |= SHADER_EXCLUDE_TRANSMIT; + shader_flag |= SHADER_EXCLUDE_TRANSMIT; use_light_visibility = true; } @@ -249,7 +247,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen if(shader->use_mis && shader->has_surface_emission) { distribution[offset].x = totarea; distribution[offset].y = __int_as_float(i + mesh->tri_offset); - distribution[offset].z = __int_as_float(shader_id); + distribution[offset].z = __int_as_float(shader_flag); distribution[offset].w = __int_as_float(object_id); offset++; @@ -267,40 +265,6 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen totarea += triangle_area(p1, p2, p3); } } - - /* sample as light disabled for strands */ -#if 0 - size_t i = 0; - - foreach(Mesh::Curve& curve, mesh->curves) { - Shader *shader = scene->shaders[curve.shader]; - int first_key = curve.first_key; - - if(shader->use_mis && shader->has_surface_emission) { - for(int j = 0; j < curve.num_segments(); j++) { - distribution[offset].x = totarea; - distribution[offset].y = __int_as_float(i + mesh->curve_offset); // XXX fix kernel code - distribution[offset].z = __int_as_float(j) & SHADER_MASK; - distribution[offset].w = __int_as_float(object_id); - offset++; - - float3 p1 = mesh->curve_keys[first_key + j].loc; - float r1 = mesh->curve_keys[first_key + j].radius; - float3 p2 = mesh->curve_keys[first_key + j + 1].loc; - float r2 = mesh->curve_keys[first_key + j + 1].radius; - - if(!transform_applied) { - p1 = transform_point(&tfm, p1); - p2 = transform_point(&tfm, p2); - } - - totarea += M_PI_F * (r1 + r2) * len(p1 - p2); - } - } - - i++; - } -#endif } if(progress.get_cancel()) return; @@ -432,7 +396,7 @@ void LightManager::device_update_background(Device *device, DeviceScene *dscene, assert(res > 0); vector<float3> pixels; - shade_background_pixels(device, dscene, res, pixels); + shade_background_pixels(device, dscene, res, pixels, progress); if(progress.get_cancel()) return; diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp index 93f24886dc9..9c5ddd55010 100644 --- a/intern/cycles/render/mesh.cpp +++ b/intern/cycles/render/mesh.cpp @@ -18,6 +18,7 @@ #include "bvh_build.h" #include "camera.h" +#include "curves.h" #include "device.h" #include "shader.h" #include "light.h" @@ -34,6 +35,39 @@ CCL_NAMESPACE_BEGIN +/* Triangle */ + +void Mesh::Triangle::bounds_grow(const float3 *verts, BoundBox& bounds) const +{ + bounds.grow(verts[v[0]]); + bounds.grow(verts[v[1]]); + bounds.grow(verts[v[2]]); +} + +/* Curve */ + +void Mesh::Curve::bounds_grow(const int k, const float4 *curve_keys, BoundBox& bounds) const +{ + float3 P[4]; + + P[0] = float4_to_float3(curve_keys[max(first_key + k - 1,first_key)]); + P[1] = float4_to_float3(curve_keys[first_key + k]); + P[2] = float4_to_float3(curve_keys[first_key + k + 1]); + P[3] = float4_to_float3(curve_keys[min(first_key + k + 2, first_key + num_keys - 1)]); + + float3 lower; + float3 upper; + + curvebounds(&lower.x, &upper.x, P, 0); + curvebounds(&lower.y, &upper.y, P, 1); + curvebounds(&lower.z, &upper.z, P, 2); + + float mr = max(curve_keys[first_key + k].w, curve_keys[first_key + k + 1].w); + + bounds.grow(lower, mr); + bounds.grow(upper, mr); +} + /* Mesh */ Mesh::Mesh() @@ -46,6 +80,9 @@ Mesh::Mesh() displacement_method = DISPLACE_BUMP; bounds = BoundBox::empty; + motion_steps = 3; + use_motion_blur = false; + bvh = NULL; tri_offset = 0; @@ -97,6 +134,22 @@ void Mesh::clear() transform_normal = transform_identity(); } +int Mesh::split_vertex(int vertex) +{ + /* copy vertex location and vertex attributes */ + verts.push_back(verts[vertex]); + + foreach(Attribute& attr, attributes.attributes) { + if(attr.element == ATTR_ELEMENT_VERTEX) { + vector<char> tmp(attr.data_sizeof()); + memcpy(&tmp[0], attr.data() + tmp.size()*vertex, tmp.size()); + attr.add(&tmp[0]); + } + } + + return verts.size() - 1; +} + void Mesh::set_triangle(int i, int v0, int v1, int v2, int shader_, bool smooth_) { Triangle tri; @@ -123,9 +176,8 @@ void Mesh::add_triangle(int v0, int v1, int v2, int shader_, bool smooth_) void Mesh::add_curve_key(float3 co, float radius) { - CurveKey key; - key.co = co; - key.radius = radius; + float4 key = float3_to_float4(co); + key.w = radius; curve_keys.push_back(key); } @@ -151,7 +203,25 @@ void Mesh::compute_bounds() bnds.grow(verts[i]); for(size_t i = 0; i < curve_keys_size; i++) - bnds.grow(curve_keys[i].co, curve_keys[i].radius); + bnds.grow(float4_to_float3(curve_keys[i]), curve_keys[i].w); + + Attribute *attr = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (use_motion_blur && attr) { + size_t steps_size = verts.size() * (motion_steps - 1); + float3 *vert_steps = attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow(vert_steps[i]); + } + + Attribute *curve_attr = curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if(use_motion_blur && curve_attr) { + size_t steps_size = curve_keys.size() * (motion_steps - 1); + float3 *key_steps = curve_attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow(key_steps[i]); + } if(!bnds.valid()) { bnds = BoundBox::empty; @@ -161,7 +231,23 @@ void Mesh::compute_bounds() bnds.grow_safe(verts[i]); for(size_t i = 0; i < curve_keys_size; i++) - bnds.grow_safe(curve_keys[i].co, curve_keys[i].radius); + bnds.grow_safe(float4_to_float3(curve_keys[i]), curve_keys[i].w); + + if (use_motion_blur && attr) { + size_t steps_size = verts.size() * (motion_steps - 1); + float3 *vert_steps = attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow_safe(vert_steps[i]); + } + + if (use_motion_blur && curve_attr) { + size_t steps_size = curve_keys.size() * (motion_steps - 1); + float3 *key_steps = curve_attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + bnds.grow_safe(key_steps[i]); + } } } @@ -173,6 +259,21 @@ void Mesh::compute_bounds() bounds = bnds; } +static float3 compute_face_normal(const Mesh::Triangle& t, float3 *verts) +{ + float3 v0 = verts[t.v[0]]; + float3 v1 = verts[t.v[1]]; + float3 v2 = verts[t.v[2]]; + + float3 norm = cross(v1 - v0, v2 - v0); + float normlen = len(norm); + + if(normlen == 0.0f) + return make_float3(0.0f, 0.0f, 0.0f); + + return norm / normlen; +} + void Mesh::add_face_normals() { /* don't compute if already there */ @@ -192,17 +293,7 @@ void Mesh::add_face_normals() Triangle *triangles_ptr = &triangles[0]; for(size_t i = 0; i < triangles_size; i++) { - Triangle t = triangles_ptr[i]; - float3 v0 = verts_ptr[t.v[0]]; - float3 v1 = verts_ptr[t.v[1]]; - float3 v2 = verts_ptr[t.v[2]]; - - float3 norm = cross(v1 - v0, v2 - v0); - float normlen = len(norm); - if(normlen == 0.0f) - fN[i] = make_float3(0.0f, 0.0f, 0.0f); - else - fN[i] = norm / normlen; + fN[i] = compute_face_normal(triangles_ptr[i], verts_ptr); if(flip) fN[i] = -fN[i]; @@ -220,36 +311,69 @@ void Mesh::add_face_normals() void Mesh::add_vertex_normals() { - /* don't compute if already there */ - if(attributes.find(ATTR_STD_VERTEX_NORMAL)) - return; - - /* get attributes */ - Attribute *attr_fN = attributes.find(ATTR_STD_FACE_NORMAL); - Attribute *attr_vN = attributes.add(ATTR_STD_VERTEX_NORMAL); + bool flip = transform_negative_scaled; + size_t verts_size = verts.size(); + size_t triangles_size = triangles.size(); - float3 *fN = attr_fN->data_float3(); - float3 *vN = attr_vN->data_float3(); + /* static vertex normals */ + if(!attributes.find(ATTR_STD_VERTEX_NORMAL)) { + /* get attributes */ + Attribute *attr_fN = attributes.find(ATTR_STD_FACE_NORMAL); + Attribute *attr_vN = attributes.add(ATTR_STD_VERTEX_NORMAL); - /* compute vertex normals */ - memset(vN, 0, verts.size()*sizeof(float3)); + float3 *fN = attr_fN->data_float3(); + float3 *vN = attr_vN->data_float3(); - size_t verts_size = verts.size(); - size_t triangles_size = triangles.size(); - bool flip = transform_negative_scaled; + /* compute vertex normals */ + memset(vN, 0, verts.size()*sizeof(float3)); - if(triangles_size) { - Triangle *triangles_ptr = &triangles[0]; + if(triangles_size) { + Triangle *triangles_ptr = &triangles[0]; - for(size_t i = 0; i < triangles_size; i++) - for(size_t j = 0; j < 3; j++) - vN[triangles_ptr[i].v[j]] += fN[i]; + for(size_t i = 0; i < triangles_size; i++) + for(size_t j = 0; j < 3; j++) + vN[triangles_ptr[i].v[j]] += fN[i]; + } + + for(size_t i = 0; i < verts_size; i++) { + vN[i] = normalize(vN[i]); + if(flip) + vN[i] = -vN[i]; + } } - for(size_t i = 0; i < verts_size; i++) { - vN[i] = normalize(vN[i]); - if(flip) - vN[i] = -vN[i]; + /* motion vertex normals */ + Attribute *attr_mP = attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + Attribute *attr_mN = attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); + + if(has_motion_blur() && attr_mP && !attr_mN) { + /* create attribute */ + attr_mN = attributes.add(ATTR_STD_MOTION_VERTEX_NORMAL); + + for(int step = 0; step < motion_steps - 1; step++) { + float3 *mP = attr_mP->data_float3() + step*verts.size(); + float3 *mN = attr_mN->data_float3() + step*verts.size(); + + /* compute */ + memset(mN, 0, verts.size()*sizeof(float3)); + + if(triangles_size) { + Triangle *triangles_ptr = &triangles[0]; + + for(size_t i = 0; i < triangles_size; i++) { + for(size_t j = 0; j < 3; j++) { + float3 fN = compute_face_normal(triangles_ptr[i], mP); + mN[triangles_ptr[i].v[j]] += fN; + } + } + } + + for(size_t i = 0; i < verts_size; i++) { + mN[i] = normalize(mN[i]); + if(flip) + mN[i] = -mN[i]; + } + } } } @@ -335,18 +459,14 @@ void Mesh::pack_verts(float4 *tri_verts, float4 *tri_vindex, size_t vert_offset) void Mesh::pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset) { size_t curve_keys_size = curve_keys.size(); - CurveKey *keys_ptr = NULL; + float4 *keys_ptr = NULL; /* pack curve keys */ if(curve_keys_size) { keys_ptr = &curve_keys[0]; - for(size_t i = 0; i < curve_keys_size; i++) { - float3 p = keys_ptr[i].co; - float radius = keys_ptr[i].radius; - - curve_key_co[i] = make_float4(p.x, p.y, p.z, radius); - } + for(size_t i = 0; i < curve_keys_size; i++) + curve_key_co[i] = keys_ptr[i]; } /* pack curve segments */ @@ -430,6 +550,13 @@ void Mesh::tag_update(Scene *scene, bool rebuild) scene->object_manager->need_update = true; } +bool Mesh::has_motion_blur() const +{ + return (use_motion_blur && + (attributes.find(ATTR_STD_MOTION_VERTEX_POSITION) || + curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION))); +} + /* Mesh Manager */ MeshManager::MeshManager() @@ -641,10 +768,16 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa size_t size = mattr->element_size( mesh->verts.size(), mesh->triangles.size(), + mesh->motion_steps, mesh->curves.size(), mesh->curve_keys.size()); - if(mattr->type == TypeDesc::TypeFloat) { + if(mattr->element == ATTR_ELEMENT_VOXEL) { + /* store slot in offset value */ + VoxelAttribute *voxel_data = mattr->data_voxel(); + offset = voxel_data->slot; + } + else if(mattr->type == TypeDesc::TypeFloat) { float *data = mattr->data_float(); offset = attr_float.size(); @@ -663,19 +796,21 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa attr_float3[offset+k] = (&tfm->x)[k]; } else { - float3 *data = mattr->data_float3(); + float4 *data = mattr->data_float4(); offset = attr_float3.size(); attr_float3.resize(attr_float3.size() + size); for(size_t k = 0; k < size; k++) - attr_float3[offset+k] = float3_to_float4(data[k]); + attr_float3[offset+k] = data[k]; } /* mesh vertex/curve index is global, not per object, so we sneak * a correction for that in here */ if(element == ATTR_ELEMENT_VERTEX) offset -= mesh->vert_offset; + else if(element == ATTR_ELEMENT_VERTEX_MOTION) + offset -= mesh->vert_offset; else if(element == ATTR_ELEMENT_FACE) offset -= mesh->tri_offset; else if(element == ATTR_ELEMENT_CORNER) @@ -684,6 +819,8 @@ static void update_attribute_element_offset(Mesh *mesh, vector<float>& attr_floa offset -= mesh->curve_offset; else if(element == ATTR_ELEMENT_CURVE_KEY) offset -= mesh->curvekey_offset; + else if(element == ATTR_ELEMENT_CURVE_KEY_MOTION) + offset -= mesh->curvekey_offset; } else { /* attribute not found */ @@ -750,8 +887,8 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene, /* create attribute lookup maps */ if(scene->shader_manager->use_osl()) update_osl_attributes(device, scene, mesh_attributes); - else - update_svm_attributes(device, dscene, scene, mesh_attributes); + + update_svm_attributes(device, dscene, scene, mesh_attributes); if(progress.get_cancel()) return; @@ -866,9 +1003,9 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene * dscene->tri_woop.reference(&pack.tri_woop[0], pack.tri_woop.size()); device->tex_alloc("__tri_woop", dscene->tri_woop); } - if(pack.prim_segment.size()) { - dscene->prim_segment.reference((uint*)&pack.prim_segment[0], pack.prim_segment.size()); - device->tex_alloc("__prim_segment", dscene->prim_segment); + if(pack.prim_type.size()) { + dscene->prim_type.reference((uint*)&pack.prim_type[0], pack.prim_type.size()); + device->tex_alloc("__prim_type", dscene->prim_type); } if(pack.prim_visibility.size()) { dscene->prim_visibility.reference((uint*)&pack.prim_visibility[0], pack.prim_visibility.size()); @@ -956,7 +1093,6 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen foreach(Shader *shader, scene->shaders) shader->need_update_attributes = false; - float shuttertime = scene->camera->shuttertime; #ifdef __OBJECT_MOTION__ Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); bool motion_blur = need_motion == Scene::MOTION_BLUR; @@ -965,7 +1101,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen #endif foreach(Object *object, scene->objects) - object->compute_bounds(motion_blur, shuttertime); + object->compute_bounds(motion_blur); if(progress.get_cancel()) return; @@ -979,7 +1115,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) device->tex_free(dscene->bvh_nodes); device->tex_free(dscene->object_node); device->tex_free(dscene->tri_woop); - device->tex_free(dscene->prim_segment); + device->tex_free(dscene->prim_type); device->tex_free(dscene->prim_visibility); device->tex_free(dscene->prim_index); device->tex_free(dscene->prim_object); @@ -996,7 +1132,7 @@ void MeshManager::device_free(Device *device, DeviceScene *dscene) dscene->bvh_nodes.clear(); dscene->object_node.clear(); dscene->tri_woop.clear(); - dscene->prim_segment.clear(); + dscene->prim_type.clear(); dscene->prim_visibility.clear(); dscene->prim_index.clear(); dscene->prim_object.clear(); diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h index 281a8f0645e..247e3dd555e 100644 --- a/intern/cycles/render/mesh.h +++ b/intern/cycles/render/mesh.h @@ -46,6 +46,8 @@ public: /* Mesh Triangle */ struct Triangle { int v[3]; + + void bounds_grow(const float3 *verts, BoundBox& bounds) const; }; /* Mesh Curve */ @@ -55,11 +57,8 @@ public: uint shader; int num_segments() { return num_keys - 1; } - }; - struct CurveKey { - float3 co; - float radius; + void bounds_grow(const int k, const float4 *curve_keys, BoundBox& bounds) const; }; /* Displacement */ @@ -77,7 +76,7 @@ public: vector<uint> shader; vector<bool> smooth; - vector<CurveKey> curve_keys; + vector<float4> curve_keys; /* co + radius */ vector<Curve> curves; vector<uint> used_shaders; @@ -90,6 +89,9 @@ public: Transform transform_normal; DisplacementMethod displacement_method; + uint motion_steps; + bool use_motion_blur; + /* Update Flags */ bool need_update; bool need_update_rebuild; @@ -112,6 +114,7 @@ public: void add_triangle(int v0, int v1, int v2, int shader, bool smooth); void add_curve_key(float3 loc, float radius); void add_curve(int first_key, int num_keys, int shader); + int split_vertex(int vertex); void compute_bounds(); void add_face_normals(); @@ -126,6 +129,8 @@ public: bool need_attribute(Scene *scene, ustring name); void tag_update(Scene *scene, bool rebuild); + + bool has_motion_blur() const; }; /* Mesh Manager */ diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index 2fd8a978511..661fd9c66c1 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -44,7 +44,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me progress.set_status("Updating Mesh", msg); /* find object index. todo: is arbitrary */ - size_t object_index = ~0; + size_t object_index = OBJECT_NONE; for(size_t i = 0; i < scene->objects.size(); i++) { if(scene->objects[i]->mesh == mesh) { @@ -119,17 +119,21 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me task.shader_eval_type = SHADER_EVAL_DISPLACE; task.shader_x = 0; task.shader_w = d_output.size(); + task.get_cancel = function_bind(&Progress::get_cancel, &progress); device->task_add(task); device->task_wait(); + if(progress.get_cancel()) { + device->mem_free(d_input); + device->mem_free(d_output); + return false; + } + device->mem_copy_from(d_output, 0, 1, d_output.size(), sizeof(float4)); device->mem_free(d_input); device->mem_free(d_output); - if(progress.get_cancel()) - return false; - /* read result */ done.clear(); done.resize(mesh->verts.size(), false); diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index af6fca29ab0..a53e0b39435 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -189,10 +189,12 @@ ImageTextureNode::ImageTextureNode() slot = -1; is_float = -1; is_linear = false; + use_alpha = true; filename = ""; builtin_data = NULL; color_space = ustring("Color"); projection = ustring("Flat"); + interpolation = INTERPOLATION_LINEAR; projection_blend = 0.0f; animated = false; @@ -204,7 +206,7 @@ ImageTextureNode::ImageTextureNode() ImageTextureNode::~ImageTextureNode() { if(image_manager) - image_manager->remove_image(filename, builtin_data); + image_manager->remove_image(filename, builtin_data, interpolation); } ShaderNode *ImageTextureNode::clone() const @@ -241,7 +243,7 @@ void ImageTextureNode::compile(SVMCompiler& compiler) image_manager = compiler.image_manager; if(is_float == -1) { bool is_float_bool; - slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear); + slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear, interpolation, use_alpha); is_float = (int)is_float_bool; } @@ -315,6 +317,22 @@ void ImageTextureNode::compile(OSLCompiler& compiler) compiler.parameter("projection_blend", projection_blend); compiler.parameter("is_float", is_float); compiler.parameter("use_alpha", !alpha_out->links.empty()); + + switch (interpolation) { + case INTERPOLATION_CLOSEST: + compiler.parameter("interpolation", "closest"); + break; + case INTERPOLATION_CUBIC: + compiler.parameter("interpolation", "cubic"); + break; + case INTERPOLATION_SMART: + compiler.parameter("interpolation", "smart"); + break; + case INTERPOLATION_LINEAR: + default: + compiler.parameter("interpolation", "linear"); + break; + } compiler.add(this, "node_image_texture"); } @@ -340,6 +358,7 @@ EnvironmentTextureNode::EnvironmentTextureNode() slot = -1; is_float = -1; is_linear = false; + use_alpha = true; filename = ""; builtin_data = NULL; color_space = ustring("Color"); @@ -354,7 +373,7 @@ EnvironmentTextureNode::EnvironmentTextureNode() EnvironmentTextureNode::~EnvironmentTextureNode() { if(image_manager) - image_manager->remove_image(filename, builtin_data); + image_manager->remove_image(filename, builtin_data, INTERPOLATION_LINEAR); } ShaderNode *EnvironmentTextureNode::clone() const @@ -389,7 +408,7 @@ void EnvironmentTextureNode::compile(SVMCompiler& compiler) image_manager = compiler.image_manager; if(slot == -1) { bool is_float_bool; - slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear); + slot = image_manager->add_image(filename, builtin_data, animated, is_float_bool, is_linear, INTERPOLATION_LINEAR, use_alpha); is_float = (int)is_float_bool; } @@ -565,13 +584,13 @@ static void sky_texture_precompute_new(SunSky *sunsky, float3 dir, float turbidi /* Copy values from sky_state to SunSky */ for (int i = 0; i < 9; ++i) { - sunsky->config_x[i] = sky_state->configs[0][i]; - sunsky->config_y[i] = sky_state->configs[1][i]; - sunsky->config_z[i] = sky_state->configs[2][i]; + sunsky->config_x[i] = (float)sky_state->configs[0][i]; + sunsky->config_y[i] = (float)sky_state->configs[1][i]; + sunsky->config_z[i] = (float)sky_state->configs[2][i]; } - sunsky->radiance_x = sky_state->radiances[0]; - sunsky->radiance_y = sky_state->radiances[1]; - sunsky->radiance_z = sky_state->radiances[2]; + sunsky->radiance_x = (float)sky_state->radiances[0]; + sunsky->radiance_y = (float)sky_state->radiances[1]; + sunsky->radiance_z = (float)sky_state->radiances[2]; /* Free sky_state */ arhosekskymodelstate_free(sky_state); @@ -612,6 +631,8 @@ void SkyTextureNode::compile(SVMCompiler& compiler) sky_texture_precompute_old(&sunsky, sun_direction, turbidity); else if(type_enum[type] == NODE_SKY_NEW) sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo); + else + assert(false); if(vector_in->link) compiler.stack_assign(vector_in); @@ -649,6 +670,8 @@ void SkyTextureNode::compile(OSLCompiler& compiler) sky_texture_precompute_old(&sunsky, sun_direction, turbidity); else if(type_enum[type] == NODE_SKY_NEW) sky_texture_precompute_new(&sunsky, sun_direction, turbidity, ground_albedo); + else + assert(false); compiler.parameter("sky_model", type); compiler.parameter("theta", sunsky.theta); @@ -2192,8 +2215,9 @@ void TextureCoordinateNode::attributes(Shader *shader, AttributeRequestSet *attr if(shader->has_volume) { if(!from_dupli) { - if(!output("Generated")->links.empty()) + if(!output("Generated")->links.empty()) { attributes->add(ATTR_STD_GENERATED_TRANSFORM); + } } } @@ -2310,6 +2334,78 @@ void TextureCoordinateNode::compile(OSLCompiler& compiler) compiler.add(this, "node_texture_coordinate"); } +UVMapNode::UVMapNode() +: ShaderNode("uvmap") +{ + attribute = ""; + from_dupli = false; + + add_output("UV", SHADER_SOCKET_POINT); +} + +void UVMapNode::attributes(Shader *shader, AttributeRequestSet *attributes) +{ + if(shader->has_surface) { + if(!from_dupli) { + if(!output("UV")->links.empty()) { + if (attribute != "") + attributes->add(attribute); + else + attributes->add(ATTR_STD_UV); + } + } + } + + ShaderNode::attributes(shader, attributes); +} + +void UVMapNode::compile(SVMCompiler& compiler) +{ + ShaderOutput *out = output("UV"); + NodeType texco_node = NODE_TEX_COORD; + NodeType attr_node = NODE_ATTR; + int attr; + + if(bump == SHADER_BUMP_DX) { + texco_node = NODE_TEX_COORD_BUMP_DX; + attr_node = NODE_ATTR_BUMP_DX; + } + else if(bump == SHADER_BUMP_DY) { + texco_node = NODE_TEX_COORD_BUMP_DY; + attr_node = NODE_ATTR_BUMP_DY; + } + + if(!out->links.empty()) { + if(from_dupli) { + compiler.stack_assign(out); + compiler.add_node(texco_node, NODE_TEXCO_DUPLI_UV, out->stack_offset); + } + else { + if (attribute != "") + attr = compiler.attribute(attribute); + else + attr = compiler.attribute(ATTR_STD_UV); + + compiler.stack_assign(out); + compiler.add_node(attr_node, attr, out->stack_offset, NODE_ATTR_FLOAT3); + } + } +} + +void UVMapNode::compile(OSLCompiler& compiler) +{ + if(bump == SHADER_BUMP_DX) + compiler.parameter("bump_offset", "dx"); + else if(bump == SHADER_BUMP_DY) + compiler.parameter("bump_offset", "dy"); + else + compiler.parameter("bump_offset", "center"); + + compiler.parameter("from_dupli", from_dupli); + compiler.parameter("name", attribute.c_str()); + compiler.add(this, "node_uv_map"); +} + /* Light Path */ LightPathNode::LightPathNode() @@ -2325,6 +2421,7 @@ LightPathNode::LightPathNode() add_output("Is Volume Scatter Ray", SHADER_SOCKET_FLOAT); add_output("Ray Length", SHADER_SOCKET_FLOAT); add_output("Ray Depth", SHADER_SOCKET_FLOAT); + add_output("Transparent Depth", SHADER_SOCKET_FLOAT); } void LightPathNode::compile(SVMCompiler& compiler) @@ -2392,6 +2489,11 @@ void LightPathNode::compile(SVMCompiler& compiler) compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_depth, out->stack_offset); } + out = output("Transparent Depth"); + if(!out->links.empty()) { + compiler.stack_assign(out); + compiler.add_node(NODE_LIGHT_PATH, NODE_LP_ray_transparent, out->stack_offset); + } } void LightPathNode::compile(OSLCompiler& compiler) @@ -2612,7 +2714,7 @@ void HairInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes) if(!intercept_out->links.empty()) attributes->add(ATTR_STD_CURVE_INTERCEPT); } - + ShaderNode::attributes(shader, attributes); } @@ -3126,15 +3228,22 @@ AttributeNode::AttributeNode() void AttributeNode::attributes(Shader *shader, AttributeRequestSet *attributes) { - if(shader->has_surface) { - ShaderOutput *color_out = output("Color"); - ShaderOutput *vector_out = output("Vector"); - ShaderOutput *fac_out = output("Fac"); + ShaderOutput *color_out = output("Color"); + ShaderOutput *vector_out = output("Vector"); + ShaderOutput *fac_out = output("Fac"); - if(!color_out->links.empty() || !vector_out->links.empty() || !fac_out->links.empty()) + if(!color_out->links.empty() || !vector_out->links.empty() || !fac_out->links.empty()) { + AttributeStandard std = Attribute::name_standard(attribute.c_str()); + + if(std != ATTR_STD_NONE) + attributes->add(std); + else attributes->add(attribute); } - + + if(shader->has_volume) + attributes->add(ATTR_STD_GENERATED_TRANSFORM); + ShaderNode::attributes(shader, attributes); } @@ -3144,6 +3253,13 @@ void AttributeNode::compile(SVMCompiler& compiler) ShaderOutput *vector_out = output("Vector"); ShaderOutput *fac_out = output("Fac"); NodeType attr_node = NODE_ATTR; + AttributeStandard std = Attribute::name_standard(attribute.c_str()); + int attr; + + if(std != ATTR_STD_NONE) + attr = compiler.attribute(std); + else + attr = compiler.attribute(attribute); if(bump == SHADER_BUMP_DX) attr_node = NODE_ATTR_BUMP_DX; @@ -3151,8 +3267,6 @@ void AttributeNode::compile(SVMCompiler& compiler) attr_node = NODE_ATTR_BUMP_DY; if(!color_out->links.empty() || !vector_out->links.empty()) { - int attr = compiler.attribute(attribute); - if(!color_out->links.empty()) { compiler.stack_assign(color_out); compiler.add_node(attr_node, attr, color_out->stack_offset, NODE_ATTR_FLOAT3); @@ -3164,8 +3278,6 @@ void AttributeNode::compile(SVMCompiler& compiler) } if(!fac_out->links.empty()) { - int attr = compiler.attribute(attribute); - compiler.stack_assign(fac_out); compiler.add_node(attr_node, attr, fac_out->stack_offset, NODE_ATTR_FLOAT); } @@ -3179,8 +3291,12 @@ void AttributeNode::compile(OSLCompiler& compiler) compiler.parameter("bump_offset", "dy"); else compiler.parameter("bump_offset", "center"); + + if(Attribute::name_standard(attribute.c_str()) != ATTR_STD_NONE) + compiler.parameter("name", (string("geom:") + attribute.c_str()).c_str()); + else + compiler.parameter("name", attribute.c_str()); - compiler.parameter("name", attribute.c_str()); compiler.add(this, "node_attribute"); } @@ -3428,6 +3544,7 @@ static ShaderEnum math_type_init() enm.insert("Less Than", NODE_MATH_LESS_THAN); enm.insert("Greater Than", NODE_MATH_GREATER_THAN); enm.insert("Modulo", NODE_MATH_MODULO); + enm.insert("Absolute", NODE_MATH_ABSOLUTE); return enm; } diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 86c4f490875..d94d8ce6033 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -72,10 +72,12 @@ public: int slot; int is_float; bool is_linear; + bool use_alpha; string filename; void *builtin_data; ustring color_space; ustring projection; + InterpolationType interpolation; float projection_blend; bool animated; @@ -94,6 +96,7 @@ public: int slot; int is_float; bool is_linear; + bool use_alpha; string filename; void *builtin_data; ustring color_space; @@ -208,6 +211,7 @@ public: BsdfNode(bool scattering = false); SHADER_NODE_BASE_CLASS(BsdfNode); + bool has_spatial_varying() { return true; } void compile(SVMCompiler& compiler, ShaderInput *param1, ShaderInput *param2, ShaderInput *param3 = NULL, ShaderInput *param4 = NULL); ClosureType closure; @@ -279,6 +283,7 @@ public: SHADER_NODE_CLASS(SubsurfaceScatteringNode) bool has_surface_bssrdf() { return true; } bool has_bssrdf_bump(); + bool has_spatial_varying() { return true; } static ShaderEnum falloff_enum; }; @@ -288,6 +293,7 @@ public: SHADER_NODE_CLASS(EmissionNode) bool has_surface_emission() { return true; } + bool has_spatial_varying() { return true; } bool total_power; }; @@ -305,6 +311,8 @@ public: class AmbientOcclusionNode : public ShaderNode { public: SHADER_NODE_CLASS(AmbientOcclusionNode) + + bool has_spatial_varying() { return true; } }; class VolumeNode : public ShaderNode { @@ -339,16 +347,28 @@ class GeometryNode : public ShaderNode { public: SHADER_NODE_CLASS(GeometryNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } }; class TextureCoordinateNode : public ShaderNode { public: SHADER_NODE_CLASS(TextureCoordinateNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } bool from_dupli; }; +class UVMapNode : public ShaderNode { +public: + SHADER_NODE_CLASS(UVMapNode) + void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } + + ustring attribute; + bool from_dupli; +}; + class LightPathNode : public ShaderNode { public: SHADER_NODE_CLASS(LightPathNode) @@ -357,6 +377,7 @@ public: class LightFalloffNode : public ShaderNode { public: SHADER_NODE_CLASS(LightFalloffNode) + bool has_spatial_varying() { return true; } }; class ObjectInfoNode : public ShaderNode { @@ -375,6 +396,7 @@ public: SHADER_NODE_CLASS(HairInfoNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } }; class ValueNode : public ShaderNode { @@ -460,6 +482,7 @@ class AttributeNode : public ShaderNode { public: SHADER_NODE_CLASS(AttributeNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } ustring attribute; }; @@ -467,21 +490,25 @@ public: class CameraNode : public ShaderNode { public: SHADER_NODE_CLASS(CameraNode) + bool has_spatial_varying() { return true; } }; class FresnelNode : public ShaderNode { public: SHADER_NODE_CLASS(FresnelNode) + bool has_spatial_varying() { return true; } }; class LayerWeightNode : public ShaderNode { public: SHADER_NODE_CLASS(LayerWeightNode) + bool has_spatial_varying() { return true; } }; class WireframeNode : public ShaderNode { public: SHADER_NODE_CLASS(WireframeNode) + bool has_spatial_varying() { return true; } bool use_pixel_size; }; @@ -538,6 +565,8 @@ public: class BumpNode : public ShaderNode { public: SHADER_NODE_CLASS(BumpNode) + bool has_spatial_varying() { return true; } + bool invert; }; @@ -568,6 +597,10 @@ public: class OSLScriptNode : public ShaderNode { public: SHADER_NODE_CLASS(OSLScriptNode) + + /* ideally we could beter detect this, but we can't query this now */ + bool has_spatial_varying() { return true; } + string filepath; string bytecode_hash; @@ -581,6 +614,7 @@ class NormalMapNode : public ShaderNode { public: SHADER_NODE_CLASS(NormalMapNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } ustring space; static ShaderEnum space_enum; @@ -592,6 +626,7 @@ class TangentNode : public ShaderNode { public: SHADER_NODE_CLASS(TangentNode) void attributes(Shader *shader, AttributeRequestSet *attributes); + bool has_spatial_varying() { return true; } ustring direction_type; static ShaderEnum direction_type_enum; diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index 3edb934ef2c..027bfd71931 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -19,6 +19,7 @@ #include "mesh.h" #include "curves.h" #include "object.h" +#include "particles.h" #include "scene.h" #include "util_foreach.h" @@ -38,7 +39,8 @@ Object::Object() visibility = ~0; random_id = 0; pass_id = 0; - particle_id = 0; + particle_system = NULL; + particle_index = 0; bounds = BoundBox::empty; motion.pre = transform_identity(); motion.mid = transform_identity(); @@ -53,7 +55,7 @@ Object::~Object() { } -void Object::compute_bounds(bool motion_blur, float shuttertime) +void Object::compute_bounds(bool motion_blur) { BoundBox mbounds = mesh->bounds; @@ -66,10 +68,7 @@ void Object::compute_bounds(bool motion_blur, float shuttertime) /* todo: this is really terrible. according to pbrt there is a better * way to find this iteratively, but did not find implementation yet * or try to implement myself */ - float start_t = 0.5f - shuttertime*0.25f; - float end_t = 0.5f + shuttertime*0.25f; - - for(float t = start_t; t < end_t; t += (1.0f/128.0f)*shuttertime) { + for(float t = 0.0f; t < 1.0f; t += (1.0f/128.0f)) { Transform ttfm; transform_motion_interpolate(&ttfm, &decomp, t); @@ -80,29 +79,83 @@ void Object::compute_bounds(bool motion_blur, float shuttertime) bounds = mbounds.transformed(&tfm); } -void Object::apply_transform() +void Object::apply_transform(bool apply_to_motion) { if(!mesh || tfm == transform_identity()) return; + + /* triangles */ + if(mesh->verts.size()) { + /* store matrix to transform later. when accessing these as attributes we + * do not want the transform to be applied for consistency between static + * and dynamic BVH, so we do it on packing. */ + mesh->transform_normal = transform_transpose(transform_inverse(tfm)); + + /* apply to mesh vertices */ + for(size_t i = 0; i < mesh->verts.size(); i++) + mesh->verts[i] = transform_point(&tfm, mesh->verts[i]); + + if(apply_to_motion) { + Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if (attr) { + size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1); + float3 *vert_steps = attr->data_float3(); + + for (size_t i = 0; i < steps_size; i++) + vert_steps[i] = transform_point(&tfm, vert_steps[i]); + } - float3 c0 = transform_get_column(&tfm, 0); - float3 c1 = transform_get_column(&tfm, 1); - float3 c2 = transform_get_column(&tfm, 2); - float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f); + Attribute *attr_N = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_NORMAL); - for(size_t i = 0; i < mesh->verts.size(); i++) - mesh->verts[i] = transform_point(&tfm, mesh->verts[i]); + if(attr_N) { + Transform ntfm = mesh->transform_normal; + size_t steps_size = mesh->verts.size() * (mesh->motion_steps - 1); + float3 *normal_steps = attr_N->data_float3(); - for(size_t i = 0; i < mesh->curve_keys.size(); i++) { - mesh->curve_keys[i].co = transform_point(&tfm, mesh->curve_keys[i].co); - /* scale for strand radius - only correct for uniform transforms*/ - mesh->curve_keys[i].radius *= scalar; + for (size_t i = 0; i < steps_size; i++) + normal_steps[i] = normalize(transform_direction(&ntfm, normal_steps[i])); + } + } } - /* store matrix to transform later. when accessing these as attributes we - * do not want the transform to be applied for consistency between static - * and dynamic BVH, so we do it on packing. */ - mesh->transform_normal = transform_transpose(transform_inverse(tfm)); + /* curves */ + if(mesh->curve_keys.size()) { + /* compute uniform scale */ + float3 c0 = transform_get_column(&tfm, 0); + float3 c1 = transform_get_column(&tfm, 1); + float3 c2 = transform_get_column(&tfm, 2); + float scalar = pow(fabsf(dot(cross(c0, c1), c2)), 1.0f/3.0f); + + /* apply transform to curve keys */ + for(size_t i = 0; i < mesh->curve_keys.size(); i++) { + float3 co = transform_point(&tfm, float4_to_float3(mesh->curve_keys[i])); + float radius = mesh->curve_keys[i].w * scalar; + + /* scale for curve radius is only correct for uniform scale */ + mesh->curve_keys[i] = float3_to_float4(co); + mesh->curve_keys[i].w = radius; + } + + if(apply_to_motion) { + Attribute *curve_attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + + if (curve_attr) { + /* apply transform to motion curve keys */ + size_t steps_size = mesh->curve_keys.size() * (mesh->motion_steps - 1); + float4 *key_steps = curve_attr->data_float4(); + + for (size_t i = 0; i < steps_size; i++) { + float3 co = transform_point(&tfm, float4_to_float3(key_steps[i])); + float radius = key_steps[i].w * scalar; + + /* scale for curve radius is only correct for uniform scale */ + key_steps[i] = float3_to_float4(co); + key_steps[i].w = radius; + } + } + } + } /* we keep normals pointing in same direction on negative scale, notify * mesh about this in it (re)calculates normals */ @@ -111,7 +164,7 @@ void Object::apply_transform() if(bounds.valid()) { mesh->compute_bounds(); - compute_bounds(false, 0.0f); + compute_bounds(false); } /* tfm is not reset to identity, all code that uses it needs to check the @@ -137,6 +190,26 @@ void Object::tag_update(Scene *scene) scene->object_manager->need_update = true; } +vector<float> Object::motion_times() +{ + /* compute times at which we sample motion for this object */ + vector<float> times; + + if(!mesh || mesh->motion_steps == 1) + return times; + + int motion_steps = mesh->motion_steps; + + for(int step = 0; step < motion_steps; step++) { + if(step != motion_steps / 2) { + float time = 2.0f * step / (motion_steps - 1) - 1.0f; + times.push_back(time); + } + } + + return times; +} + /* Object Manager */ ObjectManager::ObjectManager() @@ -154,6 +227,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene float4 *objects_vector = NULL; int i = 0; map<Mesh*, float> surface_area_map; + map<ParticleSystem*, int> particle_offset; Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); bool have_motion = false; bool have_curves = false; @@ -162,6 +236,15 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene if(need_motion == Scene::MOTION_PASS) objects_vector = dscene->objects_vector.resize(OBJECT_VECTOR_SIZE*scene->objects.size()); + /* particle system device offsets + * 0 is dummy particle, index starts at 1 + */ + int numparticles = 1; + foreach(ParticleSystem *psys, scene->particle_systems) { + particle_offset[psys] = numparticles; + numparticles += psys->particles.size(); + } + foreach(Object *ob, scene->objects) { Mesh *mesh = ob->mesh; uint flag = 0; @@ -177,6 +260,7 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene float surface_area = 0.0f; float pass_id = ob->pass_id; float random_number = (float)ob->random_id * (1.0f/(float)0xFFFFFFFF); + int particle_index = (ob->particle_system)? ob->particle_index + particle_offset[ob->particle_system]: 0; if(transform_uniform_scale(tfm, uniform_scale)) { map<Mesh*, float>::iterator it = surface_area_map.find(mesh); @@ -190,20 +274,6 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene surface_area += triangle_area(p1, p2, p3); } - foreach(Mesh::Curve& curve, mesh->curves) { - int first_key = curve.first_key; - - for(int i = 0; i < curve.num_segments(); i++) { - float3 p1 = mesh->curve_keys[first_key + i].co; - float r1 = mesh->curve_keys[first_key + i].radius; - float3 p2 = mesh->curve_keys[first_key + i + 1].co; - float r2 = mesh->curve_keys[first_key + i + 1].radius; - - /* currently ignores segment overlaps*/ - surface_area += M_PI_F *(r1 + r2) * len(p1 - p2); - } - } - surface_area_map[mesh] = surface_area; } else @@ -219,31 +289,17 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene surface_area += triangle_area(p1, p2, p3); } - - foreach(Mesh::Curve& curve, mesh->curves) { - int first_key = curve.first_key; - - for(int i = 0; i < curve.num_segments(); i++) { - float3 p1 = mesh->curve_keys[first_key + i].co; - float r1 = mesh->curve_keys[first_key + i].radius; - float3 p2 = mesh->curve_keys[first_key + i + 1].co; - float r2 = mesh->curve_keys[first_key + i + 1].radius; - - p1 = transform_point(&tfm, p1); - p2 = transform_point(&tfm, p2); - - /* currently ignores segment overlaps*/ - surface_area += M_PI_F *(r1 + r2) * len(p1 - p2); - } - } } /* pack in texture */ int offset = i*OBJECT_SIZE; + /* OBJECT_TRANSFORM */ memcpy(&objects[offset], &tfm, sizeof(float4)*3); + /* OBJECT_INVERSE_TRANSFORM */ memcpy(&objects[offset+4], &itfm, sizeof(float4)*3); - objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(ob->particle_id)); + /* OBJECT_PROPERTIES */ + objects[offset+8] = make_float4(surface_area, pass_id, random_number, __int_as_float(particle_index)); if(need_motion == Scene::MOTION_PASS) { /* motion transformations, is world/object space depending if mesh @@ -252,10 +308,10 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene Transform mtfm_pre = ob->motion.pre; Transform mtfm_post = ob->motion.post; - if(!(mesh->attributes.find(ATTR_STD_MOTION_PRE) || mesh->curve_attributes.find(ATTR_STD_MOTION_PRE))) + if(!mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION)) { mtfm_pre = mtfm_pre * itfm; - if(!(mesh->attributes.find(ATTR_STD_MOTION_POST) || mesh->curve_attributes.find(ATTR_STD_MOTION_POST))) mtfm_post = mtfm_post * itfm; + } memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+0], &mtfm_pre, sizeof(float4)*3); memcpy(&objects_vector[i*OBJECT_VECTOR_SIZE+3], &mtfm_post, sizeof(float4)*3); @@ -274,9 +330,17 @@ void ObjectManager::device_update_transforms(Device *device, DeviceScene *dscene } #endif - /* dupli object coords */ - objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], 0.0f); - objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], 0.0f, 0.0f); + if(mesh->use_motion_blur) + have_motion = true; + + /* dupli object coords and motion info */ + int totalsteps = mesh->motion_steps; + int numsteps = (totalsteps - 1)/2; + int numverts = mesh->verts.size(); + int numkeys = mesh->curve_keys.size(); + + objects[offset+9] = make_float4(ob->dupli_generated[0], ob->dupli_generated[1], ob->dupli_generated[2], __int_as_float(numkeys)); + objects[offset+10] = make_float4(ob->dupli_uv[0], ob->dupli_uv[1], __int_as_float(numsteps), __int_as_float(numverts)); /* object flag */ if(ob->use_holdout) @@ -355,6 +419,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u #ifdef __OBJECT_MOTION__ Scene::MotionType need_motion = scene->need_motion(); bool motion_blur = need_motion == Scene::MOTION_BLUR; + bool apply_to_motion = need_motion != Scene::MOTION_PASS; #else bool motion_blur = false; #endif @@ -377,7 +442,7 @@ void ObjectManager::apply_static_transforms(DeviceScene *dscene, Scene *scene, u if(mesh_users[object->mesh] == 1) { if(!(motion_blur && object->use_motion)) { if(!object->mesh->transform_applied) { - object->apply_transform(); + object->apply_transform(apply_to_motion); object->mesh->transform_applied = true; if(progress.get_cancel()) return; diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h index 5da85be3873..677526b715f 100644 --- a/intern/cycles/render/object.h +++ b/intern/cycles/render/object.h @@ -27,6 +27,7 @@ CCL_NAMESPACE_BEGIN class Device; class DeviceScene; class Mesh; +class ParticleSystem; class Progress; class Scene; struct Transform; @@ -50,15 +51,18 @@ public: float3 dupli_generated; float2 dupli_uv; - int particle_id; - + ParticleSystem *particle_system; + int particle_index; + Object(); ~Object(); void tag_update(Scene *scene); - void compute_bounds(bool motion_blur, float shuttertime); - void apply_transform(); + void compute_bounds(bool motion_blur); + void apply_transform(bool apply_to_motion); + + vector<float> motion_times(); }; /* Object Manager */ diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index e2798f438e2..94866102f60 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -203,7 +203,6 @@ void OSLShaderManager::shading_system_init() "glossy", /* PATH_RAY_GLOSSY */ "singular", /* PATH_RAY_SINGULAR */ "transparent", /* PATH_RAY_TRANSPARENT */ - "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */ "shadow", /* PATH_RAY_SHADOW_OPAQUE */ "shadow", /* PATH_RAY_SHADOW_TRANSPARENT */ @@ -212,6 +211,8 @@ void OSLShaderManager::shading_system_init() "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */ "glossy_ancestor", /* PATH_RAY_GLOSSY_ANCESTOR */ "bssrdf_ancestor", /* PATH_RAY_BSSRDF_ANCESTOR */ + "__unused__", /* PATH_RAY_SINGLE_PASS_DONE */ + "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */ }; const int nraytypes = sizeof(raytypes)/sizeof(raytypes[0]); @@ -512,16 +513,14 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) } } - /* create shader of the appropriate type. we pass "surface" to all shaders, - * because "volume" and "displacement" don't work yet in OSL. the shaders - * work fine, but presumably these values would be used for more strict - * checking, so when that is fixed, we should update the code here too. */ + /* create shader of the appropriate type. OSL only distinguishes between "surface" + * and "displacement" atm */ if(current_type == SHADER_TYPE_SURFACE) ss->Shader("surface", name, id(node).c_str()); else if(current_type == SHADER_TYPE_VOLUME) ss->Shader("surface", name, id(node).c_str()); else if(current_type == SHADER_TYPE_DISPLACEMENT) - ss->Shader("surface", name, id(node).c_str()); + ss->Shader("displacement", name, id(node).c_str()); else assert(0); @@ -544,7 +543,7 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) /* test if we shader contains specific closures */ OSLShaderInfo *info = ((OSLShaderManager*)manager)->shader_loaded_info(name); - if(info) { + if(info && current_type == SHADER_TYPE_SURFACE) { if(info->has_surface_emission) current_shader->has_surface_emission = true; if(info->has_surface_transparent) @@ -554,6 +553,10 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) current_shader->has_bssrdf_bump = true; /* can't detect yet */ } } + else if(current_type == SHADER_TYPE_VOLUME) { + if(node->has_spatial_varying()) + current_shader->has_heterogeneous_volume = true; + } } void OSLCompiler::parameter(const char *name, float f) @@ -709,14 +712,20 @@ void OSLCompiler::generate_nodes(const set<ShaderNode*>& nodes) node->compile(*this); done.insert(node); - if(node->has_surface_emission()) - current_shader->has_surface_emission = true; - if(node->has_surface_transparent()) - current_shader->has_surface_transparent = true; - if(node->has_surface_bssrdf()) { - current_shader->has_surface_bssrdf = true; - if(node->has_bssrdf_bump()) - current_shader->has_bssrdf_bump = true; + if(current_type == SHADER_TYPE_SURFACE) { + if(node->has_surface_emission()) + current_shader->has_surface_emission = true; + if(node->has_surface_transparent()) + current_shader->has_surface_transparent = true; + if(node->has_surface_bssrdf()) { + current_shader->has_surface_bssrdf = true; + if(node->has_bssrdf_bump()) + current_shader->has_bssrdf_bump = true; + } + } + else if(current_type == SHADER_TYPE_VOLUME) { + if(node->has_spatial_varying()) + current_shader->has_heterogeneous_volume = true; } } else @@ -798,6 +807,7 @@ void OSLCompiler::compile(OSLGlobals *og, Shader *shader) shader->has_bssrdf_bump = false; shader->has_volume = false; shader->has_displacement = false; + shader->has_heterogeneous_volume = false; /* generate surface shader */ if(shader->used && graph && output->input("Surface")->link) { diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index 71f5a9dafed..4f5ad439520 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -17,6 +17,7 @@ #include <stdlib.h> #include "background.h" +#include "bake.h" #include "camera.h" #include "curves.h" #include "device.h" @@ -54,6 +55,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) image_manager = new ImageManager(); particle_system_manager = new ParticleSystemManager(); curve_system_manager = new CurveSystemManager(); + bake_manager = new BakeManager(); /* OSL only works on the CPU */ if(device_info_.type == DEVICE_CPU) @@ -61,8 +63,8 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) else shader_manager = ShaderManager::create(this, SceneParams::SVM); - if (device_info_.type == DEVICE_CPU) - image_manager->set_extended_image_limits(); + /* Extended image limits for CPU and GPUs */ + image_manager->set_extended_image_limits(device_info_); } Scene::~Scene() @@ -103,6 +105,8 @@ void Scene::free_memory(bool final) particle_system_manager->device_free(device, &dscene); curve_system_manager->device_free(device, &dscene); + bake_manager->device_free(device, &dscene); + if(!params.persistent_data || final) image_manager->device_free(device, &dscene); @@ -122,6 +126,7 @@ void Scene::free_memory(bool final) delete particle_system_manager; delete curve_system_manager; delete image_manager; + delete bake_manager; } } @@ -137,6 +142,8 @@ void Scene::device_update(Device *device_, Progress& progress) * - Camera may be used for adapative subdivison. * - Displacement shader must have all shader data available. * - Light manager needs lookup tables and final mesh data to compute emission CDF. + * - Film needs light manager to run for use_light_visibility + * - Lookup tables are done a second time to handle film tables */ image_manager->set_pack_images(device->info.pack_images); @@ -171,11 +178,6 @@ void Scene::device_update(Device *device_, Progress& progress) if(progress.get_cancel()) return; - progress.set_status("Updating Film"); - film->device_update(device, &dscene, this); - - if(progress.get_cancel()) return; - progress.set_status("Updating Lookup Tables"); lookup_tables->device_update(device, &dscene); @@ -196,11 +198,26 @@ void Scene::device_update(Device *device_, Progress& progress) if(progress.get_cancel()) return; + progress.set_status("Updating Film"); + film->device_update(device, &dscene, this); + + if(progress.get_cancel()) return; + progress.set_status("Updating Integrator"); integrator->device_update(device, &dscene, this); if(progress.get_cancel()) return; + progress.set_status("Updating Lookup Tables"); + lookup_tables->device_update(device, &dscene); + + if(progress.get_cancel()) return; + + progress.set_status("Updating Baking"); + bake_manager->device_update(device, &dscene, this, progress); + + if(progress.get_cancel()) return; + progress.set_status("Updating Device", "Writing constant memory"); device->const_copy_to("__data", &dscene.data, sizeof(dscene.data)); } @@ -219,8 +236,10 @@ bool Scene::need_global_attribute(AttributeStandard std) { if(std == ATTR_STD_UV) return Pass::contains(film->passes, PASS_UV); - if(std == ATTR_STD_MOTION_PRE || std == ATTR_STD_MOTION_POST) - return need_motion() == MOTION_PASS; + else if(std == ATTR_STD_MOTION_VERTEX_POSITION) + return need_motion() != MOTION_NONE; + else if(std == ATTR_STD_MOTION_VERTEX_NORMAL) + return need_motion() == MOTION_BLUR; return false; } @@ -249,7 +268,8 @@ bool Scene::need_reset() || integrator->need_update || shader_manager->need_update || particle_system_manager->need_update - || curve_system_manager->need_update); + || curve_system_manager->need_update + || bake_manager->need_update); } void Scene::reset() diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 2c223192536..0f0bb725823 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -51,6 +51,8 @@ class CurveSystemManager; class Shader; class ShaderManager; class Progress; +class BakeManager; +class BakeData; /* Scene Device Data */ @@ -60,7 +62,7 @@ public: device_vector<float4> bvh_nodes; device_vector<uint> object_node; device_vector<float4> tri_woop; - device_vector<uint> prim_segment; + device_vector<uint> prim_type; device_vector<uint> prim_visibility; device_vector<uint> prim_index; device_vector<uint> prim_object; @@ -103,8 +105,8 @@ public: /* integrator */ device_vector<uint> sobol_directions; - /* images */ - device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES]; + /* cpu images */ + device_vector<uchar4> tex_image[TEX_EXTENDED_NUM_IMAGES_CPU]; device_vector<float4> tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES]; /* opencl images */ @@ -174,6 +176,7 @@ public: ObjectManager *object_manager; ParticleSystemManager *particle_system_manager; CurveSystemManager *curve_system_manager; + BakeManager *bake_manager; /* default shaders */ int default_surface; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 0805a685467..28b44df6b36 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -23,6 +23,7 @@ #include "integrator.h" #include "scene.h" #include "session.h" +#include "bake.h" #include "util_foreach.h" #include "util_function.h" @@ -50,7 +51,7 @@ Session::Session(const SessionParams& params_) device = Device::create(params.device, stats, params.background); - if(params.background) { + if(params.background && params.output_path.empty()) { buffers = NULL; display = NULL; } @@ -81,6 +82,7 @@ Session::Session(const SessionParams& params_) Session::~Session() { if(session_thread) { + /* wait for session thread to end */ progress.set_cancel("Exiting"); gpu_need_tonemap = false; @@ -95,13 +97,19 @@ Session::~Session() wait(); } - if(display && !params.output_path.empty()) { - tonemap(); + if(!params.output_path.empty()) { + /* tonemap and write out image if requested */ + delete display; + + display = new DisplayBuffer(device, false); + display->reset(device, buffers->params); + tonemap(params.samples); progress.set_status("Writing Image", params.output_path); display->write(device, params.output_path); } + /* clean up */ foreach(RenderBuffers *buffers, tile_buffers) delete buffers; @@ -151,7 +159,7 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples) pause_cond.notify_all(); } -bool Session::draw_gpu(BufferParams& buffer_params) +bool Session::draw_gpu(BufferParams& buffer_params, DeviceDrawParams& draw_params) { /* block for buffer access */ thread_scoped_lock display_lock(display_mutex); @@ -165,12 +173,12 @@ bool Session::draw_gpu(BufferParams& buffer_params) * only access GL buffers from the main thread */ if(gpu_need_tonemap) { thread_scoped_lock buffers_lock(buffers_mutex); - tonemap(); + tonemap(tile_manager.state.sample); gpu_need_tonemap = false; gpu_need_tonemap_cond.notify_all(); } - display->draw(device); + display->draw(device, draw_params); if(display_outdated && (time_dt() - reset_time) > params.text_timeout) return false; @@ -315,7 +323,7 @@ void Session::reset_cpu(BufferParams& buffer_params, int samples) pause_cond.notify_all(); } -bool Session::draw_cpu(BufferParams& buffer_params) +bool Session::draw_cpu(BufferParams& buffer_params, DeviceDrawParams& draw_params) { thread_scoped_lock display_lock(display_mutex); @@ -324,7 +332,7 @@ bool Session::draw_cpu(BufferParams& buffer_params) /* then verify the buffers have the expected size, so we don't * draw previous results in a resized window */ if(!buffer_params.modified(display->params)) { - display->draw(device); + display->draw(device, draw_params); if(display_outdated && (time_dt() - reset_time) > params.text_timeout) return false; @@ -367,7 +375,7 @@ bool Session::acquire_tile(Device *tile_device, RenderTile& rtile) /* in case of a permanent buffer, return it, otherwise we will allocate * a new temporary buffer */ - if(!params.background) { + if(!(params.background && params.output_path.empty())) { tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); rtile.buffer = buffers->buffer.device_pointer; @@ -567,8 +575,8 @@ void Session::run_cpu() } else if(need_tonemap) { /* tonemap only if we do not reset, we don't we don't - * want to show the result of an incomplete sample*/ - tonemap(); + * want to show the result of an incomplete sample */ + tonemap(tile_manager.state.sample); } if(!device->error_message().empty()) @@ -624,12 +632,12 @@ void Session::run() progress.set_update(); } -bool Session::draw(BufferParams& buffer_params) +bool Session::draw(BufferParams& buffer_params, DeviceDrawParams &draw_params) { if(device_use_gl) - return draw_gpu(buffer_params); + return draw_gpu(buffer_params, draw_params); else - return draw_cpu(buffer_params); + return draw_cpu(buffer_params, draw_params); } void Session::reset_(BufferParams& buffer_params, int samples) @@ -726,10 +734,14 @@ void Session::update_scene() cam->tag_update(); } - /* number of samples is needed by multi jittered sampling pattern */ + /* number of samples is needed by multi jittered + * sampling pattern and by baking */ Integrator *integrator = scene->integrator; + BakeManager *bake_manager = scene->bake_manager; - if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ) { + if(integrator->sampling_pattern == SAMPLING_PATTERN_CMJ || + bake_manager->get_baking()) + { int aa_samples = tile_manager.num_samples; if(aa_samples != integrator->aa_samples) { @@ -834,7 +846,7 @@ void Session::path_trace() device->task_add(task); } -void Session::tonemap() +void Session::tonemap(int sample) { /* add tonemap task */ DeviceTask task(DeviceTask::FILM_CONVERT); @@ -846,7 +858,7 @@ void Session::tonemap() task.rgba_byte = display->rgba_byte.device_pointer; task.rgba_half = display->rgba_half.device_pointer; task.buffer = buffers->buffer.device_pointer; - task.sample = tile_manager.state.sample; + task.sample = sample; tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); if(task.w > 0 && task.h > 0) { diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 1227edf81b6..1e625158652 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -128,7 +128,7 @@ public: ~Session(); void start(); - bool draw(BufferParams& params); + bool draw(BufferParams& params, DeviceDrawParams& draw_params); void wait(); bool ready_to_reset(); @@ -136,6 +136,7 @@ public: void set_samples(int samples); void set_pause(bool pause); + void update_scene(); void device_free(); protected: struct DelayedReset { @@ -147,19 +148,18 @@ protected: void run(); - void update_scene(); void update_status_time(bool show_pause = false, bool show_done = false); - void tonemap(); + void tonemap(int sample); void path_trace(); void reset_(BufferParams& params, int samples); void run_cpu(); - bool draw_cpu(BufferParams& params); + bool draw_cpu(BufferParams& params, DeviceDrawParams& draw_params); void reset_cpu(BufferParams& params, int samples); void run_gpu(); - bool draw_gpu(BufferParams& params); + bool draw_gpu(BufferParams& params, DeviceDrawParams& draw_params); void reset_gpu(BufferParams& params, int samples); bool acquire_tile(Device *tile_device, RenderTile& tile); diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 20f0fd7ed1e..b25673b36c3 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -53,6 +53,7 @@ Shader::Shader() has_volume = false; has_displacement = false; has_bssrdf_bump = false; + has_heterogeneous_volume = false; used = false; @@ -249,7 +250,7 @@ void ShaderManager::device_update_common(Device *device, DeviceScene *dscene, Sc * the case with camera inside volumes too */ flag |= SD_HAS_TRANSPARENT_SHADOW; } - if(shader->heterogeneous_volume) + if(shader->heterogeneous_volume && shader->has_heterogeneous_volume) flag |= SD_HETEROGENEOUS_VOLUME; if(shader->has_bssrdf_bump) flag |= SD_HAS_BSSRDF_BUMP; diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index 5f87050fe19..874e8face7a 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -77,6 +77,7 @@ public: bool has_surface_bssrdf; bool has_converter_blackbody; bool has_bssrdf_bump; + bool has_heterogeneous_volume; /* requested mesh attributes */ AttributeRequestSet attributes; diff --git a/intern/cycles/render/sky_model.cpp b/intern/cycles/render/sky_model.cpp index 6f250c06bc1..adb07d9e288 100644 --- a/intern/cycles/render/sky_model.cpp +++ b/intern/cycles/render/sky_model.cpp @@ -310,7 +310,7 @@ double arhosekskymodel_radiance( double wavelength ) { - int low_wl = (wavelength - 320.0 ) / 40.0; + int low_wl = (int)((wavelength - 320.0) / 40.0); if ( low_wl < 0 || low_wl >= 11 ) return 0.0f; diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index 538b1aae313..576c176759c 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -63,8 +63,6 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene svm_nodes.push_back(make_int4(NODE_SHADER_JUMP, 0, 0, 0)); } - bool use_multi_closure = device->info.advanced_shading; - for(i = 0; i < scene->shaders.size(); i++) { Shader *shader = scene->shaders[i]; @@ -75,8 +73,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene if(shader->use_mis && shader->has_surface_emission) scene->light_manager->need_update = true; - SVMCompiler compiler(scene->shader_manager, scene->image_manager, - use_multi_closure); + SVMCompiler compiler(scene->shader_manager, scene->image_manager); compiler.background = ((int)i == scene->default_background); compiler.compile(shader, svm_nodes, i); } @@ -104,7 +101,7 @@ void SVMShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s /* Graph Compiler */ -SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_, bool use_multi_closure_) +SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_manager_) { shader_manager = shader_manager_; image_manager = image_manager_; @@ -114,7 +111,6 @@ SVMCompiler::SVMCompiler(ShaderManager *shader_manager_, ImageManager *image_man current_graph = NULL; background = false; mix_weight_offset = SVM_STACK_INVALID; - use_multi_closure = use_multi_closure_; compile_failed = false; } @@ -230,7 +226,8 @@ void SVMCompiler::stack_assign(ShaderInput *input) else if(input->type == SHADER_SOCKET_VECTOR || input->type == SHADER_SOCKET_NORMAL || input->type == SHADER_SOCKET_POINT || - input->type == SHADER_SOCKET_COLOR) { + input->type == SHADER_SOCKET_COLOR) + { add_node(NODE_VALUE_V, input->stack_offset); add_node(NODE_VALUE_V, input->value); @@ -379,6 +376,22 @@ void SVMCompiler::find_dependencies(set<ShaderNode*>& dependencies, const set<Sh } } +void SVMCompiler::generate_node(ShaderNode *node, set<ShaderNode*>& done) +{ + node->compile(*this); + stack_clear_users(node, done); + stack_clear_temporary(node); + + if(current_type == SHADER_TYPE_VOLUME) { + if(node->has_spatial_varying()) + current_shader->has_heterogeneous_volume = true; + } + + /* detect if we have a blackbody converter, to prepare lookup table */ + if(node->has_converter_blackbody()) + current_shader->has_converter_blackbody = true; +} + void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done) { bool nodes_done; @@ -396,13 +409,7 @@ void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNo inputs_done = false; if(inputs_done) { - /* Detect if we have a blackbody converter, to prepare lookup table */ - if(node->has_converter_blackbody()) - current_shader->has_converter_blackbody = true; - - node->compile(*this); - stack_clear_users(node, done); - stack_clear_temporary(node); + generate_node(node, done); done.insert(node); } else @@ -412,83 +419,34 @@ void SVMCompiler::generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNo } while(!nodes_done); } -void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done) +void SVMCompiler::generate_closure_node(ShaderNode *node, set<ShaderNode*>& done) { - if(node->name == ustring("mix_closure") || node->name == ustring("add_closure")) { - ShaderInput *fin = node->input("Fac"); - ShaderInput *cl1in = node->input("Closure1"); - ShaderInput *cl2in = node->input("Closure2"); - - /* execute dependencies for mix weight */ - if(fin) { + /* execute dependencies for closure */ + foreach(ShaderInput *in, node->inputs) { + if(!node_skip_input(node, in) && in->link) { set<ShaderNode*> dependencies; - find_dependencies(dependencies, done, fin); + find_dependencies(dependencies, done, in); generate_svm_nodes(dependencies, done); - - /* add mix node */ - stack_assign(fin); - } - - int mix_offset = svm_nodes.size(); - - if(fin) - add_node(NODE_MIX_CLOSURE, fin->stack_offset, 0, 0); - else - add_node(NODE_ADD_CLOSURE, 0, 0, 0); - - /* generate code for closure 1 - * note we backup all compiler state and restore it afterwards, so one - * closure choice doesn't influence the other*/ - if(cl1in->link) { - StackBackup backup; - stack_backup(backup, done); - - generate_closure(cl1in->link->parent, done); - add_node(NODE_END, 0, 0, 0); - - stack_restore(backup, done); } - else - add_node(NODE_END, 0, 0, 0); - - /* generate code for closure 2 */ - int cl2_offset = svm_nodes.size(); - - if(cl2in->link) { - StackBackup backup; - stack_backup(backup, done); - - generate_closure(cl2in->link->parent, done); - add_node(NODE_END, 0, 0, 0); - - stack_restore(backup, done); - } - else - add_node(NODE_END, 0, 0, 0); + } - /* set jump for mix node, -1 because offset is already - * incremented when this jump is added to it */ - svm_nodes[mix_offset].z = cl2_offset - mix_offset - 1; + /* closure mix weight */ + const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight"; + ShaderInput *weight_in = node->input(weight_name); - done.insert(node); - stack_clear_users(node, done); - stack_clear_temporary(node); + if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) { + stack_assign(weight_in); + mix_weight_offset = weight_in->stack_offset; } - else { - /* execute dependencies for closure */ - foreach(ShaderInput *in, node->inputs) { - if(!node_skip_input(node, in) && in->link) { - set<ShaderNode*> dependencies; - find_dependencies(dependencies, done, in); - generate_svm_nodes(dependencies, done); - } - } + else + mix_weight_offset = SVM_STACK_INVALID; - /* compile closure itself */ - node->compile(*this); - stack_clear_users(node, done); - stack_clear_temporary(node); + /* compile closure itself */ + generate_node(node, done); + mix_weight_offset = SVM_STACK_INVALID; + + if(current_type == SHADER_TYPE_SURFACE) { if(node->has_surface_emission()) current_shader->has_surface_emission = true; if(node->has_surface_transparent()) @@ -498,18 +456,24 @@ void SVMCompiler::generate_closure(ShaderNode *node, set<ShaderNode*>& done) if(node->has_bssrdf_bump()) current_shader->has_bssrdf_bump = true; } + } +} - /* end node is added outside of this */ +void SVMCompiler::generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared) +{ + if(shared.find(node) != shared.end()) { + generate_multi_closure(node, done, closure_done); + } + else { + foreach(ShaderInput *in, node->inputs) { + if(in->type == SHADER_SOCKET_CLOSURE && in->link) + generated_shared_closure_nodes(in->link->parent, done, closure_done, shared); + } } } void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done) { - /* todo: the weak point here is that unlike the single closure sampling - * we will evaluate all nodes even if they are used as input for closures - * that are unused. it's not clear what would be the best way to skip such - * nodes at runtime, especially if they are tangled up */ - /* only generate once */ if(closure_done.find(node) != closure_done.end()) return; @@ -520,50 +484,81 @@ void SVMCompiler::generate_multi_closure(ShaderNode *node, set<ShaderNode*>& don /* weighting is already taken care of in ShaderGraph::transform_multi_closure */ ShaderInput *cl1in = node->input("Closure1"); ShaderInput *cl2in = node->input("Closure2"); + ShaderInput *facin = node->input("Fac"); - if(cl1in->link) - generate_multi_closure(cl1in->link->parent, done, closure_done); - if(cl2in->link) - generate_multi_closure(cl2in->link->parent, done, closure_done); - } - else { - /* execute dependencies for closure */ - foreach(ShaderInput *in, node->inputs) { - if(!node_skip_input(node, in) && in->link) { - set<ShaderNode*> dependencies; - find_dependencies(dependencies, done, in); - generate_svm_nodes(dependencies, done); + /* skip empty mix/add closure nodes */ + if(!cl1in->link && !cl2in->link) + return; + + if(facin && facin->link) { + /* mix closure: generate instructions to compute mix weight */ + set<ShaderNode*> dependencies; + find_dependencies(dependencies, done, facin); + generate_svm_nodes(dependencies, done); + + stack_assign(facin); + + /* execute shared dependencies. this is needed to allow skipping + * of zero weight closures and their dependencies later, so we + * ensure that they only skip dependencies that are unique to them */ + set<ShaderNode*> cl1deps, cl2deps, shareddeps; + + find_dependencies(cl1deps, done, cl1in); + find_dependencies(cl2deps, done, cl2in); + + set_intersection(cl1deps.begin(), cl1deps.end(), + cl2deps.begin(), cl2deps.end(), + std::inserter(shareddeps, shareddeps.begin())); + + if(!shareddeps.empty()) { + if(cl1in->link) + generated_shared_closure_nodes(cl1in->link->parent, done, closure_done, shareddeps); + if(cl2in->link) + generated_shared_closure_nodes(cl2in->link->parent, done, closure_done, shareddeps); + + generate_svm_nodes(shareddeps, done); } - } - /* closure mix weight */ - const char *weight_name = (current_type == SHADER_TYPE_VOLUME)? "VolumeMixWeight": "SurfaceMixWeight"; - ShaderInput *weight_in = node->input(weight_name); + /* generate instructions for input closure 1 */ + if(cl1in->link) { + /* add instruction to skip closure and its dependencies if mix weight is zero */ + svm_nodes.push_back(make_int4(NODE_JUMP_IF_ONE, 0, facin->stack_offset, 0)); + int node_jump_skip_index = svm_nodes.size() - 1; - if(weight_in && (weight_in->link || weight_in->value.x != 1.0f)) { - stack_assign(weight_in); - mix_weight_offset = weight_in->stack_offset; - } - else - mix_weight_offset = SVM_STACK_INVALID; + generate_multi_closure(cl1in->link->parent, done, closure_done); - /* compile closure itself */ - node->compile(*this); - stack_clear_users(node, done); - stack_clear_temporary(node); + /* fill in jump instruction location to be after closure */ + svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1; + } - mix_weight_offset = SVM_STACK_INVALID; + /* generate instructions for input closure 2 */ + if(cl2in->link) { + /* add instruction to skip closure and its dependencies if mix weight is zero */ + svm_nodes.push_back(make_int4(NODE_JUMP_IF_ZERO, 0, facin->stack_offset, 0)); + int node_jump_skip_index = svm_nodes.size() - 1; - if(node->has_surface_emission()) - current_shader->has_surface_emission = true; - if(node->has_surface_transparent()) - current_shader->has_surface_transparent = true; - if(node->has_surface_bssrdf()) { - current_shader->has_surface_bssrdf = true; - if(node->has_bssrdf_bump()) - current_shader->has_bssrdf_bump = true; + generate_multi_closure(cl2in->link->parent, done, closure_done); + + /* fill in jump instruction location to be after closure */ + svm_nodes[node_jump_skip_index].y = svm_nodes.size() - node_jump_skip_index - 1; + } + + /* unassign */ + facin->stack_offset = SVM_STACK_INVALID; + } + else { + /* execute closures and their dependencies, no runtime checks + * to skip closures here because was already optimized due to + * fixed weight or add closure that always needs both */ + if(cl1in->link) + generate_multi_closure(cl1in->link->parent, done, closure_done); + if(cl2in->link) + generate_multi_closure(cl2in->link->parent, done, closure_done); } } + else { + generate_closure_node(node, done); + } done.insert(node); } @@ -642,14 +637,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty } if(generate) { - set<ShaderNode*> done; - - if(use_multi_closure) { - set<ShaderNode*> closure_done; - generate_multi_closure(clin->link->parent, done, closure_done); - } - else - generate_closure(clin->link->parent, done); + set<ShaderNode*> done, closure_done; + generate_multi_closure(clin->link->parent, done, closure_done); } } @@ -676,9 +665,9 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in shader->graph_bump = shader->graph->copy(); /* finalize */ - shader->graph->finalize(false, false, use_multi_closure); + shader->graph->finalize(false, false); if(shader->graph_bump) - shader->graph_bump->finalize(true, false, use_multi_closure); + shader->graph_bump->finalize(true, false); current_shader = shader; @@ -690,6 +679,7 @@ void SVMCompiler::compile(Shader *shader, vector<int4>& global_svm_nodes, int in shader->has_converter_blackbody = false; shader->has_volume = false; shader->has_displacement = false; + shader->has_heterogeneous_volume = false; /* generate surface shader */ compile_type(shader, shader->graph, SHADER_TYPE_SURFACE); diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h index 3d84a67e173..45aa4d26926 100644 --- a/intern/cycles/render/svm.h +++ b/intern/cycles/render/svm.h @@ -52,8 +52,7 @@ public: class SVMCompiler { public: - SVMCompiler(ShaderManager *shader_manager, ImageManager *image_manager, - bool use_multi_closure_); + SVMCompiler(ShaderManager *shader_manager, ImageManager *image_manager); void compile(Shader *shader, vector<int4>& svm_nodes, int index); void stack_assign(ShaderOutput *output); @@ -123,9 +122,13 @@ protected: bool node_skip_input(ShaderNode *node, ShaderInput *input); /* single closure */ - void find_dependencies(set<ShaderNode*>& dependencies, const set<ShaderNode*>& done, ShaderInput *input); + void find_dependencies(set<ShaderNode*>& dependencies, + const set<ShaderNode*>& done, ShaderInput *input); + void generate_node(ShaderNode *node, set<ShaderNode*>& done); + void generate_closure_node(ShaderNode *node, set<ShaderNode*>& done); + void generated_shared_closure_nodes(ShaderNode *node, set<ShaderNode*>& done, + set<ShaderNode*>& closure_done, const set<ShaderNode*>& shared); void generate_svm_nodes(const set<ShaderNode*>& nodes, set<ShaderNode*>& done); - void generate_closure(ShaderNode *node, set<ShaderNode*>& done); /* multi closure */ void generate_multi_closure(ShaderNode *node, set<ShaderNode*>& done, set<ShaderNode*>& closure_done); @@ -140,7 +143,6 @@ protected: Stack active_stack; int max_stack_use; uint mix_weight_offset; - bool use_multi_closure; bool compile_failed; }; diff --git a/intern/cycles/render/tables.cpp b/intern/cycles/render/tables.cpp index be0d4afbe2c..a8d502c432d 100644 --- a/intern/cycles/render/tables.cpp +++ b/intern/cycles/render/tables.cpp @@ -39,7 +39,10 @@ void LookupTables::device_update(Device *device, DeviceScene *dscene) if(!need_update) return; - device->tex_alloc("__lookup_table", dscene->lookup_table); + device->tex_free(dscene->lookup_table); + + if(lookup_tables.size() > 0) + device->tex_alloc("__lookup_table", dscene->lookup_table); need_update = false; } diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp index 417ecfffd49..6bbf4af3f85 100644 --- a/intern/cycles/subd/subd_split.cpp +++ b/intern/cycles/subd/subd_split.cpp @@ -94,7 +94,7 @@ void DiagSplit::partition_edge(Patch *patch, float2 *P, int *t0, int *t1, float2 *t1 = T(patch, *P, Pend); } else { - int I = floor(t*0.5f); + int I = (int)floor((float)t*0.5f); *P = interp(Pstart, Pend, (t == 0)? 0: I/(float)t); /* XXX is t faces or verts */ *t0 = I; *t1 = t - I; diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h index 0cfa4049d3e..b72cc6bc873 100644 --- a/intern/cycles/util/util_color.h +++ b/intern/cycles/util/util_color.h @@ -61,22 +61,22 @@ ccl_device float3 rgb_to_hsv(float3 rgb) h = 0.0f; } - if(s == 0.0f) { - h = 0.0f; - } - else { + if(s != 0.0f) { float3 cmax3 = make_float3(cmax, cmax, cmax); c = (cmax3 - rgb)/cdelta; - if(rgb.x == cmax) h = c.z - c.y; - else if(rgb.y == cmax) h = 2.0f + c.x - c.z; - else h = 4.0f + c.y - c.x; + if (rgb.x == cmax) h = c.z - c.y; + else if(rgb.y == cmax) h = 2.0f + c.x - c.z; + else h = 4.0f + c.y - c.x; h /= 6.0f; if(h < 0.0f) h += 1.0f; } + else { + h = 0.0f; + } return make_float3(h, s, v); } @@ -90,13 +90,10 @@ ccl_device float3 hsv_to_rgb(float3 hsv) s = hsv.y; v = hsv.z; - if(s == 0.0f) { - rgb = make_float3(v, v, v); - } - else { + if(s != 0.0f) { if(h == 1.0f) h = 0.0f; - + h *= 6.0f; i = floorf(h); f = h - i; @@ -104,13 +101,16 @@ ccl_device float3 hsv_to_rgb(float3 hsv) p = v*(1.0f-s); q = v*(1.0f-(s*f)); t = v*(1.0f-(s*(1.0f-f))); - - if(i == 0.0f) rgb = make_float3(v, t, p); + + if (i == 0.0f) rgb = make_float3(v, t, p); else if(i == 1.0f) rgb = make_float3(q, v, p); else if(i == 2.0f) rgb = make_float3(p, v, t); else if(i == 3.0f) rgb = make_float3(p, q, v); else if(i == 4.0f) rgb = make_float3(t, p, v); - else rgb = make_float3(v, p, q); + else rgb = make_float3(v, p, q); + } + else { + rgb = make_float3(v, v, v); } return rgb; @@ -132,8 +132,8 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y) ccl_device float3 xyz_to_rgb(float x, float y, float z) { return make_float3(3.240479f * x + -1.537150f * y + -0.498535f * z, - -0.969256f * x + 1.875991f * y + 0.041556f * z, - 0.055648f * x + -0.204043f * y + 1.057311f * z); + -0.969256f * x + 1.875991f * y + 0.041556f * z, + 0.055648f * x + -0.204043f * y + 1.057311f * z); } #ifndef __KERNEL_OPENCL__ diff --git a/intern/cycles/util/util_cuda.h b/intern/cycles/util/util_cuda.h index deb2ff969d6..0c80303df9b 100644 --- a/intern/cycles/util/util_cuda.h +++ b/intern/cycles/util/util_cuda.h @@ -206,7 +206,8 @@ typedef enum CUjit_target_enum CU_TARGET_COMPUTE_20, CU_TARGET_COMPUTE_21, CU_TARGET_COMPUTE_30, - CU_TARGET_COMPUTE_35 + CU_TARGET_COMPUTE_35, + CU_TARGET_COMPUTE_50 } CUjit_target; typedef enum CUjit_fallback_enum diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 21192024f7f..da6fae79bb9 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -19,13 +19,17 @@ #include "util_types.h" +#ifdef __KERNEL_SSE2__ +#include "util_simd.h" +#endif + CCL_NAMESPACE_BEGIN /* Half Floats */ #ifdef __KERNEL_OPENCL__ -#define float4_store_half(h, f, scale) vstore_half4(*(f) * (scale), 0, h); +#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h); #else @@ -34,24 +38,24 @@ struct half4 { half x, y, z, w; }; #ifdef __KERNEL_CUDA__ -ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) +ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half_rn(f->x * scale); - h[1] = __float2half_rn(f->y * scale); - h[2] = __float2half_rn(f->z * scale); - h[3] = __float2half_rn(f->w * scale); + h[0] = __float2half_rn(f.x * scale); + h[1] = __float2half_rn(f.y * scale); + h[2] = __float2half_rn(f.z * scale); + h[3] = __float2half_rn(f.w * scale); } #else -ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) +ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { #ifndef __KERNEL_SSE2__ for(int i = 0; i < 4; i++) { /* optimized float to half for pixels: * assumes no negative, no nan, no inf, and sets denormal to 0 */ union { uint i; float f; } in; - float fscale = (*f)[i] * scale; + float fscale = f[i] * scale; in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f; int x = in.i; @@ -70,7 +74,7 @@ ccl_device_inline void float4_store_half(half *h, const float4 *f, float scale) const __m128i mm_7FFFFFFF = _mm_set1_epi32(0x7FFFFFFF); const __m128i mm_C8000000 = _mm_set1_epi32(0xC8000000); - __m128 mm_fscale = _mm_mul_ps(*(__m128*)f, mm_scale); + __m128 mm_fscale = _mm_mul_ps(load_m128(f), mm_scale); __m128i x = _mm_castps_si128(_mm_min_ps(_mm_max_ps(mm_fscale, _mm_set_ps1(0.0f)), _mm_set_ps1(65500.0f))); __m128i absolute = _mm_and_si128(x, mm_7FFFFFFF); __m128i Z = _mm_add_epi32(absolute, mm_C8000000); diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h index ded25c92b90..edd2448efa4 100644 --- a/intern/cycles/util/util_hash.h +++ b/intern/cycles/util/util_hash.h @@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN static inline uint hash_int_2d(uint kx, uint ky) { - #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) uint a, b, c; @@ -41,7 +41,7 @@ static inline uint hash_int_2d(uint kx, uint ky) return c; - #undef rot +#undef rot } static inline uint hash_int(uint k) diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 2e73639d2bb..ded75762cd2 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -163,11 +163,7 @@ ccl_device_inline float clamp(float a, float mn, float mx) ccl_device_inline int float_to_int(float f) { -#if defined(__KERNEL_SSE2__) && !defined(_MSC_VER) - return _mm_cvtt_ss2si(_mm_load_ss(&f)); -#else return (int)f; -#endif } ccl_device_inline int floor_to_int(float f) @@ -469,6 +465,15 @@ ccl_device_inline float dot(const float3 a, const float3 b) #endif } +ccl_device_inline float dot(const float4 a, const float4 b) +{ +#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); +#else + return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); +#endif +} + ccl_device_inline float3 cross(const float3 a, const float3 b) { float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); @@ -493,6 +498,11 @@ ccl_device_inline float len_squared(const float3 a) #ifndef __KERNEL_OPENCL__ +ccl_device_inline float len_squared(const float4 a) +{ + return dot(a, a); +} + ccl_device_inline float3 normalize(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) @@ -812,11 +822,6 @@ ccl_device_inline float average(const float4& a) return reduce_add(a) * 0.25f; } -ccl_device_inline float dot(const float4& a, const float4& b) -{ - return reduce_add(a * b); -} - ccl_device_inline float len(const float4 a) { return sqrtf(dot(a, a)); @@ -1113,6 +1118,17 @@ ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b) /* Color division */ +ccl_device_inline float3 safe_invert_color(float3 a) +{ + float x, y, z; + + x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; + y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; + z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; + + return make_float3(x, y, z); +} + ccl_device_inline float3 safe_divide_color(float3 a, float3 b) { float x, y, z; @@ -1221,7 +1237,7 @@ ccl_device float compatible_powf(float x, float y) ccl_device float safe_powf(float a, float b) { - if(a < 0.0f && b != float_to_int(b)) + if(UNLIKELY(a < 0.0f && b != float_to_int(b))) return 0.0f; return compatible_powf(a, b); @@ -1229,7 +1245,7 @@ ccl_device float safe_powf(float a, float b) ccl_device float safe_logf(float a, float b) { - if(a < 0.0f || b < 0.0f) + if(UNLIKELY(a < 0.0f || b < 0.0f)) return 0.0f; return logf(a)/logf(b); @@ -1289,7 +1305,7 @@ ccl_device bool ray_aligned_disk_intersect( float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); float div = dot(ray_D, disk_N); - if(div == 0.0f) + if(UNLIKELY(div == 0.0f)) return false; /* compute t to intersection point */ @@ -1319,7 +1335,7 @@ ccl_device bool ray_triangle_intersect( float3 s1 = cross(ray_D, e2); const float divisor = dot(s1, e1); - if(divisor == 0.0f) + if(UNLIKELY(divisor == 0.0f)) return false; const float invdivisor = 1.0f/divisor; @@ -1351,6 +1367,50 @@ ccl_device bool ray_triangle_intersect( return true; } +ccl_device bool ray_triangle_intersect_uv( + float3 ray_P, float3 ray_D, float ray_t, + float3 v0, float3 v1, float3 v2, + float *isect_u, float *isect_v, float *isect_t) +{ + /* Calculate intersection */ + float3 e1 = v1 - v0; + float3 e2 = v2 - v0; + float3 s1 = cross(ray_D, e2); + + const float divisor = dot(s1, e1); + if(UNLIKELY(divisor == 0.0f)) + return false; + + const float invdivisor = 1.0f/divisor; + + /* compute first barycentric coordinate */ + const float3 d = ray_P - v0; + const float u = dot(d, s1)*invdivisor; + if(u < 0.0f) + return false; + + /* Compute second barycentric coordinate */ + const float3 s2 = cross(d, e1); + const float v = dot(ray_D, s2)*invdivisor; + if(v < 0.0f) + return false; + + const float b0 = 1.0f - u - v; + if(b0 < 0.0f) + return false; + + /* compute t to intersection point */ + const float t = dot(e2, s2)*invdivisor; + if(t < 0.0f || t > ray_t) + return false; + + *isect_u = u; + *isect_v = v; + *isect_t = t; + + return true; +} + ccl_device bool ray_quad_intersect( float3 ray_P, float3 ray_D, float ray_t, float3 quad_P, float3 quad_u, float3 quad_v, diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp index c53fbd90c67..add0d18c742 100644 --- a/intern/cycles/util/util_md5.cpp +++ b/intern/cycles/util/util_md5.cpp @@ -367,7 +367,7 @@ string MD5Hash::get_hex() finish(digest); for(int i = 0; i < 16; i++) - sprintf(buf + i*2, "%02X", digest[i]); + sprintf(buf + i*2, "%02X", (unsigned int)digest[i]); buf[sizeof(buf)-1] = '\0'; return string(buf); diff --git a/intern/cycles/util/util_opencl.h b/intern/cycles/util/util_opencl.h index 5f3f1667bcc..141c5e38273 100644 --- a/intern/cycles/util/util_opencl.h +++ b/intern/cycles/util/util_opencl.h @@ -304,7 +304,9 @@ typedef struct _cl_kernel * cl_kernel; typedef struct _cl_event * cl_event; typedef struct _cl_sampler * cl_sampler; -typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +/* WARNING! Unlike cl_ types in cl_platform.h, + * cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_uint cl_bool; typedef cl_ulong cl_bitfield; typedef cl_bitfield cl_device_type; typedef cl_uint cl_platform_info; diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 4fd5df4316d..85d19b6a325 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -111,6 +111,11 @@ string path_escape(const string& path) return result; } +bool path_is_relative(const string& path) +{ + return to_boost(path).is_relative(); +} + bool path_exists(const string& path) { return boost::filesystem::exists(to_boost(path)); diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h index e9041e63dae..fd9ea11740d 100644 --- a/intern/cycles/util/util_path.h +++ b/intern/cycles/util/util_path.h @@ -41,6 +41,7 @@ string path_filename(const string& path); string path_dirname(const string& path); string path_join(const string& dir, const string& file); string path_escape(const string& path); +bool path_is_relative(const string& path); /* file info */ bool path_exists(const string& path); diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index fd5ba1de37b..f0f37fa57aa 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -71,7 +71,7 @@ ccl_device_inline const __m128 shuffle_swap(const __m128& a, shuffle_swap_t shuf #ifdef __KERNEL_SSE41__ ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap, - const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3]) + const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3]) { const __m128 idirsplat_raw[] = { _mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z) }; idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn); @@ -87,7 +87,7 @@ ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t } #else ccl_device_inline void gen_idirsplat_swap(const __m128 &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap, - const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3]) + const float3& idir, __m128 idirsplat[3], shuffle_swap_t shufflexyz[3]) { idirsplat[0] = _mm_xor_ps(_mm_set_ps1(idir.x), pn); idirsplat[1] = _mm_xor_ps(_mm_set_ps1(idir.y), pn); @@ -154,6 +154,12 @@ ccl_device_inline const __m128 fms(const __m128& a, const __m128& b, const __m12 return _mm_sub_ps(_mm_mul_ps(a, b), c); } +/* calculate -a*b+c (replacement for fused negated-multiply-subtract on SSE CPUs) */ +ccl_device_inline const __m128 fnma(const __m128& a, const __m128& b, const __m128& c) +{ + return _mm_sub_ps(c, _mm_mul_ps(a, b)); +} + template<size_t N> ccl_device_inline const __m128 broadcast(const __m128& a) { return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(N, N, N, N))); @@ -180,6 +186,88 @@ ccl_device_inline const __m128 set_sign_bit(const __m128 &a) return _mm_xor_ps(a, _mm_castsi128_ps(_mm_setr_epi32(S1 << 31, S2 << 31, S3 << 31, S4 << 31))); } +#ifdef __KERNEL_WITH_SSE_ALIGN__ +ccl_device_inline const __m128 load_m128(const float4 &vec) +{ + return _mm_load_ps(&vec.x); +} + +ccl_device_inline const __m128 load_m128(const float3 &vec) +{ + return _mm_load_ps(&vec.x); +} + +#else + +ccl_device_inline const __m128 load_m128(const float4 &vec) +{ + return _mm_loadu_ps(&vec.x); +} + +ccl_device_inline const __m128 load_m128(const float3 &vec) +{ + return _mm_loadu_ps(&vec.x); +} +#endif /* __KERNEL_WITH_SSE_ALIGN__ */ + +ccl_device_inline const __m128 dot3_splat(const __m128& a, const __m128& b) +{ +#ifdef __KERNEL_SSE41__ + return _mm_dp_ps(a, b, 0x7f); +#else + __m128 t = _mm_mul_ps(a, b); + return _mm_set1_ps(((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]); +#endif +} + +/* squared length taking only specified axes into account */ +template<size_t X, size_t Y, size_t Z, size_t W> +ccl_device_inline float len_squared(const __m128& a) +{ +#ifndef __KERNEL_SSE41__ + float4& t = (float4 &)a; + return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + (W ? t.w * t.w : 0.0f); +#else + return _mm_cvtss_f32(_mm_dp_ps(a, a, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf)); +#endif +} + +ccl_device_inline float dot3(const __m128& a, const __m128& b) +{ +#ifdef __KERNEL_SSE41__ + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f)); +#else + __m128 t = _mm_mul_ps(a, b); + return ((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]; +#endif +} + +ccl_device_inline const __m128 len3_squared_splat(const __m128& a) +{ + return dot3_splat(a, a); +} + +ccl_device_inline float len3_squared(const __m128& a) +{ + return dot3(a, a); +} + +ccl_device_inline float len3(const __m128& a) +{ + return _mm_cvtss_f32(_mm_sqrt_ss(dot3_splat(a, a))); +} + +/* calculate shuffled cross product, useful when order of components does not matter */ +ccl_device_inline const __m128 cross_zxy(const __m128& a, const __m128& b) +{ + return fms(a, shuffle<1, 2, 0, 3>(b), _mm_mul_ps(b, shuffle<1, 2, 0, 3>(a))); +} + +ccl_device_inline const __m128 cross(const __m128& a, const __m128& b) +{ + return shuffle<1, 2, 0, 3>(cross_zxy(a, b)); +} + #endif /* __KERNEL_SSE2__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 3d7781f6146..0764f7d9345 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -161,8 +161,25 @@ static CPUCapabilities& system_cpu_capabilities() caps.sse41 = (result[2] & ((int)1 << 19)) != 0; caps.sse42 = (result[2] & ((int)1 << 20)) != 0; - caps.avx = (result[2] & ((int)1 << 28)) != 0; caps.fma3 = (result[2] & ((int)1 << 12)) != 0; + caps.avx = false; + bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0; + bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0; + + if( os_uses_xsave_xrestore && cpu_avx_support) { + // Check if the OS will save the YMM registers + uint32_t xcr_feature_mask; +#if defined(__GNUC__) + int edx; /* not used */ + /* actual opcode for xgetbv */ + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) ); +#elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */ +#else + xcr_feature_mask = 0; +#endif + caps.avx = (xcr_feature_mask & 0x6) == 0x6; + } } #if 0 diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 12c2270a8d4..14613558501 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -75,7 +75,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) } } - if(pivotsize == 0) + if(UNLIKELY(pivotsize == 0.0f)) return false; if(pivot != i) { @@ -106,7 +106,7 @@ static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) for(int i = 3; i >= 0; --i) { float f; - if((f = M[i][i]) == 0) + if(UNLIKELY((f = M[i][i]) == 0.0f)) return false; for(int j = 0; j < 4; j++) { @@ -135,15 +135,16 @@ Transform transform_inverse(const Transform& tfm) memcpy(R, &tfmR, sizeof(R)); memcpy(M, &tfm, sizeof(M)); - if(!transform_matrix4_gj_inverse(R, M)) { + if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should * never be in this situation, but try to invert it anyway with tweak */ M[0][0] += 1e-8f; M[1][1] += 1e-8f; M[2][2] += 1e-8f; - if(!transform_matrix4_gj_inverse(R, M)) + if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { return transform_identity(); + } } memcpy(&tfmR, R, sizeof(R)); diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index 4c7ce12d1de..5b3dbe42f69 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -108,9 +108,9 @@ ccl_device_inline Transform transform_transpose(const Transform a) } ccl_device_inline Transform make_transform(float a, float b, float c, float d, - float e, float f, float g, float h, - float i, float j, float k, float l, - float m, float n, float o, float p) + float e, float f, float g, float h, + float i, float j, float k, float l, + float m, float n, float o, float p) { Transform t; diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index c770931c69b..bfaab3dba3b 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -37,6 +37,7 @@ #define ccl_device_noinline static #define ccl_global #define ccl_constant +#define __KERNEL_WITH_SSE_ALIGN__ #if defined(_WIN32) && !defined(FREE_WINDOWS) @@ -45,6 +46,7 @@ #ifdef __KERNEL_64_BIT__ #define ccl_try_align(...) __declspec(align(__VA_ARGS__)) #else +#undef __KERNEL_WITH_SSE_ALIGN__ #define ccl_try_align(...) /* not support for function arguments (error C2719) */ #endif #define ccl_may_alias @@ -63,8 +65,6 @@ #endif -#else -#define ccl_align(...) #endif /* Standard Integer Types */ @@ -159,8 +159,8 @@ struct int2 { __forceinline int& operator[](int i) { return *(&x + i); } }; -#ifdef __KERNEL_SSE__ struct ccl_try_align(16) int3 { +#ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; @@ -171,7 +171,6 @@ struct ccl_try_align(16) int3 { __forceinline operator const __m128i&(void) const { return m128; } __forceinline operator __m128i&(void) { return m128; } #else -struct ccl_try_align(16) int3 { int x, y, z, w; #endif @@ -179,8 +178,8 @@ struct ccl_try_align(16) int3 { __forceinline int& operator[](int i) { return *(&x + i); } }; -#ifdef __KERNEL_SSE__ struct ccl_try_align(16) int4 { +#ifdef __KERNEL_SSE__ union { __m128i m128; struct { int x, y, z, w; }; @@ -191,7 +190,6 @@ struct ccl_try_align(16) int4 { __forceinline operator const __m128i&(void) const { return m128; } __forceinline operator __m128i&(void) { return m128; } #else -struct ccl_try_align(16) int4 { int x, y, z, w; #endif @@ -227,8 +225,8 @@ struct float2 { __forceinline float& operator[](int i) { return *(&x + i); } }; -#ifdef __KERNEL_SSE__ struct ccl_try_align(16) float3 { +#ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; @@ -239,7 +237,6 @@ struct ccl_try_align(16) float3 { __forceinline operator const __m128&(void) const { return m128; } __forceinline operator __m128&(void) { return m128; } #else -struct ccl_try_align(16) float3 { float x, y, z, w; #endif @@ -247,8 +244,8 @@ struct ccl_try_align(16) float3 { __forceinline float& operator[](int i) { return *(&x + i); } }; -#ifdef __KERNEL_SSE__ struct ccl_try_align(16) float4 { +#ifdef __KERNEL_SSE__ union { __m128 m128; struct { float x, y, z, w; }; @@ -259,7 +256,6 @@ struct ccl_try_align(16) float4 { __forceinline operator const __m128&(void) const { return m128; } __forceinline operator __m128&(void) { return m128; } #else -struct ccl_try_align(16) float4 { float x, y, z, w; #endif @@ -450,6 +446,53 @@ ccl_device_inline int4 make_int4(const float3& f) #endif +/* Interpolation types for textures + * cuda also use texture space to store other objects */ +enum InterpolationType { + INTERPOLATION_NONE = -1, + INTERPOLATION_LINEAR = 0, + INTERPOLATION_CLOSEST = 1, + INTERPOLATION_CUBIC = 2, + INTERPOLATION_SMART = 3, +}; + + +/* macros */ + +/* hints for branch prediction, only use in code that runs a _lot_ */ +#if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +/* Causes warning: + * incompatible types when assigning to type 'Foo' from type 'Bar' + * ... the compiler optimizes away the temp var */ +#ifdef __GNUC__ +#define CHECK_TYPE(var, type) { \ + __typeof(var) *__tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ +} (void)0 + +#define CHECK_TYPE_PAIR(var_a, var_b) { \ + __typeof(var_a) *__tmp; \ + __tmp = (__typeof(var_b) *)NULL; \ + (void)__tmp; \ +} (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif + +/* can be used in simple macros */ +#define CHECK_TYPE_INLINE(val, type) \ + ((void)(((type)0) != (val))) + + CCL_NAMESPACE_END #endif /* __UTIL_TYPES_H__ */ diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp index 361a7bc95f2..6bf9c9ed8c0 100644 --- a/intern/cycles/util/util_view.cpp +++ b/intern/cycles/util/util_view.cpp @@ -80,8 +80,8 @@ void view_display_info(const char *info) void view_display_help() { - const int w = V.width / 1.15; - const int h = V.height / 1.15; + const int w = (int)((float)V.width / 1.15f); + const int h = (int)((float)V.height / 1.15f); const int x1 = (V.width - w) / 2; const int x2 = x1 + w; @@ -100,14 +100,16 @@ void view_display_help() view_display_text(x1+20, y2-20, "Cycles Renderer"); view_display_text(x1+20, y2-40, "(C) 2011-2014 Blender Foundation"); view_display_text(x1+20, y2-80, "Controls:"); - view_display_text(x1+20, y2-100, "h: Show/Hide this help message"); - view_display_text(x1+20, y2-120, "r: Restart the render"); - view_display_text(x1+20, y2-140, "q: Quit the program"); - view_display_text(x1+20, y2-160, "esc: Cancel the render"); + view_display_text(x1+20, y2-100, "h: Info/Help"); + view_display_text(x1+20, y2-120, "r: Reset"); + view_display_text(x1+20, y2-140, "p: Pause"); + view_display_text(x1+20, y2-160, "esc: Cancel"); + view_display_text(x1+20, y2-180, "q: Quit program"); - view_display_text(x1+20, y2-190, "Interactive Mode (i-key):"); - view_display_text(x1+20, y2-210, "LMB: Move camera"); - view_display_text(x1+20, y2-230, "RMB: Rotate camera"); + view_display_text(x1+20, y2-210, "i: Interactive mode"); + view_display_text(x1+20, y2-230, "Left mouse: Move camera"); + view_display_text(x1+20, y2-250, "Right mouse: Rotate camera"); + view_display_text(x1+20, y2-270, "W/A/S/D: Move camera"); glColor3f(1.0f, 1.0f, 1.0f); } @@ -246,9 +248,7 @@ void view_main_loop(const char *title, int width, int height, glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH); glutCreateWindow(title); -#ifndef __APPLE__ glewInit(); -#endif view_reshape(width, height); diff --git a/intern/elbeem/intern/mvmcoords.cpp b/intern/elbeem/intern/mvmcoords.cpp index 281a9656fcf..838fc54491d 100644 --- a/intern/elbeem/intern/mvmcoords.cpp +++ b/intern/elbeem/intern/mvmcoords.cpp @@ -18,7 +18,7 @@ #include <algorithm> #if defined(_MSC_VER) && _MSC_VER > 1600 -// sdt::greater +// std::greater #include <functional> #endif diff --git a/intern/ffmpeg/ffmpeg_compat.h b/intern/ffmpeg/ffmpeg_compat.h index ff2cc405f4c..ac4da5b6133 100644 --- a/intern/ffmpeg/ffmpeg_compat.h +++ b/intern/ffmpeg/ffmpeg_compat.h @@ -103,6 +103,7 @@ FFMPEG_INLINE int av_sample_fmt_is_planar(enum AVSampleFormat sample_fmt) { /* no planar formats in FFmpeg < 0.9 */ + (void) sample_fmt; return 0; } @@ -172,6 +173,7 @@ FFMPEG_INLINE int av_opt_set(void *obj, const char *name, const char *val, int search_flags) { const AVOption *rv = NULL; + (void) search_flags; av_set_string3(obj, name, val, 1, &rv); return rv != NULL; } @@ -180,6 +182,7 @@ FFMPEG_INLINE int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags) { const AVOption *rv = NULL; + (void) search_flags; rv = av_set_int(obj, name, val); return rv != NULL; } @@ -188,6 +191,7 @@ FFMPEG_INLINE int av_opt_set_double(void *obj, const char *name, double val, int search_flags) { const AVOption *rv = NULL; + (void) search_flags; rv = av_set_double(obj, name, val); return rv != NULL; } @@ -210,15 +214,12 @@ enum AVSampleFormat av_get_packed_sample_fmt(enum AVSampleFormat sample_fmt) } #endif -#if ((LIBAVFORMAT_VERSION_MAJOR < 53) || ((LIBAVFORMAT_VERSION_MAJOR == 53) && (LIBAVFORMAT_VERSION_MINOR < 24)) || ((LIBAVFORMAT_VERSION_MAJOR == 53) && (LIBAVFORMAT_VERSION_MINOR < 24) && (LIBAVFORMAT_VERSION_MICRO < 2))) -# define avformat_close_input(x) av_close_input_file(*(x)) -#endif - #if ((LIBAVCODEC_VERSION_MAJOR < 53) || (LIBAVCODEC_VERSION_MAJOR == 53 && LIBAVCODEC_VERSION_MINOR < 35)) FFMPEG_INLINE int avcodec_open2(AVCodecContext *avctx, AVCodec *codec, AVDictionary **options) { /* TODO: no options are taking into account */ + (void) options; return avcodec_open(avctx, codec); } #endif @@ -228,6 +229,7 @@ FFMPEG_INLINE AVStream *avformat_new_stream(AVFormatContext *s, AVCodec *c) { /* TODO: no codec is taking into account */ + (void) c; return av_new_stream(s, 0); } @@ -235,6 +237,7 @@ FFMPEG_INLINE int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options) { /* TODO: no options are taking into account */ + (void) options; return av_find_stream_info(ic); } #endif @@ -435,4 +438,12 @@ AVRational av_get_r_frame_rate_compat(const AVStream *stream) #endif } +#if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(51, 32, 0) +# define AV_OPT_SEARCH_FAKE_OBJ 0 +#endif + +#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(54, 59, 100) +# define FFMPEG_HAVE_DEPRECATED_FLAGS2 +#endif + #endif diff --git a/intern/ghost/intern/GHOST_NDOFManager.cpp b/intern/ghost/intern/GHOST_NDOFManager.cpp index f8c707b668c..c99680641c3 100644 --- a/intern/ghost/intern/GHOST_NDOFManager.cpp +++ b/intern/ghost/intern/GHOST_NDOFManager.cpp @@ -295,14 +295,14 @@ bool GHOST_NDOFManager::setDevice(unsigned short vendor_id, unsigned short produ return m_deviceType != NDOF_UnknownDevice; } -void GHOST_NDOFManager::updateTranslation(short t[3], GHOST_TUns64 time) +void GHOST_NDOFManager::updateTranslation(const short t[3], GHOST_TUns64 time) { memcpy(m_translation, t, sizeof(m_translation)); m_motionTime = time; m_motionEventPending = true; } -void GHOST_NDOFManager::updateRotation(short r[3], GHOST_TUns64 time) +void GHOST_NDOFManager::updateRotation(const short r[3], GHOST_TUns64 time) { memcpy(m_rotation, r, sizeof(m_rotation)); m_motionTime = time; @@ -506,7 +506,5 @@ bool GHOST_NDOFManager::sendMotionEvent() m_system.pushEvent(event); - m_prevMotionTime = m_motionTime; - return true; } diff --git a/intern/ghost/intern/GHOST_NDOFManager.h b/intern/ghost/intern/GHOST_NDOFManager.h index 50f784d89c4..98aebfa4f30 100644 --- a/intern/ghost/intern/GHOST_NDOFManager.h +++ b/intern/ghost/intern/GHOST_NDOFManager.h @@ -128,8 +128,8 @@ public: // rotations are + when CCW, - when CW // each platform is responsible for getting axis data into this form // these values should not be scaled (just shuffled or flipped) - void updateTranslation(short t[3], GHOST_TUns64 time); - void updateRotation(short r[3], GHOST_TUns64 time); + void updateTranslation(const short t[3], GHOST_TUns64 time); + void updateRotation(const short r[3], GHOST_TUns64 time); // the latest raw button data from the device // use HID button encoding (not NDOF_ButtonT) diff --git a/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm b/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm index 4fc4f8016e5..1a029257f09 100644 --- a/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm +++ b/intern/ghost/intern/GHOST_NDOFManagerCocoa.mm @@ -79,8 +79,8 @@ static void NDOF_DeviceEvent(io_connect_t connection, natural_t messageType, voi case kConnexionCmdHandleAxis: { // convert to blender view coordinates - short t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]}; - short r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])}; + const short t[3] = {s->axis[0], -(s->axis[2]), s->axis[1]}; + const short r[3] = {-(s->axis[3]), s->axis[5], -(s->axis[4])}; ndof_manager->updateTranslation(t, now); ndof_manager->updateRotation(r, now); @@ -162,7 +162,7 @@ GHOST_NDOFManagerCocoa::~GHOST_NDOFManagerCocoa() if (GHOST_NDOFManager3Dconnexion_available()) { GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID); - GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID); + GHOST_NDOFManager3Dconnexion_UnregisterConnexionClient(m_clientID); GHOST_NDOFManager3Dconnexion_CleanupConnexionHandlers(); ghost_system = NULL; diff --git a/intern/ghost/intern/GHOST_NDOFManagerX11.cpp b/intern/ghost/intern/GHOST_NDOFManagerX11.cpp index 947d8d74461..77e09e7ef49 100644 --- a/intern/ghost/intern/GHOST_NDOFManagerX11.cpp +++ b/intern/ghost/intern/GHOST_NDOFManagerX11.cpp @@ -77,23 +77,46 @@ bool GHOST_NDOFManagerX11::available() return m_available; } +/* + * Workaround for a problem where we don't enter the 'GHOST_kFinished' state, + * this causes any proceeding event to have a very high 'dt' (time delta), + * many seconds for eg, causing the view to jump. + * + * this workaround expect's continuous events, if we miss a motion event, + * immediately send a dummy event with no motion to ensure the finished state is reached. + */ +#define USE_FINISH_GLITCH_WORKAROUND + + +#ifdef USE_FINISH_GLITCH_WORKAROUND +static bool motion_test_prev = false; +#endif + bool GHOST_NDOFManagerX11::processEvents() { bool anyProcessed = false; if (m_available) { spnav_event e; + +#ifdef USE_FINISH_GLITCH_WORKAROUND + bool motion_test = false; +#endif + while (spnav_poll_event(&e)) { switch (e.type) { case SPNAV_EVENT_MOTION: { /* convert to blender view coords */ GHOST_TUns64 now = m_system.getMilliSeconds(); - short t[3] = {(short)e.motion.x, (short)e.motion.y, (short)-e.motion.z}; - short r[3] = {(short)-e.motion.rx, (short)-e.motion.ry, (short)e.motion.rz}; + const short t[3] = {(short)e.motion.x, (short)e.motion.y, (short)-e.motion.z}; + const short r[3] = {(short)-e.motion.rx, (short)-e.motion.ry, (short)e.motion.rz}; updateTranslation(t, now); updateRotation(r, now); +#ifdef USE_FINISH_GLITCH_WORKAROUND + motion_test = true; +#endif break; } case SPNAV_EVENT_BUTTON: @@ -103,6 +126,20 @@ bool GHOST_NDOFManagerX11::processEvents() } anyProcessed = true; } + +#ifdef USE_FINISH_GLITCH_WORKAROUND + if (motion_test_prev == true && motion_test == false) { + GHOST_TUns64 now = m_system.getMilliSeconds(); + const short v[3] = {0, 0, 0}; + + updateTranslation(v, now); + updateRotation(v, now); + + anyProcessed = true; + } + motion_test_prev = motion_test; +#endif + } return anyProcessed; diff --git a/intern/ghost/intern/GHOST_SystemWin32.cpp b/intern/ghost/intern/GHOST_SystemWin32.cpp index 8280474437b..070dd86c0fb 100644 --- a/intern/ghost/intern/GHOST_SystemWin32.cpp +++ b/intern/ghost/intern/GHOST_SystemWin32.cpp @@ -843,14 +843,14 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const& raw) { case 1: // translation { - short *axis = (short *)(data + 1); + const short *axis = (short *)(data + 1); // massage into blender view coords (same goes for rotation) - short t[3] = {axis[0], -axis[2], axis[1]}; + const short t[3] = {axis[0], -axis[2], axis[1]}; m_ndofManager->updateTranslation(t, now); if (raw.data.hid.dwSizeHid == 13) { // this report also includes rotation - short r[3] = {-axis[3], axis[5], -axis[4]}; + const short r[3] = {-axis[3], axis[5], -axis[4]}; m_ndofManager->updateRotation(r, now); // I've never gotten one of these, has anyone else? @@ -860,8 +860,8 @@ bool GHOST_SystemWin32::processNDOF(RAWINPUT const& raw) } case 2: // rotation { - short *axis = (short *)(data + 1); - short r[3] = {-axis[0], axis[2], -axis[1]}; + const short *axis = (short *)(data + 1); + const short r[3] = {-axis[0], axis[2], -axis[1]}; m_ndofManager->updateRotation(r, now); break; } diff --git a/intern/ghost/intern/GHOST_SystemX11.cpp b/intern/ghost/intern/GHOST_SystemX11.cpp index 9900f7e153f..8f1f9867724 100644 --- a/intern/ghost/intern/GHOST_SystemX11.cpp +++ b/intern/ghost/intern/GHOST_SystemX11.cpp @@ -755,7 +755,7 @@ GHOST_SystemX11::processEvent(XEvent *xe) case KeyRelease: { XKeyEvent *xke = &(xe->xkey); - KeySym key_sym = XLookupKeysym(xke, 0); + KeySym key_sym; char ascii; #if defined(WITH_X11_XINPUT) && defined(X_HAVE_UTF8_STRING) /* utf8_array[] is initial buffer used for Xutf8LookupString(). @@ -771,7 +771,29 @@ GHOST_SystemX11::processEvent(XEvent *xe) char *utf8_buf = NULL; #endif - GHOST_TKey gkey = convertXKey(key_sym); + GHOST_TKey gkey; + + /* In keyboards like latin ones, + * numbers needs a 'Shift' to be accessed but key_sym + * is unmodified (or anyone swapping the keys with xmodmap). + * + * Here we look at the 'Shifted' version of the key. + * If it is a number, then we take it instead of the normal key. + * + * The modified key is sent in the 'ascii's variable anyway. + */ + if ((xke->keycode >= 10 && xke->keycode < 20) && + ((key_sym = XLookupKeysym(xke, ShiftMask)) >= XK_0) && (key_sym <= XK_9)) + { + /* pass (keep shift'ed key_sym) */ + } + else { + /* regular case */ + key_sym = XLookupKeysym(xke, 0); + } + + gkey = convertXKey(key_sym); + GHOST_TEventType type = (xke->type == KeyPress) ? GHOST_kEventKeyDown : GHOST_kEventKeyUp; diff --git a/intern/ghost/intern/GHOST_WindowX11.cpp b/intern/ghost/intern/GHOST_WindowX11.cpp index 4e3fcd4da3f..56e225e94a2 100644 --- a/intern/ghost/intern/GHOST_WindowX11.cpp +++ b/intern/ghost/intern/GHOST_WindowX11.cpp @@ -186,7 +186,8 @@ GHOST_WindowX11( m_valid_setup(false), m_invalid_window(false), m_empty_cursor(None), - m_custom_cursor(None) + m_custom_cursor(None), + m_visible_cursor(None) { /* Set up the minimum atrributes that we require and see if @@ -1454,7 +1455,10 @@ setWindowCursorVisibility( Cursor xcursor; if (visible) { - xcursor = getStandardCursor(getCursorShape() ); + if (m_visible_cursor) + xcursor = m_visible_cursor; + else + xcursor = getStandardCursor(getCursorShape() ); } else { xcursor = getEmptyCursor(); @@ -1517,6 +1521,8 @@ setWindowCursorShape( GHOST_TStandardCursor shape) { Cursor xcursor = getStandardCursor(shape); + + m_visible_cursor = xcursor; XDefineCursor(m_display, m_window, xcursor); XFlush(m_display); @@ -1566,6 +1572,8 @@ setWindowCustomCursorShape( m_custom_cursor = XCreatePixmapCursor(m_display, bitmap_pix, mask_pix, &fg, &bg, hotX, hotY); XDefineCursor(m_display, m_window, m_custom_cursor); XFlush(m_display); + + m_visible_cursor = m_custom_cursor; XFreePixmap(m_display, bitmap_pix); XFreePixmap(m_display, mask_pix); diff --git a/intern/ghost/intern/GHOST_WindowX11.h b/intern/ghost/intern/GHOST_WindowX11.h index ff7b7409627..93ee9edda0e 100644 --- a/intern/ghost/intern/GHOST_WindowX11.h +++ b/intern/ghost/intern/GHOST_WindowX11.h @@ -391,6 +391,9 @@ private: /** XCursor structure of the custom cursor */ Cursor m_custom_cursor; + + /** XCursor to show when cursor is visible */ + Cursor m_visible_cursor; /** Cache of XC_* ID's to XCursor structures */ std::map<unsigned int, Cursor> m_standard_cursors; diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c index 8fb46ffc385..9a192c17180 100644 --- a/intern/ghost/test/multitest/MultiTest.c +++ b/intern/ghost/test/multitest/MultiTest.c @@ -74,7 +74,7 @@ void multitestapp_exit(MultiTestApp *app); /**/ -void rect_bevel_side(int rect[2][2], int side, float *lt, float *dk, float *col, int width) +void rect_bevel_side(int rect[2][2], int side, float *lt, float *dk, const float col[3], int width) { int ltidx = (side / 2) % 4; int dkidx = (ltidx + 1 + (side & 1)) % 4; diff --git a/intern/guardedalloc/intern/mallocn.c b/intern/guardedalloc/intern/mallocn.c index 2ac01a6c7e4..e85fba7a6d0 100644 --- a/intern/guardedalloc/intern/mallocn.c +++ b/intern/guardedalloc/intern/mallocn.c @@ -15,11 +15,6 @@ * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. - * All rights reserved. - * - * The Original Code is: all of this file. - * * Contributor(s): Brecht Van Lommel * Campbell Barton * @@ -43,7 +38,7 @@ size_t (*MEM_allocN_len)(const void *vmemh) = MEM_lockfree_allocN_len; void (*MEM_freeN)(void *vmemh) = MEM_lockfree_freeN; void *(*MEM_dupallocN)(const void *vmemh) = MEM_lockfree_dupallocN; void *(*MEM_reallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_reallocN_id; -void *(*MEM_recallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_recallocN_id;; +void *(*MEM_recallocN_id)(void *vmemh, size_t len, const char *str) = MEM_lockfree_recallocN_id; void *(*MEM_callocN)(size_t len, const char *str) = MEM_lockfree_callocN; void *(*MEM_mallocN)(size_t len, const char *str) = MEM_lockfree_mallocN; void *(*MEM_mapallocN)(size_t len, const char *str) = MEM_lockfree_mapallocN; @@ -71,7 +66,7 @@ void MEM_use_guarded_allocator(void) MEM_freeN = MEM_guarded_freeN; MEM_dupallocN = MEM_guarded_dupallocN; MEM_reallocN_id = MEM_guarded_reallocN_id; - MEM_recallocN_id = MEM_guarded_recallocN_id;; + MEM_recallocN_id = MEM_guarded_recallocN_id; MEM_callocN = MEM_guarded_callocN; MEM_mallocN = MEM_guarded_mallocN; MEM_mapallocN = MEM_guarded_mapallocN; diff --git a/intern/guardedalloc/intern/mallocn_guarded_impl.c b/intern/guardedalloc/intern/mallocn_guarded_impl.c index 352d18df732..172c79d50cd 100644 --- a/intern/guardedalloc/intern/mallocn_guarded_impl.c +++ b/intern/guardedalloc/intern/mallocn_guarded_impl.c @@ -497,9 +497,9 @@ void *MEM_guarded_mallocN(size_t len, const char *str) memh = (MemHead *)malloc(len + sizeof(MemHead) + sizeof(MemTail)); - if (memh) { + if (LIKELY(memh)) { make_memhead_header(memh, len, str); - if (malloc_debug_memset && len) + if (UNLIKELY(malloc_debug_memset && len)) memset(memh + 1, 255, len); #ifdef DEBUG_MEMCOUNTER @@ -544,7 +544,7 @@ void *MEM_guarded_mapallocN(size_t len, const char *str) /* on 64 bit, simply use calloc instead, as mmap does not support * allocating > 4 GB on Windows. the only reason mapalloc exists * is to get around address space limitations in 32 bit OSes. */ - if(sizeof(void*) >= 8) + if (sizeof(void *) >= 8) return MEM_guarded_callocN(len, str); len = SIZET_ALIGN_4(len); @@ -735,7 +735,7 @@ static void MEM_guarded_printmemlist_internal(int pydict) membl->_count); #else print_error("%s len: " SIZET_FORMAT " %p\n", - membl->name, SIZET_ARG(membl->len), membl + 1); + membl->name, SIZET_ARG(membl->len), (void *)(membl + 1)); #endif #ifdef DEBUG_BACKTRACE print_memhead_backtrace(membl); @@ -951,7 +951,7 @@ static void rem_memblock(MemHead *memh) #endif } else { - if (malloc_debug_memset && memh->len) + if (UNLIKELY(malloc_debug_memset && memh->len)) memset(memh + 1, 255, memh->len); free(memh); } diff --git a/intern/guardedalloc/intern/mallocn_intern.h b/intern/guardedalloc/intern/mallocn_intern.h index db45b59b884..b0fd52d2766 100644 --- a/intern/guardedalloc/intern/mallocn_intern.h +++ b/intern/guardedalloc/intern/mallocn_intern.h @@ -77,6 +77,14 @@ #define SIZET_ALIGN_4(len) ((len + 3) & ~(size_t)3) +#ifdef __GNUC__ +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + /* Prototypes for counted allocator functions */ size_t MEM_lockfree_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT; void MEM_lockfree_freeN(void *vmemh); diff --git a/intern/guardedalloc/intern/mallocn_lockfree_impl.c b/intern/guardedalloc/intern/mallocn_lockfree_impl.c index 2c7c087966a..6fc01807af3 100644 --- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c +++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c @@ -15,11 +15,6 @@ * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * - * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV. - * All rights reserved. - * - * The Original Code is: all of this file. - * * Contributor(s): Brecht Van Lommel * Campbell Barton * Sergey Sharybin @@ -126,7 +121,7 @@ void MEM_lockfree_freeN(void *vmemh) #endif } else { - if (malloc_debug_memset && len) { + if (UNLIKELY(malloc_debug_memset && len)) { memset(memh + 1, 255, len); } free(memh); @@ -219,7 +214,7 @@ void *MEM_lockfree_callocN(size_t len, const char *str) memh = (MemHead *)calloc(1, len + sizeof(MemHead)); - if (memh) { + if (LIKELY(memh)) { memh->len = len; atomic_add_u(&totblock, 1); atomic_add_z(&mem_in_use, len); @@ -242,8 +237,8 @@ void *MEM_lockfree_mallocN(size_t len, const char *str) memh = (MemHead *)malloc(len + sizeof(MemHead)); - if (memh) { - if (malloc_debug_memset && len) { + if (LIKELY(memh)) { + if (UNLIKELY(malloc_debug_memset && len)) { memset(memh + 1, 255, len); } @@ -268,7 +263,7 @@ void *MEM_lockfree_mapallocN(size_t len, const char *str) /* on 64 bit, simply use calloc instead, as mmap does not support * allocating > 4 GB on Windows. the only reason mapalloc exists * is to get around address space limitations in 32 bit OSes. */ - if(sizeof(void*) >= 8) + if (sizeof(void *) >= 8) return MEM_lockfree_callocN(len, str); len = SIZET_ALIGN_4(len); diff --git a/intern/itasc/SConscript b/intern/itasc/SConscript index 1b7709bb986..bd20368f001 100644 --- a/intern/itasc/SConscript +++ b/intern/itasc/SConscript @@ -35,7 +35,4 @@ incs = '. ../../extern/Eigen3' defs = [] -if env['OURPLATFORM']=='darwin' and env['C_COMPILER_ID'] == 'clang' and env['CCVERSION'] >= '3.4': # workaround for friend declaration specifies a default argument expression, not allowed anymore - env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100], cc_compilerchange='/usr/bin/gcc', cxx_compilerchange='/usr/bin/g++' ) -else: - env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100]) +env.BlenderLib ('bf_intern_itasc', sources, Split(incs), defs, libtype=['intern','player'], priority=[20,100]) diff --git a/intern/itasc/kdl/frameacc.hpp b/intern/itasc/kdl/frameacc.hpp index 40dd5bfa712..bccd229804d 100644 --- a/intern/itasc/kdl/frameacc.hpp +++ b/intern/itasc/kdl/frameacc.hpp @@ -78,9 +78,9 @@ public: IMETHOD friend VectorAcc operator / (const VectorAcc& r2,const doubleAcc& r1); - IMETHOD friend bool Equal(const VectorAcc& r1,const VectorAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Vector& r1,const VectorAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const VectorAcc& r1,const Vector& r2,double eps=epsilon); + IMETHOD friend bool Equal(const VectorAcc& r1,const VectorAcc& r2,double eps); + IMETHOD friend bool Equal(const Vector& r1,const VectorAcc& r2,double eps); + IMETHOD friend bool Equal(const VectorAcc& r1,const Vector& r2,double eps); IMETHOD friend VectorAcc operator - (const VectorAcc& r); IMETHOD friend doubleAcc dot(const VectorAcc& lhs,const VectorAcc& rhs); IMETHOD friend doubleAcc dot(const VectorAcc& lhs,const Vector& rhs); @@ -132,9 +132,9 @@ public: IMETHOD friend RotationAcc operator* (const RotationAcc& r1,const RotationAcc& r2); IMETHOD friend RotationAcc operator* (const Rotation& r1,const RotationAcc& r2); IMETHOD friend RotationAcc operator* (const RotationAcc& r1,const Rotation& r2); - IMETHOD friend bool Equal(const RotationAcc& r1,const RotationAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Rotation& r1,const RotationAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const RotationAcc& r1,const Rotation& r2,double eps=epsilon); + IMETHOD friend bool Equal(const RotationAcc& r1,const RotationAcc& r2,double eps); + IMETHOD friend bool Equal(const Rotation& r1,const RotationAcc& r2,double eps); + IMETHOD friend bool Equal(const RotationAcc& r1,const Rotation& r2,double eps); IMETHOD TwistAcc Inverse(const TwistAcc& arg) const; IMETHOD TwistAcc Inverse(const Twist& arg) const; IMETHOD TwistAcc operator * (const TwistAcc& arg) const; @@ -170,9 +170,9 @@ public: IMETHOD friend FrameAcc operator * (const FrameAcc& f1,const FrameAcc& f2); IMETHOD friend FrameAcc operator * (const Frame& f1,const FrameAcc& f2); IMETHOD friend FrameAcc operator * (const FrameAcc& f1,const Frame& f2); - IMETHOD friend bool Equal(const FrameAcc& r1,const FrameAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Frame& r1,const FrameAcc& r2,double eps=epsilon); - IMETHOD friend bool Equal(const FrameAcc& r1,const Frame& r2,double eps=epsilon); + IMETHOD friend bool Equal(const FrameAcc& r1,const FrameAcc& r2,double eps); + IMETHOD friend bool Equal(const Frame& r1,const FrameAcc& r2,double eps); + IMETHOD friend bool Equal(const FrameAcc& r1,const Frame& r2,double eps); IMETHOD TwistAcc Inverse(const TwistAcc& arg) const; IMETHOD TwistAcc Inverse(const Twist& arg) const; @@ -226,9 +226,9 @@ public: // the new point. // Complexity : 6M+6A - IMETHOD friend bool Equal(const TwistAcc& a,const TwistAcc& b,double eps=epsilon); - IMETHOD friend bool Equal(const Twist& a,const TwistAcc& b,double eps=epsilon); - IMETHOD friend bool Equal(const TwistAcc& a,const Twist& b,double eps=epsilon); + IMETHOD friend bool Equal(const TwistAcc& a,const TwistAcc& b,double eps); + IMETHOD friend bool Equal(const Twist& a,const TwistAcc& b,double eps); + IMETHOD friend bool Equal(const TwistAcc& a,const Twist& b,double eps); IMETHOD Twist GetTwist() const; @@ -240,9 +240,18 @@ public: }; - - - +IMETHOD bool Equal(const VectorAcc&, const VectorAcc&, double = epsilon); +IMETHOD bool Equal(const Vector&, const VectorAcc&, double = epsilon); +IMETHOD bool Equal(const VectorAcc&, const Vector&, double = epsilon); +IMETHOD bool Equal(const RotationAcc&, const RotationAcc&, double = epsilon); +IMETHOD bool Equal(const Rotation&, const RotationAcc&, double = epsilon); +IMETHOD bool Equal(const RotationAcc&, const Rotation&, double = epsilon); +IMETHOD bool Equal(const FrameAcc&, const FrameAcc&, double = epsilon); +IMETHOD bool Equal(const Frame&, const FrameAcc&, double = epsilon); +IMETHOD bool Equal(const FrameAcc&, const Frame&, double = epsilon); +IMETHOD bool Equal(const TwistAcc&, const TwistAcc&, double = epsilon); +IMETHOD bool Equal(const Twist&, const TwistAcc&, double = epsilon); +IMETHOD bool Equal(const TwistAcc&, const Twist&, double = epsilon); #ifdef KDL_INLINE diff --git a/intern/itasc/kdl/frames.hpp b/intern/itasc/kdl/frames.hpp index 28a59898e20..87eedea29f7 100644 --- a/intern/itasc/kdl/frames.hpp +++ b/intern/itasc/kdl/frames.hpp @@ -248,10 +248,10 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Vector& a,const Vector& b,double eps=epsilon); + inline friend bool Equal(const Vector& a,const Vector& b,double eps); //! return a normalized vector - inline friend Vector Normalize(const Vector& a, double eps=epsilon); + inline friend Vector Normalize(const Vector& a, double eps); //! The literal equality operator==(), also identical. inline friend bool operator==(const Vector& a,const Vector& b); @@ -261,7 +261,7 @@ public: friend class Rotation; friend class Frame; }; - + inline Vector Normalize(const Vector&, double eps=epsilon); /** \brief represents rotations in 3 dimensional space. @@ -502,7 +502,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - friend bool Equal(const Rotation& a,const Rotation& b,double eps=epsilon); + //! The literal equality operator==(), also identical. friend bool operator==(const Rotation& a,const Rotation& b); @@ -663,7 +663,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Frame& a,const Frame& b,double eps=epsilon); + inline friend bool Equal(const Frame& a,const Frame& b,double eps); //! The literal equality operator==(), also identical. inline friend bool operator==(const Frame& a,const Frame& b); @@ -735,7 +735,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Twist& a,const Twist& b,double eps=epsilon); + inline friend bool Equal(const Twist& a,const Twist& b,double eps); //! The literal equality operator==(), also identical. inline friend bool operator==(const Twist& a,const Twist& b); @@ -898,7 +898,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Wrench& a,const Wrench& b,double eps=epsilon); + inline friend bool Equal(const Wrench& a,const Wrench& b,double eps); //! The literal equality operator==(), also identical. inline friend bool operator==(const Wrench& a,const Wrench& b); @@ -979,7 +979,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Vector2& a,const Vector2& b,double eps=epsilon); + inline friend bool Equal(const Vector2& a,const Vector2& b,double eps); friend class Rotation2; }; @@ -1026,7 +1026,7 @@ public: //! do not use operator == because the definition of Equal(.,.) is slightly //! different. It compares whether the 2 arguments are equal in an eps-interval - inline friend bool Equal(const Rotation2& a,const Rotation2& b,double eps=epsilon); + inline friend bool Equal(const Rotation2& a,const Rotation2& b,double eps); }; //! A 2D frame class, for further documentation see the Frames class @@ -1067,9 +1067,18 @@ public: tmp.SetIdentity(); return tmp; } - inline friend bool Equal(const Frame2& a,const Frame2& b,double eps=epsilon); + inline friend bool Equal(const Frame2& a,const Frame2& b,double eps); }; +inline bool Equal(const Vector&, const Vector&, double = epsilon); + bool Equal(const Rotation&, const Rotation&, double = epsilon); +inline bool Equal(const Frame&, const Frame&, double = epsilon); +inline bool Equal(const Twist&, const Twist&, double = epsilon); +inline bool Equal(const Wrench&, const Wrench&, double = epsilon); +inline bool Equal(const Vector2&, const Vector2&, double = epsilon); +inline bool Equal(const Rotation2&, const Rotation2&, double = epsilon); +inline bool Equal(const Frame2&, const Frame2&, double = epsilon); + IMETHOD Vector diff(const Vector& a,const Vector& b,double dt=1); IMETHOD Vector diff(const Rotation& R_a_b1,const Rotation& R_a_b2,double dt=1); IMETHOD Twist diff(const Frame& F_a_b1,const Frame& F_a_b2,double dt=1); diff --git a/intern/itasc/kdl/framevel.hpp b/intern/itasc/kdl/framevel.hpp index e95c5ef7907..17e1f2adfa0 100644 --- a/intern/itasc/kdl/framevel.hpp +++ b/intern/itasc/kdl/framevel.hpp @@ -110,9 +110,9 @@ public: IMETHOD friend void SetToZero(VectorVel& v); - IMETHOD friend bool Equal(const VectorVel& r1,const VectorVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Vector& r1,const VectorVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const VectorVel& r1,const Vector& r2,double eps=epsilon); + IMETHOD friend bool Equal(const VectorVel& r1,const VectorVel& r2,double eps); + IMETHOD friend bool Equal(const Vector& r1,const VectorVel& r2,double eps); + IMETHOD friend bool Equal(const VectorVel& r1,const Vector& r2,double eps); IMETHOD friend VectorVel operator - (const VectorVel& r); IMETHOD friend doubleVel dot(const VectorVel& lhs,const VectorVel& rhs); IMETHOD friend doubleVel dot(const VectorVel& lhs,const Vector& rhs); @@ -166,9 +166,9 @@ public: IMETHOD friend RotationVel operator* (const RotationVel& r1,const RotationVel& r2); IMETHOD friend RotationVel operator* (const Rotation& r1,const RotationVel& r2); IMETHOD friend RotationVel operator* (const RotationVel& r1,const Rotation& r2); - IMETHOD friend bool Equal(const RotationVel& r1,const RotationVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Rotation& r1,const RotationVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const RotationVel& r1,const Rotation& r2,double eps=epsilon); + IMETHOD friend bool Equal(const RotationVel& r1,const RotationVel& r2,double eps); + IMETHOD friend bool Equal(const Rotation& r1,const RotationVel& r2,double eps); + IMETHOD friend bool Equal(const RotationVel& r1,const Rotation& r2,double eps); IMETHOD TwistVel Inverse(const TwistVel& arg) const; IMETHOD TwistVel Inverse(const Twist& arg) const; @@ -220,9 +220,9 @@ public: IMETHOD friend FrameVel operator * (const FrameVel& f1,const FrameVel& f2); IMETHOD friend FrameVel operator * (const Frame& f1,const FrameVel& f2); IMETHOD friend FrameVel operator * (const FrameVel& f1,const Frame& f2); - IMETHOD friend bool Equal(const FrameVel& r1,const FrameVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const Frame& r1,const FrameVel& r2,double eps=epsilon); - IMETHOD friend bool Equal(const FrameVel& r1,const Frame& r2,double eps=epsilon); + IMETHOD friend bool Equal(const FrameVel& r1,const FrameVel& r2,double eps); + IMETHOD friend bool Equal(const Frame& r1,const FrameVel& r2,double eps); + IMETHOD friend bool Equal(const FrameVel& r1,const Frame& r2,double eps); IMETHOD TwistVel Inverse(const TwistVel& arg) const; IMETHOD TwistVel Inverse(const Twist& arg) const; @@ -292,9 +292,9 @@ public: // = Equality operators // do not use operator == because the definition of Equal(.,.) is slightly // different. It compares whether the 2 arguments are equal in an eps-interval - IMETHOD friend bool Equal(const TwistVel& a,const TwistVel& b,double eps=epsilon); - IMETHOD friend bool Equal(const Twist& a,const TwistVel& b,double eps=epsilon); - IMETHOD friend bool Equal(const TwistVel& a,const Twist& b,double eps=epsilon); + IMETHOD friend bool Equal(const TwistVel& a,const TwistVel& b,double eps); + IMETHOD friend bool Equal(const Twist& a,const TwistVel& b,double eps); + IMETHOD friend bool Equal(const TwistVel& a,const Twist& b,double eps); // = Conversion to other entities IMETHOD Twist GetTwist() const; @@ -305,6 +305,19 @@ public: }; +IMETHOD bool Equal(const VectorVel&, const VectorVel&, double = epsilon); +IMETHOD bool Equal(const Vector&, const VectorVel&, double = epsilon); +IMETHOD bool Equal(const VectorVel&, const Vector&, double = epsilon); +IMETHOD bool Equal(const RotationVel&, const RotationVel&, double = epsilon); +IMETHOD bool Equal(const Rotation&, const RotationVel&, double = epsilon); +IMETHOD bool Equal(const RotationVel&, const Rotation&, double = epsilon); +IMETHOD bool Equal(const FrameVel&, const FrameVel&, double = epsilon); +IMETHOD bool Equal(const Frame&, const FrameVel&, double = epsilon); +IMETHOD bool Equal(const FrameVel&, const Frame&, double = epsilon); +IMETHOD bool Equal(const TwistVel&, const TwistVel&, double = epsilon); +IMETHOD bool Equal(const Twist&, const TwistVel&, double = epsilon); +IMETHOD bool Equal(const TwistVel&, const Twist&, double = epsilon); + IMETHOD VectorVel diff(const VectorVel& a,const VectorVel& b,double dt=1.0) { return VectorVel(diff(a.p,b.p,dt),diff(a.v,b.v,dt)); } diff --git a/intern/itasc/kdl/jacobian.hpp b/intern/itasc/kdl/jacobian.hpp index e9057451c9f..9708ebd37be 100644 --- a/intern/itasc/kdl/jacobian.hpp +++ b/intern/itasc/kdl/jacobian.hpp @@ -45,7 +45,7 @@ namespace KDL bool operator ==(const Jacobian& arg); bool operator !=(const Jacobian& arg); - friend bool Equal(const Jacobian& a,const Jacobian& b,double eps=epsilon); + friend bool Equal(const Jacobian& a,const Jacobian& b,double eps); ~Jacobian(); @@ -63,6 +63,7 @@ namespace KDL }; + bool Equal(const Jacobian&, const Jacobian&, double = epsilon); } #endif diff --git a/intern/itasc/kdl/jntarray.hpp b/intern/itasc/kdl/jntarray.hpp index ece6b0bdb6b..886171b11db 100644 --- a/intern/itasc/kdl/jntarray.hpp +++ b/intern/itasc/kdl/jntarray.hpp @@ -209,12 +209,12 @@ class MyTask : public RTT::TaskContext * @return true if each element of src1 is within eps of the same * element in src2, or if both src1 and src2 have no data (ie 0==rows()) */ - friend bool Equal(const JntArray& src1,const JntArray& src2,double eps=epsilon); + friend bool Equal(const JntArray& src1,const JntArray& src2,double eps); friend bool operator==(const JntArray& src1,const JntArray& src2); //friend bool operator!=(const JntArray& src1,const JntArray& src2); }; - + bool Equal(const JntArray&,const JntArray&, double = epsilon); bool operator==(const JntArray& src1,const JntArray& src2); //bool operator!=(const JntArray& src1,const JntArray& src2); diff --git a/intern/itasc/kdl/jntarrayacc.hpp b/intern/itasc/kdl/jntarrayacc.hpp index 275aa58f21e..fd1c26430e8 100644 --- a/intern/itasc/kdl/jntarrayacc.hpp +++ b/intern/itasc/kdl/jntarrayacc.hpp @@ -58,9 +58,10 @@ namespace KDL friend void Divide(const JntArrayAcc& src,const doubleVel& factor,JntArrayAcc& dest); friend void Divide(const JntArrayAcc& src,const doubleAcc& factor,JntArrayAcc& dest); friend void SetToZero(JntArrayAcc& array); - friend bool Equal(const JntArrayAcc& src1,const JntArrayAcc& src2,double eps=epsilon); - + friend bool Equal(const JntArrayAcc& src1,const JntArrayAcc& src2,double eps); }; + + bool Equal(const JntArrayAcc&, const JntArrayAcc&, double = epsilon); } #endif diff --git a/intern/itasc/kdl/jntarrayvel.hpp b/intern/itasc/kdl/jntarrayvel.hpp index faa82076ebb..480f84f1708 100644 --- a/intern/itasc/kdl/jntarrayvel.hpp +++ b/intern/itasc/kdl/jntarrayvel.hpp @@ -51,9 +51,10 @@ namespace KDL friend void Divide(const JntArrayVel& src,const double& factor,JntArrayVel& dest); friend void Divide(const JntArrayVel& src,const doubleVel& factor,JntArrayVel& dest); friend void SetToZero(JntArrayVel& array); - friend bool Equal(const JntArrayVel& src1,const JntArrayVel& src2,double eps=epsilon); - + friend bool Equal(const JntArrayVel& src1,const JntArrayVel& src2,double eps); }; + + bool Equal(const JntArrayVel&, const JntArrayVel&, double = epsilon); } #endif diff --git a/intern/locale/CMakeLists.txt b/intern/locale/CMakeLists.txt index 3599aa68545..217fe9a8c71 100644 --- a/intern/locale/CMakeLists.txt +++ b/intern/locale/CMakeLists.txt @@ -36,6 +36,14 @@ set(SRC boost_locale_wrapper.h ) +if(WITH_HEADLESS) + add_definitions(-DWITH_HEADLESS) +endif() + +if(WITH_GHOST_SDL) + add_definitions(-DWITH_GHOST_SDL) +endif() + if(WITH_INTERNATIONAL) list(APPEND INC_SYS ${BOOST_INCLUDE_DIR} @@ -51,5 +59,10 @@ blender_add_lib(bf_intern_locale "${SRC}" "${INC}" "${INC_SYS}") set(MSFFMT_SRC msgfmt.cc ) - add_executable(msgfmt ${MSFFMT_SRC}) + +if(CMAKE_C_COMPILER_ID MATCHES "Clang" AND (NOT (CMAKE_C_COMPILER_VERSION VERSION_LESS 3.4))) + # needed for clang 3.4+ + target_link_libraries(msgfmt ${PLATFORM_LINKLIBS}) +endif() + diff --git a/intern/locale/SConscript b/intern/locale/SConscript index 4136ac8237d..24828c120ec 100644 --- a/intern/locale/SConscript +++ b/intern/locale/SConscript @@ -66,10 +66,6 @@ if env['WITH_BF_INTERNATIONAL']: locale = env.Clone() - msgfmt_executable = targetpath - if env['OURPLATFORM'] in ('win32-vc', 'win64-vc', 'win32-mingw', 'win64-mingw'): - msgfmt_executable += ".exe" - # dependencies dependencies = [msgfmt_target] @@ -82,7 +78,7 @@ if env['WITH_BF_INTERNATIONAL']: po_file = os.path.join(po_dir, f) mo_file = os.path.join(build_dir, os.path.splitext(f)[0] + ".mo") - command = "\"%s\" \"%s\" \"%s\"" % (msgfmt_executable, po_file, mo_file) + command = "\"%s\" \"%s\" \"%s\"" % (targetpath, po_file, mo_file) locale.Command(mo_file, po_file, command) locale.Depends(mo_file, dependencies) diff --git a/intern/locale/boost_locale_wrapper.cpp b/intern/locale/boost_locale_wrapper.cpp index 945d0bbc5da..25843d60578 100644 --- a/intern/locale/boost_locale_wrapper.cpp +++ b/intern/locale/boost_locale_wrapper.cpp @@ -64,7 +64,7 @@ void bl_locale_set(const char *locale) _locale = gen(locale); } else { -#ifdef __APPLE__ +#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL) extern char GHOST_user_locale[128]; // pulled from Ghost_SystemCocoa std::string locale_osx = GHOST_user_locale + std::string(".UTF-8"); _locale = gen(locale_osx.c_str()); @@ -113,7 +113,11 @@ const char *bl_locale_pgettext(const char *msgctxt, const char *msgid) return r; return msgid; } - catch(std::exception const &) { + catch(std::bad_cast const &e) { /* if std::has_facet<char_message_facet>(l) == false, LC_ALL = "C" case */ +// std::cout << "bl_locale_pgettext(" << msgid << "): " << e.what() << " \n"; + return msgid; + } + catch(std::exception const &e) { // std::cout << "bl_locale_pgettext(" << msgctxt << ", " << msgid << "): " << e.what() << " \n"; return msgid; } diff --git a/intern/rigidbody/rb_bullet_api.cpp b/intern/rigidbody/rb_bullet_api.cpp index ab7b851911a..6d39e328e82 100644 --- a/intern/rigidbody/rb_bullet_api.cpp +++ b/intern/rigidbody/rb_bullet_api.cpp @@ -726,8 +726,8 @@ rbMeshData *RB_trimesh_data_new(int num_tris, int num_verts) static void RB_trimesh_data_delete(rbMeshData *mesh) { delete mesh->index_array; - delete mesh->vertices; - delete mesh->triangles; + delete[] mesh->vertices; + delete[] mesh->triangles; delete mesh; } diff --git a/intern/utfconv/utfconv.c b/intern/utfconv/utfconv.c index 7f7a612528d..e5f8756917f 100644 --- a/intern/utfconv/utfconv.c +++ b/intern/utfconv/utfconv.c @@ -170,7 +170,7 @@ int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16) { char u; char type = 0; - wchar_t u32 = 0; + unsigned int u32 = 0; wchar_t *out16end = out16 + size16; int err = 0; if (!size16 || !in8 || !out16) return UTF_ERROR_NULL_IN; |