diff options
Diffstat (limited to 'intern/cycles/kernel/CMakeLists.txt')
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 251 |
1 files changed, 189 insertions, 62 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 5322f6abee1..7aab5f4a94a 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -1,31 +1,64 @@ remove_extra_strict_flags() set(INC - . - ../util - osl - svm + .. ) set(INC_SYS ) -set(SRC +set(SRC_CPU_KERNELS kernels/cpu/kernel.cpp + kernels/cpu/kernel_sse2.cpp + kernels/cpu/kernel_sse3.cpp + kernels/cpu/kernel_sse41.cpp + kernels/cpu/kernel_avx.cpp + kernels/cpu/kernel_avx2.cpp + kernels/cpu/kernel_split.cpp + kernels/cpu/kernel_split_sse2.cpp + kernels/cpu/kernel_split_sse3.cpp + kernels/cpu/kernel_split_sse41.cpp + kernels/cpu/kernel_split_avx.cpp + kernels/cpu/kernel_split_avx2.cpp + kernels/cpu/filter.cpp + kernels/cpu/filter_sse2.cpp + kernels/cpu/filter_sse3.cpp + kernels/cpu/filter_sse41.cpp + kernels/cpu/filter_avx.cpp + kernels/cpu/filter_avx2.cpp +) + +set(SRC_CUDA_KERNELS + kernels/cuda/kernel.cu + kernels/cuda/kernel_split.cu + kernels/cuda/filter.cu +) + +set(SRC_OPENCL_KERNELS kernels/opencl/kernel.cl + kernels/opencl/kernel_state_buffer_size.cl + kernels/opencl/kernel_split.cl kernels/opencl/kernel_data_init.cl + kernels/opencl/kernel_path_init.cl kernels/opencl/kernel_queue_enqueue.cl kernels/opencl/kernel_scene_intersect.cl kernels/opencl/kernel_lamp_emission.cl - kernels/opencl/kernel_background_buffer_update.cl + kernels/opencl/kernel_do_volume.cl + kernels/opencl/kernel_indirect_background.cl + kernels/opencl/kernel_shader_setup.cl + kernels/opencl/kernel_shader_sort.cl kernels/opencl/kernel_shader_eval.cl kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl + kernels/opencl/kernel_subsurface_scatter.cl kernels/opencl/kernel_direct_lighting.cl - kernels/opencl/kernel_shadow_blocked.cl + kernels/opencl/kernel_shadow_blocked_ao.cl + kernels/opencl/kernel_shadow_blocked_dl.cl + kernels/opencl/kernel_enqueue_inactive.cl kernels/opencl/kernel_next_iteration_setup.cl - kernels/opencl/kernel_sum_all_radiance.cl - kernels/cuda/kernel.cu + kernels/opencl/kernel_indirect_subsurface.cl + kernels/opencl/kernel_buffer_update.cl + kernels/opencl/filter.cl ) set(SRC_BVH_HEADERS @@ -52,12 +85,10 @@ set(SRC_HEADERS kernel_compat_cpu.h kernel_compat_cuda.h kernel_compat_opencl.h - kernel_debug.h kernel_differential.h kernel_emission.h kernel_film.h kernel_globals.h - kernel_image_opencl.h kernel_jitter.h kernel_light.h kernel_math.h @@ -68,6 +99,7 @@ set(SRC_HEADERS kernel_path_common.h kernel_path_state.h kernel_path_surface.h + kernel_path_subsurface.h kernel_path_volume.h kernel_projection.h kernel_queues.h @@ -86,6 +118,18 @@ set(SRC_KERNELS_CPU_HEADERS kernels/cpu/kernel_cpu.h kernels/cpu/kernel_cpu_impl.h kernels/cpu/kernel_cpu_image.h + kernels/cpu/filter_cpu.h + kernels/cpu/filter_cpu_impl.h +) + +set(SRC_KERNELS_CUDA_HEADERS + kernels/cuda/kernel_config.h + kernels/cuda/kernel_cuda_image.h +) + +set(SRC_KERNELS_OPENCL_HEADERS + kernels/opencl/kernel_split_function.h + kernels/opencl/kernel_opencl_image.h ) set(SRC_CLOSURE_HEADERS @@ -109,6 +153,8 @@ set(SRC_CLOSURE_HEADERS closure/bssrdf.h closure/emissive.h closure/volume.h + closure/bsdf_principled_diffuse.h + closure/bsdf_principled_sheen.h ) set(SRC_SVM_HEADERS @@ -162,8 +208,11 @@ set(SRC_GEOM_HEADERS geom/geom.h geom/geom_attribute.h geom/geom_curve.h + geom/geom_curve_intersect.h geom/geom_motion_curve.h geom/geom_motion_triangle.h + geom/geom_motion_triangle_intersect.h + geom/geom_motion_triangle_shader.h geom/geom_object.h geom/geom_patch.h geom/geom_primitive.h @@ -173,31 +222,93 @@ set(SRC_GEOM_HEADERS geom/geom_volume.h ) +set(SRC_FILTER_HEADERS + filter/filter.h + filter/filter_defines.h + filter/filter_features.h + filter/filter_features_sse.h + filter/filter_kernel.h + filter/filter_nlm_cpu.h + filter/filter_nlm_gpu.h + filter/filter_prefilter.h + filter/filter_reconstruction.h + filter/filter_transform.h + filter/filter_transform_gpu.h + filter/filter_transform_sse.h +) + set(SRC_UTIL_HEADERS ../util/util_atomic.h ../util/util_color.h + ../util/util_defines.h ../util/util_half.h ../util/util_hash.h ../util/util_math.h ../util/util_math_fast.h + ../util/util_math_intersect.h + ../util/util_math_float2.h + ../util/util_math_float3.h + ../util/util_math_float4.h + ../util/util_math_int2.h + ../util/util_math_int3.h + ../util/util_math_int4.h + ../util/util_math_matrix.h ../util/util_static_assert.h ../util/util_transform.h ../util/util_texture.h ../util/util_types.h + ../util/util_types_float2.h + ../util/util_types_float2_impl.h + ../util/util_types_float3.h + ../util/util_types_float3_impl.h + ../util/util_types_float4.h + ../util/util_types_float4_impl.h + ../util/util_types_int2.h + ../util/util_types_int2_impl.h + ../util/util_types_int3.h + ../util/util_types_int3_impl.h + ../util/util_types_int4.h + ../util/util_types_int4_impl.h + ../util/util_types_uchar2.h + ../util/util_types_uchar2_impl.h + ../util/util_types_uchar3.h + ../util/util_types_uchar3_impl.h + ../util/util_types_uchar4.h + ../util/util_types_uchar4_impl.h + ../util/util_types_uint2.h + ../util/util_types_uint2_impl.h + ../util/util_types_uint3.h + ../util/util_types_uint3_impl.h + ../util/util_types_uint4.h + ../util/util_types_uint4_impl.h + ../util/util_types_vector3.h + ../util/util_types_vector3_impl.h ) set(SRC_SPLIT_HEADERS - split/kernel_background_buffer_update.h + split/kernel_branched.h + split/kernel_buffer_update.h split/kernel_data_init.h split/kernel_direct_lighting.h + split/kernel_do_volume.h + split/kernel_enqueue_inactive.h split/kernel_holdout_emission_blurring_pathtermination_ao.h + split/kernel_indirect_background.h + split/kernel_indirect_subsurface.h split/kernel_lamp_emission.h split/kernel_next_iteration_setup.h + split/kernel_path_init.h + split/kernel_queue_enqueue.h split/kernel_scene_intersect.h + split/kernel_shader_setup.h + split/kernel_shader_sort.h split/kernel_shader_eval.h - split/kernel_shadow_blocked.h + split/kernel_shadow_blocked_ao.h + split/kernel_shadow_blocked_dl.h split/kernel_split_common.h - split/kernel_sum_all_radiance.h + split/kernel_split_data.h + split/kernel_split_data_types.h + split/kernel_subsurface_scatter.h ) # CUDA module @@ -217,7 +328,7 @@ if(WITH_CYCLES_CUDA_BINARIES) set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") # warn for other versions - if(CUDA_VERSION MATCHES "80") + if(CUDA_VERSION MATCHES "80" OR CUDA_VERSION MATCHES "90") else() message(WARNING "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, " @@ -225,25 +336,31 @@ if(WITH_CYCLES_CUDA_BINARIES) endif() # build for each arch - set(cuda_sources kernels/cuda/kernel.cu + set(cuda_sources kernels/cuda/kernel.cu kernels/cuda/kernel_split.cu ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} ${SRC_BVH_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS} ) + set(cuda_filter_sources kernels/cuda/filter.cu + ${SRC_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_FILTER_HEADERS} + ${SRC_UTIL_HEADERS} + ) set(cuda_cubins) - macro(CYCLES_CUDA_KERNEL_ADD arch experimental) + macro(CYCLES_CUDA_KERNEL_ADD arch name flags sources experimental) if(${experimental}) - set(cuda_extra_flags "-D__KERNEL_EXPERIMENTAL__") - set(cuda_cubin kernel_experimental_${arch}.cubin) - else() - set(cuda_extra_flags "") - set(cuda_cubin kernel_${arch}.cubin) + set(flags ${flags} -D__KERNEL_EXPERIMENTAL__) + set(name ${name}_experimental) endif() + set(cuda_cubin ${name}_${arch}.cubin) + if(WITH_CYCLES_DEBUG) set(cuda_debug_flags "-D__KERNEL_DEBUG__") else() @@ -256,26 +373,27 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}") set(cuda_math_flags "--use_fast_math") + set(cuda_kernel_src "/kernels/cuda/${name}.cu") + add_custom_command( OUTPUT ${cuda_cubin} COMMAND ${cuda_nvcc_command} -arch=${arch} ${CUDA_NVCC_FLAGS} -m${CUDA_BITS} - --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda/kernel.cu + --cubin ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} - ${cuda_extra_flags} + ${flags} ${cuda_debug_flags} - -I${CMAKE_CURRENT_SOURCE_DIR}/../util - -I${CMAKE_CURRENT_SOURCE_DIR}/svm + -I${CMAKE_CURRENT_SOURCE_DIR}/.. -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC - DEPENDS ${cuda_sources}) + DEPENDS ${sources}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND cuda_cubins ${cuda_cubin}) @@ -288,8 +406,18 @@ if(WITH_CYCLES_CUDA_BINARIES) endmacro() foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - # Compile regular kernel - CYCLES_CUDA_KERNEL_ADD(${arch} FALSE) + if(CUDA_VERSION MATCHES "90" AND ${arch} MATCHES "sm_2.") + message(STATUS "CUDA binaries for ${arch} disabled, not supported by CUDA 9.") + else() + # Compile regular kernel + CYCLES_CUDA_KERNEL_ADD(${arch} kernel "" "${cuda_sources}" FALSE) + CYCLES_CUDA_KERNEL_ADD(${arch} filter "" "${cuda_filter_sources}" FALSE) + + if(WITH_CYCLES_CUDA_SPLIT_KERNEL_BINARIES) + # Compile split kernel + CYCLES_CUDA_KERNEL_ADD(${arch} kernel_split "-D__SPLIT__" ${cuda_sources} FALSE) + endif() + endif() endforeach() add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins}) @@ -319,38 +447,45 @@ list(APPEND SRC_HEADERS include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) -if(CXX_HAS_SSE) - list(APPEND SRC - kernels/cpu/kernel_sse2.cpp - kernels/cpu/kernel_sse3.cpp - kernels/cpu/kernel_sse41.cpp - ) +set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") +set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") +set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") +if(CXX_HAS_SSE) set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX) - list(APPEND SRC - kernels/cpu/kernel_avx.cpp - ) set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}") endif() if(CXX_HAS_AVX2) - list(APPEND SRC - kernels/cpu/kernel_avx2.cpp - ) set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") + set_source_files_properties(kernels/cpu/filter_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}") endif() add_library(cycles_kernel - ${SRC} + ${SRC_CPU_KERNELS} + ${SRC_CUDA_KERNELS} + ${SRC_OPENCL_KERNELS} ${SRC_HEADERS} ${SRC_KERNELS_CPU_HEADERS} + ${SRC_KERNELS_CUDA_HEADERS} + ${SRC_KERNELS_OPENCL_HEADERS} ${SRC_BVH_HEADERS} ${SRC_CLOSURE_HEADERS} + ${SRC_FILTER_HEADERS} ${SRC_SVM_HEADERS} ${SRC_GEOM_HEADERS} ${SRC_SPLIT_HEADERS} @@ -370,24 +505,16 @@ endif() #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_sum_all_radiance.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/bvh) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/geom) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel) -delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/split) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_OPENCL_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CUDA_KERNELS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_OPENCL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/opencl) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNELS_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/kernels/cuda) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_FILTER_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/filter) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util) +delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SPLIT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/split) |