diff options
Diffstat (limited to 'intern/cycles/kernel/device')
23 files changed, 146 insertions, 88 deletions
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h index 888c0d5d872..5ccca52255f 100644 --- a/intern/cycles/kernel/device/cpu/compat.h +++ b/intern/cycles/kernel/device/cpu/compat.h @@ -26,11 +26,11 @@ # pragma GCC diagnostic ignored "-Wuninitialized" #endif -#include "util/util_half.h" -#include "util/util_math.h" -#include "util/util_simd.h" -#include "util/util_texture.h" -#include "util/util_types.h" +#include "util/half.h" +#include "util/math.h" +#include "util/simd.h" +#include "util/texture.h" +#include "util/types.h" /* On x86_64, versions of glibc < 2.16 have an issue where expf is * much slower than the double version. This was fixed in glibc 2.16. diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h index fb9aae38cfc..dd0327b3f94 100644 --- a/intern/cycles/kernel/device/cpu/globals.h +++ b/intern/cycles/kernel/device/cpu/globals.h @@ -18,8 +18,8 @@ #pragma once -#include "kernel/kernel_profiling.h" -#include "kernel/kernel_types.h" +#include "kernel/types.h" +#include "kernel/util/profiling.h" CCL_NAMESPACE_BEGIN @@ -36,7 +36,7 @@ struct OSLShadingSystem; typedef struct KernelGlobalsCPU { #define KERNEL_TEX(type, name) texture<type> name; -#include "kernel/kernel_textures.h" +#include "kernel/textures.h" KernelData __data; diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp index 8519b77aa08..a16c637d5ac 100644 --- a/intern/cycles/kernel/device/cpu/kernel.cpp +++ b/intern/cycles/kernel/device/cpu/kernel.cpp @@ -85,7 +85,7 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem kg->tname.data = (type *)mem; \ kg->tname.width = size; \ } -#include "kernel/kernel_textures.h" +#include "kernel/textures.h" else { assert(0); } diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h index 28337a58898..c49d7ca445a 100644 --- a/intern/cycles/kernel/device/cpu/kernel.h +++ b/intern/cycles/kernel/device/cpu/kernel.h @@ -18,9 +18,9 @@ /* CPU Kernel Interface */ -#include "util/util_types.h" +#include "util/types.h" -#include "kernel/kernel_types.h" +#include "kernel/types.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index ba777062113..6df5d7787fc 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -29,27 +29,28 @@ # include "kernel/device/cpu/globals.h" # include "kernel/device/cpu/image.h" -# include "kernel/integrator/integrator_state.h" -# include "kernel/integrator/integrator_state_flow.h" -# include "kernel/integrator/integrator_state_util.h" - -# include "kernel/integrator/integrator_init_from_camera.h" -# include "kernel/integrator/integrator_init_from_bake.h" -# include "kernel/integrator/integrator_intersect_closest.h" -# include "kernel/integrator/integrator_intersect_shadow.h" -# include "kernel/integrator/integrator_intersect_subsurface.h" -# include "kernel/integrator/integrator_intersect_volume_stack.h" -# include "kernel/integrator/integrator_shade_background.h" -# include "kernel/integrator/integrator_shade_light.h" -# include "kernel/integrator/integrator_shade_shadow.h" -# include "kernel/integrator/integrator_shade_surface.h" -# include "kernel/integrator/integrator_shade_volume.h" -# include "kernel/integrator/integrator_megakernel.h" - -# include "kernel/kernel_film.h" -# include "kernel/kernel_adaptive_sampling.h" -# include "kernel/kernel_bake.h" -# include "kernel/kernel_id_passes.h" +# include "kernel/integrator/state.h" +# include "kernel/integrator/state_flow.h" +# include "kernel/integrator/state_util.h" + +# include "kernel/integrator/init_from_camera.h" +# include "kernel/integrator/init_from_bake.h" +# include "kernel/integrator/intersect_closest.h" +# include "kernel/integrator/intersect_shadow.h" +# include "kernel/integrator/intersect_subsurface.h" +# include "kernel/integrator/intersect_volume_stack.h" +# include "kernel/integrator/shade_background.h" +# include "kernel/integrator/shade_light.h" +# include "kernel/integrator/shade_shadow.h" +# include "kernel/integrator/shade_surface.h" +# include "kernel/integrator/shade_volume.h" +# include "kernel/integrator/megakernel.h" + +# include "kernel/film/adaptive_sampling.h" +# include "kernel/film/read.h" +# include "kernel/film/id_passes.h" + +# include "kernel/bake/bake.h" #else # define STUB_ASSERT(arch, name) \ diff --git a/intern/cycles/kernel/device/cpu/kernel_avx.cpp b/intern/cycles/kernel/device/cpu/kernel_avx.cpp index 220768036ab..cece750a255 100644 --- a/intern/cycles/kernel/device/cpu/kernel_avx.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_avx.cpp @@ -18,7 +18,7 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -#include "util/util_optimization.h" +#include "util/optimization.h" #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX # define KERNEL_STUB diff --git a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp index 90c05113cbe..fad4581236e 100644 --- a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp @@ -18,7 +18,7 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -#include "util/util_optimization.h" +#include "util/optimization.h" #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 # define KERNEL_STUB diff --git a/intern/cycles/kernel/device/cpu/kernel_sse2.cpp b/intern/cycles/kernel/device/cpu/kernel_sse2.cpp index fb85ef5b0d0..5fb4849ac08 100644 --- a/intern/cycles/kernel/device/cpu/kernel_sse2.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_sse2.cpp @@ -18,7 +18,7 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -#include "util/util_optimization.h" +#include "util/optimization.h" #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 # define KERNEL_STUB diff --git a/intern/cycles/kernel/device/cpu/kernel_sse3.cpp b/intern/cycles/kernel/device/cpu/kernel_sse3.cpp index 87baf04258a..c9424682fd4 100644 --- a/intern/cycles/kernel/device/cpu/kernel_sse3.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_sse3.cpp @@ -18,7 +18,7 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -#include "util/util_optimization.h" +#include "util/optimization.h" #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 # define KERNEL_STUB diff --git a/intern/cycles/kernel/device/cpu/kernel_sse41.cpp b/intern/cycles/kernel/device/cpu/kernel_sse41.cpp index bb421d58815..849ebf51989 100644 --- a/intern/cycles/kernel/device/cpu/kernel_sse41.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_sse41.cpp @@ -18,7 +18,7 @@ * optimization flags and nearly all functions inlined, while kernel.cpp * is compiled without for other CPU's. */ -#include "util/util_optimization.h" +#include "util/optimization.h" #ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 # define KERNEL_STUB diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h index 8a50eb1a3d5..1ee82e6eb7c 100644 --- a/intern/cycles/kernel/device/cuda/compat.h +++ b/intern/cycles/kernel/device/cuda/compat.h @@ -137,5 +137,5 @@ __device__ float __half2float(const half h) /* Types */ -#include "util/util_half.h" -#include "util/util_types.h" +#include "util/half.h" +#include "util/types.h" diff --git a/intern/cycles/kernel/device/cuda/globals.h b/intern/cycles/kernel/device/cuda/globals.h index 2c187cf8a23..e5023fad40c 100644 --- a/intern/cycles/kernel/device/cuda/globals.h +++ b/intern/cycles/kernel/device/cuda/globals.h @@ -18,10 +18,11 @@ #pragma once -#include "kernel/kernel_profiling.h" -#include "kernel/kernel_types.h" +#include "kernel/types.h" -#include "kernel/integrator/integrator_state.h" +#include "kernel/integrator/state.h" + +#include "kernel/util/profiling.h" CCL_NAMESPACE_BEGIN @@ -35,7 +36,7 @@ typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Global scene data and textures */ __constant__ KernelData __data; #define KERNEL_TEX(type, name) const __constant__ __device__ type *name; -#include "kernel/kernel_textures.h" +#include "kernel/textures.h" /* Integrator state */ __constant__ IntegratorStateGPU __integrator_state; diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 335cb1ec0c0..f86a8c692aa 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -19,27 +19,28 @@ #include "kernel/device/gpu/parallel_active_index.h" #include "kernel/device/gpu/parallel_prefix_sum.h" #include "kernel/device/gpu/parallel_sorted_index.h" - -#include "kernel/integrator/integrator_state.h" -#include "kernel/integrator/integrator_state_flow.h" -#include "kernel/integrator/integrator_state_util.h" - -#include "kernel/integrator/integrator_init_from_bake.h" -#include "kernel/integrator/integrator_init_from_camera.h" -#include "kernel/integrator/integrator_intersect_closest.h" -#include "kernel/integrator/integrator_intersect_shadow.h" -#include "kernel/integrator/integrator_intersect_subsurface.h" -#include "kernel/integrator/integrator_intersect_volume_stack.h" -#include "kernel/integrator/integrator_shade_background.h" -#include "kernel/integrator/integrator_shade_light.h" -#include "kernel/integrator/integrator_shade_shadow.h" -#include "kernel/integrator/integrator_shade_surface.h" -#include "kernel/integrator/integrator_shade_volume.h" - -#include "kernel/kernel_adaptive_sampling.h" -#include "kernel/kernel_bake.h" -#include "kernel/kernel_film.h" -#include "kernel/kernel_work_stealing.h" +#include "kernel/device/gpu/work_stealing.h" + +#include "kernel/integrator/state.h" +#include "kernel/integrator/state_flow.h" +#include "kernel/integrator/state_util.h" + +#include "kernel/integrator/init_from_bake.h" +#include "kernel/integrator/init_from_camera.h" +#include "kernel/integrator/intersect_closest.h" +#include "kernel/integrator/intersect_shadow.h" +#include "kernel/integrator/intersect_subsurface.h" +#include "kernel/integrator/intersect_volume_stack.h" +#include "kernel/integrator/shade_background.h" +#include "kernel/integrator/shade_light.h" +#include "kernel/integrator/shade_shadow.h" +#include "kernel/integrator/shade_surface.h" +#include "kernel/integrator/shade_volume.h" + +#include "kernel/bake/bake.h" + +#include "kernel/film/adaptive_sampling.h" +#include "kernel/film/read.h" /* -------------------------------------------------------------------- * Integrator. diff --git a/intern/cycles/kernel/device/gpu/parallel_active_index.h b/intern/cycles/kernel/device/gpu/parallel_active_index.h index db4a4bf71e0..d7416beb783 100644 --- a/intern/cycles/kernel/device/gpu/parallel_active_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h @@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN * * Shared memory requirement is `sizeof(int) * (number_of_warps + 1)`. */ -#include "util/util_atomic.h" +#include "util/atomic.h" #ifdef __HIP__ # define GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE 1024 diff --git a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h index aabe6e2e27a..6de3a022569 100644 --- a/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h +++ b/intern/cycles/kernel/device/gpu/parallel_prefix_sum.h @@ -25,7 +25,7 @@ CCL_NAMESPACE_BEGIN * This is used for an array the size of the number of shaders in the scene * which is not usually huge, so might not be a significant bottleneck. */ -#include "util/util_atomic.h" +#include "util/atomic.h" #ifdef __HIP__ # define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024 diff --git a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h index 7570c5a6bbd..c06d7be444f 100644 --- a/intern/cycles/kernel/device/gpu/parallel_sorted_index.h +++ b/intern/cycles/kernel/device/gpu/parallel_sorted_index.h @@ -24,7 +24,7 @@ CCL_NAMESPACE_BEGIN * * TODO: there may be ways to optimize this to avoid this many atomic ops? */ -#include "util/util_atomic.h" +#include "util/atomic.h" #ifdef __HIP__ # define GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE 1024 diff --git a/intern/cycles/kernel/device/gpu/work_stealing.h b/intern/cycles/kernel/device/gpu/work_stealing.h new file mode 100644 index 00000000000..fab0915c38e --- /dev/null +++ b/intern/cycles/kernel/device/gpu/work_stealing.h @@ -0,0 +1,52 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +CCL_NAMESPACE_BEGIN + +/* + * Utility functions for work stealing + */ + +/* Map global work index to tile, pixel X/Y and sample. */ +ccl_device_inline void get_work_pixel(ccl_global const KernelWorkTile *tile, + uint global_work_index, + ccl_private uint *x, + ccl_private uint *y, + ccl_private uint *sample) +{ +#if 0 + /* Keep threads for the same sample together. */ + uint tile_pixels = tile->w * tile->h; + uint sample_offset = global_work_index / tile_pixels; + uint pixel_offset = global_work_index - sample_offset * tile_pixels; +#else + /* Keeping threads for the same pixel together. + * Appears to improve performance by a few % on CUDA and OptiX. */ + uint sample_offset = global_work_index % tile->num_samples; + uint pixel_offset = global_work_index / tile->num_samples; +#endif + + uint y_offset = pixel_offset / tile->w; + uint x_offset = pixel_offset - y_offset * tile->w; + + *x = tile->x + x_offset; + *y = tile->y + y_offset; + *sample = tile->start_sample + sample_offset; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/hip/compat.h b/intern/cycles/kernel/device/hip/compat.h index 089976d84e4..282c3eca641 100644 --- a/intern/cycles/kernel/device/hip/compat.h +++ b/intern/cycles/kernel/device/hip/compat.h @@ -116,5 +116,5 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object tex /* Types */ -#include "util/util_half.h" -#include "util/util_types.h" +#include "util/half.h" +#include "util/types.h" diff --git a/intern/cycles/kernel/device/hip/globals.h b/intern/cycles/kernel/device/hip/globals.h index 28e1cc4282f..d9a560d668b 100644 --- a/intern/cycles/kernel/device/hip/globals.h +++ b/intern/cycles/kernel/device/hip/globals.h @@ -18,10 +18,11 @@ #pragma once -#include "kernel/kernel_profiling.h" -#include "kernel/kernel_types.h" +#include "kernel/types.h" -#include "kernel/integrator/integrator_state.h" +#include "kernel/integrator/state.h" + +#include "kernel/util/profiling.h" CCL_NAMESPACE_BEGIN @@ -35,7 +36,7 @@ typedef ccl_global const KernelGlobalsGPU *ccl_restrict KernelGlobals; /* Global scene data and textures */ __constant__ KernelData __data; #define KERNEL_TEX(type, name) __attribute__((used)) const __constant__ __device__ type *name; -#include "kernel/kernel_textures.h" +#include "kernel/textures.h" /* Integrator state */ __constant__ IntegratorStateGPU __integrator_state; diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index d27b7d55475..835e4621d47 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -129,5 +129,5 @@ __device__ float __half2float(const half h) /* Types */ -#include "util/util_half.h" -#include "util/util_types.h" +#include "util/half.h" +#include "util/types.h" diff --git a/intern/cycles/kernel/device/optix/globals.h b/intern/cycles/kernel/device/optix/globals.h index 7b8ebfe50e6..e9b72369cd5 100644 --- a/intern/cycles/kernel/device/optix/globals.h +++ b/intern/cycles/kernel/device/optix/globals.h @@ -18,10 +18,11 @@ #pragma once -#include "kernel/kernel_profiling.h" -#include "kernel/kernel_types.h" +#include "kernel/types.h" -#include "kernel/integrator/integrator_state.h" +#include "kernel/integrator/state.h" + +#include "kernel/util/profiling.h" CCL_NAMESPACE_BEGIN @@ -41,7 +42,7 @@ struct KernelParamsOptiX { /* Global scene data and textures */ KernelData data; #define KERNEL_TEX(type, name) const type *name; -#include "kernel/kernel_textures.h" +#include "kernel/textures.h" /* Integrator state */ IntegratorStateGPU __integrator_state; diff --git a/intern/cycles/kernel/device/optix/kernel.cu b/intern/cycles/kernel/device/optix/kernel.cu index a3bafb9846c..6989219cd9f 100644 --- a/intern/cycles/kernel/device/optix/kernel.cu +++ b/intern/cycles/kernel/device/optix/kernel.cu @@ -21,14 +21,14 @@ #include "kernel/device/gpu/image.h" /* Texture lookup uses normal CUDA intrinsics. */ -#include "kernel/integrator/integrator_state.h" -#include "kernel/integrator/integrator_state_flow.h" -#include "kernel/integrator/integrator_state_util.h" - -#include "kernel/integrator/integrator_intersect_closest.h" -#include "kernel/integrator/integrator_intersect_shadow.h" -#include "kernel/integrator/integrator_intersect_subsurface.h" -#include "kernel/integrator/integrator_intersect_volume_stack.h" +#include "kernel/integrator/state.h" +#include "kernel/integrator/state_flow.h" +#include "kernel/integrator/state_util.h" + +#include "kernel/integrator/intersect_closest.h" +#include "kernel/integrator/intersect_shadow.h" +#include "kernel/integrator/intersect_subsurface.h" +#include "kernel/integrator/intersect_volume_stack.h" // clang-format on diff --git a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu index bf787e29eaa..071e9deae0b 100644 --- a/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu +++ b/intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu @@ -18,7 +18,8 @@ * much longer to compiler. This is only loaded when needed by the scene. */ #include "kernel/device/optix/kernel.cu" -#include "kernel/integrator/integrator_shade_surface.h" + +#include "kernel/integrator/shade_surface.h" extern "C" __global__ void __raygen__kernel_optix_integrator_shade_surface_raytrace() { |