From 557544f2c452303ff4b4d3af96551841e4d79040 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Sat, 16 Apr 2016 20:48:33 +0200 Subject: Cycles: Refactor Image Texture limits. Instead of treating Fermi GPU limits as default, and overriding them for other devices, we now nicely set them for each platform. * Due to setting values for all platforms, we don't have to offset the slot id for OpenCL anymore, as the image manager wont add float images for OpenCL now. * Bugfix: TEX_NUM_FLOAT_IMAGES was always 5, even for CPU, so the code in svm_image.h clamped float textures with alpha on CPU after the 5th slot. Reviewers: #cycles, brecht Reviewed By: #cycles, brecht Subscribers: brecht Differential Revision: https://developer.blender.org/D1925 --- intern/cycles/kernel/svm/svm_image.h | 15 ++++++---- intern/cycles/render/image.cpp | 57 +++++++++++++++++++++++------------- intern/cycles/render/image.h | 5 ++-- intern/cycles/render/scene.cpp | 5 +--- intern/cycles/render/scene.h | 4 +-- intern/cycles/util/util_texture.h | 38 ++++++++++++------------ 6 files changed, 69 insertions(+), 55 deletions(-) (limited to 'intern') diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 8f2b9423b2c..07ab2f28577 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -16,6 +16,15 @@ CCL_NAMESPACE_BEGIN +/* Float textures on various devices. */ +#if defined(__KERNEL_CPU__) + #define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CPU +#elif defined(__KERNEL_CUDA__) + #define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_CUDA +#else + #define TEX_NUM_FLOAT_IMAGES TEX_NUM_FLOAT_IMAGES_OPENCL +#endif + #ifdef __KERNEL_OPENCL__ /* For OpenCL all images are packed in a single array, and we do manual lookup @@ -50,12 +59,6 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix) ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha) { - /* first slots are used by float textures, which are not supported here */ - if(id < TEX_NUM_FLOAT_IMAGES) - return make_float4(1.0f, 0.0f, 1.0f, 1.0f); - - id -= TEX_NUM_FLOAT_IMAGES; - uint4 info = kernel_tex_fetch(__tex_image_packed_info, id); uint width = info.x; uint height = info.y; diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index 3bf3cae2b85..c0cbf0af968 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -30,16 +30,46 @@ CCL_NAMESPACE_BEGIN -ImageManager::ImageManager() +ImageManager::ImageManager(const DeviceInfo& info) { need_update = true; pack_images = false; osl_texture_system = NULL; animation_frame = 0; - tex_num_images = TEX_NUM_IMAGES; - tex_num_float_images = TEX_NUM_FLOAT_IMAGES; - tex_image_byte_start = TEX_IMAGE_BYTE_START; + /* Set image limits */ + + /* CPU */ + if(info.type == DEVICE_CPU) { + tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CPU; + tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CPU; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CPU; + } + /* CUDA (Fermi) */ + else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && !info.extended_images) { + tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA; + tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA; + } + /* CUDA (Kepler and above) */ + else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) { + tex_num_byte_images = TEX_NUM_BYTE_IMAGES_CUDA_KEPLER; + tex_num_float_images = TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER; + tex_image_byte_start = TEX_IMAGE_BYTE_START_CUDA_KELPER; + } + /* OpenCL */ + else if(info.pack_images) { + tex_num_byte_images = TEX_NUM_BYTE_IMAGES_OPENCL; + tex_num_float_images = TEX_NUM_FLOAT_IMAGES_OPENCL; + tex_image_byte_start = TEX_IMAGE_BYTE_START_OPENCL; + } + /* Should never happen */ + else { + tex_num_byte_images = 0; + tex_num_float_images = 0; + tex_image_byte_start = 0; + assert(0); + } } ImageManager::~ImageManager() @@ -60,21 +90,6 @@ void ImageManager::set_osl_texture_system(void *texture_system) osl_texture_system = texture_system; } -void ImageManager::set_extended_image_limits(const DeviceInfo& info) -{ - if(info.type == DEVICE_CPU) { - tex_num_images = TEX_EXTENDED_NUM_IMAGES_CPU; - tex_num_float_images = TEX_EXTENDED_NUM_FLOAT_IMAGES; - tex_image_byte_start = TEX_EXTENDED_IMAGE_BYTE_START; - } - else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) { - tex_num_images = TEX_EXTENDED_NUM_IMAGES_GPU; - } - else if(info.pack_images) { - tex_num_images = TEX_PACKED_NUM_IMAGES; - } -} - bool ImageManager::set_animation_frame_update(int frame) { if(frame != animation_frame) { @@ -267,9 +282,9 @@ int ImageManager::add_image(const string& filename, if(slot == images.size()) { /* max images limit reached */ - if(images.size() == tex_num_images) { + if(images.size() == tex_num_byte_images) { printf("ImageManager::add_image: byte image limit reached %d, skipping '%s'\n", - tex_num_images, filename.c_str()); + tex_num_byte_images, filename.c_str()); return -1; } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index e4fb8001b78..64798d75638 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -32,7 +32,7 @@ class Progress; class ImageManager { public: - ImageManager(); + ImageManager(const DeviceInfo& info); ~ImageManager(); int add_image(const string& filename, @@ -62,7 +62,6 @@ public: void set_osl_texture_system(void *texture_system); void set_pack_images(bool pack_images_); - void set_extended_image_limits(const DeviceInfo& info); bool set_animation_frame_update(int frame); bool need_update; @@ -86,7 +85,7 @@ public: }; private: - int tex_num_images; + int tex_num_byte_images; int tex_num_float_images; int tex_image_byte_start; thread_mutex device_mutex; diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index ece4919dedb..29163c53109 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -54,7 +54,7 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) mesh_manager = new MeshManager(); object_manager = new ObjectManager(); integrator = new Integrator(); - image_manager = new ImageManager(); + image_manager = new ImageManager(device_info_); particle_system_manager = new ParticleSystemManager(); curve_system_manager = new CurveSystemManager(); bake_manager = new BakeManager(); @@ -64,9 +64,6 @@ Scene::Scene(const SceneParams& params_, const DeviceInfo& device_info_) shader_manager = ShaderManager::create(this, params.shadingsystem); else shader_manager = ShaderManager::create(this, SHADINGSYSTEM_SVM); - - /* Extended image limits for CPU and GPUs */ - image_manager->set_extended_image_limits(device_info_); } Scene::~Scene() diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index db70ae3c176..d30a0cb45fe 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -109,8 +109,8 @@ public: device_vector sobol_directions; /* cpu images */ - device_vector tex_image[TEX_EXTENDED_NUM_IMAGES_CPU]; - device_vector tex_float_image[TEX_EXTENDED_NUM_FLOAT_IMAGES]; + device_vector tex_image[TEX_NUM_BYTE_IMAGES_CPU]; + device_vector tex_float_image[TEX_NUM_FLOAT_IMAGES_CPU]; /* opencl images */ device_vector tex_image_packed; diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 0ae267571d2..2b6b8e743fb 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -21,26 +21,26 @@ CCL_NAMESPACE_BEGIN /* Texture limits on various devices. */ -#define TEX_NUM_FLOAT_IMAGES 5 +/* CPU */ +#define TEX_NUM_BYTE_IMAGES_CPU 1024 +#define TEX_NUM_FLOAT_IMAGES_CPU 1024 +#define TEX_IMAGE_BYTE_START_CPU TEX_NUM_FLOAT_IMAGES_CPU + +/* CUDA (Fermi) */ +#define TEX_NUM_BYTE_IMAGES_CUDA 88 +#define TEX_NUM_FLOAT_IMAGES_CUDA 5 +#define TEX_IMAGE_BYTE_START_CUDA TEX_NUM_FLOAT_IMAGES_CUDA + +/* CUDA (KEPLER and above) */ +#define TEX_NUM_BYTE_IMAGES_CUDA_KEPLER 145 +#define TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER 5 +#define TEX_IMAGE_BYTE_START_CUDA_KELPER TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER + +/* OpenCL */ +#define TEX_NUM_BYTE_IMAGES_OPENCL 1024 +#define TEX_NUM_FLOAT_IMAGES_OPENCL 0 +#define TEX_IMAGE_BYTE_START_OPENCL TEX_NUM_FLOAT_IMAGES_OPENCL -/* generic */ -#define TEX_NUM_IMAGES 88 -#define TEX_IMAGE_BYTE_START TEX_NUM_FLOAT_IMAGES - -/* extended gpu */ -#define TEX_EXTENDED_NUM_IMAGES_GPU 145 - -/* extended cpu */ -#define TEX_EXTENDED_NUM_FLOAT_IMAGES 1024 -#define TEX_EXTENDED_NUM_IMAGES_CPU 1024 -#define TEX_EXTENDED_IMAGE_BYTE_START TEX_EXTENDED_NUM_FLOAT_IMAGES - -/* Limitations for packed images. - * - * Technically number of textures is unlimited, but it should in - * fact be in sync with CPU limitations. - */ -#define TEX_PACKED_NUM_IMAGES 1024 /* Color to use when textures are not found. */ #define TEX_IMAGE_MISSING_R 1 -- cgit v1.2.3