diff options
Diffstat (limited to 'intern/cycles/device/cuda/device_impl.cpp')
-rw-r--r-- | intern/cycles/device/cuda/device_impl.cpp | 140 |
1 files changed, 2 insertions, 138 deletions
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 37fab8f8293..5e1a63c04df 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -31,7 +31,6 @@ # include "util/util_logging.h" # include "util/util_map.h" # include "util/util_md5.h" -# include "util/util_opengl.h" # include "util/util_path.h" # include "util/util_string.h" # include "util/util_system.h" @@ -837,7 +836,7 @@ void CUDADevice::mem_copy_to(device_memory &mem) } } -void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem) +void CUDADevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) { if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) { assert(!"mem_copy_from not supported for textures."); @@ -891,7 +890,7 @@ void CUDADevice::mem_free(device_memory &mem) } } -device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) +device_ptr CUDADevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) { return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset)); } @@ -1169,141 +1168,6 @@ void CUDADevice::tex_free(device_texture &mem) } } -# if 0 -void CUDADevice::render(DeviceTask &task, - RenderTile &rtile, - device_vector<KernelWorkTile> &work_tiles) -{ - scoped_timer timer(&rtile.buffers->render_time); - - if (have_error()) - return; - - CUDAContextScope scope(this); - CUfunction cuRender; - - /* Get kernel function. */ - if (rtile.task == RenderTile::BAKE) { - cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_bake")); - } - else { - cuda_assert(cuModuleGetFunction(&cuRender, cuModule, "kernel_cuda_path_trace")); - } - - if (have_error()) { - return; - } - - cuda_assert(cuFuncSetCacheConfig(cuRender, CU_FUNC_CACHE_PREFER_L1)); - - /* Allocate work tile. */ - work_tiles.alloc(1); - - KernelWorkTile *wtile = work_tiles.data(); - wtile->x = rtile.x; - wtile->y = rtile.y; - wtile->w = rtile.w; - wtile->h = rtile.h; - wtile->offset = rtile.offset; - wtile->stride = rtile.stride; - wtile->buffer = (float *)(CUdeviceptr)rtile.buffer; - - /* Prepare work size. More step samples render faster, but for now we - * remain conservative for GPUs connected to a display to avoid driver - * timeouts and display freezing. */ - int min_blocks, num_threads_per_block; - cuda_assert( - cuOccupancyMaxPotentialBlockSize(&min_blocks, &num_threads_per_block, cuRender, NULL, 0, 0)); - if (!info.display_device) { - min_blocks *= 8; - } - - uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h); - - /* Render all samples. */ - uint start_sample = rtile.start_sample; - uint end_sample = rtile.start_sample + rtile.num_samples; - - for (int sample = start_sample; sample < end_sample;) { - /* Setup and copy work tile to device. */ - wtile->start_sample = sample; - wtile->num_samples = step_samples; - if (task.adaptive_sampling.use) { - wtile->num_samples = task.adaptive_sampling.align_samples(sample, step_samples); - } - wtile->num_samples = min(wtile->num_samples, end_sample - sample); - work_tiles.copy_to_device(); - - CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer; - uint total_work_size = wtile->w * wtile->h * wtile->num_samples; - uint num_blocks = divide_up(total_work_size, num_threads_per_block); - - /* Launch kernel. */ - void *args[] = {&d_work_tiles, &total_work_size}; - - cuda_assert( - cuLaunchKernel(cuRender, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0)); - - /* Run the adaptive sampling kernels at selected samples aligned to step samples. */ - uint filter_sample = sample + wtile->num_samples - 1; - if (task.adaptive_sampling.use && task.adaptive_sampling.need_filter(filter_sample)) { - adaptive_sampling_filter(filter_sample, wtile, d_work_tiles); - } - - cuda_assert(cuCtxSynchronize()); - - /* Update progress. */ - sample += wtile->num_samples; - rtile.sample = sample; - task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); - - if (task.get_cancel()) { - if (task.need_finish_queue == false) - break; - } - } - - /* Finalize adaptive sampling. */ - if (task.adaptive_sampling.use) { - CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer; - adaptive_sampling_post(rtile, wtile, d_work_tiles); - cuda_assert(cuCtxSynchronize()); - task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples); - } -} - -void CUDADevice::thread_run(DeviceTask &task) -{ - CUDAContextScope scope(this); - - if (task.type == DeviceTask::RENDER) { - device_vector<KernelWorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY); - - /* keep rendering tiles until done */ - RenderTile tile; - DenoisingTask denoising(this, task); - - while (task.acquire_tile(this, tile, task.tile_types)) { - if (tile.task == RenderTile::PATH_TRACE) { - render(task, tile, work_tiles); - } - else if (tile.task == RenderTile::BAKE) { - render(task, tile, work_tiles); - } - - task.release_tile(tile); - - if (task.get_cancel()) { - if (task.need_finish_queue == false) - break; - } - } - - work_tiles.free(); - } -} -# endif - unique_ptr<DeviceQueue> CUDADevice::gpu_queue_create() { return make_unique<CUDADeviceQueue>(this); |