Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/device/cuda/device_cuda_impl.cpp')
-rw-r--r--intern/cycles/device/cuda/device_cuda_impl.cpp220
1 files changed, 106 insertions, 114 deletions
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 64c7f5e7d34..3a2eb8df95b 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -105,7 +105,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
virtual SplitKernelFunction *get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &);
virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask *task);
+ virtual int2 split_kernel_global_size(device_memory &kg, device_memory &data, DeviceTask &task);
};
/* Utility to push/pop CUDA context. */
@@ -135,8 +135,10 @@ BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
return BVH_LAYOUT_BVH2;
}
-void CUDADevice::cuda_error_documentation()
+void CUDADevice::set_error(const string &error)
{
+ Device::set_error(error);
+
if (first_error) {
fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
fprintf(stderr,
@@ -148,42 +150,13 @@ void CUDADevice::cuda_error_documentation()
# define cuda_assert(stmt) \
{ \
CUresult result = stmt; \
-\
if (result != CUDA_SUCCESS) { \
- string message = string_printf( \
- "CUDA error: %s in %s, line %d", cuewErrorString(result), #stmt, __LINE__); \
- if (error_msg == "") \
- error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- cuda_error_documentation(); \
+ const char *name = cuewErrorString(result); \
+ set_error(string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \
} \
} \
(void)0
-bool CUDADevice::cuda_error_(CUresult result, const string &stmt)
-{
- if (result == CUDA_SUCCESS)
- return false;
-
- string message = string_printf("CUDA error at %s: %s", stmt.c_str(), cuewErrorString(result));
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
- return true;
-}
-
-# define cuda_error(stmt) cuda_error_(stmt, # stmt)
-
-void CUDADevice::cuda_error_message(const string &message)
-{
- if (error_msg == "")
- error_msg = message;
- fprintf(stderr, "%s\n", message.c_str());
- cuda_error_documentation();
-}
-
CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL)
{
@@ -212,12 +185,19 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
functions.loaded = false;
/* Intialize CUDA. */
- if (cuda_error(cuInit(0)))
+ CUresult result = cuInit(0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to initialize CUDA runtime (%s)", cuewErrorString(result)));
return;
+ }
/* Setup device and context. */
- if (cuda_error(cuDeviceGet(&cuDevice, cuDevId)))
+ result = cuDeviceGet(&cuDevice, cuDevId);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to get CUDA device handle from ordinal (%s)",
+ cuewErrorString(result)));
return;
+ }
/* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
* CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
@@ -235,8 +215,6 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
}
/* Create context. */
- CUresult result;
-
if (background) {
result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
}
@@ -249,8 +227,10 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
}
}
- if (cuda_error_(result, "cuCtxCreate"))
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to create CUDA context (%s)", cuewErrorString(result)));
return;
+ }
int major, minor;
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
@@ -263,7 +243,7 @@ CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool
CUDADevice::~CUDADevice()
{
- task_pool.stop();
+ task_pool.cancel();
delete split_kernel;
@@ -280,10 +260,8 @@ bool CUDADevice::support_device(const DeviceRequestedFeatures & /*requested_feat
/* We only support sm_30 and above */
if (major < 3) {
- cuda_error_message(
- string_printf("CUDA device supported only with compute capability 3.0 or up, found %d.%d.",
- major,
- minor));
+ set_error(string_printf(
+ "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
return false;
}
@@ -319,13 +297,19 @@ bool CUDADevice::check_peer_access(Device *peer_device)
// Enable peer access in both directions
{
const CUDAContextScope scope(this);
- if (cuda_error(cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0))) {
+ CUresult result = cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
return false;
}
}
{
const CUDAContextScope scope(peer_device_cuda);
- if (cuda_error(cuCtxEnablePeerAccess(cuContext, 0))) {
+ CUresult result = cuCtxEnablePeerAccess(cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
return false;
}
}
@@ -399,11 +383,24 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
}
}
- const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
- VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
- if (path_exists(ptx)) {
- VLOG(1) << "Using precompiled kernel.";
- return ptx;
+ /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
+ int ptx_major = major, ptx_minor = minor;
+ while (ptx_major >= 3) {
+ const string ptx = path_get(
+ string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+ if (path_exists(ptx)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return ptx;
+ }
+
+ if (ptx_minor > 0) {
+ ptx_minor--;
+ }
+ else {
+ ptx_major--;
+ ptx_minor = 9;
+ }
}
}
@@ -432,14 +429,14 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
# ifdef _WIN32
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
if (major < 3) {
- cuda_error_message(
- string_printf("CUDA device requires compute capability 3.0 or up, "
- "found %d.%d. Your GPU is not supported.",
+ set_error(
+ string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. "
+ "Your GPU is not supported.",
major,
minor));
}
else {
- cuda_error_message(
+ set_error(
string_printf("CUDA binary kernel for this graphics card compute "
"capability (%d.%d) not found.",
major,
@@ -452,7 +449,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
/* Compile. */
const char *const nvcc = cuewCompilerPath();
if (nvcc == NULL) {
- cuda_error_message(
+ set_error(
"CUDA nvcc compiler not found. "
"Install CUDA toolkit in default location.");
return string();
@@ -504,7 +501,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
command = "call " + command;
# endif
if (system(command.c_str()) != 0) {
- cuda_error_message(
+ set_error(
"Failed to execute compilation command, "
"see console for details.");
return string();
@@ -512,7 +509,7 @@ string CUDADevice::compile_kernel(const DeviceRequestedFeatures &requested_featu
/* Verify if compilation succeeded */
if (!path_exists(cubin)) {
- cuda_error_message(
+ set_error(
"CUDA kernel compilation failed, "
"see console for details.");
return string();
@@ -565,16 +562,19 @@ bool CUDADevice::load_kernels(const DeviceRequestedFeatures &requested_features)
else
result = CUDA_ERROR_FILE_NOT_FOUND;
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", cubin.c_str()));
+ if (result != CUDA_SUCCESS)
+ set_error(string_printf(
+ "Failed to load CUDA kernel from '%s' (%s)", cubin.c_str(), cuewErrorString(result)));
if (path_read_text(filter_cubin, cubin_data))
result = cuModuleLoadData(&cuFilterModule, cubin_data.c_str());
else
result = CUDA_ERROR_FILE_NOT_FOUND;
- if (cuda_error_(result, "cuModuleLoad"))
- cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
+ if (result != CUDA_SUCCESS)
+ set_error(string_printf("Failed to load CUDA kernel from '%s' (%s)",
+ filter_cubin.c_str(),
+ cuewErrorString(result)));
if (result == CUDA_SUCCESS) {
reserve_local_memory(requested_features);
@@ -870,7 +870,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
if (mem_alloc_result != CUDA_SUCCESS) {
status = " failed, out of device and host memory";
- cuda_assert(mem_alloc_result);
+ set_error("System is out of GPU and shared host memory");
}
if (mem.name) {
@@ -1773,7 +1773,7 @@ void CUDADevice::denoise(RenderTile &rtile, DenoisingTask &denoising)
denoising.render_buffer.samples = rtile.sample;
denoising.buffer.gpu_temporary_mem = true;
- denoising.run_denoising(&rtile);
+ denoising.run_denoising(rtile);
}
void CUDADevice::adaptive_sampling_filter(uint filter_sample,
@@ -2339,11 +2339,11 @@ void CUDADevice::draw_pixels(device_memory &mem,
Device::draw_pixels(mem, y, w, h, width, height, dx, dy, dw, dh, transparent, draw_params);
}
-void CUDADevice::thread_run(DeviceTask *task)
+void CUDADevice::thread_run(DeviceTask &task)
{
CUDAContextScope scope(this);
- if (task->type == DeviceTask::RENDER) {
+ if (task.type == DeviceTask::RENDER) {
DeviceRequestedFeatures requested_features;
if (use_split_kernel()) {
if (split_kernel == NULL) {
@@ -2356,72 +2356,64 @@ void CUDADevice::thread_run(DeviceTask *task)
/* keep rendering tiles until done */
RenderTile tile;
- DenoisingTask denoising(this, *task);
+ DenoisingTask denoising(this, task);
- while (task->acquire_tile(this, tile, task->tile_types)) {
+ while (task.acquire_tile(this, tile, task.tile_types)) {
if (tile.task == RenderTile::PATH_TRACE) {
if (use_split_kernel()) {
device_only_memory<uchar> void_buffer(this, "void_buffer");
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
}
else {
- render(*task, tile, work_tiles);
+ render(task, tile, work_tiles);
}
}
else if (tile.task == RenderTile::BAKE) {
- render(*task, tile, work_tiles);
+ render(task, tile, work_tiles);
}
else if (tile.task == RenderTile::DENOISE) {
tile.sample = tile.start_sample + tile.num_samples;
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
- task->release_tile(tile);
+ task.release_tile(tile);
- if (task->get_cancel()) {
- if (task->need_finish_queue == false)
+ if (task.get_cancel()) {
+ if (task.need_finish_queue == false)
break;
}
}
work_tiles.free();
}
- else if (task->type == DeviceTask::SHADER) {
- shader(*task);
+ else if (task.type == DeviceTask::SHADER) {
+ shader(task);
cuda_assert(cuCtxSynchronize());
}
- else if (task->type == DeviceTask::DENOISE_BUFFER) {
+ else if (task.type == DeviceTask::DENOISE_BUFFER) {
RenderTile tile;
- tile.x = task->x;
- tile.y = task->y;
- tile.w = task->w;
- tile.h = task->h;
- tile.buffer = task->buffer;
- tile.sample = task->sample + task->num_samples;
- tile.num_samples = task->num_samples;
- tile.start_sample = task->sample;
- tile.offset = task->offset;
- tile.stride = task->stride;
- tile.buffers = task->buffers;
-
- DenoisingTask denoising(this, *task);
+ tile.x = task.x;
+ tile.y = task.y;
+ tile.w = task.w;
+ tile.h = task.h;
+ tile.buffer = task.buffer;
+ tile.sample = task.sample + task.num_samples;
+ tile.num_samples = task.num_samples;
+ tile.start_sample = task.sample;
+ tile.offset = task.offset;
+ tile.stride = task.stride;
+ tile.buffers = task.buffers;
+
+ DenoisingTask denoising(this, task);
denoise(tile, denoising);
- task->update_progress(&tile, tile.w * tile.h);
+ task.update_progress(&tile, tile.w * tile.h);
}
}
-class CUDADeviceTask : public DeviceTask {
- public:
- CUDADeviceTask(CUDADevice *device, DeviceTask &task) : DeviceTask(task)
- {
- run = function_bind(&CUDADevice::thread_run, device, this);
- }
-};
-
void CUDADevice::task_add(DeviceTask &task)
{
CUDAContextScope scope(this);
@@ -2437,7 +2429,10 @@ void CUDADevice::task_add(DeviceTask &task)
film_convert(task, task.buffer, task.rgba_byte, task.rgba_half);
}
else {
- task_pool.push(new CUDADeviceTask(this, task));
+ task_pool.push([=] {
+ DeviceTask task_copy = task;
+ thread_run(task_copy);
+ });
}
}
@@ -2458,14 +2453,10 @@ void CUDADevice::task_cancel()
# define cuda_assert(stmt) \
{ \
CUresult result = stmt; \
-\
if (result != CUDA_SUCCESS) { \
- string message = string_printf("CUDA error: %s in %s", cuewErrorString(result), #stmt); \
- if (device->error_msg == "") \
- device->error_msg = message; \
- fprintf(stderr, "%s\n", message.c_str()); \
- /*cuda_abort();*/ \
- device->cuda_error_documentation(); \
+ const char *name = cuewErrorString(result); \
+ device->set_error( \
+ string_printf("%s in %s (device_cuda_impl.cpp:%d)", name, #stmt, __LINE__)); \
} \
} \
(void)0
@@ -2647,14 +2638,15 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim
SplitKernelFunction *CUDASplitKernel::get_split_kernel_function(const string &kernel_name,
const DeviceRequestedFeatures &)
{
- CUDAContextScope scope(device);
- CUfunction func;
+ const CUDAContextScope scope(device);
- cuda_assert(
- cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data()));
- if (device->have_error()) {
- device->cuda_error_message(
- string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data()));
+ CUfunction func;
+ const CUresult result = cuModuleGetFunction(
+ &func, device->cuModule, (string("kernel_cuda_") + kernel_name).data());
+ if (result != CUDA_SUCCESS) {
+ device->set_error(string_printf("Could not find kernel \"kernel_cuda_%s\" in module (%s)",
+ kernel_name.data(),
+ cuewErrorString(result)));
return NULL;
}
@@ -2668,7 +2660,7 @@ int2 CUDASplitKernel::split_kernel_local_size()
int2 CUDASplitKernel::split_kernel_global_size(device_memory &kg,
device_memory &data,
- DeviceTask * /*task*/)
+ DeviceTask & /*task*/)
{
CUDAContextScope scope(device);
size_t free;