diff options
Diffstat (limited to 'intern/cycles/device/device_cuda.cpp')
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 93 |
1 files changed, 62 insertions, 31 deletions
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 79a1a2b7fe1..eb861d79a8c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -185,7 +185,7 @@ public: cuda_assert(cuCtxDestroy(cuContext)); } - bool support_device(bool experimental) + bool support_device(bool /*experimental*/) { int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); @@ -266,7 +266,7 @@ public: printf("CUDA version %d.%d detected, build may succeed but only CUDA 6.5 is officially supported.\n", cuda_version/10, cuda_version%10); /* compile */ - string kernel = path_join(kernel_path, "kernel.cu"); + string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu"))); string include = kernel_path; const int machine = system_cpu_bits(); @@ -281,7 +281,7 @@ public: nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version); if(experimental) - command += " -D__KERNEL_CUDA_EXPERIMENTAL__"; + command += " -D__KERNEL_EXPERIMENTAL__"; if(getenv("CYCLES_CUDA_EXTRA_CFLAGS")) { command += string(" ") + getenv("CYCLES_CUDA_EXTRA_CFLAGS"); @@ -309,18 +309,18 @@ public: return cubin; } - bool load_kernels(bool experimental) + bool load_kernels(const DeviceRequestedFeatures& requested_features) { /* check if cuda init succeeded */ if(cuContext == 0) return false; /* check if GPU is supported */ - if(!support_device(experimental)) + if(!support_device(requested_features.experimental)) return false; /* get kernel */ - string cubin = compile_kernel(experimental); + string cubin = compile_kernel(requested_features.experimental); if(cubin == "") return false; @@ -331,7 +331,7 @@ public: string cubin_data; CUresult result; - if (path_read_text(cubin, cubin_data)) + if(path_read_text(cubin, cubin_data)) result = cuModuleLoadData(&cuModule, cubin_data.c_str()); else result = CUDA_ERROR_FILE_NOT_FOUND; @@ -344,7 +344,7 @@ public: return (result == CUDA_SUCCESS); } - void mem_alloc(device_memory& mem, MemoryType type) + void mem_alloc(device_memory& mem, MemoryType /*type*/) { cuda_push_context(); CUdeviceptr device_pointer; @@ -419,6 +419,7 @@ public: void tex_alloc(const char *name, device_memory& mem, InterpolationType interpolation, bool periodic) { /* todo: support 3D textures, only CPU for now */ + VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes."; /* determine format */ CUarray_format_enum format; @@ -483,7 +484,7 @@ public: if(interpolation == INTERPOLATION_CLOSEST) { cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); } - else if (interpolation == INTERPOLATION_LINEAR) { + else if(interpolation == INTERPOLATION_LINEAR) { cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_LINEAR)); } else {/* CUBIC and SMART are unsupported for CUDA */ @@ -879,11 +880,12 @@ public: } } - void draw_pixels(device_memory& mem, int y, int w, int h, int dy, int width, int height, bool transparent, + void draw_pixels(device_memory& mem, int y, int w, int h, int dx, int dy, int width, int height, bool transparent, const DeviceDrawParams &draw_params) { if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; + float *vpointer; cuda_push_context(); @@ -917,23 +919,52 @@ public: draw_params.bind_display_space_shader_cb(); } - glPushMatrix(); - glTranslatef(0.0f, (float)dy, 0.0f); - - glBegin(GL_QUADS); - - glTexCoord2f(0.0f, 0.0f); - glVertex2f(0.0f, 0.0f); - glTexCoord2f((float)w/(float)pmem.w, 0.0f); - glVertex2f((float)width, 0.0f); - glTexCoord2f((float)w/(float)pmem.w, (float)h/(float)pmem.h); - glVertex2f((float)width, (float)height); - glTexCoord2f(0.0f, (float)h/(float)pmem.h); - glVertex2f(0.0f, (float)height); + if(!vertex_buffer) + glGenBuffers(1, &vertex_buffer); + + glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer); + /* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered */ + glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW); + + vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + + if(vpointer) { + /* texture coordinate - vertex pair */ + vpointer[0] = 0.0f; + vpointer[1] = 0.0f; + vpointer[2] = dx; + vpointer[3] = dy; + + vpointer[4] = (float)w/(float)pmem.w; + vpointer[5] = 0.0f; + vpointer[6] = (float)width + dx; + vpointer[7] = dy; + + vpointer[8] = (float)w/(float)pmem.w; + vpointer[9] = (float)h/(float)pmem.h; + vpointer[10] = (float)width + dx; + vpointer[11] = (float)height + dy; + + vpointer[12] = 0.0f; + vpointer[13] = (float)h/(float)pmem.h; + vpointer[14] = dx; + vpointer[15] = (float)height + dy; + + glUnmapBuffer(GL_ARRAY_BUFFER); + } + + glTexCoordPointer(2, GL_FLOAT, 4 * sizeof(float), 0); + glVertexPointer(2, GL_FLOAT, 4 * sizeof(float), (char *)NULL + 2 * sizeof(float)); + + glEnableClientState(GL_VERTEX_ARRAY); + glEnableClientState(GL_TEXTURE_COORD_ARRAY); + + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - glEnd(); + glDisableClientState(GL_TEXTURE_COORD_ARRAY); + glDisableClientState(GL_VERTEX_ARRAY); - glPopMatrix(); + glBindBuffer(GL_ARRAY_BUFFER, 0); if(draw_params.unbind_display_space_shader_cb) { draw_params.unbind_display_space_shader_cb(); @@ -950,7 +981,7 @@ public: return; } - Device::draw_pixels(mem, y, w, h, dy, width, height, transparent, draw_params); + Device::draw_pixels(mem, y, w, h, dx, dy, width, height, transparent, draw_params); } void thread_run(DeviceTask *task) @@ -966,7 +997,7 @@ public: int end_sample = tile.start_sample + tile.num_samples; for(int sample = start_sample; sample < end_sample; sample++) { - if (task->get_cancel()) { + if(task->get_cancel()) { if(task->need_finish_queue == false) break; } @@ -999,7 +1030,7 @@ public: } }; - int get_split_task_count(DeviceTask& task) + int get_split_task_count(DeviceTask& /*task*/) { return 1; } @@ -1035,12 +1066,12 @@ bool device_cuda_init(void) static bool initialized = false; static bool result = false; - if (initialized) + if(initialized) return result; initialized = true; int cuew_result = cuewInit(); - if (cuew_result == CUEW_SUCCESS) { + if(cuew_result == CUEW_SUCCESS) { VLOG(1) << "CUEW initialization succeeded"; if(CUDADevice::have_precompiled_kernels()) { VLOG(1) << "Found precompiled kernels"; @@ -1132,7 +1163,7 @@ string device_cuda_capabilities(void) if(result != CUDA_ERROR_NO_DEVICE) { return string("Error initializing CUDA: ") + cuewErrorString(result); } - return "No CUDA device found"; + return "No CUDA device found\n"; } int count; |