diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-09-28 07:49:38 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-09-28 07:51:43 +0300 |
commit | 8b1d8bfc5aec8bff6bc0b19fcff5b1a98d9164fb (patch) | |
tree | 5bd186e2ba79072d0811f226731b201d3b10df12 | |
parent | 0531217925168d963b649f61eb7e8a926829b62f (diff) | |
parent | 0ae1a1ed48863403fed1e2de72313d231bd56f56 (diff) |
Merge branch 'master' into soc-2016-pbvh-paintingsoc-2016-pbvh-painting
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | build_files/build_environment/windows/build_deps.cmd | 8 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 178 | ||||
-rw-r--r-- | source/blender/blenkernel/BKE_pbvh.h | 2 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/CCGSubSurf.c | 1 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/paint.c | 4 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/pbvh.c | 2 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/subsurf_ccg.c | 11 | ||||
-rw-r--r-- | source/blender/blenloader/intern/versioning_270.c | 18 | ||||
-rw-r--r-- | source/blender/blenloader/intern/versioning_defaults.c | 14 | ||||
-rw-r--r-- | source/blender/editors/sculpt_paint/paint_image.c | 4 | ||||
-rw-r--r-- | source/blender/editors/sculpt_paint/paint_intern.h | 1 | ||||
-rw-r--r-- | source/blender/editors/sculpt_paint/paint_vertex.c | 25 | ||||
-rw-r--r-- | source/blender/editors/sculpt_paint/sculpt.c | 36 | ||||
-rw-r--r-- | source/blender/editors/sculpt_paint/sculpt_intern.h | 43 | ||||
-rw-r--r-- | source/blender/python/generic/bgl.c | 135 |
16 files changed, 241 insertions, 242 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a357721387..6f43049731b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1472,6 +1472,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "MSVC") "/wd4800" # forcing value to bool 'true' or 'false' # errors: "/we4013" # 'function' undefined; assuming extern returning int + "/we4133" # incompatible pointer types "/we4431" # missing type specifier - int assumed ) diff --git a/build_files/build_environment/windows/build_deps.cmd b/build_files/build_environment/windows/build_deps.cmd index a18eb085e4f..96280014df2 100644 --- a/build_files/build_environment/windows/build_deps.cmd +++ b/build_files/build_environment/windows/build_deps.cmd @@ -101,8 +101,8 @@ cd %Staging%\%BuildDir%%ARCH%R echo %DATE% %TIME% : Start > %StatusFile% cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DBUILD_MODE=Release -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ echo %DATE% %TIME% : Release Configuration done >> %StatusFile% -msbuild /m "ll.vcxproj" /p:Configuration=Release /fl /flp:logfile=BlenderDeps_llvm.log -msbuild /m "BlenderDependencies.sln" /p:Configuration=Release /fl /flp:logfile=BlenderDeps.log +msbuild /m "ll.vcxproj" /p:Configuration=Release /fl /flp:logfile=BlenderDeps_llvm.log;Verbosity=normal +msbuild /m "BlenderDependencies.sln" /p:Configuration=Release /fl /flp:logfile=BlenderDeps.log;Verbosity=minimal /verbosity:minimal echo %DATE% %TIME% : Release Build done >> %StatusFile% cmake --build . --target Harvest_Release_Results > Harvest_Release.txt echo %DATE% %TIME% : Release Harvest done >> %StatusFile% @@ -111,8 +111,8 @@ mkdir %STAGING%\%BuildDir%%ARCH%D cd %Staging%\%BuildDir%%ARCH%D cmake -G "%CMAKE_BUILDER%" %SOURCE_DIR% -DDOWNLOAD_DIR=%BUILD_DIR%/downloads -DCMAKE_BUILD_TYPE=Debug -DBUILD_MODE=Debug -DHARVEST_TARGET=%HARVEST_DIR%/%HARVESTROOT%%VSVER_SHORT%/ %CMAKE_DEBUG_OPTIONS% echo %DATE% %TIME% : Debug Configuration done >> %StatusFile% -msbuild /m "ll.vcxproj" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps_llvm.log -msbuild /m "BlenderDependencies.sln" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps.log +msbuild /m "ll.vcxproj" /p:Configuration=Debug /fl /flp:logfile=BlenderDeps_llvm.log;;Verbosity=normal +msbuild /m "BlenderDependencies.sln" /p:Configuration=Debug /verbosity:n /fl /flp:logfile=BlenderDeps.log;;Verbosity=normal echo %DATE% %TIME% : Debug Build done >> %StatusFile% cmake --build . --target Harvest_Debug_Results> Harvest_Debug.txt echo %DATE% %TIME% : Debug Harvest done >> %StatusFile% diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 3b75142ee67..29b5bd70789 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -111,6 +111,16 @@ public: virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task); }; +/* Utility to push/pop CUDA context. */ +class CUDAContextScope { +public: + CUDAContextScope(CUDADevice *device); + ~CUDAContextScope(); + +private: + CUDADevice *device; +}; + class CUDADevice : public Device { public: @@ -206,16 +216,6 @@ public: cuda_error_documentation(); } - void cuda_push_context() - { - cuda_assert(cuCtxSetCurrent(cuContext)); - } - - void cuda_pop_context() - { - cuda_assert(cuCtxSetCurrent(NULL)); - } - CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(info, stats, background_) { @@ -263,7 +263,8 @@ public: cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); cuDevArchitecture = major*100 + minor*10; - cuda_pop_context(); + /* Pop context set by cuCtxCreate. */ + cuCtxPopCurrent(NULL); } ~CUDADevice() @@ -519,7 +520,7 @@ public: return false; /* open module */ - cuda_push_context(); + CUDAContextScope scope(this); string cubin_data; CUresult result; @@ -540,8 +541,6 @@ public: if(cuda_error_(result, "cuModuleLoad")) cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str())); - cuda_pop_context(); - return (result == CUDA_SUCCESS); } @@ -556,36 +555,36 @@ public: void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/) { + CUDAContextScope scope(this); + if(name) { VLOG(1) << "Buffer allocate: " << name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; } - cuda_push_context(); CUdeviceptr device_pointer; size_t size = mem.memory_size(); cuda_assert(cuMemAlloc(&device_pointer, size)); mem.device_pointer = (device_ptr)device_pointer; mem.device_size = size; stats.mem_alloc(size); - cuda_pop_context(); } void mem_copy_to(device_memory& mem) { - cuda_push_context(); + CUDAContextScope scope(this); + if(mem.device_pointer) cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size())); - cuda_pop_context(); } void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { + CUDAContextScope scope(this); size_t offset = elem*y*w; size_t size = elem*w*h; - cuda_push_context(); if(mem.device_pointer) { cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset, (CUdeviceptr)(mem.device_pointer + offset), size)); @@ -593,7 +592,6 @@ public: else { memset((char*)mem.data_pointer + offset, 0, size); } - cuda_pop_context(); } void mem_zero(device_memory& mem) @@ -602,18 +600,17 @@ public: memset((void*)mem.data_pointer, 0, mem.memory_size()); } - cuda_push_context(); - if(mem.device_pointer) + if(mem.device_pointer) { + CUDAContextScope scope(this); cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size())); - cuda_pop_context(); + } } void mem_free(device_memory& mem) { if(mem.device_pointer) { - cuda_push_context(); + CUDAContextScope scope(this); cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer))); - cuda_pop_context(); mem.device_pointer = 0; @@ -629,14 +626,13 @@ public: void const_copy_to(const char *name, void *host, size_t size) { + CUDAContextScope scope(this); CUdeviceptr mem; size_t bytes; - cuda_push_context(); cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name)); //assert(bytes == size); cuda_assert(cuMemcpyHtoD(mem, host, size)); - cuda_pop_context(); } void tex_alloc(const char *name, @@ -644,6 +640,8 @@ public: InterpolationType interpolation, ExtensionType extension) { + CUDAContextScope scope(this); + VLOG(1) << "Texture allocate: " << name << ", " << string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_size(mem.memory_size()) << ")"; @@ -706,9 +704,7 @@ public: tokens[3].c_str()); } - cuda_push_context(); cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str())); - cuda_pop_context(); if(!texref) { return; @@ -721,8 +717,6 @@ public: mem_alloc(NULL, mem, MEM_READ_ONLY); mem_copy_to(mem); - cuda_push_context(); - CUdeviceptr cumem; size_t cubytes; @@ -738,28 +732,20 @@ public: uint32_t ptr = (uint32_t)mem.device_pointer; cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes)); } - - cuda_pop_context(); } else { mem_alloc(NULL, mem, MEM_READ_ONLY); mem_copy_to(mem); - cuda_push_context(); - cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size)); cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT)); cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER)); - - cuda_pop_context(); } } /* Texture Storage */ else { CUarray handle = NULL; - cuda_push_context(); - if(mem.data_depth > 1) { CUDA_ARRAY3D_DESCRIPTOR desc; @@ -784,7 +770,6 @@ public: } if(!handle) { - cuda_pop_context(); return; } @@ -877,14 +862,10 @@ public: cuda_assert(cuTexRefSetFilterMode(texref, filter_mode)); cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES)); } - - cuda_pop_context(); } /* Fermi, Data and Image Textures */ if(!has_bindless_textures) { - cuda_push_context(); - cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode)); cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode)); if(mem.data_depth > 1) { @@ -892,8 +873,6 @@ public: } cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements)); - - cuda_pop_context(); } /* Fermi and Kepler */ @@ -904,9 +883,8 @@ public: { if(mem.device_pointer) { if(tex_interp_map[mem.device_pointer]) { - cuda_push_context(); + CUDAContextScope scope(this); cuArrayDestroy((CUarray)mem.device_pointer); - cuda_pop_context(); /* Free CUtexObject (Bindless Textures) */ if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) { @@ -960,7 +938,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); int4 rect = task->rect; int w = align_up(rect.z-rect.x, 4); @@ -1017,7 +995,6 @@ public: CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1026,7 +1003,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilterConstructTransform; cuda_assert(cuModuleGetFunction(&cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform")); @@ -1046,7 +1023,6 @@ public: CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1058,11 +1034,11 @@ public: if(have_error()) return false; + CUDAContextScope scope(this); + mem_zero(task->storage.XtWX); mem_zero(task->storage.XtWY); - cuda_push_context(); - CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian, cuFinalize; cuda_assert(cuModuleGetFunction(&cuNLMCalcDifference, cuFilterModule, "kernel_cuda_filter_nlm_calc_difference")); cuda_assert(cuModuleGetFunction(&cuNLMBlur, cuFilterModule, "kernel_cuda_filter_nlm_blur")); @@ -1150,7 +1126,6 @@ public: CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1161,7 +1136,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilterCombineHalves; cuda_assert(cuModuleGetFunction(&cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves")); @@ -1179,7 +1154,6 @@ public: CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1190,7 +1164,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilterDivideShadow; cuda_assert(cuModuleGetFunction(&cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow")); @@ -1214,7 +1188,6 @@ public: CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1227,7 +1200,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilterGetFeature; cuda_assert(cuModuleGetFunction(&cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature")); @@ -1250,7 +1223,6 @@ public: CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1263,7 +1235,7 @@ public: if(have_error()) return false; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilterDetectOutliers; cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers")); @@ -1282,7 +1254,6 @@ public: CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); return !have_error(); } @@ -1319,7 +1290,7 @@ public: if(have_error()) return; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuPathTrace; CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer); @@ -1333,8 +1304,9 @@ public: cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace")); } - if(have_error()) + if(have_error()) { return; + } /* pass in parameters */ void *args[] = {&d_buffer, @@ -1370,8 +1342,6 @@ public: 0, 0, args, 0)); cuda_assert(cuCtxSynchronize()); - - cuda_pop_context(); } void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) @@ -1379,7 +1349,7 @@ public: if(have_error()) return; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuFilmConvert; CUdeviceptr d_rgba = map_pixels((rgba_byte)? rgba_byte: rgba_half); @@ -1424,8 +1394,6 @@ public: 0, 0, args, 0)); unmap_pixels((rgba_byte)? rgba_byte: rgba_half); - - cuda_pop_context(); } void shader(DeviceTask& task) @@ -1433,7 +1401,7 @@ public: if(have_error()) return; - cuda_push_context(); + CUDAContextScope scope(this); CUfunction cuShader; CUdeviceptr d_input = cuda_device_ptr(task.shader_input); @@ -1498,8 +1466,6 @@ public: task.update_progress(NULL); } - - cuda_pop_context(); } CUdeviceptr map_pixels(device_ptr mem) @@ -1535,7 +1501,7 @@ public: pmem.w = mem.data_width; pmem.h = mem.data_height; - cuda_push_context(); + CUDAContextScope scope(this); glGenBuffers(1, &pmem.cuPBO); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); @@ -1559,8 +1525,6 @@ public: CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE); if(result == CUDA_SUCCESS) { - cuda_pop_context(); - mem.device_pointer = pmem.cuTexId; pixel_mem_map[mem.device_pointer] = pmem; @@ -1574,8 +1538,6 @@ public: glDeleteBuffers(1, &pmem.cuPBO); glDeleteTextures(1, &pmem.cuTexId); - cuda_pop_context(); - background = true; } } @@ -1588,7 +1550,7 @@ public: if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; - cuda_push_context(); + CUDAContextScope scope(this); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pmem.cuPBO); uchar *pixels = (uchar*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_READ_ONLY); @@ -1597,8 +1559,6 @@ public: glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - cuda_pop_context(); - return; } @@ -1611,14 +1571,12 @@ public: if(!background) { PixelMem pmem = pixel_mem_map[mem.device_pointer]; - cuda_push_context(); + CUDAContextScope scope(this); cuda_assert(cuGraphicsUnregisterResource(pmem.cuPBOresource)); glDeleteBuffers(1, &pmem.cuPBO); glDeleteTextures(1, &pmem.cuTexId); - cuda_pop_context(); - pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer)); mem.device_pointer = 0; @@ -1639,7 +1597,7 @@ public: PixelMem pmem = pixel_mem_map[mem.device_pointer]; float *vpointer; - cuda_push_context(); + CUDAContextScope scope(this); /* for multi devices, this assumes the inefficient method that we allocate * all pixels on the device even though we only render to a subset */ @@ -1728,8 +1686,6 @@ public: glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); - cuda_pop_context(); - return; } @@ -1738,6 +1694,8 @@ public: void thread_run(DeviceTask *task) { + CUDAContextScope scope(this); + if(task->type == DeviceTask::RENDER) { RenderTile tile; @@ -1805,9 +1763,7 @@ public: shader(*task); - cuda_push_context(); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); } } @@ -1828,12 +1784,11 @@ public: void task_add(DeviceTask& task) { if(task.type == DeviceTask::FILM_CONVERT) { + CUDAContextScope scope(this); + /* must be done in main thread due to opengl access */ film_convert(task, task.buffer, task.rgba_byte, task.rgba_half); - - cuda_push_context(); cuda_assert(cuCtxSynchronize()); - cuda_pop_context(); } else { task_pool.push(new CUDADeviceTask(this, task)); @@ -1852,6 +1807,7 @@ public: friend class CUDASplitKernelFunction; friend class CUDASplitKernel; + friend class CUDAContextScope; }; /* redefine the cuda_assert macro so it can be used outside of the CUDADevice class @@ -1872,6 +1828,20 @@ public: } \ } (void)0 + +/* CUDA context scope. */ + +CUDAContextScope::CUDAContextScope(CUDADevice *device) +: device(device) +{ + cuda_assert(cuCtxPushCurrent(device->cuContext)); +} + +CUDAContextScope::~CUDAContextScope() +{ + cuda_assert(cuCtxPopCurrent(NULL)); +} + /* split kernel */ class CUDASplitKernelFunction : public SplitKernelFunction{ @@ -1889,11 +1859,11 @@ public: /* enqueue the kernel, returns false if there is an error */ bool enqueue(const KernelDimensions &dim, void *args[]) { - device->cuda_push_context(); - if(device->have_error()) return false; + CUDAContextScope scope(device); + /* we ignore dim.local_size for now, as this is faster */ int threads_per_block; cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, func)); @@ -1907,8 +1877,6 @@ public: threads_per_block, 1, 1, /* threads */ 0, 0, args, 0)); - device->cuda_pop_context(); - return !device->have_error(); } }; @@ -1919,12 +1887,12 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device) uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads) { + CUDAContextScope scope(device); + device_vector<uint64_t> size_buffer; size_buffer.resize(1); device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE); - device->cuda_push_context(); - uint threads = num_threads; CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer); @@ -1946,8 +1914,6 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory 1, 1, 1, 0, 0, (void**)&args, 0)); - device->cuda_pop_context(); - device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t)); device->mem_free(size_buffer); @@ -1965,7 +1931,7 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim device_memory& use_queues_flag, device_memory& work_pool_wgs) { - device->cuda_push_context(); + CUDAContextScope scope(device); CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer); CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer); @@ -2029,26 +1995,21 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions& dim CUDASplitKernelFunction(device, data_init).enqueue(dim, (void**)&args); - device->cuda_pop_context(); - return !device->have_error(); } SplitKernelFunction* CUDASplitKernel::get_split_kernel_function(const string& kernel_name, const DeviceRequestedFeatures&) { + CUDAContextScope scope(device); CUfunction func; - device->cuda_push_context(); - cuda_assert(cuModuleGetFunction(&func, device->cuModule, (string("kernel_cuda_") + kernel_name).data())); if(device->have_error()) { device->cuda_error_message(string_printf("kernel \"kernel_cuda_%s\" not found in module", kernel_name.data())); return NULL; } - device->cuda_pop_context(); - return new CUDASplitKernelFunction(device, func); } @@ -2059,12 +2020,11 @@ int2 CUDASplitKernel::split_kernel_local_size() int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/) { + CUDAContextScope scope(device); size_t free; size_t total; - device->cuda_push_context(); cuda_assert(cuMemGetInfo(&free, &total)); - device->cuda_pop_context(); VLOG(1) << "Maximum device allocation size: " << string_human_readable_number(free) << " bytes. (" diff --git a/source/blender/blenkernel/BKE_pbvh.h b/source/blender/blenkernel/BKE_pbvh.h index a8691796228..cc84be6e2c1 100644 --- a/source/blender/blenkernel/BKE_pbvh.h +++ b/source/blender/blenkernel/BKE_pbvh.h @@ -72,7 +72,7 @@ void BKE_pbvh_build_grids(PBVH *bvh, struct CCGElem **grid_elems, struct CCGKey *key, void **gridfaces, struct DMFlagMat *flagmats, unsigned int **grid_hidden); void BKE_pbvh_build_bmesh(PBVH *bvh, struct BMesh *bm, bool smooth_shading, struct BMLog *log, const int cd_vert_node_offset, const int cd_face_node_offset); -void BKE_pbvh_add_ccgdm(PBVH *bvh, struct CCGDerivedMesh *ccgdm); +void BKE_pbvh_set_ccgdm(PBVH *bvh, struct CCGDerivedMesh *ccgdm); void BKE_pbvh_free(PBVH *bvh); void BKE_pbvh_free_layer_disp(PBVH *bvh); diff --git a/source/blender/blenkernel/intern/CCGSubSurf.c b/source/blender/blenkernel/intern/CCGSubSurf.c index 7b74bbcba04..792e9195f12 100644 --- a/source/blender/blenkernel/intern/CCGSubSurf.c +++ b/source/blender/blenkernel/intern/CCGSubSurf.c @@ -1196,7 +1196,6 @@ int ccgSubSurf_getNumEdges(const CCGSubSurf *ss) } int ccgSubSurf_getNumFaces(const CCGSubSurf *ss) { - return ss->fMap->numEntries; } diff --git a/source/blender/blenkernel/intern/paint.c b/source/blender/blenkernel/intern/paint.c index 8341411dcb6..25ea6ad079f 100644 --- a/source/blender/blenkernel/intern/paint.c +++ b/source/blender/blenkernel/intern/paint.c @@ -675,19 +675,15 @@ void BKE_sculptsession_free_deformMats(SculptSession *ss) void BKE_sculptsession_free_vwpaint_data(struct SculptSession *ss) { - /* Free maps */ - /* Create maps */ struct SculptVertexPaintGeomMap *gmap = NULL; if (ss->mode_type == OB_MODE_VERTEX_PAINT) { gmap = &ss->mode.vpaint.gmap; - /* Free average, blur, and spray brush arrays */ MEM_SAFE_FREE(ss->mode.vpaint.previous_color); } else if (ss->mode_type == OB_MODE_WEIGHT_PAINT) { gmap = &ss->mode.wpaint.gmap; - /* Free average, blur, and spray brush arrays */ MEM_SAFE_FREE(ss->mode.wpaint.alpha_weight); MEM_SAFE_FREE(ss->mode.wpaint.previous_weight); } diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c index 376c8c45561..4b154d3301c 100644 --- a/source/blender/blenkernel/intern/pbvh.c +++ b/source/blender/blenkernel/intern/pbvh.c @@ -607,7 +607,7 @@ void BKE_pbvh_build_grids(PBVH *bvh, CCGElem **grids, MEM_freeN(prim_bbc); } -void BKE_pbvh_add_ccgdm(PBVH *bvh, CCGDerivedMesh *ccgdm) { +void BKE_pbvh_set_ccgdm(PBVH *bvh, CCGDerivedMesh *ccgdm) { bvh->ccgdm = ccgdm; } diff --git a/source/blender/blenkernel/intern/subsurf_ccg.c b/source/blender/blenkernel/intern/subsurf_ccg.c index dafbb4c7e13..7c5ee42b7bc 100644 --- a/source/blender/blenkernel/intern/subsurf_ccg.c +++ b/source/blender/blenkernel/intern/subsurf_ccg.c @@ -4440,8 +4440,9 @@ static struct PBVH *ccgDM_getPBVH(Object *ob, DerivedMesh *dm) if (ccgdm->pbvh) { /* For vertex paint, keep track of ccgdm */ - if (!(ob->mode & OB_MODE_SCULPT)) - BKE_pbvh_add_ccgdm(ccgdm->pbvh, ccgdm); + if (!(ob->mode & OB_MODE_SCULPT)) { + BKE_pbvh_set_ccgdm(ccgdm->pbvh, ccgdm); + } return ccgdm->pbvh; } @@ -4480,9 +4481,9 @@ static struct PBVH *ccgDM_getPBVH(Object *ob, DerivedMesh *dm) pbvh_show_diffuse_color_set(ccgdm->pbvh, ob->sculpt->show_diffuse_color); /* For vertex paint, keep track of ccgdm */ - if (!(ob->mode & OB_MODE_SCULPT) && ccgdm->pbvh) - BKE_pbvh_add_ccgdm(ccgdm->pbvh, ccgdm); - + if (!(ob->mode & OB_MODE_SCULPT) && ccgdm->pbvh) { + BKE_pbvh_set_ccgdm(ccgdm->pbvh, ccgdm); + } return ccgdm->pbvh; } diff --git a/source/blender/blenloader/intern/versioning_270.c b/source/blender/blenloader/intern/versioning_270.c index 808914459fe..40fed37b1b0 100644 --- a/source/blender/blenloader/intern/versioning_270.c +++ b/source/blender/blenloader/intern/versioning_270.c @@ -1682,24 +1682,6 @@ void blo_do_versions_270(FileData *fd, Library *UNUSED(lib), Main *main) } } } - - { - Brush *br; - br = (Brush *)BKE_libblock_find_name_ex(main, ID_BR, "Average"); - if (!br) { - br = BKE_brush_add(main, "Average", OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT); - br->vertexpaint_tool = PAINT_BLEND_AVERAGE; - br->ob_mode = OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT; - } - - br = (Brush *)BKE_libblock_find_name_ex(main, ID_BR, "Smear"); - if (!br) { - br = BKE_brush_add(main, "Smear", OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT); - br->vertexpaint_tool = PAINT_BLEND_SMEAR; - br->ob_mode = OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT; - } - } - } } diff --git a/source/blender/blenloader/intern/versioning_defaults.c b/source/blender/blenloader/intern/versioning_defaults.c index eb8a72e12a7..3d3e73eb470 100644 --- a/source/blender/blenloader/intern/versioning_defaults.c +++ b/source/blender/blenloader/intern/versioning_defaults.c @@ -246,6 +246,20 @@ void BLO_update_defaults_startup_blend(Main *bmain) br->ob_mode = OB_MODE_TEXTURE_PAINT; } + /* Vertex/Weight Paint */ + br = (Brush *)BKE_libblock_find_name_ex(bmain, ID_BR, "Average"); + if (!br) { + br = BKE_brush_add(bmain, "Average", OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT); + br->vertexpaint_tool = PAINT_BLEND_AVERAGE; + br->ob_mode = OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT; + } + br = (Brush *)BKE_libblock_find_name_ex(bmain, ID_BR, "Smear"); + if (!br) { + br = BKE_brush_add(bmain, "Smear", OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT); + br->vertexpaint_tool = PAINT_BLEND_SMEAR; + br->ob_mode = OB_MODE_VERTEX_PAINT | OB_MODE_WEIGHT_PAINT; + } + br = (Brush *)BKE_libblock_find_name_ex(bmain, ID_BR, "Mask"); if (br) { br->imagepaint_tool = PAINT_TOOL_MASK; diff --git a/source/blender/editors/sculpt_paint/paint_image.c b/source/blender/editors/sculpt_paint/paint_image.c index fc6b0122ed2..79ce440251d 100644 --- a/source/blender/editors/sculpt_paint/paint_image.c +++ b/source/blender/editors/sculpt_paint/paint_image.c @@ -1455,8 +1455,8 @@ static int brush_colors_flip_exec(bContext *C, wmOperator *UNUSED(op)) br = image_paint_brush(C); } else { - /* At the moment, wpaint does not support the color flipper. - * So for now we're only handling vpaint */ + /* At the moment, wpaint does not support the color flipper. + * So for now we're only handling vpaint */ ToolSettings *ts = CTX_data_tool_settings(C); VPaint *vp = ts->vpaint; br = BKE_paint_brush(&vp->paint); diff --git a/source/blender/editors/sculpt_paint/paint_intern.h b/source/blender/editors/sculpt_paint/paint_intern.h index b7706db3e37..0ec7d97a04d 100644 --- a/source/blender/editors/sculpt_paint/paint_intern.h +++ b/source/blender/editors/sculpt_paint/paint_intern.h @@ -54,7 +54,6 @@ struct wmOperator; struct wmOperatorType; struct wmWindowManager; struct DMCoNo; -struct MeshElemMap; enum PaintMode; /* paint_stroke.c */ diff --git a/source/blender/editors/sculpt_paint/paint_vertex.c b/source/blender/editors/sculpt_paint/paint_vertex.c index 7faa24e8bd2..a88b834d601 100644 --- a/source/blender/editors/sculpt_paint/paint_vertex.c +++ b/source/blender/editors/sculpt_paint/paint_vertex.c @@ -1725,8 +1725,7 @@ static void do_weight_paint_vertex( } -/**** Toggle operator for turning vertex paint mode on or off / -/ copied from sculpt.c ****/ +/* Toggle operator for turning vertex paint mode on or off (copied from sculpt.c) */ static void vertex_paint_init_session(Scene *scene, Object *ob) { if (ob->sculpt == NULL) { @@ -1862,9 +1861,9 @@ static int wpaint_mode_toggle_exec(bContext *C, wmOperator *op) ED_vgroup_sync_from_pose(ob); /* Create vertex/weight paint mode session data */ - if (ob->sculpt) + if (ob->sculpt) { BKE_sculptsession_free(ob); - + } vertex_paint_init_session(scene, ob); } @@ -1940,12 +1939,6 @@ struct WPaintData { int defbase_tot_sel; /* number of selected groups */ bool do_multipaint; /* true if multipaint enabled and multiple groups selected */ - /* variables for blur */ - struct { - MeshElemMap *vmap; - int *vmap_mem; - } blur_data; - int defbase_tot; }; @@ -2091,8 +2084,8 @@ static void vwpaint_update_cache_variants(bContext *C, VPaint *vd, Object *ob, P StrokeCache *cache = ss->cache; Brush *brush = BKE_paint_brush(&vd->paint); - /* This effects the actual brush radius, so things farther away */ - /* are compared with a larger radius and vise versa. */ + /* This effects the actual brush radius, so things farther away + * are compared with a larger radius and vise versa. */ if (cache->first_time) { RNA_float_get_array(ptr, "location", cache->true_location); } @@ -2100,10 +2093,10 @@ static void vwpaint_update_cache_variants(bContext *C, VPaint *vd, Object *ob, P RNA_float_get_array(ptr, "mouse", cache->mouse); /* XXX: Use pressure value from first brush step for brushes which don't - * support strokes (grab, thumb). They depends on initial state and - * brush coord/pressure/etc. - * It's more an events design issue, which doesn't split coordinate/pressure/angle - * changing events. We should avoid this after events system re-design */ + * support strokes (grab, thumb). They depends on initial state and + * brush coord/pressure/etc. + * It's more an events design issue, which doesn't split coordinate/pressure/angle + * changing events. We should avoid this after events system re-design */ if (paint_supports_dynamic_size(brush, ePaintSculpt) || cache->first_time) { cache->pressure = RNA_float_get(ptr, "pressure"); } diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c index deaacd75d52..3c21fa5e9a2 100644 --- a/source/blender/editors/sculpt_paint/sculpt.c +++ b/source/blender/editors/sculpt_paint/sculpt.c @@ -172,9 +172,24 @@ typedef enum StrokeFlags { /************** Access to original unmodified vertex data *************/ +typedef struct { + BMLog *bm_log; + + SculptUndoNode *unode; + float (*coords)[3]; + short (*normals)[3]; + const float *vmasks; + + /* Original coordinate, normal, and mask */ + const float *co; + const short *no; + float mask; +} SculptOrigVertData; + + /* Initialize a SculptOrigVertData for accessing original vertex data; * handles BMesh, mesh, and multires */ -void sculpt_orig_vert_data_unode_init(SculptOrigVertData *data, +static void sculpt_orig_vert_data_unode_init(SculptOrigVertData *data, Object *ob, SculptUndoNode *unode) { @@ -196,7 +211,7 @@ void sculpt_orig_vert_data_unode_init(SculptOrigVertData *data, /* Initialize a SculptOrigVertData for accessing original vertex data; * handles BMesh, mesh, and multires */ -void sculpt_orig_vert_data_init(SculptOrigVertData *data, +static void sculpt_orig_vert_data_init(SculptOrigVertData *data, Object *ob, PBVHNode *node) { @@ -207,7 +222,7 @@ void sculpt_orig_vert_data_init(SculptOrigVertData *data, /* Update a SculptOrigVertData for a particular vertex from the PBVH * iterator */ -void sculpt_orig_vert_data_update(SculptOrigVertData *orig_data, +static void sculpt_orig_vert_data_update(SculptOrigVertData *orig_data, PBVHVertexIter *iter) { if (orig_data->unode->type == SCULPT_UNDO_COORDS) { @@ -291,6 +306,21 @@ static void sculpt_project_v3_normal_align(SculptSession *ss, const float normal madd_v3_v3fl(grab_delta, ss->cache->sculpt_normal_symm, (len_signed * normal_weight) * len_view_scale); } + +/** \name SculptProjectVector + * + * Fast-path for #project_plane_v3_v3v3 + * + * \{ */ + +typedef struct SculptProjectVector { + float plane[3]; + float len_sq; + float len_sq_inv_neg; + bool is_valid; + +} SculptProjectVector; + /** * \param plane Direction, can be any length. */ diff --git a/source/blender/editors/sculpt_paint/sculpt_intern.h b/source/blender/editors/sculpt_paint/sculpt_intern.h index 416109fabc0..850d4631311 100644 --- a/source/blender/editors/sculpt_paint/sculpt_intern.h +++ b/source/blender/editors/sculpt_paint/sculpt_intern.h @@ -46,6 +46,7 @@ struct bContext; struct KeyBlock; struct Object; struct SculptUndoNode; +struct SculptOrigVertData; int sculpt_mode_poll(struct bContext *C); int sculpt_mode_poll_view3d(struct bContext *C); @@ -117,32 +118,6 @@ typedef struct SculptUndoNode { char shapeName[sizeof(((KeyBlock *)0))->name]; } SculptUndoNode; -/************** Access to original unmodified vertex data *************/ - -typedef struct SculptOrigVertData { - struct BMLog *bm_log; - - SculptUndoNode *unode; - float(*coords)[3]; - short(*normals)[3]; - const float *vmasks; - - /* Original coordinate, normal, and mask */ - const float *co; - const short *no; - float mask; -} SculptOrigVertData; - - -void sculpt_orig_vert_data_unode_init(SculptOrigVertData *data, - Object *ob, - SculptUndoNode *unode); -void sculpt_orig_vert_data_init(SculptOrigVertData *data, - Object *ob, - PBVHNode *node); -void sculpt_orig_vert_data_update(SculptOrigVertData *orig_data, - PBVHVertexIter *iter); - /* Factor of brush to have rake point following behind * (could be configurable but this is reasonable default). */ #define SCULPT_RAKE_BRUSH_FACTOR 0.25f @@ -152,20 +127,6 @@ struct SculptRakeData { float follow_co[3]; }; -/** \name SculptProjectVector -* -* Fast-path for #project_plane_v3_v3v3 -* -* \{ */ - -typedef struct SculptProjectVector { - float plane[3]; - float len_sq; - float len_sq_inv_neg; - bool is_valid; - -} SculptProjectVector; - /* Single struct used by all BLI_task threaded callbacks, let's avoid adding 10's of those... */ typedef struct SculptThreadedTaskData { bContext *C; @@ -194,7 +155,7 @@ typedef struct SculptThreadedTaskData { bool smooth_mask; bool has_bm_orco; - SculptProjectVector *spvc; + struct SculptProjectVector *spvc; float *offset; float *grab_delta; float *cono; diff --git a/source/blender/python/generic/bgl.c b/source/blender/python/generic/bgl.c index 3ea10228ad4..9cf00192db5 100644 --- a/source/blender/python/generic/bgl.c +++ b/source/blender/python/generic/bgl.c @@ -472,6 +472,49 @@ int BGL_typeSize(int type) return -1; } +static int gl_buffer_type_from_py_format_char(char *typestr) +{ + if (ELEM(typestr[0], '<', '>', '|')) { + typestr += 1; + } + char format = typestr[0]; + char byte_num = typestr[1]; + + switch (format) { + case 't': + case 'b': + case 'h': + if (!byte_num) return GL_BYTE; + ATTR_FALLTHROUGH; + case 'i': + if (!byte_num) return GL_SHORT; + ATTR_FALLTHROUGH; + case 'l': + if (!byte_num || byte_num == '4') return GL_INT; + if (byte_num == '1') return GL_BYTE; + if (byte_num == '2') return GL_SHORT; + break; + case 'f': + if (!byte_num) return GL_FLOAT; + ATTR_FALLTHROUGH; + case 'd': + if (!byte_num || byte_num == '8') return GL_DOUBLE; + if (byte_num == '4') return GL_FLOAT; + break; + } + return -1; /* UNKNOWN */ +} + +static bool compare_dimensions(int ndim, int *dim1, Py_ssize_t *dim2) +{ + for (int i = 0; i < ndim; i++) { + if (dim1[i] != dim2[i]) { + return false; + } + } + return true; +} + /** \} */ @@ -630,6 +673,22 @@ PyTypeObject BGL_bufferType = { NULL /*tp_del*/ }; + +static Buffer *BGL_MakeBuffer_FromData(PyObject *parent, int type, int ndimensions, int *dimensions, void *buf) +{ + Buffer *buffer = (Buffer *)PyObject_NEW(Buffer, &BGL_bufferType); + + Py_XINCREF(parent); + buffer->parent = parent; + buffer->ndimensions = ndimensions; + buffer->dimensions = MEM_mallocN(ndimensions * sizeof(int), "Buffer dimensions"); + memcpy(buffer->dimensions, dimensions, ndimensions * sizeof(int)); + buffer->type = type; + buffer->buf.asvoid = buf; + + return buffer; +} + /** * Create a buffer object * @@ -641,30 +700,21 @@ Buffer *BGL_MakeBuffer(int type, int ndimensions, int *dimensions, void *initbuf { Buffer *buffer; void *buf = NULL; - int i, size, length; + int i, size = BGL_typeSize(type); - length = 1; for (i = 0; i < ndimensions; i++) { - length *= dimensions[i]; + size *= dimensions[i]; } - size = BGL_typeSize(type); + buf = MEM_mallocN(size, "Buffer buffer"); - buf = MEM_mallocN(length * size, "Buffer buffer"); - - buffer = (Buffer *)PyObject_NEW(Buffer, &BGL_bufferType); - buffer->parent = NULL; - buffer->ndimensions = ndimensions; - buffer->dimensions = MEM_mallocN(ndimensions * sizeof(int), "Buffer dimensions"); - memcpy(buffer->dimensions, dimensions, ndimensions * sizeof(int)); - buffer->type = type; - buffer->buf.asvoid = buf; + buffer = BGL_MakeBuffer_FromData(NULL, type, ndimensions, dimensions, buf); if (initbuffer) { - memcpy(buffer->buf.asvoid, initbuffer, length * size); + memcpy(buffer->buf.asvoid, initbuffer, size); } else { - memset(buffer->buf.asvoid, 0, length * size); + memset(buffer->buf.asvoid, 0, size); } return buffer; } @@ -674,7 +724,7 @@ Buffer *BGL_MakeBuffer(int type, int ndimensions, int *dimensions, void *initbuf static PyObject *Buffer_new(PyTypeObject *UNUSED(type), PyObject *args, PyObject *kwds) { PyObject *length_ob = NULL, *init = NULL; - Buffer *buffer; + Buffer *buffer = NULL; int dimensions[MAX_DIMENSIONS]; int type; @@ -739,9 +789,32 @@ static PyObject *Buffer_new(PyTypeObject *UNUSED(type), PyObject *args, PyObject return NULL; } - buffer = BGL_MakeBuffer(type, ndimensions, dimensions, NULL); - if (init && ndimensions) { - if (Buffer_ass_slice(buffer, 0, dimensions[0], init)) { + if (init && PyObject_CheckBuffer(init)) { + Py_buffer pybuffer; + + if (PyObject_GetBuffer(init, &pybuffer, PyBUF_ND | PyBUF_FORMAT) == -1) { + /* PyObject_GetBuffer raise a PyExc_BufferError */ + return NULL; + } + + if (type != gl_buffer_type_from_py_format_char(pybuffer.format)) { + PyErr_Format(PyExc_TypeError, + "`GL_TYPE` and `typestr` of object with buffer interface do not match. '%s'", pybuffer.format); + } + else if (ndimensions != pybuffer.ndim || + !compare_dimensions(ndimensions, dimensions, pybuffer.shape)) + { + PyErr_Format(PyExc_TypeError, "array size does not match"); + } + else { + buffer = BGL_MakeBuffer_FromData(init, type, pybuffer.ndim, dimensions, pybuffer.buf); + } + + PyBuffer_Release(&pybuffer); + } + else { + buffer = BGL_MakeBuffer(type, ndimensions, dimensions, NULL); + if (init && Buffer_ass_slice(buffer, 0, dimensions[0], init)) { Py_DECREF(buffer); return NULL; } @@ -774,27 +847,17 @@ static PyObject *Buffer_item(Buffer *self, int i) } } else { - Buffer *newbuf; - int j, length, size; + int j, offset = i * BGL_typeSize(self->type); - length = 1; for (j = 1; j < self->ndimensions; j++) { - length *= self->dimensions[j]; + offset *= self->dimensions[j]; } - size = BGL_typeSize(self->type); - - newbuf = (Buffer *)PyObject_NEW(Buffer, &BGL_bufferType); - - Py_INCREF(self); - newbuf->parent = (PyObject *)self; - - newbuf->ndimensions = self->ndimensions - 1; - newbuf->type = self->type; - newbuf->buf.asvoid = self->buf.asbyte + i * length * size; - newbuf->dimensions = MEM_mallocN(newbuf->ndimensions * sizeof(int), "Buffer dimensions"); - memcpy(newbuf->dimensions, self->dimensions + 1, newbuf->ndimensions * sizeof(int)); - return (PyObject *)newbuf; + return (PyObject *)BGL_MakeBuffer_FromData( + (PyObject *)self, self->type, + self->ndimensions - 1, + self->dimensions + 1, + self->buf.asbyte + offset); } return NULL; |