diff options
author | Kévin Dietrich <kevin.dietrich@mailoo.org> | 2022-02-17 15:14:47 +0300 |
---|---|---|
committer | Kévin Dietrich <kevin.dietrich@mailoo.org> | 2022-02-17 15:14:47 +0300 |
commit | 433fad50d859c764a9f2d8a63dd321510e88cca4 (patch) | |
tree | 0be8db253f62839d0e37c29427cb4b08502e6951 /intern | |
parent | 43b40f7bdedfd9bf7a466eb9680ac95e2c18925d (diff) | |
parent | c5dcfb63d9e40138f03c135346bfa779abcb0e58 (diff) |
Merge remote-tracking branch 'origin/blender-v3.1-release'
Diffstat (limited to 'intern')
5 files changed, 3433 insertions, 3 deletions
diff --git a/intern/opensubdiv/CMakeLists.txt b/intern/opensubdiv/CMakeLists.txt index 38ce9791b5a..bb3aa16a9fe 100644 --- a/intern/opensubdiv/CMakeLists.txt +++ b/intern/opensubdiv/CMakeLists.txt @@ -66,6 +66,8 @@ if(WITH_OPENSUBDIV) internal/evaluator/evaluator_capi.cc internal/evaluator/evaluator_impl.cc internal/evaluator/evaluator_impl.h + internal/evaluator/gl_compute_evaluator.cc + internal/evaluator/gl_compute_evaluator.h internal/evaluator/patch_map.cc internal/evaluator/patch_map.h @@ -104,6 +106,8 @@ if(WITH_OPENSUBDIV) add_definitions(-DNOMINMAX) add_definitions(-D_USE_MATH_DEFINES) endif() + + data_to_c_simple(internal/evaluator/shaders/glsl_compute_kernel.glsl SRC) else() list(APPEND SRC stub/opensubdiv_stub.cc diff --git a/intern/opensubdiv/internal/evaluator/eval_output_gpu.h b/intern/opensubdiv/internal/evaluator/eval_output_gpu.h index 783efd484aa..dc137e4322e 100644 --- a/intern/opensubdiv/internal/evaluator/eval_output_gpu.h +++ b/intern/opensubdiv/internal/evaluator/eval_output_gpu.h @@ -20,13 +20,11 @@ #define OPENSUBDIV_EVAL_OUTPUT_GPU_H_ #include "internal/evaluator/eval_output.h" +#include "internal/evaluator/gl_compute_evaluator.h" -#include <opensubdiv/osd/glComputeEvaluator.h> #include <opensubdiv/osd/glPatchTable.h> #include <opensubdiv/osd/glVertexBuffer.h> -using OpenSubdiv::Osd::GLComputeEvaluator; -using OpenSubdiv::Osd::GLStencilTableSSBO; using OpenSubdiv::Osd::GLVertexBuffer; namespace blender { diff --git a/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc new file mode 100644 index 00000000000..0cab44518aa --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.cc @@ -0,0 +1,647 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "gl_compute_evaluator.h" + +#include <GL/glew.h> + +#include <opensubdiv/far/error.h> +#include <opensubdiv/far/patchDescriptor.h> +#include <opensubdiv/far/stencilTable.h> +#include <opensubdiv/osd/glslPatchShaderSource.h> + +#include <cassert> +#include <cmath> +#include <sstream> +#include <string> +#include <vector> + +using OpenSubdiv::Far::LimitStencilTable; +using OpenSubdiv::Far::StencilTable; +using OpenSubdiv::Osd::BufferDescriptor; +using OpenSubdiv::Osd::PatchArray; +using OpenSubdiv::Osd::PatchArrayVector; + +extern "C" char datatoc_glsl_compute_kernel_glsl[]; + +namespace blender { +namespace opensubdiv { + +template<class T> GLuint createSSBO(std::vector<T> const &src) +{ + if (src.empty()) { + return 0; + } + + GLuint devicePtr = 0; + +#if defined(GL_ARB_direct_state_access) + if (GLEW_ARB_direct_state_access) { + glCreateBuffers(1, &devicePtr); + glNamedBufferData(devicePtr, src.size() * sizeof(T), &src.at(0), GL_STATIC_DRAW); + } + else +#endif + { + GLint prev = 0; + glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); + glGenBuffers(1, &devicePtr); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); + glBufferData(GL_SHADER_STORAGE_BUFFER, src.size() * sizeof(T), &src.at(0), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); + } + + return devicePtr; +} + +GLStencilTableSSBO::GLStencilTableSSBO(StencilTable const *stencilTable) +{ + _numStencils = stencilTable->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(stencilTable->GetSizes()); + _offsets = createSSBO(stencilTable->GetOffsets()); + _indices = createSSBO(stencilTable->GetControlIndices()); + _weights = createSSBO(stencilTable->GetWeights()); + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } + else { + _sizes = _offsets = _indices = _weights = 0; + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } +} + +GLStencilTableSSBO::GLStencilTableSSBO(LimitStencilTable const *limitStencilTable) +{ + _numStencils = limitStencilTable->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(limitStencilTable->GetSizes()); + _offsets = createSSBO(limitStencilTable->GetOffsets()); + _indices = createSSBO(limitStencilTable->GetControlIndices()); + _weights = createSSBO(limitStencilTable->GetWeights()); + _duWeights = createSSBO(limitStencilTable->GetDuWeights()); + _dvWeights = createSSBO(limitStencilTable->GetDvWeights()); + _duuWeights = createSSBO(limitStencilTable->GetDuuWeights()); + _duvWeights = createSSBO(limitStencilTable->GetDuvWeights()); + _dvvWeights = createSSBO(limitStencilTable->GetDvvWeights()); + } + else { + _sizes = _offsets = _indices = _weights = 0; + _duWeights = _dvWeights = 0; + _duuWeights = _duvWeights = _dvvWeights = 0; + } +} + +GLStencilTableSSBO::~GLStencilTableSSBO() +{ + if (_sizes) + glDeleteBuffers(1, &_sizes); + if (_offsets) + glDeleteBuffers(1, &_offsets); + if (_indices) + glDeleteBuffers(1, &_indices); + if (_weights) + glDeleteBuffers(1, &_weights); + if (_duWeights) + glDeleteBuffers(1, &_duWeights); + if (_dvWeights) + glDeleteBuffers(1, &_dvWeights); + if (_duuWeights) + glDeleteBuffers(1, &_duuWeights); + if (_duvWeights) + glDeleteBuffers(1, &_duvWeights); + if (_dvvWeights) + glDeleteBuffers(1, &_dvvWeights); +} + +// --------------------------------------------------------------------------- + +GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64), _patchArraysSSBO(0) +{ + memset(&_stencilKernel, 0, sizeof(_stencilKernel)); + memset(&_patchKernel, 0, sizeof(_patchKernel)); +} + +GLComputeEvaluator::~GLComputeEvaluator() +{ + if (_patchArraysSSBO) { + glDeleteBuffers(1, &_patchArraysSSBO); + } +} + +static GLuint compileKernel(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + const char *kernelDefine, + int workGroupSize) +{ + GLuint program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + + std::string patchBasisShaderSource = + OpenSubdiv::Osd::GLSLPatchShaderSource::GetPatchBasisShaderSource(); + const char *patchBasisShaderSourceDefine = "#define OSD_PATCH_BASIS_GLSL\n"; + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n" + << "#define WORK_GROUP_SIZE " << workGroupSize << "\n" + << kernelDefine << "\n" + << patchBasisShaderSourceDefine << "\n"; + + bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0); + bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0); + if (deriv1) { + defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n"; + } + if (deriv2) { + defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n"; + } + + std::string defineStr = defines.str(); + + const char *shaderSources[4] = {"#version 430\n", 0, 0, 0}; + + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = patchBasisShaderSource.c_str(); + shaderSources[3] = datatoc_glsl_compute_kernel_glsl; + glShaderSource(shader, 4, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(program, shader); + + GLint linked = 0; + glLinkProgram(program); + glGetProgramiv(program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(program, 1024, NULL, buffer); + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(program); + return 0; + } + + glDeleteShader(shader); + + return program; +} + +bool GLComputeEvaluator::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc) +{ + + // create a stencil kernel + if (!_stencilKernel.Compile( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { + return false; + } + + // create a patch kernel + if (!_patchKernel.Compile( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, _workGroupSize)) { + return false; + } + + // create a patch arrays buffer + if (!_patchArraysSSBO) { + glGenBuffers(1, &_patchArraysSSBO); + } + + return true; +} + +/* static */ +void GLComputeEvaluator::Synchronize(void * /*kernel*/) +{ + // XXX: this is currently just for the performance measuring purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +int GLComputeEvaluator::GetDispatchSize(int count) const +{ + return (count + _workGroupSize - 1) / _workGroupSize; +} + +void GLComputeEvaluator::DispatchCompute(int totalDispatchSize) const +{ + int maxWorkGroupCount[2] = {0, 0}; + + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &maxWorkGroupCount[0]); + glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &maxWorkGroupCount[1]); + + const uint maxResX = static_cast<uint>(maxWorkGroupCount[0]); + + const int dispatchSize = GetDispatchSize(totalDispatchSize); + uint dispatchRX = static_cast<uint>(dispatchSize); + uint dispatchRY = 1u; + if (dispatchRX > maxResX) { + /* Since there are some limitations with regards to the maximum work group size (could be as + * low as 64k elements per call), we split the number elements into a "2d" number, with the + * final index being computed as `res_x + res_y * max_work_group_size`. Even with a maximum + * work group size of 64k, that still leaves us with roughly `64k * 64k = 4` billion elements + * total, which should be enough. If not, we could also use the 3rd dimension. */ + /* TODO(fclem): We could dispatch fewer groups if we compute the prime factorization and + * get the smallest rect fitting the requirements. */ + dispatchRX = dispatchRY = std::ceil(std::sqrt(dispatchSize)); + /* Avoid a completely empty dispatch line caused by rounding. */ + if ((dispatchRX * (dispatchRY - 1)) >= dispatchSize) { + dispatchRY -= 1; + } + } + + /* X and Y dimensions may have different limits so the above computation may not be right, but + * even with the standard 64k minimum on all dimensions we still have a lot of room. Therefore, + * we presume it all fits. */ + assert(dispatchRY < static_cast<uint>(maxWorkGroupCount[1])); + + glDispatchCompute(dispatchRX, dispatchRY, 1); +} + +bool GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + int start, + int end) const +{ + + return EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + sizesBuffer, + offsetsBuffer, + indicesBuffer, + weightsBuffer, + duWeightsBuffer, + dvWeightsBuffer, + 0, + 0, + 0, + start, + end); +} + +bool GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint duuBuffer, + BufferDescriptor const &duuDesc, + GLuint duvBuffer, + BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + BufferDescriptor const &dvvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + GLuint duuWeightsBuffer, + GLuint duvWeightsBuffer, + GLuint dvvWeightsBuffer, + int start, + int end) const +{ + + if (!_stencilKernel.program) + return false; + int count = end - start; + if (count <= 0) { + return true; + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, weightsBuffer); + if (duWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer); + if (dvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer); + if (duuWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer); + if (duvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer); + if (dvvWeightsBuffer) + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer); + + glUseProgram(_stencilKernel.program); + + glUniform1i(_stencilKernel.uniformStart, start); + glUniform1i(_stencilKernel.uniformEnd, end); + glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset); + if (_stencilKernel.uniformDuDesc > 0) { + glUniform3i(_stencilKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); + } + if (_stencilKernel.uniformDvDesc > 0) { + glUniform3i(_stencilKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); + } + if (_stencilKernel.uniformDuuDesc > 0) { + glUniform3i(_stencilKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); + } + if (_stencilKernel.uniformDuvDesc > 0) { + glUniform3i(_stencilKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); + } + if (_stencilKernel.uniformDvvDesc > 0) { + glUniform3i(_stencilKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); + } + + DispatchCompute(count); + + glUseProgram(0); + + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + for (int i = 0; i < 16; ++i) { + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0); + } + + return true; +} + +bool GLComputeEvaluator::EvalPatches(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const +{ + + return EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + 0, + BufferDescriptor(), + numPatchCoords, + patchCoordsBuffer, + patchArrays, + patchIndexBuffer, + patchParamsBuffer); +} + +bool GLComputeEvaluator::EvalPatches(GLuint srcBuffer, + BufferDescriptor const &srcDesc, + GLuint dstBuffer, + BufferDescriptor const &dstDesc, + GLuint duBuffer, + BufferDescriptor const &duDesc, + GLuint dvBuffer, + BufferDescriptor const &dvDesc, + GLuint duuBuffer, + BufferDescriptor const &duuDesc, + GLuint duvBuffer, + BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + BufferDescriptor const &dvvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const +{ + + if (!_patchKernel.program) + return false; + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchCoordsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchIndexBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, patchParamsBuffer); + + glUseProgram(_patchKernel.program); + + glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset); + + int patchArraySize = sizeof(PatchArray); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, _patchArraysSSBO); + glBufferData( + GL_SHADER_STORAGE_BUFFER, patchArrays.size() * patchArraySize, NULL, GL_STATIC_DRAW); + for (int i = 0; i < (int)patchArrays.size(); ++i) { + glBufferSubData( + GL_SHADER_STORAGE_BUFFER, i * patchArraySize, sizeof(PatchArray), &patchArrays[i]); + } + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _patchArraysSSBO); + + if (_patchKernel.uniformDuDesc > 0) { + glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); + } + if (_patchKernel.uniformDvDesc > 0) { + glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); + } + if (_patchKernel.uniformDuuDesc > 0) { + glUniform3i(_patchKernel.uniformDuuDesc, duuDesc.offset, duuDesc.length, duuDesc.stride); + } + if (_patchKernel.uniformDuvDesc > 0) { + glUniform3i(_patchKernel.uniformDuvDesc, duvDesc.offset, duvDesc.length, duvDesc.stride); + } + if (_patchKernel.uniformDvvDesc > 0) { + glUniform3i(_patchKernel.uniformDvvDesc, dvvDesc.offset, dvvDesc.length, dvvDesc.stride); + } + + DispatchCompute(numPatchCoords); + + glUseProgram(0); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0); + + return true; +} +// --------------------------------------------------------------------------- + +GLComputeEvaluator::_StencilKernel::_StencilKernel() : program(0) +{ +} +GLComputeEvaluator::_StencilKernel::~_StencilKernel() +{ + if (program) { + glDeleteProgram(program); + } +} + +bool GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + // create stencil kernel + if (program) { + glDeleteProgram(program); + } + + const char *kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n"; + + program = compileKernel( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); + if (program == 0) + return false; + + // cache uniform locations (TODO: use uniform block) + uniformStart = glGetUniformLocation(program, "batchStart"); + uniformEnd = glGetUniformLocation(program, "batchEnd"); + uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); + uniformDstOffset = glGetUniformLocation(program, "dstOffset"); + uniformDuDesc = glGetUniformLocation(program, "duDesc"); + uniformDvDesc = glGetUniformLocation(program, "dvDesc"); + uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); + uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); + uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); + + return true; +} + +// --------------------------------------------------------------------------- + +GLComputeEvaluator::_PatchKernel::_PatchKernel() : program(0) +{ +} +GLComputeEvaluator::_PatchKernel::~_PatchKernel() +{ + if (program) { + glDeleteProgram(program); + } +} + +bool GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc, + BufferDescriptor const &dstDesc, + BufferDescriptor const &duDesc, + BufferDescriptor const &dvDesc, + BufferDescriptor const &duuDesc, + BufferDescriptor const &duvDesc, + BufferDescriptor const &dvvDesc, + int workGroupSize) +{ + // create stencil kernel + if (program) { + glDeleteProgram(program); + } + + const char *kernelDefine = "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n"; + + program = compileKernel( + srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc, kernelDefine, workGroupSize); + if (program == 0) + return false; + + // cache uniform locations + uniformSrcOffset = glGetUniformLocation(program, "srcOffset"); + uniformDstOffset = glGetUniformLocation(program, "dstOffset"); + uniformPatchArray = glGetUniformLocation(program, "patchArray"); + uniformDuDesc = glGetUniformLocation(program, "duDesc"); + uniformDvDesc = glGetUniformLocation(program, "dvDesc"); + uniformDuuDesc = glGetUniformLocation(program, "duuDesc"); + uniformDuvDesc = glGetUniformLocation(program, "duvDesc"); + uniformDvvDesc = glGetUniformLocation(program, "dvvDesc"); + + return true; +} + +} // namespace opensubdiv +} // namespace blender diff --git a/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h new file mode 100644 index 00000000000..85c12f73b08 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/gl_compute_evaluator.h @@ -0,0 +1,2465 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ +#define OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ + +#include <opensubdiv/osd/bufferDescriptor.h> +#include <opensubdiv/osd/opengl.h> +#include <opensubdiv/osd/types.h> +#include <opensubdiv/version.h> + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { +namespace Far { +class LimitStencilTable; +class StencilTable; +} // namespace Far +} // namespace OPENSUBDIV_VERSION +} // namespace OpenSubdiv + +namespace blender { +namespace opensubdiv { + +/// \brief GL stencil table (Shader Storage buffer) +/// +/// This class is a GLSL SSBO representation of OpenSubdiv::Far::StencilTable. +/// +/// GLSLComputeKernel consumes this table to apply stencils +/// +class GLStencilTableSSBO { + public: + static GLStencilTableSSBO *Create(OpenSubdiv::Far::StencilTable const *stencilTable, + void *deviceContext = NULL) + { + (void)deviceContext; // unused + return new GLStencilTableSSBO(stencilTable); + } + static GLStencilTableSSBO *Create(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable, + void *deviceContext = NULL) + { + (void)deviceContext; // unused + return new GLStencilTableSSBO(limitStencilTable); + } + + explicit GLStencilTableSSBO(OpenSubdiv::Far::StencilTable const *stencilTable); + explicit GLStencilTableSSBO(OpenSubdiv::Far::LimitStencilTable const *limitStencilTable); + ~GLStencilTableSSBO(); + + // interfaces needed for GLSLComputeKernel + GLuint GetSizesBuffer() const + { + return _sizes; + } + GLuint GetOffsetsBuffer() const + { + return _offsets; + } + GLuint GetIndicesBuffer() const + { + return _indices; + } + GLuint GetWeightsBuffer() const + { + return _weights; + } + GLuint GetDuWeightsBuffer() const + { + return _duWeights; + } + GLuint GetDvWeightsBuffer() const + { + return _dvWeights; + } + GLuint GetDuuWeightsBuffer() const + { + return _duuWeights; + } + GLuint GetDuvWeightsBuffer() const + { + return _duvWeights; + } + GLuint GetDvvWeightsBuffer() const + { + return _dvvWeights; + } + int GetNumStencils() const + { + return _numStencils; + } + + private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + GLuint _duWeights; + GLuint _dvWeights; + GLuint _duuWeights; + GLuint _duvWeights; + GLuint _dvvWeights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLComputeEvaluator { + public: + typedef bool Instantiatable; + static GLComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + void *deviceContext = NULL) + { + return Create(srcDesc, + dstDesc, + duDesc, + dvDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor(), + deviceContext); + } + + static GLComputeEvaluator *Create(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + void *deviceContext = NULL) + { + (void)deviceContext; // not used + GLComputeEvaluator *instance = new GLComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc)) + return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLComputeEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLComputeEvaluator(); + + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, srcDesc, dstBuffer, dstDesc, stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + static bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + stencilTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalStencils(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + 0, + 0, + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + stencilTable->GetDuWeightsBuffer(), + stencilTable->GetDvWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Generic stencil function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + template<typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE> + bool EvalStencils(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + STENCIL_TABLE const *stencilTable) const + { + return EvalStencils(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + stencilTable->GetDuWeightsBuffer(), + stencilTable->GetDvWeightsBuffer(), + stencilTable->GetDuuWeightsBuffer(), + stencilTable->GetDuvWeightsBuffer(), + stencilTable->GetDvvWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// \brief Dispatch the GLSL compute kernel on GPU asynchronously + /// returns false if the kernel hasn't been compiled yet. + /// + /// @param srcBuffer GL buffer of input primvar source data + /// + /// @param srcDesc vertex buffer descriptor for the srcBuffer + /// + /// @param dstBuffer GL buffer of output primvar destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer GL buffer of output derivative wrt u + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer GL buffer of output derivative wrt v + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param sizesBuffer GL buffer of the sizes in the stencil table + /// + /// @param offsetsBuffer GL buffer of the offsets in the stencil table + /// + /// @param indicesBuffer GL buffer of the indices in the stencil table + /// + /// @param weightsBuffer GL buffer of the weights in the stencil table + /// + /// @param duWeightsBuffer GL buffer of the du weights in the stencil table + /// + /// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + bool EvalStencils(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + int start, + int end) const; + + /// \brief Dispatch the GLSL compute kernel on GPU asynchronously + /// returns false if the kernel hasn't been compiled yet. + /// + /// @param srcBuffer GL buffer of input primvar source data + /// + /// @param srcDesc vertex buffer descriptor for the srcBuffer + /// + /// @param dstBuffer GL buffer of output primvar destination data + /// + /// @param dstDesc vertex buffer descriptor for the dstBuffer + /// + /// @param duBuffer GL buffer of output derivative wrt u + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer GL buffer of output derivative wrt v + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer GL buffer of output 2nd derivative wrt u + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer GL buffer of output 2nd derivative wrt u and v + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer GL buffer of output 2nd derivative wrt v + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param sizesBuffer GL buffer of the sizes in the stencil table + /// + /// @param offsetsBuffer GL buffer of the offsets in the stencil table + /// + /// @param indicesBuffer GL buffer of the indices in the stencil table + /// + /// @param weightsBuffer GL buffer of the weights in the stencil table + /// + /// @param duWeightsBuffer GL buffer of the du weights in the stencil table + /// + /// @param dvWeightsBuffer GL buffer of the dv weights in the stencil table + /// + /// @param duuWeightsBuffer GL buffer of the duu weights in the stencil table + /// + /// @param duvWeightsBuffer GL buffer of the duv weights in the stencil table + /// + /// @param dvvWeightsBuffer GL buffer of the dvv weights in the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + bool EvalStencils(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + GLuint duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + GLuint duWeightsBuffer, + GLuint dvWeightsBuffer, + GLuint duuWeightsBuffer, + GLuint duvWeightsBuffer, + GLuint dvvWeightsBuffer, + int start, + int end) const; + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatches( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatches(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + bool EvalPatches(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const OpenSubdiv::Osd::PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + bool EvalPatches(GLuint srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + GLuint dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + GLuint duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + GLuint dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + GLuint duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + GLuint duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + GLuint dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const OpenSubdiv::Osd::PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatchesVarying( + srcBuffer, srcDesc, dstBuffer, dstDesc, numPatchCoords, patchCoords, patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatchesVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetVaryingPatchArrays(), + patchTable->GetVaryingPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, + dstDesc, + OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor()); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + 0, + OpenSubdiv::Osd::BufferDescriptor(), + 0, + OpenSubdiv::Osd::BufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel, + GLComputeEvaluator const *instance, + void *deviceContext = NULL) + { + + if (instance) { + return instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + } + else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, duDesc, dvDesc, duuDesc, duvDesc, dvvDesc); + if (instance) { + bool r = instance->EvalPatchesFaceVarying(srcBuffer, + srcDesc, + dstBuffer, + dstDesc, + duBuffer, + duDesc, + dvBuffer, + dvDesc, + duuBuffer, + duuDesc, + duvBuffer, + duvDesc, + dvvBuffer, + dvvDesc, + numPatchCoords, + patchCoords, + patchTable, + fvarChannel); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output buffer derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output buffer derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param duuBuffer Output buffer 2nd derivative wrt u + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duuDesc vertex buffer descriptor for the duuBuffer + /// + /// @param duvBuffer Output buffer 2nd derivative wrt u and v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param duvDesc vertex buffer descriptor for the duvBuffer + /// + /// @param dvvBuffer Output buffer 2nd derivative wrt v + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dvvDesc vertex buffer descriptor for the dvvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param fvarChannel face-varying channel + /// + template<typename SRC_BUFFER, + typename DST_BUFFER, + typename PATCHCOORD_BUFFER, + typename PATCH_TABLE> + bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + DST_BUFFER *duuBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + DST_BUFFER *duvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + DST_BUFFER *dvvBuffer, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + int fvarChannel = 0) const + { + + return EvalPatches(srcBuffer->BindVBO(), + srcDesc, + dstBuffer->BindVBO(), + dstDesc, + duBuffer->BindVBO(), + duDesc, + dvBuffer->BindVBO(), + dvDesc, + duuBuffer->BindVBO(), + duuDesc, + duvBuffer->BindVBO(), + duvDesc, + dvvBuffer->BindVBO(), + dvvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetFVarPatchArrays(fvarChannel), + patchTable->GetFVarPatchIndexBuffer(fvarChannel), + patchTable->GetFVarPatchParamBuffer(fvarChannel)); + } + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile( + OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &dvDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &duuDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &duvDesc = OpenSubdiv::Osd::BufferDescriptor(), + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc = OpenSubdiv::Osd::BufferDescriptor()); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *deviceContext); + + private: + struct _StencilKernel { + _StencilKernel(); + ~_StencilKernel(); + bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int workGroupSize); + GLuint program; + GLuint uniformStart; + GLuint uniformEnd; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + GLuint uniformDuDesc; + GLuint uniformDvDesc; + GLuint uniformDuuDesc; + GLuint uniformDuvDesc; + GLuint uniformDvvDesc; + } _stencilKernel; + + struct _PatchKernel { + _PatchKernel(); + ~_PatchKernel(); + bool Compile(OpenSubdiv::Osd::BufferDescriptor const &srcDesc, + OpenSubdiv::Osd::BufferDescriptor const &dstDesc, + OpenSubdiv::Osd::BufferDescriptor const &duDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvDesc, + OpenSubdiv::Osd::BufferDescriptor const &duuDesc, + OpenSubdiv::Osd::BufferDescriptor const &duvDesc, + OpenSubdiv::Osd::BufferDescriptor const &dvvDesc, + int workGroupSize); + GLuint program; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + GLuint uniformPatchArray; + GLuint uniformDuDesc; + GLuint uniformDvDesc; + GLuint uniformDuuDesc; + GLuint uniformDuvDesc; + GLuint uniformDvvDesc; + } _patchKernel; + + int _workGroupSize; + GLuint _patchArraysSSBO; + + int GetDispatchSize(int count) const; + + void DispatchCompute(int totalDispatchSize) const; +}; +} // namespace opensubdiv +} // namespace blender + +#endif // OPENSUBDIV_GL_COMPUTE_EVALUATOR_H_ diff --git a/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl b/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl new file mode 100644 index 00000000000..2a58fa10ea0 --- /dev/null +++ b/intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl @@ -0,0 +1,316 @@ +// +// Copyright 2013 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +//------------------------------------------------------------------------------ + + +layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in; +layout(std430) buffer; + +// source and destination buffers + +uniform int srcOffset = 0; +uniform int dstOffset = 0; +layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; }; +layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; }; + +// derivative buffers (if needed) + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +uniform ivec3 duDesc; +uniform ivec3 dvDesc; +layout(binding=2) buffer du_buffer { float duBuffer[]; }; +layout(binding=3) buffer dv_buffer { float dvBuffer[]; }; +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +uniform ivec3 duuDesc; +uniform ivec3 duvDesc; +uniform ivec3 dvvDesc; +layout(binding=10) buffer duu_buffer { float duuBuffer[]; }; +layout(binding=11) buffer duv_buffer { float duvBuffer[]; }; +layout(binding=12) buffer dvv_buffer { float dvvBuffer[]; }; +#endif + +// stencil buffers + +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) + +uniform int batchStart = 0; +uniform int batchEnd = 0; +layout(binding=4) buffer stencilSizes { int _sizes[]; }; +layout(binding=5) buffer stencilOffsets { int _offsets[]; }; +layout(binding=6) buffer stencilIndices { int _indices[]; }; +layout(binding=7) buffer stencilWeights { float _weights[]; }; + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +layout(binding=8) buffer stencilDuWeights { float _duWeights[]; }; +layout(binding=9) buffer stencilDvWeights { float _dvWeights[]; }; +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +layout(binding=13) buffer stencilDuuWeights { float _duuWeights[]; }; +layout(binding=14) buffer stencilDuvWeights { float _duvWeights[]; }; +layout(binding=15) buffer stencilDvvWeights { float _dvvWeights[]; }; +#endif + +uint getGlobalInvocationIndex() +{ + uint invocations_per_row = gl_WorkGroupSize.x * gl_NumWorkGroups.x; + return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * invocations_per_row; +} + +#endif + +// patch buffers + +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) + +layout(binding=4) buffer patchArray_buffer { OsdPatchArray patchArrayBuffer[]; }; +layout(binding=5) buffer patchCoord_buffer { OsdPatchCoord patchCoords[]; }; +layout(binding=6) buffer patchIndex_buffer { int patchIndexBuffer[]; }; +layout(binding=7) buffer patchParam_buffer { OsdPatchParam patchParamBuffer[]; }; + +OsdPatchCoord GetPatchCoord(int coordIndex) +{ + return patchCoords[coordIndex]; +} + +OsdPatchArray GetPatchArray(int arrayIndex) +{ + return patchArrayBuffer[arrayIndex]; +} + +OsdPatchParam GetPatchParam(int patchIndex) +{ + return patchParamBuffer[patchIndex]; +} + +#endif + +//------------------------------------------------------------------------------ + +struct Vertex { + float vertexData[LENGTH]; +}; + +void clear(out Vertex v) { + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = 0; + } +} + +Vertex readVertex(int index) { + Vertex v; + int vertexIndex = srcOffset + index * SRC_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] = srcVertexBuffer[vertexIndex + i]; + } + return v; +} + +void writeVertex(int index, Vertex v) { + int vertexIndex = dstOffset + index * DST_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + dstVertexBuffer[vertexIndex + i] = v.vertexData[i]; + } +} + +void addWithWeight(inout Vertex v, const Vertex src, float weight) { + for (int i = 0; i < LENGTH; ++i) { + v.vertexData[i] += weight * src.vertexData[i]; + } +} + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) +void writeDu(int index, Vertex du) { + int duIndex = duDesc.x + index * duDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duBuffer[duIndex + i] = du.vertexData[i]; + } +} + +void writeDv(int index, Vertex dv) { + int dvIndex = dvDesc.x + index * dvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvBuffer[dvIndex + i] = dv.vertexData[i]; + } +} +#endif + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) +void writeDuu(int index, Vertex duu) { + int duuIndex = duuDesc.x + index * duuDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duuBuffer[duuIndex + i] = duu.vertexData[i]; + } +} + +void writeDuv(int index, Vertex duv) { + int duvIndex = duvDesc.x + index * duvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + duvBuffer[duvIndex + i] = duv.vertexData[i]; + } +} + +void writeDvv(int index, Vertex dvv) { + int dvvIndex = dvvDesc.x + index * dvvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dvvBuffer[dvvIndex + i] = dvv.vertexData[i]; + } +} +#endif + +//------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) + +void main() { + int current = int(getGlobalInvocationIndex()) + batchStart; + + if (current>=batchEnd) { + return; + } + + Vertex dst; + clear(dst); + + int offset = _offsets[current], + size = _sizes[current]; + + for (int stencil = 0; stencil < size; ++stencil) { + int vindex = offset + stencil; + addWithWeight( + dst, readVertex(_indices[vindex]), _weights[vindex]); + } + + writeVertex(current, dst); + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) + Vertex du, dv; + clear(du); + clear(dv); + for (int i=0; i<size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(_indices[offset+i]); + addWithWeight(du, src, _duWeights[offset+i]); + addWithWeight(dv, src, _dvWeights[offset+i]); + } + + if (duDesc.y > 0) { // length + writeDu(current, du); + } + if (dvDesc.y > 0) { + writeDv(current, dv); + } +#endif +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) + Vertex duu, duv, dvv; + clear(duu); + clear(duv); + clear(dvv); + for (int i=0; i<size; ++i) { + // expects the compiler optimizes readVertex out here. + Vertex src = readVertex(_indices[offset+i]); + addWithWeight(duu, src, _duuWeights[offset+i]); + addWithWeight(duv, src, _duvWeights[offset+i]); + addWithWeight(dvv, src, _dvvWeights[offset+i]); + } + + if (duuDesc.y > 0) { // length + writeDuu(current, duu); + } + if (duvDesc.y > 0) { + writeDuv(current, duv); + } + if (dvvDesc.y > 0) { + writeDvv(current, dvv); + } +#endif +} + +#endif + +//------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) + +// PERFORMANCE: stride could be constant, but not as significant as length + +void main() { + + int current = int(gl_GlobalInvocationID.x); + + OsdPatchCoord coord = GetPatchCoord(current); + OsdPatchArray array = GetPatchArray(coord.arrayIndex); + OsdPatchParam param = GetPatchParam(coord.patchIndex); + + int patchType = OsdPatchParamIsRegular(param) ? array.regDesc : array.desc; + + float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20]; + int nPoints = OsdEvaluatePatchBasis(patchType, param, + coord.s, coord.t, wP, wDu, wDv, wDuu, wDuv, wDvv); + + Vertex dst, du, dv, duu, duv, dvv; + clear(dst); + clear(du); + clear(dv); + clear(duu); + clear(duv); + clear(dvv); + + int indexBase = array.indexBase + array.stride * + (coord.patchIndex - array.primitiveIdBase); + + for (int cv = 0; cv < nPoints; ++cv) { + int index = patchIndexBuffer[indexBase + cv]; + addWithWeight(dst, readVertex(index), wP[cv]); + addWithWeight(du, readVertex(index), wDu[cv]); + addWithWeight(dv, readVertex(index), wDv[cv]); + addWithWeight(duu, readVertex(index), wDuu[cv]); + addWithWeight(duv, readVertex(index), wDuv[cv]); + addWithWeight(dvv, readVertex(index), wDvv[cv]); + } + writeVertex(current, dst); + +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES) + if (duDesc.y > 0) { // length + writeDu(current, du); + } + if (dvDesc.y > 0) { + writeDv(current, dv); + } +#endif +#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES) + if (duuDesc.y > 0) { // length + writeDuu(current, duu); + } + if (duvDesc.y > 0) { // length + writeDuv(current, duv); + } + if (dvvDesc.y > 0) { + writeDvv(current, dvv); + } +#endif +} + +#endif |