diff options
author | Philip Rebohle <philip.rebohle@tu-dortmund.de> | 2023-03-02 23:12:24 +0300 |
---|---|---|
committer | Philip Rebohle <philip.rebohle@tu-dortmund.de> | 2023-03-03 21:43:01 +0300 |
commit | 11f763068daa3b6c617c58b6239f1e8670717dd1 (patch) | |
tree | f3a0196d449611adce0283b259e85d7876951575 | |
parent | c5b7a669a837cf6ddd2c24eae5de8b12ae9bba31 (diff) |
[dxvk] Accumulate query data into query objectquery-rework
And do so when adding additional query handles, in order
to avoid allocating queries indefinitely if End is never
called, which Halo:MCC supposedly does.
Co-authored-by: Sam Edwards <CFSworks@gmail.com>
-rw-r--r-- | src/dxvk/dxvk_gpu_query.cpp | 96 | ||||
-rw-r--r-- | src/dxvk/dxvk_gpu_query.h | 11 | ||||
-rw-r--r-- | src/util/util_small_vector.h | 2 |
3 files changed, 77 insertions, 32 deletions
diff --git a/src/dxvk/dxvk_gpu_query.cpp b/src/dxvk/dxvk_gpu_query.cpp index 7ceb32d1..aaec6a0f 100644 --- a/src/dxvk/dxvk_gpu_query.cpp +++ b/src/dxvk/dxvk_gpu_query.cpp @@ -28,7 +28,7 @@ namespace dxvk { } - DxvkGpuQueryStatus DxvkGpuQuery::getData(DxvkQueryData& queryData) const { + DxvkGpuQueryStatus DxvkGpuQuery::getData(DxvkQueryData& queryData) { queryData = DxvkQueryData(); // Callers must ensure that no begin call is pending when @@ -37,21 +37,21 @@ namespace dxvk { if (!m_ended.load(std::memory_order_acquire)) return DxvkGpuQueryStatus::Invalid; - // Get query data from all associated handles - DxvkGpuQueryStatus status = DxvkGpuQueryStatus::Available; - - for (size_t i = 0; i < m_handles.size() - && status == DxvkGpuQueryStatus::Available; i++) - status = getDataForHandle(queryData, m_handles[i]); + // Accumulate query data from all available queries + DxvkGpuQueryStatus status = this->accumulateQueryData(); // Treat non-precise occlusion queries as available // if we already know the result will be non-zero if ((status == DxvkGpuQueryStatus::Pending) && (m_type == VK_QUERY_TYPE_OCCLUSION) && !(m_flags & VK_QUERY_CONTROL_PRECISE_BIT) - && (queryData.occlusion.samplesPassed)) + && (m_queryData.occlusion.samplesPassed)) status = DxvkGpuQueryStatus::Available; - + + // Write back accumulated query data if the result is useful + if (status == DxvkGpuQueryStatus::Available) + queryData = m_queryData; + return status; } @@ -61,10 +61,15 @@ namespace dxvk { // only the false->true transition is defined. m_ended.store(false, std::memory_order_relaxed); + // Ideally we should have no queries left at this point, + // if we do, lifetime-track them with the command list. for (size_t i = 0; i < m_handles.size(); i++) cmd->trackGpuQuery(m_handles[i]); m_handles.clear(); + + // Reset accumulated query data + m_queryData = DxvkQueryData(); } @@ -75,14 +80,18 @@ namespace dxvk { void DxvkGpuQuery::addQueryHandle(const DxvkGpuQueryHandle& handle) { + // Already accumulate available queries here in case + // we already allocated a large number of queries + if (m_handles.size() >= m_handles.MinCapacity) + this->accumulateQueryData(); + m_handles.push_back(handle); } - DxvkGpuQueryStatus DxvkGpuQuery::getDataForHandle( - DxvkQueryData& queryData, - const DxvkGpuQueryHandle& handle) const { - DxvkQueryData tmpData; + DxvkGpuQueryStatus DxvkGpuQuery::accumulateQueryDataForHandle( + const DxvkGpuQueryHandle& handle) { + DxvkQueryData tmpData = { }; // Try to copy query data to temporary structure VkResult result = m_vkd->vkGetQueryPoolResults(m_vkd->device(), @@ -98,30 +107,30 @@ namespace dxvk { // Add numbers to the destination structure switch (m_type) { case VK_QUERY_TYPE_OCCLUSION: - queryData.occlusion.samplesPassed += tmpData.occlusion.samplesPassed; + m_queryData.occlusion.samplesPassed += tmpData.occlusion.samplesPassed; break; case VK_QUERY_TYPE_TIMESTAMP: - queryData.timestamp.time = tmpData.timestamp.time; + m_queryData.timestamp.time = tmpData.timestamp.time; break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - queryData.statistic.iaVertices += tmpData.statistic.iaVertices; - queryData.statistic.iaPrimitives += tmpData.statistic.iaPrimitives; - queryData.statistic.vsInvocations += tmpData.statistic.vsInvocations; - queryData.statistic.gsInvocations += tmpData.statistic.gsInvocations; - queryData.statistic.gsPrimitives += tmpData.statistic.gsPrimitives; - queryData.statistic.clipInvocations += tmpData.statistic.clipInvocations; - queryData.statistic.clipPrimitives += tmpData.statistic.clipPrimitives; - queryData.statistic.fsInvocations += tmpData.statistic.fsInvocations; - queryData.statistic.tcsPatches += tmpData.statistic.tcsPatches; - queryData.statistic.tesInvocations += tmpData.statistic.tesInvocations; - queryData.statistic.csInvocations += tmpData.statistic.csInvocations; + m_queryData.statistic.iaVertices += tmpData.statistic.iaVertices; + m_queryData.statistic.iaPrimitives += tmpData.statistic.iaPrimitives; + m_queryData.statistic.vsInvocations += tmpData.statistic.vsInvocations; + m_queryData.statistic.gsInvocations += tmpData.statistic.gsInvocations; + m_queryData.statistic.gsPrimitives += tmpData.statistic.gsPrimitives; + m_queryData.statistic.clipInvocations += tmpData.statistic.clipInvocations; + m_queryData.statistic.clipPrimitives += tmpData.statistic.clipPrimitives; + m_queryData.statistic.fsInvocations += tmpData.statistic.fsInvocations; + m_queryData.statistic.tcsPatches += tmpData.statistic.tcsPatches; + m_queryData.statistic.tesInvocations += tmpData.statistic.tesInvocations; + m_queryData.statistic.csInvocations += tmpData.statistic.csInvocations; break; case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - queryData.xfbStream.primitivesWritten += tmpData.xfbStream.primitivesWritten; - queryData.xfbStream.primitivesNeeded += tmpData.xfbStream.primitivesNeeded; + m_queryData.xfbStream.primitivesWritten += tmpData.xfbStream.primitivesWritten; + m_queryData.xfbStream.primitivesNeeded += tmpData.xfbStream.primitivesNeeded; break; default: @@ -131,6 +140,37 @@ namespace dxvk { return DxvkGpuQueryStatus::Available; } + + + DxvkGpuQueryStatus DxvkGpuQuery::accumulateQueryData() { + DxvkGpuQueryStatus status = DxvkGpuQueryStatus::Available; + + // Process available queries and return them to the + // allocator if possible. This may help reduce the + // number of Vulkan queries in flight. + size_t queriesAvailable = 0; + + while (queriesAvailable < m_handles.size()) { + status = this->accumulateQueryDataForHandle(m_handles[queriesAvailable]); + + if (status != DxvkGpuQueryStatus::Available) + break; + + queriesAvailable += 1; + } + + if (queriesAvailable) { + for (size_t i = 0; i < queriesAvailable; i++) + m_handles[i].allocator->freeQuery(m_handles[i]); + + for (size_t i = queriesAvailable; i < m_handles.size(); i++) + m_handles[i - queriesAvailable] = m_handles[i]; + + m_handles.resize(m_handles.size() - queriesAvailable); + } + + return status; + } diff --git a/src/dxvk/dxvk_gpu_query.h b/src/dxvk/dxvk_gpu_query.h index f2318563..919d0e26 100644 --- a/src/dxvk/dxvk_gpu_query.h +++ b/src/dxvk/dxvk_gpu_query.h @@ -188,7 +188,7 @@ namespace dxvk { * \returns Current query status */ DxvkGpuQueryStatus getData( - DxvkQueryData& queryData) const; + DxvkQueryData& queryData); /** * \brief Begins query @@ -230,11 +230,14 @@ namespace dxvk { uint32_t m_index; std::atomic<bool> m_ended; + DxvkQueryData m_queryData = { }; + small_vector<DxvkGpuQueryHandle, 8> m_handles; - DxvkGpuQueryStatus getDataForHandle( - DxvkQueryData& queryData, - const DxvkGpuQueryHandle& handle) const; + DxvkGpuQueryStatus accumulateQueryDataForHandle( + const DxvkGpuQueryHandle& handle); + + DxvkGpuQueryStatus accumulateQueryData(); }; diff --git a/src/util/util_small_vector.h b/src/util/util_small_vector.h index 13ff1ac7..48fe8bff 100644 --- a/src/util/util_small_vector.h +++ b/src/util/util_small_vector.h @@ -9,6 +9,8 @@ namespace dxvk { using storage = std::aligned_storage_t<sizeof(T), alignof(T)>; public: + constexpr static size_t MinCapacity = N; + small_vector() { } small_vector (const small_vector&) = delete; |