diff options
author | Cloud Han <cloudhan@outlook.com> | 2022-01-25 07:13:04 +0300 |
---|---|---|
committer | Cloud Han <cloudhan@outlook.com> | 2022-01-25 07:59:35 +0300 |
commit | 9b60c4970e90f3d5de8f19c5e60e13d20c070be8 (patch) | |
tree | 39a1cb2c19b6c0c785a604b95c1a1e39cf30f1a9 /examples | |
parent | 21b748ac846eba0466db3eec8a08d51d194bedfd (diff) |
better example
Diffstat (limited to 'examples')
-rw-r--r-- | examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp index 3a59f2d8..ce5adece 100644 --- a/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp +++ b/examples/OpenCLVectorAdd/OpenCLVectorAdd.cpp @@ -138,26 +138,38 @@ int main() } } - const int numLaunchs = 100; + cl_int clN = static_cast<cl_int>(N); + const int numFrames = 10; + const int launchsPerFrame = 10; + constexpr int numLaunchs = numFrames * launchsPerFrame; std::vector<cl_event> kernelLaunchEvts; kernelLaunchEvts.reserve(numLaunchs); - for (int i = 0; i < numLaunchs; ++i) + for (int i = 0; i < numFrames; ++i) { - int n_value = static_cast<int>(N); - ZoneScopedN("VectorAdd Kernel Launch"); - TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4); - - CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB)); - CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(int), &n_value)); - - cl_event vectorAddKernelEvent; - CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); - TracyCLZoneSetEvent(vectorAddKernelEvent); - CL_ASSERT(clRetainEvent(vectorAddKernelEvent)); - kernelLaunchEvts.push_back(vectorAddKernelEvent); - std::cout << "VectorAdd Kernel Enqueued" << std::endl; + FrameMark; + for (int j = 0; j < launchsPerFrame; ++j) { + ZoneScopedN("VectorAdd Kernel Launch"); + TracyCLZoneC(tracyCLCtx, "VectorAdd Kernel", tracy::Color::Blue4); + + CL_ASSERT(clSetKernelArg(vectorAddKernel, 0, sizeof(cl_mem), &bufferC)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 1, sizeof(cl_mem), &bufferA)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 2, sizeof(cl_mem), &bufferB)); + CL_ASSERT(clSetKernelArg(vectorAddKernel, 3, sizeof(cl_int), &clN)); + + cl_event vectorAddKernelEvent; + CL_ASSERT(clEnqueueNDRangeKernel(commandQueue, vectorAddKernel, 1, nullptr, &N, nullptr, 0, nullptr, &vectorAddKernelEvent)); + TracyCLZoneSetEvent(vectorAddKernelEvent); + CL_ASSERT(clRetainEvent(vectorAddKernelEvent)); + kernelLaunchEvts.push_back(vectorAddKernelEvent); + std::cout << "VectorAdd Kernel Enqueued" << std::endl; + } + { + // Wait frame events to be finished + ZoneScopedN("clFinish"); + CL_ASSERT(clFinish(commandQueue)); + } + // You should collect on each 'frame' ends, so that streaming can be achieved. + TracyCLCollect(tracyCLCtx); } { @@ -188,8 +200,8 @@ int main() }) / (durations.size() - 1.0f); std::cout << "VectorAdd runtime avg: " << avg << "us, std: " << sqrt(stddev2) << "us over " << numLaunchs << " runs." << std::endl; - - TracyCLCollect(tracyCLCtx); + // Use blocking collect will ensure all queued events is finished + TracyCLBlockingCollect(tracyCLCtx); { ZoneScopedN("Checking results"); |