Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/windirstat/llfio.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llfio/v2.0/detail/impl/dynamic_thread_pool_group.ipp135
-rw-r--r--include/llfio/v2.0/dynamic_thread_pool_group.hpp14
-rw-r--r--programs/benchmark-dynamic_thread_pool_group/main.cpp15
3 files changed, 157 insertions, 7 deletions
diff --git a/include/llfio/v2.0/detail/impl/dynamic_thread_pool_group.ipp b/include/llfio/v2.0/detail/impl/dynamic_thread_pool_group.ipp
index 8d54c52a..3c8bba1f 100644
--- a/include/llfio/v2.0/detail/impl/dynamic_thread_pool_group.ipp
+++ b/include/llfio/v2.0/detail/impl/dynamic_thread_pool_group.ipp
@@ -64,6 +64,125 @@ Distributed under the Boost Software License, Version 1.0.
#define LLFIO_DYNAMIC_THREAD_POOL_GROUP_PRINTING 0
+/* NOTE that the Linux results are from a VM with half the CPUs of the Windows results,
+so they are not directly comparable.
+
+Linux 4Kb and 64Kb
+
+Benchmarking asio ...
+ For 1 work items got 33635.5 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 59420.4 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 65653.8 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 42216.3 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 458911 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 578462 SHA256 hashes/sec with 27 maximum concurrency.
+ For 64 work items got 572456 SHA256 hashes/sec with 27 maximum concurrency.
+ For 128 work items got 572326 SHA256 hashes/sec with 26 maximum concurrency.
+ For 256 work items got 568558 SHA256 hashes/sec with 25 maximum concurrency.
+ For 512 work items got 570342 SHA256 hashes/sec with 26 maximum concurrency.
+ For 1024 work items got 567351 SHA256 hashes/sec with 26 maximum concurrency.
+
+Benchmarking asio ...
+ For 1 work items got 3768.07 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 7672.47 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 14169.5 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 21785.9 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 30875 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 43614.4 SHA256 hashes/sec with 32 maximum concurrency.
+ For 64 work items got 46075.4 SHA256 hashes/sec with 32 maximum concurrency.
+ For 128 work items got 47111.6 SHA256 hashes/sec with 32 maximum concurrency.
+ For 256 work items got 45926.6 SHA256 hashes/sec with 32 maximum concurrency.
+ For 512 work items got 45923.9 SHA256 hashes/sec with 32 maximum concurrency.
+ For 1024 work items got 46250.9 SHA256 hashes/sec with 32 maximum concurrency.
+
+
+Windows 4Kb and 64kB
+
+Benchmarking asio ...
+ For 1 work items got 49443.6 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 97189 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 185187 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 328105 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 513294 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 493040 SHA256 hashes/sec with 32 maximum concurrency.
+ For 64 work items got 1.00736e+06 SHA256 hashes/sec with 64 maximum concurrency.
+ For 128 work items got 996193 SHA256 hashes/sec with 64 maximum concurrency.
+ For 256 work items got 993805 SHA256 hashes/sec with 64 maximum concurrency.
+ For 512 work items got 998211 SHA256 hashes/sec with 64 maximum concurrency.
+ For 1024 work items got 990231 SHA256 hashes/sec with 64 maximum concurrency.
+
+Benchmarking asio ...
+ For 1 work items got 3797.05 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 7869.94 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 15612 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 28481.1 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 41255.2 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 66182.4 SHA256 hashes/sec with 32 maximum concurrency.
+ For 64 work items got 67230.5 SHA256 hashes/sec with 64 maximum concurrency.
+ For 128 work items got 66988.5 SHA256 hashes/sec with 64 maximum concurrency.
+ For 256 work items got 66926.1 SHA256 hashes/sec with 64 maximum concurrency.
+ For 512 work items got 66964.7 SHA256 hashes/sec with 64 maximum concurrency.
+ For 1024 work items got 66911 SHA256 hashes/sec with 64 maximum concurrency.
+*/
+
+/* Linux 4Kb and 64Kb libdispatch
+
+Benchmarking llfio (Grand Central Dispatch) ...
+ For 1 work items got 32058.2 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 26084.1 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 24906.8 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 24729.5 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 73749.1 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 526656 SHA256 hashes/sec with 21 maximum concurrency.
+ For 64 work items got 535043 SHA256 hashes/sec with 27 maximum concurrency.
+ For 128 work items got 541809 SHA256 hashes/sec with 30 maximum concurrency.
+ For 256 work items got 543568 SHA256 hashes/sec with 33 maximum concurrency.
+ For 512 work items got 545540 SHA256 hashes/sec with 37 maximum concurrency.
+ For 1024 work items got 542017 SHA256 hashes/sec with 41 maximum concurrency.
+
+Benchmarking llfio (Grand Central Dispatch) ...
+ For 1 work items got 3857.82 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 7666.2 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 14993.6 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 25160 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 39015.5 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 43494.4 SHA256 hashes/sec with 16 maximum concurrency.
+ For 64 work items got 42874.5 SHA256 hashes/sec with 16 maximum concurrency.
+ For 128 work items got 42678.7 SHA256 hashes/sec with 16 maximum concurrency.
+ For 256 work items got 42661.7 SHA256 hashes/sec with 16 maximum concurrency.
+ For 512 work items got 42670.9 SHA256 hashes/sec with 16 maximum concurrency.
+ For 1024 work items got 44609.5 SHA256 hashes/sec with 16 maximum concurrency.
+*/
+
+/* Windows 4Kb and 64Kb Win32 thread pool
+
+Benchmarking llfio (Win32 thread pool (Vista+)) ...
+ For 1 work items got 56553.8 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 110711 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 207273 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 269391 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 245053 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 260854 SHA256 hashes/sec with 21 maximum concurrency.
+ For 64 work items got 402240 SHA256 hashes/sec with 19 maximum concurrency.
+ For 128 work items got 400192 SHA256 hashes/sec with 19 maximum concurrency.
+ For 256 work items got 405973 SHA256 hashes/sec with 20 maximum concurrency.
+ For 512 work items got 406156 SHA256 hashes/sec with 22 maximum concurrency.
+ For 1024 work items got 405901 SHA256 hashes/sec with 23 maximum concurrency.
+
+Benchmarking llfio (Win32 thread pool (Vista+)) ...
+ For 1 work items got 4020.88 SHA256 hashes/sec with 1 maximum concurrency.
+ For 2 work items got 8028.79 SHA256 hashes/sec with 2 maximum concurrency.
+ For 4 work items got 15813 SHA256 hashes/sec with 4 maximum concurrency.
+ For 8 work items got 25539.4 SHA256 hashes/sec with 8 maximum concurrency.
+ For 16 work items got 40522.3 SHA256 hashes/sec with 16 maximum concurrency.
+ For 32 work items got 65182 SHA256 hashes/sec with 32 maximum concurrency.
+ For 64 work items got 65572.9 SHA256 hashes/sec with 33 maximum concurrency.
+ For 128 work items got 66462.3 SHA256 hashes/sec with 33 maximum concurrency.
+ For 256 work items got 66315.3 SHA256 hashes/sec with 33 maximum concurrency.
+ For 512 work items got 66341.5 SHA256 hashes/sec with 33 maximum concurrency.
+ For 1024 work items got 66416.2 SHA256 hashes/sec with 33 maximum concurrency.
+*/
+
LLFIO_V2_NAMESPACE_BEGIN
namespace detail
@@ -262,7 +381,7 @@ namespace detail
thread_t *front{nullptr}, *back{nullptr};
} threadpool_active, threadpool_sleeping;
std::atomic<size_t> total_submitted_workitems{0}, threadpool_threads{0}, threadpool_sleeping_count{0};
- std::atomic<uint32_t> ms_sleep_for_more_work{5000}; // TODO put back to 60000
+ std::atomic<uint32_t> ms_sleep_for_more_work{30000}; // TODO put back to 60000
std::mutex threadmetrics_lock;
struct threadmetrics_threadid
@@ -740,6 +859,7 @@ namespace detail
threadmetrics_queue.running--;
}
_remove_from_list(threadmetrics_queue, *d_it);
+ delete *d_it;
d_it = threadmetrics_sorted.erase(d_it);
};
auto add_item = [&] {
@@ -888,6 +1008,19 @@ namespace detail
} // namespace detail
+LLFIO_HEADERS_ONLY_MEMFUNC_SPEC const char *dynamic_thread_pool_group::implementation_description() noexcept
+{
+#if LLFIO_DYNAMIC_THREAD_POOL_GROUP_USING_GCD
+ return "Grand Central Dispatch";
+#elif defined(_WIN32)
+ return "Win32 thread pool (Vista+)";
+#elif defined(__linux__)
+ return "Linux native";
+#else
+#error Unknown platform
+#endif
+}
+
class dynamic_thread_pool_group_impl final : public dynamic_thread_pool_group
{
friend struct detail::global_dynamic_thread_pool_impl;
diff --git a/include/llfio/v2.0/dynamic_thread_pool_group.hpp b/include/llfio/v2.0/dynamic_thread_pool_group.hpp
index 2af85564..2a549efa 100644
--- a/include/llfio/v2.0/dynamic_thread_pool_group.hpp
+++ b/include/llfio/v2.0/dynamic_thread_pool_group.hpp
@@ -402,6 +402,20 @@ public:
virtual ~dynamic_thread_pool_group() {}
+ /*! \brief A textual description of the underlying implementation of
+ this dynamic thread pool group.
+
+ The current possible underlying implementations are:
+
+ - "Grand Central Dispatch" (Mac OS, FreeBSD, Linux)
+ - "Linux native" (Linux)
+ - "Win32 thread pool (Vista+)" (Windows)
+
+ Which one is chosen depends on what was detected at cmake configure time,
+ and possibly what the host OS running the program binary supports.
+ */
+ static LLFIO_HEADERS_ONLY_MEMFUNC_SPEC const char *implementation_description() noexcept;
+
/*! \brief Threadsafe. Submit one or more work items for execution. Note that you can submit more later.
Note that if the group is currently stopping, you cannot submit more
diff --git a/programs/benchmark-dynamic_thread_pool_group/main.cpp b/programs/benchmark-dynamic_thread_pool_group/main.cpp
index 936bc622..841fcacf 100644
--- a/programs/benchmark-dynamic_thread_pool_group/main.cpp
+++ b/programs/benchmark-dynamic_thread_pool_group/main.cpp
@@ -23,7 +23,7 @@ Distributed under the Boost Software License, Version 1.0.
*/
//! Seconds to run the benchmark
-static constexpr unsigned BENCHMARK_DURATION = 5;
+static constexpr unsigned BENCHMARK_DURATION = 10;
//! Maximum work items to create
static constexpr unsigned MAX_WORK_ITEMS = 1024;
@@ -124,11 +124,11 @@ struct asio_runner
}
}
};
- template <class F> void add_workitem(F &&f) { ctx.post(C(this, std::move(f))); }
+ template <class F> void add_workitem(F &&f) { ctx.post(C<F>(this, std::move(f))); }
std::chrono::microseconds run(unsigned seconds)
{
std::vector<std::thread> threads;
- for(size_t n = 0; n < std::thread::hardware_concurrency(); n++)
+ for(size_t n = 0; n < std::thread::hardware_concurrency() * 2; n++)
{
threads.emplace_back([&] { ctx.run(); });
}
@@ -156,7 +156,7 @@ template <class Runner> void benchmark(const char *name)
struct worker
{
shared_t *shared;
- char buffer[4096];
+ char buffer[65536];
QUICKCPPLIB_NAMESPACE::algorithm::hash::sha256_hash::result_type hash;
uint64_t count{0};
@@ -218,10 +218,13 @@ template <class Runner> void benchmark(const char *name)
int main(void)
{
- benchmark<llfio_runner>("llfio");
+ std::string llfio_name("llfio (");
+ llfio_name.append(llfio::dynamic_thread_pool_group::implementation_description());
+ llfio_name.push_back(')');
+ benchmark<llfio_runner>(llfio_name.c_str());
+
#if ENABLE_ASIO
benchmark<asio_runner>("asio");
#endif
-
return 0;
} \ No newline at end of file