diff options
author | Masado Ishii <30641704+starintheuniverse@users.noreply.github.com> | 2022-06-12 18:56:37 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-12 18:56:37 +0300 |
commit | 0cbd3fad055e61f3b4266ccfe04eeda27f227f54 (patch) | |
tree | 524bd12e29fa2647a9f665eb8d83227a49bf03c8 | |
parent | bf4863d2e5a030c353a3afa079f803173de2a15b (diff) |
Use MPI_Isend in MpiConsoleReporter to avoid deadlock (#656)
* Use MPI_Isend in MpiConsoleReporter to avoid deadlock
* C-style cast to static_cast in doctest/extensions/mpi_reporter.h
Co-authored-by: Philipp Bucher <philipp.bucher@tum.de>
* Fix mismatch #send/#recv by reducing over the true number of messages (MpiConsoleReporter)
Co-authored-by: Philipp Bucher <philipp.bucher@tum.de>
-rw-r--r-- | doctest/extensions/mpi_reporter.h | 28 |
1 files changed, 22 insertions, 6 deletions
diff --git a/doctest/extensions/mpi_reporter.h b/doctest/extensions/mpi_reporter.h index 1f199e6e..11296c6a 100644 --- a/doctest/extensions/mpi_reporter.h +++ b/doctest/extensions/mpi_reporter.h @@ -66,6 +66,7 @@ private: return nullStream; } } + std::vector<std::pair<std::string, int>> m_failure_str_queue = {}; public: MpiConsoleReporter(const ContextOptions& co) : ConsoleReporter(co,replace_by_null_if_not_rank_0(co.cout)) @@ -154,9 +155,25 @@ public: int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + std::vector<MPI_Request> requests; + requests.reserve(m_failure_str_queue.size()); // avoid realloc & copy of MPI_Request + for (const std::pair<std::string, int> &failure : m_failure_str_queue) + { + const std::string & failure_str = failure.first; + const int failure_line = failure.second; + + int failure_msg_size = static_cast<int>(failure_str.size()); + + requests.push_back(MPI_REQUEST_NULL); + MPI_Isend(failure_str.c_str(), failure_msg_size, MPI_BYTE, + 0, failure_line, MPI_COMM_WORLD, &requests.back()); // Tag = file line + } + + // Compute the number of assert with fail among all procs + const int nb_fail_asserts = static_cast<int>(m_failure_str_queue.size()); int nb_fail_asserts_glob = 0; - MPI_Reduce(&st.numAssertsFailedCurrentTest, &nb_fail_asserts_glob, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&nb_fail_asserts, &nb_fail_asserts_glob, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); if(rank == 0) { MPI_Status status; @@ -196,6 +213,9 @@ public: s << "\n"; } } + + MPI_Waitall(static_cast<int>(requests.size()), requests.data(), MPI_STATUSES_IGNORE); + m_failure_str_queue.clear(); } ConsoleReporter::test_case_end(st); @@ -235,11 +255,7 @@ public: << "( " << rb.m_decomp.c_str() << " )\n"; } - std::string failure_str = failure_msg.str(); - int failure_msg_size = static_cast<int>(failure_str.size()); - - MPI_Send(failure_str.c_str(), failure_msg_size, MPI_BYTE, - 0, rb.m_line, MPI_COMM_WORLD); // Tag = file line + m_failure_str_queue.push_back({failure_msg.str(), rb.m_line}); } } }; // MpiConsoleReporter |