add -DDETERMINISTIC=ON/OFF flag (#912)

* Add -DDETERMINISTIC=ON/OFF flag to CMake * Use -DDETERMINISTIC=on in GitHub/Azure workflows Co-authored-by: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
author: Marcin Junczys-Dowmunt <marcinjd@microsoft.com> 2022-02-08 13:57:20 +0300
committer: GitHub <noreply@github.com> 2022-02-08 13:57:20 +0300
commit: 05ba9e4c319db2317319227f5706f634340e0db4 (patch)
tree: 9266afe6d10ec1e75ab52d30e496a624d454237a /src
parent: a365bb5ce99135eab29ffe378e0c6c9fb9bf0c1b (diff)
2 files changed, 11 insertions, 1 deletions
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index ebbe4a89..837bee53 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -897,8 +897,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
     cli.add<bool>("--shuffle-in-ram",
         "Keep shuffled corpus in RAM, do not write to temp file");
 
+#if DETERMINISTIC
     cli.add<size_t>("--data-threads",
         "Number of concurrent threads to use during data reading and processing", 1);
+#else
+    cli.add<size_t>("--data-threads",
+        "Number of concurrent threads to use during data reading and processing", 8);
+#endif
 
     // @TODO: Consider making the next two options options of the vocab instead, to make it more local in scope.
     cli.add<size_t>("--all-caps-every",
@@ -919,8 +924,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
         "Round up batch size to next power of 2 for more efficient training, but this can make batch size less stable. Disable with --mini-batch-round-up=false",
         true);
   } else {
+#if DETERMINISTIC
     cli.add<size_t>("--data-threads",
         "Number of concurrent threads to use during data reading and processing", 1);
+#else
+    cli.add<size_t>("--data-threads",
+        "Number of concurrent threads to use during data reading and processing", 8);
+#endif
   }
   // clang-format on
 }
diff --git a/src/tensors/gpu/tensor_operators.cu b/src/tensors/gpu/tensor_operators.cu
index 2103ca9d..9011f284 100644
--- a/src/tensors/gpu/tensor_operators.cu
+++ b/src/tensors/gpu/tensor_operators.cu
@@ -1163,7 +1163,7 @@ void PasteRows(Tensor out,
   size_t rowsToCopy = indices->size();
 
   int threads = std::min(MAX_THREADS, (int)cols);
-#if 0   // @TODO: make this configurable with a 'deterministic' flag
+#if DETERMINISTIC
   // If we only use one block, then each core operates on a different column,
   // hence the summation becomes deterministic.
   // However, we only use e.g. 512 cores out of possibly 3000+, so this will be
author	Marcin Junczys-Dowmunt <marcinjd@microsoft.com>	2022-02-08 13:57:20 +0300
committer	GitHub <noreply@github.com>	2022-02-08 13:57:20 +0300
commit	05ba9e4c319db2317319227f5706f634340e0db4 (patch)
tree	9266afe6d10ec1e75ab52d30e496a624d454237a /src
parent	a365bb5ce99135eab29ffe378e0c6c9fb9bf0c1b (diff)