Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <marcinjd@microsoft.com>2022-02-08 13:57:20 +0300
committerGitHub <noreply@github.com>2022-02-08 13:57:20 +0300
commit05ba9e4c319db2317319227f5706f634340e0db4 (patch)
tree9266afe6d10ec1e75ab52d30e496a624d454237a /src
parenta365bb5ce99135eab29ffe378e0c6c9fb9bf0c1b (diff)
add -DDETERMINISTIC=ON/OFF flag (#912)
* Add -DDETERMINISTIC=ON/OFF flag to CMake * Use -DDETERMINISTIC=on in GitHub/Azure workflows Co-authored-by: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/common/config_parser.cpp10
-rw-r--r--src/tensors/gpu/tensor_operators.cu2
2 files changed, 11 insertions, 1 deletions
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index ebbe4a89..837bee53 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -897,8 +897,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
cli.add<bool>("--shuffle-in-ram",
"Keep shuffled corpus in RAM, do not write to temp file");
+#if DETERMINISTIC
cli.add<size_t>("--data-threads",
"Number of concurrent threads to use during data reading and processing", 1);
+#else
+ cli.add<size_t>("--data-threads",
+ "Number of concurrent threads to use during data reading and processing", 8);
+#endif
// @TODO: Consider making the next two options options of the vocab instead, to make it more local in scope.
cli.add<size_t>("--all-caps-every",
@@ -919,8 +924,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
"Round up batch size to next power of 2 for more efficient training, but this can make batch size less stable. Disable with --mini-batch-round-up=false",
true);
} else {
+#if DETERMINISTIC
cli.add<size_t>("--data-threads",
"Number of concurrent threads to use during data reading and processing", 1);
+#else
+ cli.add<size_t>("--data-threads",
+ "Number of concurrent threads to use during data reading and processing", 8);
+#endif
}
// clang-format on
}
diff --git a/src/tensors/gpu/tensor_operators.cu b/src/tensors/gpu/tensor_operators.cu
index 2103ca9d..9011f284 100644
--- a/src/tensors/gpu/tensor_operators.cu
+++ b/src/tensors/gpu/tensor_operators.cu
@@ -1163,7 +1163,7 @@ void PasteRows(Tensor out,
size_t rowsToCopy = indices->size();
int threads = std::min(MAX_THREADS, (int)cols);
-#if 0 // @TODO: make this configurable with a 'deterministic' flag
+#if DETERMINISTIC
// If we only use one block, then each core operates on a different column,
// hence the summation becomes deterministic.
// However, we only use e.g. 512 cores out of possibly 3000+, so this will be