diff options
author | Marcin Junczys-Dowmunt <marcinjd@microsoft.com> | 2022-02-08 13:57:20 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 13:57:20 +0300 |
commit | 05ba9e4c319db2317319227f5706f634340e0db4 (patch) | |
tree | 9266afe6d10ec1e75ab52d30e496a624d454237a /src/common/config_parser.cpp | |
parent | a365bb5ce99135eab29ffe378e0c6c9fb9bf0c1b (diff) |
add -DDETERMINISTIC=ON/OFF flag (#912)
* Add -DDETERMINISTIC=ON/OFF flag to CMake
* Use -DDETERMINISTIC=on in GitHub/Azure workflows
Co-authored-by: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
Diffstat (limited to 'src/common/config_parser.cpp')
-rw-r--r-- | src/common/config_parser.cpp | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index ebbe4a89..837bee53 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -897,8 +897,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) { cli.add<bool>("--shuffle-in-ram", "Keep shuffled corpus in RAM, do not write to temp file"); +#if DETERMINISTIC cli.add<size_t>("--data-threads", "Number of concurrent threads to use during data reading and processing", 1); +#else + cli.add<size_t>("--data-threads", + "Number of concurrent threads to use during data reading and processing", 8); +#endif // @TODO: Consider making the next two options options of the vocab instead, to make it more local in scope. cli.add<size_t>("--all-caps-every", @@ -919,8 +924,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) { "Round up batch size to next power of 2 for more efficient training, but this can make batch size less stable. Disable with --mini-batch-round-up=false", true); } else { +#if DETERMINISTIC cli.add<size_t>("--data-threads", "Number of concurrent threads to use during data reading and processing", 1); +#else + cli.add<size_t>("--data-threads", + "Number of concurrent threads to use during data reading and processing", 8); +#endif } // clang-format on } |