diff options
author | Ulrich Germann <ugermann@inf.ed.ac.uk> | 2019-03-22 23:50:28 +0300 |
---|---|---|
committer | Ulrich Germann <ugermann@inf.ed.ac.uk> | 2019-03-22 23:50:28 +0300 |
commit | fd4f2a9b8cb931194d02a4cd98fef464a3af764c (patch) | |
tree | 6413d1762ddd117d04876303ae50d444d531b7cb | |
parent | 02f4af4eeefa79a24cd52d279a5d4d374423d631 (diff) |
Don't close named pipes during corpus reset.ug-issue-425
The corpus reset reopened input files for corpora. So files were opened once during
the construction of the Corpus instance, then closed and repopened during data->reset()
triggered by BatchGenerator::prepare(false). With normal files that's not a problem,
but if the "file" is a named pipe, the closing triggers a SIGPIPE (broken pipe) on
the writing end of the pipe. With this commit, Corpus::reset() leaves open pipes
alone.
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/common/filesystem.cpp | 17 | ||||
-rwxr-xr-x | src/common/filesystem.h | 4 | ||||
-rwxr-xr-x | src/data/corpus.cpp | 23 |
4 files changed, 37 insertions, 8 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 03524117..7a98eef9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ add_library(marian STATIC common/config_validator.cpp common/binary.cpp common/io.cpp + common/filesystem.cpp data/alignment.cpp data/vocab.cpp diff --git a/src/common/filesystem.cpp b/src/common/filesystem.cpp new file mode 100644 index 00000000..d5196e85 --- /dev/null +++ b/src/common/filesystem.cpp @@ -0,0 +1,17 @@ +#include "filesystem.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +namespace marian { +namespace filesystem { + +bool is_fifo(char const* path) { + struct stat buf; + stat(path, &buf); + return S_ISFIFO(buf.st_mode); +} + +} // end of namespace marian::filesystem +} // end of namespace marian diff --git a/src/common/filesystem.h b/src/common/filesystem.h index f9c06104..4b97e735 100755 --- a/src/common/filesystem.h +++ b/src/common/filesystem.h @@ -22,6 +22,8 @@ namespace marian { namespace filesystem { + bool is_fifo(char const* path); + class Path { private: Pathie::Path path; @@ -97,4 +99,4 @@ namespace filesystem { using FilesystemError = Pathie::PathieError; } -}
\ No newline at end of file +} diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp index 7a7a846e..7e979f72 100755 --- a/src/data/corpus.cpp +++ b/src/data/corpus.cpp @@ -4,6 +4,8 @@ #include <random> #include "common/utils.h" +#include "common/filesystem.h" + #include "data/corpus.h" namespace marian { @@ -44,6 +46,7 @@ SentenceTuple Corpus::next() { } else { bool gotLine = io::getline(*files_[i], line); + // LOG(debug,"[{}][{}] {}", i, pos_ - 1, line); if(!gotLine) { eofsHit++; continue; @@ -85,16 +88,22 @@ void Corpus::shuffle() { // Call either reset() or shuffle(). // @TODO: make shuffle() private, instad pass a shuffle() flag to reset(), to clarify mutual exclusiveness with shuffle() void Corpus::reset() { - files_.clear(); corpusInRAM_.clear(); ids_.clear(); pos_ = 0; - for(auto& path : paths_) { - if(path == "stdin") - files_.emplace_back(new io::InputFileStream(std::cin)); - else - files_.emplace_back(new io::InputFileStream(path)); - } + for (size_t i = 0; i < paths_.size(); ++i) + { + if(paths_[i] == "stdin") { + files_[i].reset(new io::InputFileStream(std::cin)); + // Probably not necessary, unless there are some buffers + // that we want flushed. + } + else if (!filesystem::is_fifo(paths_[i].c_str())) { + // Do NOT reset named pipes; that closes them and triggers a SIGPIPE + // (lost pipe) at the writing end. + files_[i].reset(new io::InputFileStream(paths_[i])); + } + } } void Corpus::restore(Ptr<TrainingState> ts) { |