Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Germann <ugermann@inf.ed.ac.uk>2019-03-22 23:50:28 +0300
committerUlrich Germann <ugermann@inf.ed.ac.uk>2019-03-22 23:50:28 +0300
commitfd4f2a9b8cb931194d02a4cd98fef464a3af764c (patch)
tree6413d1762ddd117d04876303ae50d444d531b7cb
parent02f4af4eeefa79a24cd52d279a5d4d374423d631 (diff)
Don't close named pipes during corpus reset.ug-issue-425
The corpus reset reopened input files for corpora. So files were opened once during the construction of the Corpus instance, then closed and repopened during data->reset() triggered by BatchGenerator::prepare(false). With normal files that's not a problem, but if the "file" is a named pipe, the closing triggers a SIGPIPE (broken pipe) on the writing end of the pipe. With this commit, Corpus::reset() leaves open pipes alone.
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/common/filesystem.cpp17
-rwxr-xr-xsrc/common/filesystem.h4
-rwxr-xr-xsrc/data/corpus.cpp23
4 files changed, 37 insertions, 8 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 03524117..7a98eef9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -16,6 +16,7 @@ add_library(marian STATIC
common/config_validator.cpp
common/binary.cpp
common/io.cpp
+ common/filesystem.cpp
data/alignment.cpp
data/vocab.cpp
diff --git a/src/common/filesystem.cpp b/src/common/filesystem.cpp
new file mode 100644
index 00000000..d5196e85
--- /dev/null
+++ b/src/common/filesystem.cpp
@@ -0,0 +1,17 @@
+#include "filesystem.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+namespace marian {
+namespace filesystem {
+
+bool is_fifo(char const* path) {
+ struct stat buf;
+ stat(path, &buf);
+ return S_ISFIFO(buf.st_mode);
+}
+
+} // end of namespace marian::filesystem
+} // end of namespace marian
diff --git a/src/common/filesystem.h b/src/common/filesystem.h
index f9c06104..4b97e735 100755
--- a/src/common/filesystem.h
+++ b/src/common/filesystem.h
@@ -22,6 +22,8 @@
namespace marian {
namespace filesystem {
+ bool is_fifo(char const* path);
+
class Path {
private:
Pathie::Path path;
@@ -97,4 +99,4 @@ namespace filesystem {
using FilesystemError = Pathie::PathieError;
}
-} \ No newline at end of file
+}
diff --git a/src/data/corpus.cpp b/src/data/corpus.cpp
index 7a7a846e..7e979f72 100755
--- a/src/data/corpus.cpp
+++ b/src/data/corpus.cpp
@@ -4,6 +4,8 @@
#include <random>
#include "common/utils.h"
+#include "common/filesystem.h"
+
#include "data/corpus.h"
namespace marian {
@@ -44,6 +46,7 @@ SentenceTuple Corpus::next() {
}
else {
bool gotLine = io::getline(*files_[i], line);
+ // LOG(debug,"[{}][{}] {}", i, pos_ - 1, line);
if(!gotLine) {
eofsHit++;
continue;
@@ -85,16 +88,22 @@ void Corpus::shuffle() {
// Call either reset() or shuffle().
// @TODO: make shuffle() private, instad pass a shuffle() flag to reset(), to clarify mutual exclusiveness with shuffle()
void Corpus::reset() {
- files_.clear();
corpusInRAM_.clear();
ids_.clear();
pos_ = 0;
- for(auto& path : paths_) {
- if(path == "stdin")
- files_.emplace_back(new io::InputFileStream(std::cin));
- else
- files_.emplace_back(new io::InputFileStream(path));
- }
+ for (size_t i = 0; i < paths_.size(); ++i)
+ {
+ if(paths_[i] == "stdin") {
+ files_[i].reset(new io::InputFileStream(std::cin));
+ // Probably not necessary, unless there are some buffers
+ // that we want flushed.
+ }
+ else if (!filesystem::is_fifo(paths_[i].c_str())) {
+ // Do NOT reset named pipes; that closes them and triggers a SIGPIPE
+ // (lost pipe) at the writing end.
+ files_[i].reset(new io::InputFileStream(paths_[i]));
+ }
+ }
}
void Corpus::restore(Ptr<TrainingState> ts) {