Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-09-01 11:13:55 +0300
committerTaku Kudo <taku@google.com>2018-09-01 11:13:55 +0300
commite9b3073703f9336ac28ad40bf23afbc751ad1ba7 (patch)
tree98c064b9fc53e65288ac56ab8ccaf6ccd1acd99d
parentf617fcb697eaeaa09b7b4d73559def57f042194e (diff)
Introduced new Filesystem API phase 2.
-rw-r--r--.gitignore1
-rw-r--r--src/common.h6
-rw-r--r--src/filesystem.cc20
-rw-r--r--src/sentencepiece_processor.cc19
-rw-r--r--src/util.h1
5 files changed, 27 insertions, 20 deletions
diff --git a/.gitignore b/.gitignore
index e31152a..28a415f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ Makefile.in
/py-compile
/test-driver
/ylwrap
+/build
/autom4te.cache
/autoscan.log
diff --git a/src/common.h b/src/common.h
index d7e7554..e08f6de 100644
--- a/src/common.h
+++ b/src/common.h
@@ -64,12 +64,6 @@ static constexpr int64 kint64max = ((int64)(0x7FFFFFFFFFFFFFFF));
static constexpr uint32 kUnicodeError = 0xFFFD;
-#ifdef OS_WIN
-#define OUTPUT_MODE std::ios::binary | std::ios::out
-#else
-#define OUTPUT_MODE std::ios::out
-#endif
-
#if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
#define WPATH(path) (::sentencepiece::win32::Utf8ToWide(path).c_str())
#else
diff --git a/src/filesystem.cc b/src/filesystem.cc
index 12bd600..2bf2bbb 100644
--- a/src/filesystem.cc
+++ b/src/filesystem.cc
@@ -13,8 +13,16 @@
// limitations under the License.!
#include "filesystem.h"
+#include <iostream>
+
#include "util.h"
+#ifdef OS_WIN
+#define OUTPUT_MODE std::ios::binary | std::ios::out
+#else
+#define OUTPUT_MODE std::ios::out
+#endif
+
namespace sentencepiece {
namespace filesystem {
@@ -38,7 +46,15 @@ class PosixReadableFile : public ReadableFile {
return static_cast<bool>(std::getline(*is_, *line));
}
- bool ReadAll(std::string *line) { return true; }
+ bool ReadAll(std::string *line) {
+ if (is_ == &std::cin) {
+ LOG(ERROR) << "ReadAll is not supported for stdin.";
+ return false;
+ }
+ line->assign(std::istreambuf_iterator<char>(*is_),
+ std::istreambuf_iterator<char>());
+ return true;
+ }
private:
util::Status status_;
@@ -72,7 +88,7 @@ class PosixWritableFile : public WritableFile {
private:
util::Status status_;
std::ostream *os_;
-}; // namespace filesystem
+};
std::unique_ptr<ReadableFile> NewReadableFile(absl::string_view filename) {
return port::MakeUnique<PosixReadableFile>(filename);
diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
index 50f3f4b..1e2ddcb 100644
--- a/src/sentencepiece_processor.cc
+++ b/src/sentencepiece_processor.cc
@@ -43,13 +43,11 @@ SentencePieceProcessor::SentencePieceProcessor() {}
SentencePieceProcessor::~SentencePieceProcessor() {}
util::Status SentencePieceProcessor::Load(util::min_string_view filename) {
- std::ifstream ifs(WPATH(filename.data()), std::ios::binary | std::ios::in);
- if (!ifs) {
- return util::StatusBuilder(util::error::NOT_FOUND)
- << "\"" << filename.data() << "\": " << util::StrError(errno);
- }
-
- return Load(&ifs);
+ auto input = filesystem::NewReadableFile(string_util::ToSV(filename));
+ RETURN_IF_ERROR(input->status());
+ std::string proto;
+ CHECK_OR_RETURN(input->ReadAll(&proto));
+ return LoadFromSerializedProto(proto);
}
void SentencePieceProcessor::LoadOrDie(util::min_string_view filename) {
@@ -57,10 +55,9 @@ void SentencePieceProcessor::LoadOrDie(util::min_string_view filename) {
}
util::Status SentencePieceProcessor::Load(std::istream *is) {
- CHECK_OR_RETURN(is) << "input ifstream is null";
- auto model_proto = port::MakeUnique<ModelProto>();
- CHECK_OR_RETURN(model_proto->ParseFromIstream(is)) << "Model file is broken";
- return Load(std::move(model_proto));
+ return util::StatusBuilder(util::error::UNIMPLEMENTED)
+ << "std::stream API is deprecated. Use LoadFromSerializedProto() "
+ << "to load model from any serialized blob object.";
}
util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
diff --git a/src/util.h b/src/util.h
index fb88999..4d940cf 100644
--- a/src/util.h
+++ b/src/util.h
@@ -19,7 +19,6 @@
#include <string.h>
#include <algorithm>
-#include <fstream>
#include <memory>
#include <random>
#include <sstream>