Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2019-01-08 11:47:19 +0300
committerTaku Kudo <taku@google.com>2019-01-08 11:47:19 +0300
commit24bda0a647ac3c076dbbf4084958d1ee86a908cc (patch)
treef3ec1b4c9b6e9a0610486dbb4412fbc64e103ae3 /src/sentencepiece_processor.cc
parentc4b42e1511f99fc66b63a0ba05d7835262c13e1b (diff)
added (Encode|Decode)AsSerializedProto interface so Python module can get full access to the proto.
Diffstat (limited to 'src/sentencepiece_processor.cc')
-rw-r--r--src/sentencepiece_processor.cc35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
index 8c9c208..43dd11c 100644
--- a/src/sentencepiece_processor.cc
+++ b/src/sentencepiece_processor.cc
@@ -512,6 +512,41 @@ util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
return Decode(pieces, spt);
}
+util::bytes SentencePieceProcessor::EncodeAsSerializedProto(
+ util::min_string_view input) const {
+ SentencePieceText spt;
+ if (!Encode(input, &spt).ok()) return "";
+ return spt.SerializeAsString();
+}
+
+util::bytes SentencePieceProcessor::SampleEncodeAsSerializedProto(
+ util::min_string_view input, int nbest_size, float alpha) const {
+ SentencePieceText spt;
+ if (!SampleEncode(input, nbest_size, alpha, &spt).ok()) return "";
+ return spt.SerializeAsString();
+}
+
+util::bytes SentencePieceProcessor::NBestEncodeAsSerializedProto(
+ util::min_string_view input, int nbest_size) const {
+ NBestSentencePieceText spt;
+ if (!NBestEncode(input, nbest_size, &spt).ok()) return "";
+ return spt.SerializeAsString();
+}
+
+util::bytes SentencePieceProcessor::DecodePiecesAsSerializedProto(
+ const std::vector<std::string> &pieces) const {
+ SentencePieceText spt;
+ if (!Decode(pieces, &spt).ok()) return "";
+ return spt.SerializeAsString();
+}
+
+util::bytes SentencePieceProcessor::DecodeIdsAsSerializedProto(
+ const std::vector<int> &ids) const {
+ SentencePieceText spt;
+ if (!Decode(ids, &spt).ok()) return "";
+ return spt.SerializeAsString();
+}
+
#define CHECK_STATUS_OR_RETURN_DEFAULT(value) \
if (!status().ok()) { \
LOG(ERROR) << status().error_message() << "\nReturns default value " \