Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-05-10 18:40:42 +0300
committerTaku Kudo <taku@google.com>2018-05-10 18:40:42 +0300
commit54210ca31e1489950acbaf7cc4f449c38940b643 (patch)
tree10e7cd886f4c2d9cc3c9bcb03a32cae936fb8ad3 /src/builder.h
parentd9469a14f96150f5bc94a4c159452b2d50618986 (diff)
CHECK to util::Status migration for Builder
Diffstat (limited to 'src/builder.h')
-rw-r--r--src/builder.h20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/builder.h b/src/builder.h
index d9ae2bc..6c1d6fe 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -20,6 +20,7 @@
#include <vector>
#include "common.h"
#include "sentencepiece_model.pb.h"
+#include "sentencepiece_processor.h"
#include "stringpiece.h"
namespace sentencepiece {
@@ -41,14 +42,17 @@ class Builder {
// String-to-string mapping.
using CharsMap = std::map<Chars, Chars>;
- // Compiles |chars_map| into a binary index.
- static std::string CompileCharsMap(const CharsMap &chars_map);
+ static util::Status CompileCharsMap(const CharsMap &chars_map,
+ std::string *output);
- // Returns a pre-compiled binary index with |name|.
- static std::string GetPrecompiledCharsMap(const std::string &name);
+ // Returns a pre-compiled binary index with `name`.
+ static util::Status GetPrecompiledCharsMap(const std::string &name,
+ std::string *output);
- // Returns a normalizer spec with a binary index |name|.
- static NormalizerSpec GetNormalizerSpec(const std::string &name);
+ // Populates necessary fields (precompiled_charmap) from
+ // `name` or `normalization_rule_tsv` fields in `normalizer_spec`.
+ static util::Status PopulateNormalizationSpec(
+ NormalizerSpec *normalizer_spec);
// Makes a normalization mapping based on NFKC.
//
@@ -90,7 +94,7 @@ class Builder {
// Returns identity mapping, which dose not perform any normalization.
static CharsMap BuildIdentityMap();
- // Builds Chars map save in |filename|.
+ // Builds Chars map save in `filename`.
// Format:
// src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
// (src|trg)_ucharX must be a hex of UCS4.
@@ -99,7 +103,7 @@ class Builder {
private:
FRIEND_TEST(BuilderTest, RemoveRedundantMapTest);
- // Removes redundant rules from |chars_map|.
+ // Removes redundant rules from `chars_map`.
// When char_maps have "aa" => "bb" and "a" => "b", the first
// rule is not necessary since the second rule can cover the first rule.
static CharsMap RemoveRedundantMap(const CharsMap &chars_map);