diff options
author | Taku Kudo <taku@google.com> | 2018-05-10 18:40:42 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-05-10 18:40:42 +0300 |
commit | 54210ca31e1489950acbaf7cc4f449c38940b643 (patch) | |
tree | 10e7cd886f4c2d9cc3c9bcb03a32cae936fb8ad3 /src/builder.h | |
parent | d9469a14f96150f5bc94a4c159452b2d50618986 (diff) |
CHECK to util::Status migration for Builder
Diffstat (limited to 'src/builder.h')
-rw-r--r-- | src/builder.h | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/src/builder.h b/src/builder.h index d9ae2bc..6c1d6fe 100644 --- a/src/builder.h +++ b/src/builder.h @@ -20,6 +20,7 @@ #include <vector> #include "common.h" #include "sentencepiece_model.pb.h" +#include "sentencepiece_processor.h" #include "stringpiece.h" namespace sentencepiece { @@ -41,14 +42,17 @@ class Builder { // String-to-string mapping. using CharsMap = std::map<Chars, Chars>; - // Compiles |chars_map| into a binary index. - static std::string CompileCharsMap(const CharsMap &chars_map); + static util::Status CompileCharsMap(const CharsMap &chars_map, + std::string *output); - // Returns a pre-compiled binary index with |name|. - static std::string GetPrecompiledCharsMap(const std::string &name); + // Returns a pre-compiled binary index with `name`. + static util::Status GetPrecompiledCharsMap(const std::string &name, + std::string *output); - // Returns a normalizer spec with a binary index |name|. - static NormalizerSpec GetNormalizerSpec(const std::string &name); + // Populates necessary fields (precompiled_charmap) from + // `name` or `normalization_rule_tsv` fields in `normalizer_spec`. + static util::Status PopulateNormalizationSpec( + NormalizerSpec *normalizer_spec); // Makes a normalization mapping based on NFKC. // @@ -90,7 +94,7 @@ class Builder { // Returns identity mapping, which dose not perform any normalization. static CharsMap BuildIdentityMap(); - // Builds Chars map save in |filename|. + // Builds Chars map save in `filename`. // Format: // src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2... // (src|trg)_ucharX must be a hex of UCS4. @@ -99,7 +103,7 @@ class Builder { private: FRIEND_TEST(BuilderTest, RemoveRedundantMapTest); - // Removes redundant rules from |chars_map|. + // Removes redundant rules from `chars_map`. // When char_maps have "aa" => "bb" and "a" => "b", the first // rule is not necessary since the second rule can cover the first rule. static CharsMap RemoveRedundantMap(const CharsMap &chars_map); |