diff options
author | Taku Kudo <taku@google.com> | 2018-06-29 09:17:18 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-06-29 09:17:18 +0300 |
commit | 573586854ec34b4403dd7cc77990fe708ce053fb (patch) | |
tree | 56c6bf24f777d9a84c9d060815bc40b1872e7ce6 /src/builder.h | |
parent | f4d0ddce6df51e862bccccec35f47cd11ca58d65 (diff) |
Added normalization with Unicode case folding
Diffstat (limited to 'src/builder.h')
-rw-r--r-- | src/builder.h | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/src/builder.h b/src/builder.h index 219a965..3178d8c 100644 --- a/src/builder.h +++ b/src/builder.h @@ -46,7 +46,8 @@ class Builder { std::string *output); // Decompiles `blob` into `chars_map`. - static util::Status DecompileCharsMap(absl::string_view blob, CharsMap *chars_map); + static util::Status DecompileCharsMap(absl::string_view blob, + CharsMap *chars_map); // Returns a pre-compiled binary index with `name`. static util::Status GetPrecompiledCharsMap(const std::string &name, @@ -93,11 +94,21 @@ class Builder { // whitespaces. static util::Status BuildNmtNFKCMap(CharsMap *chars_map); + // Merge Unicode case folding mapping into `chars_map`. + static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); + + // Makes NFKC with Unicode case folding. + static util::Status BuildNFKC_CFMap(CharsMap *chars_map); + + // Makes NMT NFKC with Unicode case folding. + static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); + // Builds Chars map save in `filename`. // Format: // src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2... // (src|trg)_ucharX must be a hex of Unicode code point. - static util::Status LoadCharsMap(absl::string_view filename, CharsMap *chars_map); + static util::Status LoadCharsMap(absl::string_view filename, + CharsMap *chars_map); // Saves Chars map to `filename` as TSV. static util::Status SaveCharsMap(absl::string_view filename, |