Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-06-29 09:17:18 +0300
committerTaku Kudo <taku@google.com>2018-06-29 09:17:18 +0300
commit573586854ec34b4403dd7cc77990fe708ce053fb (patch)
tree56c6bf24f777d9a84c9d060815bc40b1872e7ce6 /src/builder.h
parentf4d0ddce6df51e862bccccec35f47cd11ca58d65 (diff)
Added normalization with Unicode case folding
Diffstat (limited to 'src/builder.h')
-rw-r--r--src/builder.h15
1 files changed, 13 insertions, 2 deletions
diff --git a/src/builder.h b/src/builder.h
index 219a965..3178d8c 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -46,7 +46,8 @@ class Builder {
std::string *output);
// Decompiles `blob` into `chars_map`.
- static util::Status DecompileCharsMap(absl::string_view blob, CharsMap *chars_map);
+ static util::Status DecompileCharsMap(absl::string_view blob,
+ CharsMap *chars_map);
// Returns a pre-compiled binary index with `name`.
static util::Status GetPrecompiledCharsMap(const std::string &name,
@@ -93,11 +94,21 @@ class Builder {
// whitespaces.
static util::Status BuildNmtNFKCMap(CharsMap *chars_map);
+ // Merge Unicode case folding mapping into `chars_map`.
+ static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
+
+ // Makes NFKC with Unicode case folding.
+ static util::Status BuildNFKC_CFMap(CharsMap *chars_map);
+
+ // Makes NMT NFKC with Unicode case folding.
+ static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
+
// Builds Chars map save in `filename`.
// Format:
// src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
// (src|trg)_ucharX must be a hex of Unicode code point.
- static util::Status LoadCharsMap(absl::string_view filename, CharsMap *chars_map);
+ static util::Status LoadCharsMap(absl::string_view filename,
+ CharsMap *chars_map);
// Saves Chars map to `filename` as TSV.
static util::Status SaveCharsMap(absl::string_view filename,