Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-06-09 19:15:34 +0300
committerTaku Kudo <taku@google.com>2018-06-09 19:15:34 +0300
commita65ca0d829f925406ae5c25f5be0de6a94af8e00 (patch)
tree6061d7819a3d83a20ccaaae0262ad70c22a67f74 /src/builder.h
parenta574ce183c05c99a4919df50d0926c947a0dfa12 (diff)
Uses NMT_NFKC rule by default.
Diffstat (limited to 'src/builder.h')
-rw-r--r--src/builder.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/src/builder.h b/src/builder.h
index 98bd59c..5685fa5 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -89,6 +89,10 @@ class Builder {
// TODO(taku): Make NFC, NFD, and NFKD mapping if necessary.
static util::Status BuildNFKCMap(CharsMap *chars_map);
+ // Makes an NFKC-based mapping with NMT specific modifications around
+ // whitespaces.
+ static util::Status BuildNmtNFKCMap(CharsMap *chars_map);
+
// Builds Chars map save in `filename`.
// Format:
// src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...