Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-06-29 09:17:18 +0300
committerTaku Kudo <taku@google.com>2018-06-29 09:17:18 +0300
commit573586854ec34b4403dd7cc77990fe708ce053fb (patch)
tree56c6bf24f777d9a84c9d060815bc40b1872e7ce6 /src/builder_test.cc
parentf4d0ddce6df51e862bccccec35f47cd11ca58d65 (diff)
Added normalization with Unicode case folding
Diffstat (limited to 'src/builder_test.cc')
-rw-r--r--src/builder_test.cc16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/builder_test.cc b/src/builder_test.cc
index e019054..212d3d1 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -70,6 +70,22 @@ TEST(BuilderTest, GetPrecompiledCharsMapTest) {
{
const NormalizerSpec spec =
+ SentencePieceTrainer::GetNormalizerSpec("nfkc_cf");
+ const Normalizer normalizer(spec);
+ EXPECT_EQ(WS "abc", normalizer.Normalize("ABC"));
+ EXPECT_EQ(WS "abc", normalizer.Normalize("ABC"));
+ }
+
+ {
+ const NormalizerSpec spec =
+ SentencePieceTrainer::GetNormalizerSpec("nmt_nfkc_cf");
+ const Normalizer normalizer(spec);
+ EXPECT_EQ(WS "abc", normalizer.Normalize("ABC"));
+ EXPECT_EQ(WS "abc", normalizer.Normalize("ABC"));
+ }
+
+ {
+ const NormalizerSpec spec =
SentencePieceTrainer::GetNormalizerSpec("identity");
EXPECT_TRUE(spec.precompiled_charsmap().empty());
const Normalizer normalizer(spec);