diff options
author | Taku Kudo <taku@google.com> | 2018-06-29 09:17:18 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-06-29 09:17:18 +0300 |
commit | 573586854ec34b4403dd7cc77990fe708ce053fb (patch) | |
tree | 56c6bf24f777d9a84c9d060815bc40b1872e7ce6 /src/builder_test.cc | |
parent | f4d0ddce6df51e862bccccec35f47cd11ca58d65 (diff) |
Added normalization with Unicode case folding
Diffstat (limited to 'src/builder_test.cc')
-rw-r--r-- | src/builder_test.cc | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/src/builder_test.cc b/src/builder_test.cc index e019054..212d3d1 100644 --- a/src/builder_test.cc +++ b/src/builder_test.cc @@ -70,6 +70,22 @@ TEST(BuilderTest, GetPrecompiledCharsMapTest) { { const NormalizerSpec spec = + SentencePieceTrainer::GetNormalizerSpec("nfkc_cf"); + const Normalizer normalizer(spec); + EXPECT_EQ(WS "abc", normalizer.Normalize("ABC")); + EXPECT_EQ(WS "abc", normalizer.Normalize("ABC")); + } + + { + const NormalizerSpec spec = + SentencePieceTrainer::GetNormalizerSpec("nmt_nfkc_cf"); + const Normalizer normalizer(spec); + EXPECT_EQ(WS "abc", normalizer.Normalize("ABC")); + EXPECT_EQ(WS "abc", normalizer.Normalize("ABC")); + } + + { + const NormalizerSpec spec = SentencePieceTrainer::GetNormalizerSpec("identity"); EXPECT_TRUE(spec.precompiled_charsmap().empty()); const Normalizer normalizer(spec); |