Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-06-11 05:28:36 +0300
committerTaku Kudo <taku@google.com>2018-06-11 05:28:36 +0300
commit7875b8b8c5644dbb8976a8dbbf38ea1142495426 (patch)
treeeef09ec9df1add9864cd2276be31cdb97fcec153 /src/builder_test.cc
parent20e68fa7c407211e0f17e85537b6085005596eea (diff)
Support an empty normalziation and other minor fixes
Diffstat (limited to 'src/builder_test.cc')
-rw-r--r--src/builder_test.cc29
1 files changed, 29 insertions, 0 deletions
diff --git a/src/builder_test.cc b/src/builder_test.cc
index 6fd05d4..e019054 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -92,6 +92,9 @@ TEST(BuilderTest, CompileCharsMap) {
// あいう => abc
chars_map[{0x3042, 0x3044, 0x3046}] = {0x0061, 0x0062, 0x0063};
+ // えお => remove
+ chars_map[{0x3048, 0x304A}] = {};
+
NormalizerSpec spec;
EXPECT_OK(
Builder::CompileCharsMap(chars_map, spec.mutable_precompiled_charsmap()));
@@ -111,6 +114,7 @@ TEST(BuilderTest, CompileCharsMap) {
EXPECT_EQ("abc", normalizer.Normalize("あいう"));
EXPECT_EQ("abcえ", normalizer.Normalize("あいうえ"));
EXPECT_EQ("ABCabcD", normalizer.Normalize("abcあいうd"));
+ EXPECT_EQ("abcか", normalizer.Normalize("あいうえおか"));
}
TEST(BuilderTest, LoadCharsMapTest) {
@@ -139,6 +143,31 @@ TEST(BuilderTest, LoadCharsMapTest) {
#endif
}
+TEST(BuilderTest, LoadCharsMapWithEmptyeTest) {
+ test::ScopedTempFile test_tsv("test.tsv");
+ test::ScopedTempFile test_out_tsv("test_out.tsv");
+ {
+ io::OutputBuffer output(test_tsv.filename());
+ output.WriteLine("0061\t0041");
+ output.WriteLine("0062");
+ output.WriteLine("0063\t\t#foo=>bar");
+ }
+
+ Builder::CharsMap chars_map;
+ EXPECT_OK(Builder::LoadCharsMap(test_tsv.filename(), &chars_map));
+
+ EXPECT_EQ(3, chars_map.size());
+ EXPECT_EQ(std::vector<char32>({0x0041}), chars_map[{0x0061}]);
+ EXPECT_EQ(std::vector<char32>({}), chars_map[{0x0062}]);
+ EXPECT_EQ(std::vector<char32>({}), chars_map[{0x0063}]);
+
+ EXPECT_OK(Builder::SaveCharsMap(test_out_tsv.filename(), chars_map));
+
+ Builder::CharsMap new_chars_map;
+ EXPECT_OK(Builder::LoadCharsMap(test_out_tsv.filename(), &new_chars_map));
+ EXPECT_EQ(chars_map, new_chars_map);
+}
+
TEST(BuilderTest, ContainsTooManySharedPrefixTest) {
Builder::CharsMap chars_map;
std::vector<char32> keys;