Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/normalizer.cc5
-rw-r--r--src/trainer_interface.cc3
2 files changed, 7 insertions, 1 deletions
diff --git a/src/normalizer.cc b/src/normalizer.cc
index 52754db..f66fd42 100644
--- a/src/normalizer.cc
+++ b/src/normalizer.cc
@@ -150,9 +150,14 @@ util::Status Normalizer::Normalize(absl::string_view input,
norm_to_orig->push_back(consumed);
}
} else {
+#if 0
*normalized += string_util::hexStr(data + n, 1);
norm_to_orig->push_back(consumed);
norm_to_orig->push_back(consumed);
+#else
+ *normalized += data[n];
+ norm_to_orig->push_back(consumed);
+#endif
}
}
// Checks whether the last character of sp is whitespace.
diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc
index a32f78d..0ea71d3 100644
--- a/src/trainer_interface.cc
+++ b/src/trainer_interface.cc
@@ -403,7 +403,8 @@ END:
for (size_t i = n; i < sentences_.size();
i += trainer_spec_.num_threads()) {
auto *s = &sentences_[i].first;
- *s = normalizer.Normalize(meta_pieces_matcher.GlobalReplace(*s, kUPPBoundaryStr)); // cannot normalize meta pieces, but that's fine.
+ *s = meta_pieces_matcher.GlobalReplace(normalizer.Normalize(*s),
+ kUPPBoundaryStr);
}
});
}