diff options
-rw-r--r-- | src/normalizer.cc | 5 | ||||
-rw-r--r-- | src/trainer_interface.cc | 3 |
2 files changed, 7 insertions, 1 deletions
diff --git a/src/normalizer.cc b/src/normalizer.cc index 52754db..f66fd42 100644 --- a/src/normalizer.cc +++ b/src/normalizer.cc @@ -150,9 +150,14 @@ util::Status Normalizer::Normalize(absl::string_view input, norm_to_orig->push_back(consumed); } } else { +#if 0 *normalized += string_util::hexStr(data + n, 1); norm_to_orig->push_back(consumed); norm_to_orig->push_back(consumed); +#else + *normalized += data[n]; + norm_to_orig->push_back(consumed); +#endif } } // Checks whether the last character of sp is whitespace. diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc index a32f78d..0ea71d3 100644 --- a/src/trainer_interface.cc +++ b/src/trainer_interface.cc @@ -403,7 +403,8 @@ END: for (size_t i = n; i < sentences_.size(); i += trainer_spec_.num_threads()) { auto *s = &sentences_[i].first; - *s = normalizer.Normalize(meta_pieces_matcher.GlobalReplace(*s, kUPPBoundaryStr)); // cannot normalize meta pieces, but that's fine. + *s = meta_pieces_matcher.GlobalReplace(normalizer.Normalize(*s), + kUPPBoundaryStr); } }); } |