diff options
author | Taku Kudo <taku@google.com> | 2018-06-18 14:16:15 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2018-06-18 14:16:15 +0300 |
commit | 905c49e6b40060c37a01d0559bcc9711482d34a6 (patch) | |
tree | 639842bab4cd2696455fe8e3fc35ebb29e6c1b54 /src/sentencepiece_trainer.cc | |
parent | a0c7145a305b0786dfea9a60129f180e4ce3afbd (diff) |
Introduced minimum string_wrapper to remove extra string copy
Diffstat (limited to 'src/sentencepiece_trainer.cc')
-rw-r--r-- | src/sentencepiece_trainer.cc | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc index d03c39c..f56f003 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc @@ -47,9 +47,9 @@ util::Status SentencePieceTrainer::Train( // static NormalizerSpec SentencePieceTrainer::GetNormalizerSpec( - const std::string &name) { + util::min_string_view name) { NormalizerSpec spec; - spec.set_name(name); + spec.set_name(name.data(), name.size()); CHECK_OK(normalizer::Builder::GetPrecompiledCharsMap( spec.name(), spec.mutable_precompiled_charsmap())); return spec; @@ -57,15 +57,19 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec( // static util::Status SentencePieceTrainer::SetProtoField( - const std::string &field_name, const std::string &value, + util::min_string_view _field_name, util::min_string_view _value, google::protobuf::Message *message) { + const absl::string_view field_name(_field_name.data(), _field_name.size()); + const absl::string_view value(_value.data(), _value.size()); + const auto *descriptor = message->GetDescriptor(); const auto *reflection = message->GetReflection(); CHECK_OR_RETURN(descriptor != nullptr && reflection != nullptr) << "reflection is not supported."; - const auto *field = descriptor->FindFieldByName(std::string(field_name)); + const auto *field = descriptor->FindFieldByName( + std::string(field_name.data(), field_name.size())); if (field == nullptr) { return util::StatusBuilder(util::error::NOT_FOUND) @@ -73,8 +77,9 @@ util::Status SentencePieceTrainer::SetProtoField( << descriptor->DebugString(); } - std::vector<std::string> values = {value}; - if (field->is_repeated()) values = string_util::Split(value, ","); + std::vector<std::string> values = {std::string(value)}; + if (field->is_repeated()) + values = string_util::Split(std::string(value), ","); #define SET_FIELD(METHOD_TYPE, v) \ if (field->is_repeated()) \ @@ -128,11 +133,12 @@ util::Status SentencePieceTrainer::SetProtoField( // static util::Status SentencePieceTrainer::MergeSpecsFromArgs( - const std::string &args, TrainerSpec *trainer_spec, + util::min_string_view _args, TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec) { CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null."; CHECK_OR_RETURN(normalizer_spec) << "`normalizer_spec` must not be null."; + absl::string_view args(_args.data(), _args.size()); if (args.empty()) return util::OkStatus(); for (auto arg : string_util::SplitPiece(args, " ")) { @@ -170,7 +176,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( } // static -util::Status SentencePieceTrainer::Train(const std::string &args) { +util::Status SentencePieceTrainer::Train(util::min_string_view args) { TrainerSpec trainer_spec; NormalizerSpec normalizer_spec; RETURN_IF_ERROR(MergeSpecsFromArgs(args, &trainer_spec, &normalizer_spec)); |