Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-06-18 14:16:15 +0300
committerTaku Kudo <taku@google.com>2018-06-18 14:16:15 +0300
commit905c49e6b40060c37a01d0559bcc9711482d34a6 (patch)
tree639842bab4cd2696455fe8e3fc35ebb29e6c1b54 /src/sentencepiece_trainer.cc
parenta0c7145a305b0786dfea9a60129f180e4ce3afbd (diff)
Introduced minimum string_wrapper to remove extra string copy
Diffstat (limited to 'src/sentencepiece_trainer.cc')
-rw-r--r--src/sentencepiece_trainer.cc22
1 files changed, 14 insertions, 8 deletions
diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc
index d03c39c..f56f003 100644
--- a/src/sentencepiece_trainer.cc
+++ b/src/sentencepiece_trainer.cc
@@ -47,9 +47,9 @@ util::Status SentencePieceTrainer::Train(
// static
NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(
- const std::string &name) {
+ util::min_string_view name) {
NormalizerSpec spec;
- spec.set_name(name);
+ spec.set_name(name.data(), name.size());
CHECK_OK(normalizer::Builder::GetPrecompiledCharsMap(
spec.name(), spec.mutable_precompiled_charsmap()));
return spec;
@@ -57,15 +57,19 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(
// static
util::Status SentencePieceTrainer::SetProtoField(
- const std::string &field_name, const std::string &value,
+ util::min_string_view _field_name, util::min_string_view _value,
google::protobuf::Message *message) {
+ const absl::string_view field_name(_field_name.data(), _field_name.size());
+ const absl::string_view value(_value.data(), _value.size());
+
const auto *descriptor = message->GetDescriptor();
const auto *reflection = message->GetReflection();
CHECK_OR_RETURN(descriptor != nullptr && reflection != nullptr)
<< "reflection is not supported.";
- const auto *field = descriptor->FindFieldByName(std::string(field_name));
+ const auto *field = descriptor->FindFieldByName(
+ std::string(field_name.data(), field_name.size()));
if (field == nullptr) {
return util::StatusBuilder(util::error::NOT_FOUND)
@@ -73,8 +77,9 @@ util::Status SentencePieceTrainer::SetProtoField(
<< descriptor->DebugString();
}
- std::vector<std::string> values = {value};
- if (field->is_repeated()) values = string_util::Split(value, ",");
+ std::vector<std::string> values = {std::string(value)};
+ if (field->is_repeated())
+ values = string_util::Split(std::string(value), ",");
#define SET_FIELD(METHOD_TYPE, v) \
if (field->is_repeated()) \
@@ -128,11 +133,12 @@ util::Status SentencePieceTrainer::SetProtoField(
// static
util::Status SentencePieceTrainer::MergeSpecsFromArgs(
- const std::string &args, TrainerSpec *trainer_spec,
+ util::min_string_view _args, TrainerSpec *trainer_spec,
NormalizerSpec *normalizer_spec) {
CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null.";
CHECK_OR_RETURN(normalizer_spec) << "`normalizer_spec` must not be null.";
+ absl::string_view args(_args.data(), _args.size());
if (args.empty()) return util::OkStatus();
for (auto arg : string_util::SplitPiece(args, " ")) {
@@ -170,7 +176,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs(
}
// static
-util::Status SentencePieceTrainer::Train(const std::string &args) {
+util::Status SentencePieceTrainer::Train(util::min_string_view args) {
TrainerSpec trainer_spec;
NormalizerSpec normalizer_spec;
RETURN_IF_ERROR(MergeSpecsFromArgs(args, &trainer_spec, &normalizer_spec));