diff options
author | Taku Kudo <taku@google.com> | 2020-05-09 20:03:37 +0300 |
---|---|---|
committer | Taku Kudo <taku@google.com> | 2020-05-09 20:03:37 +0300 |
commit | 8b921ac65a0f088618e6679595e655ff331a530f (patch) | |
tree | 31375b4c8ee92cc4cfa87eb6887f936dc550b2e9 | |
parent | 70494e894c465291758faf9f778502d9d261e065 (diff) |
Revert the default size of piece length.
-rw-r--r-- | src/builtin_pb/sentencepiece_model.pb.cc | 10 | ||||
-rw-r--r-- | src/builtin_pb/sentencepiece_model.pb.h | 6 | ||||
-rw-r--r-- | src/sentencepiece_model.proto | 2 |
3 files changed, 9 insertions, 9 deletions
diff --git a/src/builtin_pb/sentencepiece_model.pb.cc b/src/builtin_pb/sentencepiece_model.pb.cc index 20269f1..65ac2fc 100644 --- a/src/builtin_pb/sentencepiece_model.pb.cc +++ b/src/builtin_pb/sentencepiece_model.pb.cc @@ -349,7 +349,7 @@ void TrainerSpec::SharedCtor() { shrinking_factor_ = 0.75f; num_threads_ = 16; num_sub_iterations_ = 2; - max_sentence_length_ = 8192; + max_sentence_length_ = 4192; max_sentencepiece_length_ = 16; shuffle_input_sentence_ = true; split_by_unicode_script_ = true; @@ -443,7 +443,7 @@ void TrainerSpec::Clear() { } if (cached_has_bits & 4278190080u) { num_sub_iterations_ = 2; - max_sentence_length_ = 8192; + max_sentence_length_ = 4192; max_sentencepiece_length_ = 16; shuffle_input_sentence_ = true; split_by_unicode_script_ = true; @@ -687,7 +687,7 @@ bool TrainerSpec::MergePartialFromCodedStream( break; } - // optional int32 max_sentence_length = 18 [default = 8192]; + // optional int32 max_sentence_length = 18 [default = 4192]; case 18: { if (static_cast< ::google::protobuf::uint8>(tag) == static_cast< ::google::protobuf::uint8>(144u /* 144 & 0xFF */)) { @@ -1134,7 +1134,7 @@ void TrainerSpec::SerializeWithCachedSizes( ::google::protobuf::internal::WireFormatLite::WriteInt32(17, this->num_sub_iterations(), output); } - // optional int32 max_sentence_length = 18 [default = 8192]; + // optional int32 max_sentence_length = 18 [default = 4192]; if (cached_has_bits & 0x02000000u) { ::google::protobuf::internal::WireFormatLite::WriteInt32(18, this->max_sentence_length(), output); } @@ -1487,7 +1487,7 @@ size_t TrainerSpec::ByteSizeLong() const { this->num_sub_iterations()); } - // optional int32 max_sentence_length = 18 [default = 8192]; + // optional int32 max_sentence_length = 18 [default = 4192]; if (has_max_sentence_length()) { total_size += 2 + ::google::protobuf::internal::WireFormatLite::Int32Size( diff --git a/src/builtin_pb/sentencepiece_model.pb.h b/src/builtin_pb/sentencepiece_model.pb.h index febd9a0..eeab9cb 100644 --- a/src/builtin_pb/sentencepiece_model.pb.h +++ b/src/builtin_pb/sentencepiece_model.pb.h @@ -540,7 +540,7 @@ class TrainerSpec : public ::google::protobuf::MessageLite /* @@protoc_insertion ::google::protobuf::int32 num_sub_iterations() const; void set_num_sub_iterations(::google::protobuf::int32 value); - // optional int32 max_sentence_length = 18 [default = 8192]; + // optional int32 max_sentence_length = 18 [default = 4192]; bool has_max_sentence_length() const; void clear_max_sentence_length(); static const int kMaxSentenceLengthFieldNumber = 18; @@ -2070,7 +2070,7 @@ inline void TrainerSpec::set_shrinking_factor(float value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.shrinking_factor) } -// optional int32 max_sentence_length = 18 [default = 8192]; +// optional int32 max_sentence_length = 18 [default = 4192]; inline bool TrainerSpec::has_max_sentence_length() const { return (_has_bits_[0] & 0x02000000u) != 0; } @@ -2081,7 +2081,7 @@ inline void TrainerSpec::clear_has_max_sentence_length() { _has_bits_[0] &= ~0x02000000u; } inline void TrainerSpec::clear_max_sentence_length() { - max_sentence_length_ = 8192; + max_sentence_length_ = 4192; clear_has_max_sentence_length(); } inline ::google::protobuf::int32 TrainerSpec::max_sentence_length() const { diff --git a/src/sentencepiece_model.proto b/src/sentencepiece_model.proto index 2013283..fe7bef7 100644 --- a/src/sentencepiece_model.proto +++ b/src/sentencepiece_model.proto @@ -103,7 +103,7 @@ message TrainerSpec { // Longer input tends to bring the following risks: // * Overflow during EM training (unigram language model only) // * Performance drop because of O(n log n) cost in BPE. - optional int32 max_sentence_length = 18 [default = 8192]; + optional int32 max_sentence_length = 18 [default = 4192]; // Number of threads in the training. optional int32 num_threads = 16 [default = 16]; |