Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2020-05-09 20:03:37 +0300
committerTaku Kudo <taku@google.com>2020-05-09 20:03:37 +0300
commit8b921ac65a0f088618e6679595e655ff331a530f (patch)
tree31375b4c8ee92cc4cfa87eb6887f936dc550b2e9
parent70494e894c465291758faf9f778502d9d261e065 (diff)
Revert the default size of piece length.
-rw-r--r--src/builtin_pb/sentencepiece_model.pb.cc10
-rw-r--r--src/builtin_pb/sentencepiece_model.pb.h6
-rw-r--r--src/sentencepiece_model.proto2
3 files changed, 9 insertions, 9 deletions
diff --git a/src/builtin_pb/sentencepiece_model.pb.cc b/src/builtin_pb/sentencepiece_model.pb.cc
index 20269f1..65ac2fc 100644
--- a/src/builtin_pb/sentencepiece_model.pb.cc
+++ b/src/builtin_pb/sentencepiece_model.pb.cc
@@ -349,7 +349,7 @@ void TrainerSpec::SharedCtor() {
shrinking_factor_ = 0.75f;
num_threads_ = 16;
num_sub_iterations_ = 2;
- max_sentence_length_ = 8192;
+ max_sentence_length_ = 4192;
max_sentencepiece_length_ = 16;
shuffle_input_sentence_ = true;
split_by_unicode_script_ = true;
@@ -443,7 +443,7 @@ void TrainerSpec::Clear() {
}
if (cached_has_bits & 4278190080u) {
num_sub_iterations_ = 2;
- max_sentence_length_ = 8192;
+ max_sentence_length_ = 4192;
max_sentencepiece_length_ = 16;
shuffle_input_sentence_ = true;
split_by_unicode_script_ = true;
@@ -687,7 +687,7 @@ bool TrainerSpec::MergePartialFromCodedStream(
break;
}
- // optional int32 max_sentence_length = 18 [default = 8192];
+ // optional int32 max_sentence_length = 18 [default = 4192];
case 18: {
if (static_cast< ::google::protobuf::uint8>(tag) ==
static_cast< ::google::protobuf::uint8>(144u /* 144 & 0xFF */)) {
@@ -1134,7 +1134,7 @@ void TrainerSpec::SerializeWithCachedSizes(
::google::protobuf::internal::WireFormatLite::WriteInt32(17, this->num_sub_iterations(), output);
}
- // optional int32 max_sentence_length = 18 [default = 8192];
+ // optional int32 max_sentence_length = 18 [default = 4192];
if (cached_has_bits & 0x02000000u) {
::google::protobuf::internal::WireFormatLite::WriteInt32(18, this->max_sentence_length(), output);
}
@@ -1487,7 +1487,7 @@ size_t TrainerSpec::ByteSizeLong() const {
this->num_sub_iterations());
}
- // optional int32 max_sentence_length = 18 [default = 8192];
+ // optional int32 max_sentence_length = 18 [default = 4192];
if (has_max_sentence_length()) {
total_size += 2 +
::google::protobuf::internal::WireFormatLite::Int32Size(
diff --git a/src/builtin_pb/sentencepiece_model.pb.h b/src/builtin_pb/sentencepiece_model.pb.h
index febd9a0..eeab9cb 100644
--- a/src/builtin_pb/sentencepiece_model.pb.h
+++ b/src/builtin_pb/sentencepiece_model.pb.h
@@ -540,7 +540,7 @@ class TrainerSpec : public ::google::protobuf::MessageLite /* @@protoc_insertion
::google::protobuf::int32 num_sub_iterations() const;
void set_num_sub_iterations(::google::protobuf::int32 value);
- // optional int32 max_sentence_length = 18 [default = 8192];
+ // optional int32 max_sentence_length = 18 [default = 4192];
bool has_max_sentence_length() const;
void clear_max_sentence_length();
static const int kMaxSentenceLengthFieldNumber = 18;
@@ -2070,7 +2070,7 @@ inline void TrainerSpec::set_shrinking_factor(float value) {
// @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.shrinking_factor)
}
-// optional int32 max_sentence_length = 18 [default = 8192];
+// optional int32 max_sentence_length = 18 [default = 4192];
inline bool TrainerSpec::has_max_sentence_length() const {
return (_has_bits_[0] & 0x02000000u) != 0;
}
@@ -2081,7 +2081,7 @@ inline void TrainerSpec::clear_has_max_sentence_length() {
_has_bits_[0] &= ~0x02000000u;
}
inline void TrainerSpec::clear_max_sentence_length() {
- max_sentence_length_ = 8192;
+ max_sentence_length_ = 4192;
clear_has_max_sentence_length();
}
inline ::google::protobuf::int32 TrainerSpec::max_sentence_length() const {
diff --git a/src/sentencepiece_model.proto b/src/sentencepiece_model.proto
index 2013283..fe7bef7 100644
--- a/src/sentencepiece_model.proto
+++ b/src/sentencepiece_model.proto
@@ -103,7 +103,7 @@ message TrainerSpec {
// Longer input tends to bring the following risks:
// * Overflow during EM training (unigram language model only)
// * Performance drop because of O(n log n) cost in BPE.
- optional int32 max_sentence_length = 18 [default = 8192];
+ optional int32 max_sentence_length = 18 [default = 4192];
// Number of threads in the training.
optional int32 num_threads = 16 [default = 16];