Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzengl <linxiao.zeng@gmail.com>2020-09-04 16:58:39 +0300
committerzengl <linxiao.zeng@gmail.com>2020-09-04 16:58:39 +0300
commit06f822c50a5d5e222c153a9bd637a5c557371f4c (patch)
tree932584650ad66bfec4e89a1e82c93fbb2ea53171
parenta2c12f038fd4a572c0d8ab14ff1850625bae97d7 (diff)
clear description for alpha of BPE-dropout
-rw-r--r--python/sentencepiece.i4
-rw-r--r--python/sentencepiece.py8
-rw-r--r--src/bpe_model.h2
-rw-r--r--src/sentencepiece_processor.h3
4 files changed, 9 insertions, 8 deletions
diff --git a/python/sentencepiece.i b/python/sentencepiece.i
index ee79311..04f3af0 100644
--- a/python/sentencepiece.i
+++ b/python/sentencepiece.i
@@ -223,8 +223,8 @@ class PySentenceIterator : public sentencepiece::SentenceIterator {
nbest_size < 0: assuming that nbest_size is infinite and samples
from the all hypothesis (lattice) using
forward-filtering-and-backward-sampling algorithm.
- alpha: Soothing parameter for unigram sampling, and merge probability for
- BPE-dropout.
+ alpha: Soothing parameter for unigram sampling, and dropout probability of
+ merge operations for BPE-dropout.
"""
_sentencepiece_processor_init_native(self)
diff --git a/python/sentencepiece.py b/python/sentencepiece.py
index 8f1b038..e704a2a 100644
--- a/python/sentencepiece.py
+++ b/python/sentencepiece.py
@@ -202,8 +202,8 @@ class SentencePieceProcessor(object):
nbest_size < 0: assuming that nbest_size is infinite and samples
from the all hypothesis (lattice) using
forward-filtering-and-backward-sampling algorithm.
- alpha: Soothing parameter for unigram sampling, and merge probability for
- BPE-dropout.
+ alpha: Soothing parameter for unigram sampling, and dropout probability of
+ merge operations for BPE-dropout.
"""
_sentencepiece_processor_init_native(self)
@@ -242,8 +242,8 @@ class SentencePieceProcessor(object):
nbest_size < 0: assuming that nbest_size is infinite and samples
from the all hypothesis (lattice) using
forward-filtering-and-backward-sampling algorithm.
- alpha: Soothing parameter for unigram sampling, and merge probability for
- BPE-dropout.
+ alpha: Soothing parameter for unigram sampling, and dropout probability of
+ merge operations for BPE-dropout.
"""
if out_type is None:
diff --git a/src/bpe_model.h b/src/bpe_model.h
index 8021d4e..428b8a0 100644
--- a/src/bpe_model.h
+++ b/src/bpe_model.h
@@ -37,7 +37,7 @@ class Model : public ModelInterface {
}
// Sampling with BPE-dropout: https://arxiv.org/pdf/1910.13267.pdf
- // `alpha` is merge probability in BPE-dropout paper.
+ // `alpha` is dropout probability in BPE-dropout paper.
// Skips merge operation with `alpha` probability.
// When alpha <= 0.0, no sampling is performed.
EncodeResult SampleEncode(absl::string_view normalized,
diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h
index 019eddf..1c7fa6d 100644
--- a/src/sentencepiece_processor.h
+++ b/src/sentencepiece_processor.h
@@ -285,7 +285,8 @@ class SentencePieceProcessor {
// in https://arxiv.org/abs/1804.10959 (nbest_size < 0 means l = infinity)
//
// - BPE (--model_type=bpe):
- // `alpha` is the merge probability `p` in https://arxiv.org/abs/1910.13267
+ // `alpha` is the dropout probability `p` of bpe merge operations
+ // in https://arxiv.org/abs/1910.13267
// Nbest-based sampling is not supported so nbest_size parameter is ignored in
// BPE.
virtual util::Status SampleEncode(absl::string_view input, int nbest_size,