diff options
author | zengl <linxiao.zeng@gmail.com> | 2020-09-04 16:58:39 +0300 |
---|---|---|
committer | zengl <linxiao.zeng@gmail.com> | 2020-09-04 16:58:39 +0300 |
commit | 06f822c50a5d5e222c153a9bd637a5c557371f4c (patch) | |
tree | 932584650ad66bfec4e89a1e82c93fbb2ea53171 | |
parent | a2c12f038fd4a572c0d8ab14ff1850625bae97d7 (diff) |
clear description for alpha of BPE-dropout
-rw-r--r-- | python/sentencepiece.i | 4 | ||||
-rw-r--r-- | python/sentencepiece.py | 8 | ||||
-rw-r--r-- | src/bpe_model.h | 2 | ||||
-rw-r--r-- | src/sentencepiece_processor.h | 3 |
4 files changed, 9 insertions, 8 deletions
diff --git a/python/sentencepiece.i b/python/sentencepiece.i index ee79311..04f3af0 100644 --- a/python/sentencepiece.i +++ b/python/sentencepiece.i @@ -223,8 +223,8 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. - alpha: Soothing parameter for unigram sampling, and merge probability for - BPE-dropout. + alpha: Soothing parameter for unigram sampling, and dropout probability of + merge operations for BPE-dropout. """ _sentencepiece_processor_init_native(self) diff --git a/python/sentencepiece.py b/python/sentencepiece.py index 8f1b038..e704a2a 100644 --- a/python/sentencepiece.py +++ b/python/sentencepiece.py @@ -202,8 +202,8 @@ class SentencePieceProcessor(object): nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. - alpha: Soothing parameter for unigram sampling, and merge probability for - BPE-dropout. + alpha: Soothing parameter for unigram sampling, and dropout probability of + merge operations for BPE-dropout. """ _sentencepiece_processor_init_native(self) @@ -242,8 +242,8 @@ class SentencePieceProcessor(object): nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. - alpha: Soothing parameter for unigram sampling, and merge probability for - BPE-dropout. + alpha: Soothing parameter for unigram sampling, and dropout probability of + merge operations for BPE-dropout. """ if out_type is None: diff --git a/src/bpe_model.h b/src/bpe_model.h index 8021d4e..428b8a0 100644 --- a/src/bpe_model.h +++ b/src/bpe_model.h @@ -37,7 +37,7 @@ class Model : public ModelInterface { } // Sampling with BPE-dropout: https://arxiv.org/pdf/1910.13267.pdf - // `alpha` is merge probability in BPE-dropout paper. + // `alpha` is dropout probability in BPE-dropout paper. // Skips merge operation with `alpha` probability. // When alpha <= 0.0, no sampling is performed. EncodeResult SampleEncode(absl::string_view normalized, diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h index 019eddf..1c7fa6d 100644 --- a/src/sentencepiece_processor.h +++ b/src/sentencepiece_processor.h @@ -285,7 +285,8 @@ class SentencePieceProcessor { // in https://arxiv.org/abs/1804.10959 (nbest_size < 0 means l = infinity) // // - BPE (--model_type=bpe): - // `alpha` is the merge probability `p` in https://arxiv.org/abs/1910.13267 + // `alpha` is the dropout probability `p` of bpe merge operations + // in https://arxiv.org/abs/1910.13267 // Nbest-based sampling is not supported so nbest_size parameter is ignored in // BPE. virtual util::Status SampleEncode(absl::string_view input, int nbest_size, |