clear description for alpha of BPE-dropout

author: zengl <linxiao.zeng@gmail.com> 2020-09-04 16:58:39 +0300
committer: zengl <linxiao.zeng@gmail.com> 2020-09-04 16:58:39 +0300
commit: 06f822c50a5d5e222c153a9bd637a5c557371f4c (patch)
tree: 932584650ad66bfec4e89a1e82c93fbb2ea53171
parent: a2c12f038fd4a572c0d8ab14ff1850625bae97d7 (diff)
4 files changed, 9 insertions, 8 deletions
diff --git a/python/sentencepiece.i b/python/sentencepiece.i
index ee79311..04f3af0 100644
--- a/python/sentencepiece.i
+++ b/python/sentencepiece.i
@@ -223,8 +223,8 @@ class PySentenceIterator : public sentencepiece::SentenceIterator {
                   nbest_size < 0: assuming that nbest_size is infinite and samples
                     from the all hypothesis (lattice) using
                     forward-filtering-and-backward-sampling algorithm.
-      alpha: Soothing parameter for unigram sampling, and merge probability for
-        BPE-dropout.
+      alpha: Soothing parameter for unigram sampling, and dropout probability of
+        merge operations for BPE-dropout.
     """
 
     _sentencepiece_processor_init_native(self)
diff --git a/python/sentencepiece.py b/python/sentencepiece.py
index 8f1b038..e704a2a 100644
--- a/python/sentencepiece.py
+++ b/python/sentencepiece.py
@@ -202,8 +202,8 @@ class SentencePieceProcessor(object):
                     nbest_size < 0: assuming that nbest_size is infinite and samples
                       from the all hypothesis (lattice) using
                       forward-filtering-and-backward-sampling algorithm.
-        alpha: Soothing parameter for unigram sampling, and merge probability for
-          BPE-dropout.
+        alpha: Soothing parameter for unigram sampling, and dropout probability of
+          merge operations for BPE-dropout.
       """
 
       _sentencepiece_processor_init_native(self)
@@ -242,8 +242,8 @@ class SentencePieceProcessor(object):
                     nbest_size < 0: assuming that nbest_size is infinite and samples
                       from the all hypothesis (lattice) using
                       forward-filtering-and-backward-sampling algorithm.
-        alpha: Soothing parameter for unigram sampling, and merge probability for
-          BPE-dropout.
+        alpha: Soothing parameter for unigram sampling, and dropout probability of
+          merge operations for BPE-dropout.
       """
 
       if out_type is None:
diff --git a/src/bpe_model.h b/src/bpe_model.h
index 8021d4e..428b8a0 100644
--- a/src/bpe_model.h
+++ b/src/bpe_model.h
@@ -37,7 +37,7 @@ class Model : public ModelInterface {
   }
 
   // Sampling with BPE-dropout: https://arxiv.org/pdf/1910.13267.pdf
-  // `alpha` is merge probability in BPE-dropout paper.
+  // `alpha` is dropout probability in BPE-dropout paper.
   // Skips merge operation with `alpha` probability.
   // When alpha <= 0.0, no sampling is performed.
   EncodeResult SampleEncode(absl::string_view normalized,
diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h
index 019eddf..1c7fa6d 100644
--- a/src/sentencepiece_processor.h
+++ b/src/sentencepiece_processor.h
@@ -285,7 +285,8 @@ class SentencePieceProcessor {
   // in https://arxiv.org/abs/1804.10959  (nbest_size < 0 means l = infinity)
   //
   // - BPE (--model_type=bpe):
-  // `alpha` is the merge probability `p` in https://arxiv.org/abs/1910.13267
+  // `alpha` is the dropout probability `p` of bpe merge operations
+  // in https://arxiv.org/abs/1910.13267
   // Nbest-based sampling is not supported so nbest_size parameter is ignored in
   // BPE.
   virtual util::Status SampleEncode(absl::string_view input, int nbest_size,
author	zengl <linxiao.zeng@gmail.com>	2020-09-04 16:58:39 +0300
committer	zengl <linxiao.zeng@gmail.com>	2020-09-04 16:58:39 +0300
commit	06f822c50a5d5e222c153a9bd637a5c557371f4c (patch)
tree	932584650ad66bfec4e89a1e82c93fbb2ea53171
parent	a2c12f038fd4a572c0d8ab14ff1850625bae97d7 (diff)