Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHung Bui <86261282+hungbui0411@users.noreply.github.com>2022-11-09 10:30:56 +0300
committerGitHub <noreply@github.com>2022-11-09 10:30:56 +0300
commitd0a729801412372cb553a3328010675f404a1dca (patch)
tree93c70874178bae4fb88ffd738f771c83ddb6eec0
parenteb46dda76a275d66ef5a23a2de35a7f4e84d6fbb (diff)
Addition of extra nonlinearities for experiments (#1149)
* Add extra nonlinearities to experiment with * Add notes on scores for various non-linearities * Isolate import of nonlinearities which were not available until after torch 1.3.0 Co-authored-by: Hung Manh Bui <hung0411@sc.stanford.edu> Co-authored-by: John Bauer <horatio@gmail.com>
-rw-r--r--stanza/models/constituency/utils.py64
1 files changed, 58 insertions, 6 deletions
diff --git a/stanza/models/constituency/utils.py b/stanza/models/constituency/utils.py
index e3bfa525..4f46abea 100644
--- a/stanza/models/constituency/utils.py
+++ b/stanza/models/constituency/utils.py
@@ -105,21 +105,73 @@ def retag_trees(trees, pipeline, xpos=True):
raise ValueError("Failed to properly retag tree #{}: {}".format(tree_idx, tree)) from e
return new_trees
+
+# experimental results on nonlinearities
+# this is on a VI dataset, VLSP_22, using 1/10th of the data as a dev set
+# (no released test set at the time of the experiment)
+# original non-Bert tagger, with 1 iteration each instead of averaged over 5
+# considering the number of experiments and the length of time they would take
+#
+# Gelu has the highest score, which tracks with other experiments run.
+# Note that publicly released models have typically used Relu
+# on account of the runtime speed improvement
+#
+# Gelu: 82.32
+# Relu: 82.14
+# Mish: 81.95
+# Relu6: 81.91
+# Silu: 81.90
+# ELU: 81.73
+# Hardswish: 81.67
+# Softsign: 81.63
+# Hardtanh: 81.44
+# Celu: 81.43
+# Selu: 81.17
+# TODO: need to redo the prelu experiment with
+# possibly different numbers of parameters
+# and proper weight decay
+# Prelu: 80.95 (terminated early)
+# Softplus: 80.94
+# Logsigmoid: 80.91
+# Hardsigmoid: 79.03
+# RReLU: 77.00
+# Hardshrink: failed
+# Softshrink: failed
NONLINEARITY = {
- 'tanh': nn.Tanh,
- 'relu': nn.ReLU,
+ 'celu': nn.CELU,
+ 'elu': nn.ELU,
'gelu': nn.GELU,
+ 'hardshrink': nn.Hardshrink,
+ 'hardtanh': nn.Hardtanh,
'leaky_relu': nn.LeakyReLU,
+ 'logsigmoid': nn.LogSigmoid,
+ 'prelu': nn.PReLU,
+ 'relu': nn.ReLU,
+ 'relu6': nn.ReLU6,
+ 'rrelu': nn.RReLU,
+ 'selu': nn.SELU,
+ 'softplus': nn.Softplus,
+ 'softshrink': nn.Softshrink,
+ 'softsign': nn.Softsign,
+ 'tanhshrink': nn.Tanhshrink,
+ 'tanh': nn.Tanh,
}
# separating these out allows for backwards compatibility with earlier versions of pytorch
# NOTE torch compatibility: if we ever *release* models with these
# activation functions, we will need to break that compatibility
-if hasattr(nn, 'SiLU'):
- NONLINEARITY['silu'] = nn.SiLU
-if hasattr(nn, 'Mish'):
- NONLINEARITY['mish'] = nn.Mish
+nonlinearity_list = [
+ 'GLU',
+ 'Hardsigmoid',
+ 'Hardswish',
+ 'Mish',
+ 'SiLU',
+]
+
+for nonlinearity in nonlinearity_list:
+ if hasattr(nn, nonlinearity):
+ NONLINEARITY[nonlinearity.lower()] = getattr(nn, nonlinearity)
def build_nonlinearity(nonlinearity):
"""