diff options
author | Hung Bui <86261282+hungbui0411@users.noreply.github.com> | 2022-11-09 10:30:56 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-09 10:30:56 +0300 |
commit | d0a729801412372cb553a3328010675f404a1dca (patch) | |
tree | 93c70874178bae4fb88ffd738f771c83ddb6eec0 | |
parent | eb46dda76a275d66ef5a23a2de35a7f4e84d6fbb (diff) |
Addition of extra nonlinearities for experiments (#1149)
* Add extra nonlinearities to experiment with
* Add notes on scores for various non-linearities
* Isolate import of nonlinearities which were not available until after torch 1.3.0
Co-authored-by: Hung Manh Bui <hung0411@sc.stanford.edu>
Co-authored-by: John Bauer <horatio@gmail.com>
-rw-r--r-- | stanza/models/constituency/utils.py | 64 |
1 files changed, 58 insertions, 6 deletions
diff --git a/stanza/models/constituency/utils.py b/stanza/models/constituency/utils.py index e3bfa525..4f46abea 100644 --- a/stanza/models/constituency/utils.py +++ b/stanza/models/constituency/utils.py @@ -105,21 +105,73 @@ def retag_trees(trees, pipeline, xpos=True): raise ValueError("Failed to properly retag tree #{}: {}".format(tree_idx, tree)) from e return new_trees + +# experimental results on nonlinearities +# this is on a VI dataset, VLSP_22, using 1/10th of the data as a dev set +# (no released test set at the time of the experiment) +# original non-Bert tagger, with 1 iteration each instead of averaged over 5 +# considering the number of experiments and the length of time they would take +# +# Gelu has the highest score, which tracks with other experiments run. +# Note that publicly released models have typically used Relu +# on account of the runtime speed improvement +# +# Gelu: 82.32 +# Relu: 82.14 +# Mish: 81.95 +# Relu6: 81.91 +# Silu: 81.90 +# ELU: 81.73 +# Hardswish: 81.67 +# Softsign: 81.63 +# Hardtanh: 81.44 +# Celu: 81.43 +# Selu: 81.17 +# TODO: need to redo the prelu experiment with +# possibly different numbers of parameters +# and proper weight decay +# Prelu: 80.95 (terminated early) +# Softplus: 80.94 +# Logsigmoid: 80.91 +# Hardsigmoid: 79.03 +# RReLU: 77.00 +# Hardshrink: failed +# Softshrink: failed NONLINEARITY = { - 'tanh': nn.Tanh, - 'relu': nn.ReLU, + 'celu': nn.CELU, + 'elu': nn.ELU, 'gelu': nn.GELU, + 'hardshrink': nn.Hardshrink, + 'hardtanh': nn.Hardtanh, 'leaky_relu': nn.LeakyReLU, + 'logsigmoid': nn.LogSigmoid, + 'prelu': nn.PReLU, + 'relu': nn.ReLU, + 'relu6': nn.ReLU6, + 'rrelu': nn.RReLU, + 'selu': nn.SELU, + 'softplus': nn.Softplus, + 'softshrink': nn.Softshrink, + 'softsign': nn.Softsign, + 'tanhshrink': nn.Tanhshrink, + 'tanh': nn.Tanh, } # separating these out allows for backwards compatibility with earlier versions of pytorch # NOTE torch compatibility: if we ever *release* models with these # activation functions, we will need to break that compatibility -if hasattr(nn, 'SiLU'): - NONLINEARITY['silu'] = nn.SiLU -if hasattr(nn, 'Mish'): - NONLINEARITY['mish'] = nn.Mish +nonlinearity_list = [ + 'GLU', + 'Hardsigmoid', + 'Hardswish', + 'Mish', + 'SiLU', +] + +for nonlinearity in nonlinearity_list: + if hasattr(nn, nonlinearity): + NONLINEARITY[nonlinearity.lower()] = getattr(nn, nonlinearity) def build_nonlinearity(nonlinearity): """ |