Experiment with not doing weight decay at all for pattncon_lattn

author: John Bauer <horatio@gmail.com> 2022-03-18 03:07:07 +0300
committer: John Bauer <horatio@gmail.com> 2022-06-21 09:37:59 +0300
commit: a59ce17e72682d577f0928343a95997eee7e6662 (patch)
tree: ec6cc260ef60d5552ccfd59f5c1ba9570beaa3c6
parent: 5e9a9b21ae3f896c744b928b4d919999d732dbe7 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/stanza/models/constituency/lstm_model.py b/stanza/models/constituency/lstm_model.py
index 7b79d478..8ef718c9 100644
--- a/stanza/models/constituency/lstm_model.py
+++ b/stanza/models/constituency/lstm_model.py
@@ -460,8 +460,8 @@ class LSTMModel(BaseModel, nn.Module):
     def is_low_decay_parameter(self, name):
         #if name.find("_embedding") >= 0:
         #    return True
-        #if name.startswith("partitioned_transformer_module"):
-        #    return True
+        if name.startswith("partitioned_transformer_module"):
+            return True
         #if name.find(".bias") >= 0:
         #    return True
         if name.find(".norm") >= 0 or name.find(".layer_norm") >= 0:
author	John Bauer <horatio@gmail.com>	2022-03-18 03:07:07 +0300
committer	John Bauer <horatio@gmail.com>	2022-06-21 09:37:59 +0300
commit	a59ce17e72682d577f0928343a95997eee7e6662 (patch)
tree	ec6cc260ef60d5552ccfd59f5c1ba9570beaa3c6
parent	5e9a9b21ae3f896c744b928b4d919999d732dbe7 (diff)