Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-11-07 03:00:15 +0300
committerJohn Bauer <horatio@gmail.com>2022-11-07 09:34:37 +0300
commit434ddd9d59bdb1d50b1b37520f77ad6e42215eed (patch)
tree99f4fcc853a8a519490f7d90749b037cd07f33dc
parent28bd18b4db08e77d30edc945042416364d4209fb (diff)
Simplify the operation of unary transitions in the event we are using TOP_DOWN_COMPOUND or TOP_DOWN_UNARY. Those transition schemes haven't been too successful, but still, this is simpler and more efficient
Preliminary experiments suggest this makes no difference in accuracy, as expected
-rw-r--r--stanza/models/constituency/base_model.py12
-rw-r--r--stanza/models/constituency/lstm_model.py13
-rw-r--r--stanza/models/constituency/parse_transitions.py16
3 files changed, 11 insertions, 30 deletions
diff --git a/stanza/models/constituency/base_model.py b/stanza/models/constituency/base_model.py
index 85c57445..23781bce 100644
--- a/stanza/models/constituency/base_model.py
+++ b/stanza/models/constituency/base_model.py
@@ -91,12 +91,6 @@ class BaseModel(ABC):
"""
@abstractmethod
- def unary_transform(self, constituents, labels):
- """
- Transform the top of the constituent stack using a unary transform to the new label
- """
-
- @abstractmethod
def build_constituents(self, labels, children_lists):
"""
Build multiple constituents at once. This gives the opportunity for batching operations
@@ -408,12 +402,6 @@ class SimpleModel(BaseModel):
def dummy_constituent(self, dummy):
return dummy
- def unary_transform(self, constituents, labels):
- top_constituent = constituents.value
- for label in reversed(labels):
- top_constituent = Tree(label=label, children=[top_constituent])
- return top_constituent
-
def build_constituents(self, labels, children_lists):
constituents = []
for label, children in zip(labels, children_lists):
diff --git a/stanza/models/constituency/lstm_model.py b/stanza/models/constituency/lstm_model.py
index 6c10c472..9f08535a 100644
--- a/stanza/models/constituency/lstm_model.py
+++ b/stanza/models/constituency/lstm_model.py
@@ -773,19 +773,6 @@ class LSTMModel(BaseModel, nn.Module):
# the cx doesn't matter: the dummy will be discarded when building a new constituent
return Constituent(dummy, hx.unsqueeze(0), None)
- def unary_transform(self, constituents, labels):
- # TODO: this can be faster by stacking things
- # the double dereference is because we expect the Constiuent
- # wrapped in an LSTMTreeStack Node
- top_constituent = constituents.value.value
- for label in reversed(labels):
- # double nested: the Constituent is in a list of just one child
- # and there is just one item in the list (hence the stacking comment)
- # the fake Constituent is because normally the Constituent
- # items are wrapped from the LSTMTreeStack
- top_constituent = self.build_constituents([(label,)], [[Constituent(top_constituent, None, None)]])[0]
- return top_constituent
-
def build_constituents(self, labels, children_lists):
"""
Build new constituents with the given label from the list of children
diff --git a/stanza/models/constituency/parse_transitions.py b/stanza/models/constituency/parse_transitions.py
index 8befe00a..8b647bd1 100644
--- a/stanza/models/constituency/parse_transitions.py
+++ b/stanza/models/constituency/parse_transitions.py
@@ -263,13 +263,19 @@ class CompoundUnary(Transition):
self.labels = tuple(labels)
def update_state(self, state, model):
- # remove the top constituent
- # apply the labels
- # put the constituent back on the state
+ """
+ Apply potentially multiple unary transitions to the same preterminal
+
+ It reuses the CloseConstituent machinery
+ """
+ # only the top constituent is meaningful here
constituents = state.constituents
- new_constituent = model.unary_transform(state.constituents, self.labels)
+ children = [constituents.value]
constituents = constituents.pop()
- return state.word_position, constituents, new_constituent, None
+ # unlike with CloseConstituent, our label is not on the stack.
+ # it is just our label
+ # ... but we do reuse CloseConstituent's update mechanism
+ return state.word_position, constituents, (self.labels, children), CloseConstituent
def is_legal(self, state, model):
"""