blob: 3f90fcf5225cd2289a87c498708c92d35e3d0463 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
"""
A script to prepare all lemma datasets.
For example, do
python -m stanza.utils.datasets.prepare_lemma_treebank TREEBANK
such as
python -m stanza.utils.datasets.prepare_lemma_treebank UD_English-EWT
and it will prepare each of train, dev, test
"""
import stanza.utils.datasets.common as common
import stanza.utils.datasets.prepare_tokenizer_treebank as prepare_tokenizer_treebank
def process_treebank(treebank, paths, args):
prepare_tokenizer_treebank.copy_conllu_treebank(treebank, paths, paths["LEMMA_DATA_DIR"])
def main():
common.main(process_treebank)
if __name__ == '__main__':
main()
|