diff options
Diffstat (limited to 'stanza/tests/test_constant.py')
-rw-r--r-- | stanza/tests/test_constant.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/stanza/tests/test_constant.py b/stanza/tests/test_constant.py new file mode 100644 index 00000000..3afcc8d6 --- /dev/null +++ b/stanza/tests/test_constant.py @@ -0,0 +1,35 @@ +""" +Test the conversion to lcodes and splitting of dataset names +""" + +import tempfile + +import pytest + +import stanza +from stanza.models.common.constant import treebank_to_short_name +from stanza.tests import * + +pytestmark = [pytest.mark.travis, pytest.mark.pipeline] + +def test_treebank(): + """ + Test the entire treebank name conversion + """ + # conversion of a UD_ name + assert "hi_hdtb" == treebank_to_short_name("UD_Hindi-HDTB") + # conversion of names without UD + assert "hi_fire2013" == treebank_to_short_name("Hindi-fire2013") + assert "hi_fire2013" == treebank_to_short_name("Hindi-Fire2013") + assert "hi_fire2013" == treebank_to_short_name("Hindi-FIRE2013") + # already short names are generally preserved + assert "hi_fire2013" == treebank_to_short_name("hi-fire2013") + assert "hi_fire2013" == treebank_to_short_name("hi_fire2013") + # a special case + assert "zh-hant_pud" == treebank_to_short_name("UD_Chinese-PUD") + # a special case already converted once + assert "zh-hant_pud" == treebank_to_short_name("zh-hant_pud") + assert "zh-hant_pud" == treebank_to_short_name("zh-hant-pud") + assert "zh-hans_gsdsimp" == treebank_to_short_name("zh-hans_gsdsimp") + + |