Welcome to mirror list, hosted at ThFree Co, Russian Federation.

installation.py « resources « stanza - github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 0a942bd845d38eac761a232f37678392652d36be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
Functions for setting up the environments.
"""

import os
import logging
import zipfile
import shutil

from stanza.resources.common import HOME_DIR, request_file, unzip, \
    get_root_from_zipfile, set_logging_level

logger = logging.getLogger('stanza')

DEFAULT_CORENLP_MODEL_URL = os.getenv(
    'CORENLP_MODEL_URL',
    'https://huggingface.co/stanfordnlp/corenlp-{model}/resolve/{tag}/stanford-corenlp-models-{model}.jar'
)
BACKUP_CORENLP_MODEL_URL = "http://nlp.stanford.edu/software/stanford-corenlp-{version}-models-{model}.jar"

DEFAULT_CORENLP_URL = os.getenv(
    'CORENLP_MODEL_URL',
    'https://huggingface.co/stanfordnlp/CoreNLP/resolve/{tag}/stanford-corenlp-latest.zip'
)

DEFAULT_CORENLP_DIR = os.getenv(
    'CORENLP_HOME',
    os.path.join(HOME_DIR, 'stanza_corenlp')
)

AVAILABLE_MODELS = set(['arabic', 'chinese', 'english-extra', 'english-kbp', 'french', 'german', 'spanish'])


def download_corenlp_models(model, version, dir=DEFAULT_CORENLP_DIR, url=DEFAULT_CORENLP_MODEL_URL, logging_level='INFO', proxies=None):
    """
    A automatic way to download the CoreNLP models.

    Args:
        model: the name of the model, can be one of 'arabic', 'chinese', 'english',
            'english-kbp', 'french', 'german', 'spanish'
        version: the version of the model
        dir: the directory to download CoreNLP model into; alternatively can be
            set up with environment variable $CORENLP_HOME
        url: The link to download CoreNLP models.
             It will need {model} and either {version} or {tag} to properly format the URL
        logging_level: logging level to use during installation
    """
    dir = os.path.expanduser(dir)
    if not model or not version:
        raise ValueError(
            "Both model and model version should be specified."
        )
    logger.info(f"Downloading {model} models (version {version}) into directory {dir}...")
    model = model.strip().lower()
    if model not in AVAILABLE_MODELS:
        raise KeyError(
            f'{model} is currently not supported. '
            f'Must be one of: {list(AVAILABLE_MODELS)}.'
        )
    # for example:
    # https://huggingface.co/stanfordnlp/CoreNLP/resolve/v4.2.2/stanford-corenlp-models-french.jar
    tag = version if version == 'main' else 'v' + version
    download_url = url.format(tag=tag, model=model, version=version)
    try:
        request_file(
            download_url,
            os.path.join(dir, f'stanford-corenlp-{version}-models-{model}.jar'),
            proxies
        )
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        raise RuntimeError(
            "Downloading CoreNLP model file failed. "
            "Please try manual downloading at: https://stanfordnlp.github.io/CoreNLP/."
        ) from e


def install_corenlp(dir=DEFAULT_CORENLP_DIR, url=DEFAULT_CORENLP_URL, logging_level=None, proxies=None, version="main"):
    """
    A fully automatic way to install and setting up the CoreNLP library 
    to use the client functionality.

    Args:
        dir: the directory to download CoreNLP model into; alternatively can be
            set up with environment variable $CORENLP_HOME
        url: The link to download CoreNLP models
             Needs a {version} or {tag} parameter to specify the version
        logging_level: logging level to use during installation
    """
    dir = os.path.expanduser(dir)
    set_logging_level(logging_level=logging_level, verbose=None)
    if os.path.exists(dir):
        logger.warn(
            f"Directory {dir} already exists. "
            f"Please install CoreNLP to a new directory.")
        return

    logger.info(f"Installing CoreNLP package into {dir}...")
    # First download the URL package
    logger.debug(f"Download to destination file: {os.path.join(dir, 'corenlp.zip')}")
    tag = version if version == 'main' else 'v' + version
    url = url.format(version=version, tag=tag)
    try:
        request_file(url, os.path.join(dir, 'corenlp.zip'), proxies)

    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        raise RuntimeError(
            "Downloading CoreNLP zip file failed. "
            "Please try manual installation: https://stanfordnlp.github.io/CoreNLP/."
        ) from e

    # Unzip corenlp into dir
    logger.debug("Unzipping downloaded zip file...")
    unzip(dir, 'corenlp.zip')

    # By default CoreNLP will be unzipped into a version-dependent folder, 
    # e.g., stanford-corenlp-4.0.0. We need some hack around that and move
    # files back into our designated folder
    logger.debug(f"Moving files into the designated folder at: {dir}")
    corenlp_dirname = get_root_from_zipfile(os.path.join(dir, 'corenlp.zip'))
    corenlp_dirname = os.path.join(dir, corenlp_dirname)
    for f in os.listdir(corenlp_dirname):
        shutil.move(os.path.join(corenlp_dirname, f), dir)

    # Remove original zip and folder
    logger.debug("Removing downloaded zip file...")
    os.remove(os.path.join(dir, 'corenlp.zip'))
    shutil.rmtree(corenlp_dirname)

    # Warn user to set up env
    if dir != DEFAULT_CORENLP_DIR:
        logger.warning(
            f"For customized installation location, please set the `CORENLP_HOME` "
            f"environment variable to the location of the installation. "
            f"In Unix, this is done with `export CORENLP_HOME={dir}`.")