Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Bauer <horatio@gmail.com>2022-10-31 09:57:51 +0300
committerJohn Bauer <horatio@gmail.com>2022-10-31 10:02:08 +0300
commitaf5e7d3d3b4726d5503efb027b8efda6e8dbbd3e (patch)
tree44d8410376772ef11b3bedbe0800d8ed344f4dd5
parentbdb64b0e4664f08f745a13009c9ba6c0a675899c (diff)
Add a suffix argument to the renormalize script
-rw-r--r--stanza/utils/datasets/vietnamese/renormalize.py19
1 files changed, 13 insertions, 6 deletions
diff --git a/stanza/utils/datasets/vietnamese/renormalize.py b/stanza/utils/datasets/vietnamese/renormalize.py
index c21b1288..08fcfab2 100644
--- a/stanza/utils/datasets/vietnamese/renormalize.py
+++ b/stanza/utils/datasets/vietnamese/renormalize.py
@@ -94,16 +94,16 @@ def convert_file(org_file, new_file):
def convert_files(file_list, new_dir):
for file_name in file_list:
- base_name, _ = os.path.splitext(os.path.split(file_name)[-1])
- new_path = os.path.join(new_dir, base_name)
- new_file_path = f'{new_path}.txt'
+ base_name = os.path.split(file_name)[-1]
+ new_file_path = os.path.join(new_dir, base_name)
convert_file(file_name, new_file_path)
-def convert_dir(org_dir, new_dir):
+def convert_dir(org_dir, new_dir, suffix):
+ os.makedirs(new_dir, exist_ok=True)
file_list = os.listdir(org_dir)
- file_list = [os.path.join(org_dir, f) for f in file_list if os.path.splitext(f)[1] == '.txt']
+ file_list = [os.path.join(org_dir, f) for f in file_list if os.path.splitext(f)[1] == suffix]
convert_files(file_list, new_dir)
@@ -122,12 +122,19 @@ def main():
help='The location of new directory'
)
+ parser.add_argument(
+ '--suffix',
+ type=str,
+ default='.txt',
+ help='Which suffix to look for when renormalizing a directory'
+ )
+
args = parser.parse_args()
if os.path.isfile(args.orig):
convert_file(args.orig, args.converted)
else:
- convert_dir(args.orig, args.converted)
+ convert_dir(args.orig, args.converted, args.suffix)
if __name__ == '__main__':