1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/env python
__author__ = "Marta Bañón"
__version__ = "Version 0.1 # 28/09/2018 # Classifier test # Marta Bañón"
import subprocess
import bicleaner
def setup_function():
print("Running test setup...")
langpackurl = "https://github.com/bitextor/bicleaner-data/releases/latest/download/en-de.tar.gz"
tar = "tar -xzvf en-de.tar.gz"
command = "mkdir -p test_langpacks && cd test_langpacks && wget -q {0} && {1} && cd ..".format(langpackurl, tar)
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
p.wait()
p = subprocess.Popen("cat test_langpacks/en-de/en-de.yaml |grep -v '_lm' | grep -v 'lm_type' | grep -v '_perp' > test_langpacks/en-de/en-de.nolm.yaml", shell=True, stdout=subprocess.PIPE)
p.wait()
def teardown_function():
print("Running test teardown...")
command = "rm -r test_langpacks && rm tests/test-corpus.en-de.classified"
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
p.wait()
def bicleaner_test(executable, training_yaml ):
print("Running test body...")
bicleaner_cmd = "{} \
tests/test-corpus.en-de \
tests/test-corpus.en-de.classified \
test_langpacks/en-de/{} -q".format(executable,training_yaml)
p = subprocess.Popen(bicleaner_cmd, shell=True, stdout=subprocess.PIPE)
p.wait()
scores = []
with open("tests/test-corpus.en-de.classified", "r") as classified_file:
for line in classified_file:
line = line.rstrip("\n")
print(line)
try:
url1, url2, source_sentence, target_sentence, score = line.split('\t')
scores.append(round(float(score), 1))
except Exception as e:
print(e)
scores.append("-1")
continue
return scores
def test_full_process():
expected = [0, 0, 0, 0, 0, 0.5, 0, 0, 0.3, 0]
results = bicleaner_test("bicleaner-classify","en-de.yaml")
print("Checking test results...")
for i in range(len(expected)):
assert(results[i] == expected[i])
def test_full_process_nolm():
expected = [0, 0, 0, 0.7, 0, 0.5, 0, 0, 0.3, 0]
results = bicleaner_test("bicleaner-classify","en-de.nolm.yaml")
print("Checking test results...")
for i in range(len(expected)):
assert(results[i] == expected[i])
def test_lite_process():
expected = [0, 0, 0, 0, 0, 0.5, 0, 0, 0.3, 0]
results = bicleaner_test("bicleaner-classify-lite","en-de.yaml")
print("Checking test results...")
for i in range(len(expected)):
assert(results[i] == expected[i])
def test_lite_process_nolm():
expected = [0, 0, 0, 0.7, 0, 0.5, 0, 0, 0.3, 0]
results = bicleaner_test("bicleaner-classify-lite","en-de.nolm.yaml")
print("Checking test results...")
for i in range(len(expected)):
assert(results[i] == expected[i])
|