Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-05-01 12:23:55 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-05-01 12:23:55 +0300
commite6cb99c48ca1a8eb84a428ad7e3532e6c02de9b6 (patch)
treee295687d8128dba8f7e4dbfdf08a348a09d595ac /scripts
parentb80ec4fa04fb99e3a2b4f91f6a4df751a2de2195 (diff)
better file handling, better exceptions hanling
Diffstat (limited to 'scripts')
-rw-r--r--scripts/idf.py9
1 files changed, 6 insertions, 3 deletions
diff --git a/scripts/idf.py b/scripts/idf.py
index efd05e9a..60905d80 100644
--- a/scripts/idf.py
+++ b/scripts/idf.py
@@ -1,5 +1,6 @@
import sys
import math
+import yaml
from collections import Counter
c = Counter()
@@ -10,7 +11,9 @@ for line in sys.stdin:
c[word] += 1
N += 1
-keys = sorted([k for k in c])
-for word in keys:
+out = dict()
+for word in c:
idf = math.log(float(N) / float(c[word])) / math.log(N)
- print word, ":", idf
+ out[word] = idf
+
+yaml.safe_dump(out, sys.stdout, default_flow_style=False, allow_unicode=True) \ No newline at end of file