diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-05-01 12:23:55 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2016-05-01 12:23:55 +0300 |
commit | e6cb99c48ca1a8eb84a428ad7e3532e6c02de9b6 (patch) | |
tree | e295687d8128dba8f7e4dbfdf08a348a09d595ac /scripts | |
parent | b80ec4fa04fb99e3a2b4f91f6a4df751a2de2195 (diff) |
better file handling, better exceptions hanling
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/idf.py | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/scripts/idf.py b/scripts/idf.py index efd05e9a..60905d80 100644 --- a/scripts/idf.py +++ b/scripts/idf.py @@ -1,5 +1,6 @@ import sys import math +import yaml from collections import Counter c = Counter() @@ -10,7 +11,9 @@ for line in sys.stdin: c[word] += 1 N += 1 -keys = sorted([k for k in c]) -for word in keys: +out = dict() +for word in c: idf = math.log(float(N) / float(c[word])) / math.log(N) - print word, ":", idf + out[word] = idf + +yaml.safe_dump(out, sys.stdout, default_flow_style=False, allow_unicode=True)
\ No newline at end of file |