[booking] Review fixes

author: Sergey Yershov <yershov@corp.mail.ru> 2016-05-31 17:20:29 +0300
committer: Sergey Yershov <yershov@corp.mail.ru> 2016-05-31 17:20:29 +0300
commit: 24765c5607ea234e4d9c363e6ac394756b67aaaa (patch)
tree: ae9c6d81bdf0e090551e28feab36f919c0ab25af /tools
parent: 4b89d45831e4c69ccd7b15c56d2e31d7174871ca (diff)
1 files changed, 73 insertions, 0 deletions
diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py
new file mode 100755
index 0000000000..00d9b29138
--- /dev/null
+++ b/tools/python/booking_hotels_quality.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+# coding: utf8
+from __future__ import print_function
+
+from collections import namedtuple, defaultdict
+from datetime import datetime
+from sklearn import metrics
+import argparse
+import base64
+import json
+import logging
+import os
+import pickle
+import time
+import urllib2
+
+# init logging
+logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
+
+def load_binary_list(path):
+    bits = []
+    with open(path, 'r') as fd:
+        for line in fd:
+            if (not line.strip()) or line[0] == '#':
+                continue
+            bits.append(1 if line[0] == 'y' else 0)
+    return bits
+
+def load_score_list(path):
+    scores = []
+    with open(path, 'r') as fd:
+        for line in fd:
+            if (not line.strip()) or line[0] == '#':
+                continue
+            scores.append(float(line[line.rfind(':')+2:]))
+    return scores
+
+def process_options():
+    parser = argparse.ArgumentParser(description='Download and process booking hotels.')
+    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
+    parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")
+
+    parser.add_argument("--reference_list", dest="reference_list", help="Path to data files")
+    parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file")
+
+    parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall")
+
+    options = parser.parse_args()
+
+    if not options.reference_list or not options.sample_list:
+        parser.print_help()
+        exit()
+
+    return options
+
+def main():
+    options = process_options()
+    reference = load_binary_list(options.reference_list)
+    sample = load_score_list(options.sample_list)
+
+    precision, recall, threshold = metrics.precision_recall_curve(reference, sample)
+    aa = zip(precision, recall, threshold)
+    print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r))))
+    print("AUC: {0}".format(metrics.roc_auc_score(reference, sample)))
+
+    if options.show:
+        import matplotlib.pyplot as plt
+        plt.plot(recall, precision)
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()
author	Sergey Yershov <yershov@corp.mail.ru>	2016-05-31 17:20:29 +0300
committer	Sergey Yershov <yershov@corp.mail.ru>	2016-05-31 17:20:29 +0300
commit	24765c5607ea234e4d9c363e6ac394756b67aaaa (patch)
tree	ae9c6d81bdf0e090551e28feab36f919c0ab25af /tools
parent	4b89d45831e4c69ccd7b15c56d2e31d7174871ca (diff)