Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorSergey Yershov <yershov@corp.mail.ru>2016-05-31 17:20:29 +0300
committerSergey Yershov <yershov@corp.mail.ru>2016-05-31 17:20:29 +0300
commit24765c5607ea234e4d9c363e6ac394756b67aaaa (patch)
treeae9c6d81bdf0e090551e28feab36f919c0ab25af /tools
parent4b89d45831e4c69ccd7b15c56d2e31d7174871ca (diff)
[booking] Review fixes
Diffstat (limited to 'tools')
-rwxr-xr-xtools/python/booking_hotels_quality.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py
new file mode 100755
index 0000000000..00d9b29138
--- /dev/null
+++ b/tools/python/booking_hotels_quality.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+# coding: utf8
+from __future__ import print_function
+
+from collections import namedtuple, defaultdict
+from datetime import datetime
+from sklearn import metrics
+import argparse
+import base64
+import json
+import logging
+import os
+import pickle
+import time
+import urllib2
+
+# init logging
+logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
+
+def load_binary_list(path):
+ bits = []
+ with open(path, 'r') as fd:
+ for line in fd:
+ if (not line.strip()) or line[0] == '#':
+ continue
+ bits.append(1 if line[0] == 'y' else 0)
+ return bits
+
+def load_score_list(path):
+ scores = []
+ with open(path, 'r') as fd:
+ for line in fd:
+ if (not line.strip()) or line[0] == '#':
+ continue
+ scores.append(float(line[line.rfind(':')+2:]))
+ return scores
+
+def process_options():
+ parser = argparse.ArgumentParser(description='Download and process booking hotels.')
+ parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
+ parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")
+
+ parser.add_argument("--reference_list", dest="reference_list", help="Path to data files")
+ parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file")
+
+ parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall")
+
+ options = parser.parse_args()
+
+ if not options.reference_list or not options.sample_list:
+ parser.print_help()
+ exit()
+
+ return options
+
+def main():
+ options = process_options()
+ reference = load_binary_list(options.reference_list)
+ sample = load_score_list(options.sample_list)
+
+ precision, recall, threshold = metrics.precision_recall_curve(reference, sample)
+ aa = zip(precision, recall, threshold)
+ print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r))))
+ print("AUC: {0}".format(metrics.roc_auc_score(reference, sample)))
+
+ if options.show:
+ import matplotlib.pyplot as plt
+ plt.plot(recall, precision)
+ plt.show()
+
+
+if __name__ == "__main__":
+ main()