diff options
author | Sergey Yershov <yershov@corp.mail.ru> | 2016-05-31 17:20:29 +0300 |
---|---|---|
committer | Sergey Yershov <yershov@corp.mail.ru> | 2016-05-31 17:20:29 +0300 |
commit | 24765c5607ea234e4d9c363e6ac394756b67aaaa (patch) | |
tree | ae9c6d81bdf0e090551e28feab36f919c0ab25af /tools | |
parent | 4b89d45831e4c69ccd7b15c56d2e31d7174871ca (diff) |
[booking] Review fixes
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/python/booking_hotels_quality.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py new file mode 100755 index 0000000000..00d9b29138 --- /dev/null +++ b/tools/python/booking_hotels_quality.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# coding: utf8 +from __future__ import print_function + +from collections import namedtuple, defaultdict +from datetime import datetime +from sklearn import metrics +import argparse +import base64 +import json +import logging +import os +import pickle +import time +import urllib2 + +# init logging +logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s') + +def load_binary_list(path): + bits = [] + with open(path, 'r') as fd: + for line in fd: + if (not line.strip()) or line[0] == '#': + continue + bits.append(1 if line[0] == 'y' else 0) + return bits + +def load_score_list(path): + scores = [] + with open(path, 'r') as fd: + for line in fd: + if (not line.strip()) or line[0] == '#': + continue + scores.append(float(line[line.rfind(':')+2:])) + return scores + +def process_options(): + parser = argparse.ArgumentParser(description='Download and process booking hotels.') + parser.add_argument("-v", "--verbose", action="store_true", dest="verbose") + parser.add_argument("-q", "--quiet", action="store_false", dest="verbose") + + parser.add_argument("--reference_list", dest="reference_list", help="Path to data files") + parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file") + + parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall") + + options = parser.parse_args() + + if not options.reference_list or not options.sample_list: + parser.print_help() + exit() + + return options + +def main(): + options = process_options() + reference = load_binary_list(options.reference_list) + sample = load_score_list(options.sample_list) + + precision, recall, threshold = metrics.precision_recall_curve(reference, sample) + aa = zip(precision, recall, threshold) + print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r)))) + print("AUC: {0}".format(metrics.roc_auc_score(reference, sample))) + + if options.show: + import matplotlib.pyplot as plt + plt.plot(recall, precision) + plt.show() + + +if __name__ == "__main__": + main() |