Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorSergey Yershov <yershov@corp.mail.ru>2014-12-23 18:44:11 +0300
committerAlex Zolotarev <alex@maps.me>2015-09-23 02:35:34 +0300
commite761162e1b714a0d0bf89236efbd7a4eef8912c7 (patch)
tree8f2bf8392c47cb446363acafab052575e3242a8e /tools
parentac622b6dc3004c2439d0057576eebce41d494709 (diff)
Tools for calculate statistic about downloaded countries
Diffstat (limited to 'tools')
-rwxr-xr-xtools/download_statistic/aggregator43
-rwxr-xr-xtools/download_statistic/calc_statistic1
-rwxr-xr-xtools/download_statistic/columnizer1
-rwxr-xr-xtools/download_statistic/linezier1
-rwxr-xr-xtools/download_statistic/remover1
-rw-r--r--tools/download_statistic/requirements.txt8
-rwxr-xr-xtools/download_statistic/resolver31
7 files changed, 86 insertions, 0 deletions
diff --git a/tools/download_statistic/aggregator b/tools/download_statistic/aggregator
new file mode 100755
index 0000000000..7b23b7b089
--- /dev/null
+++ b/tools/download_statistic/aggregator
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+#coding: utf-8
+
+from collections import defaultdict
+import sys
+import datetime
+
+result = defaultdict(lambda : defaultdict(lambda :defaultdict(set)))
+
+
+
+def print_result():
+ for date_key in result.iterkeys():
+ year, month, req_type = date_key.split('_')
+ for from_country in result[date_key].iterkeys():
+ for req_country in result[date_key][from_country].iterkeys():
+ print '{};{:02d};{};{};{};{}'.format(year,int(month),from_country,req_country,req_type,len(result[date_key][from_country][req_country]))
+
+try:
+ with sys.stdin as file:
+ for rec in file:
+ try:
+ parts = rec.strip().split('|')
+ req_type = 'R' if len(parts) == 6 and parts[5]=='.routing' else 'M'
+ from_country = parts[0]
+ date = datetime.datetime.strptime(parts[2], '%d/%b/%Y:%H:%M:%S')
+ user_id = parts[3]
+ req_country = parts[4].split('_')[0]
+ date_key = '{}_{}_{}'.format(date.year,date.month,req_type)
+ user_key = '{}_{}'.format(user_id,req_country)
+ result[date_key][from_country][req_country].add(user_key)
+ except:
+ pass # ignore all errors for one string
+except KeyboardInterrupt:
+ print_result()
+ exit(0)
+except:
+ print_result()
+ raise
+
+print_result()
+
+
diff --git a/tools/download_statistic/calc_statistic b/tools/download_statistic/calc_statistic
new file mode 100755
index 0000000000..9775e3dc1f
--- /dev/null
+++ b/tools/download_statistic/calc_statistic
@@ -0,0 +1 @@
+./linezier | ./remover | ./columnizer | ./resolver | ./aggregator \ No newline at end of file
diff --git a/tools/download_statistic/columnizer b/tools/download_statistic/columnizer
new file mode 100755
index 0000000000..3710833576
--- /dev/null
+++ b/tools/download_statistic/columnizer
@@ -0,0 +1 @@
+sed 's/ \[/|/;s/\] "/|/;s/" /|/;s/\.mwm/|/' | awk '!x[$0]++' \ No newline at end of file
diff --git a/tools/download_statistic/linezier b/tools/download_statistic/linezier
new file mode 100755
index 0000000000..7503bc7e6e
--- /dev/null
+++ b/tools/download_statistic/linezier
@@ -0,0 +1 @@
+sed -e :a -e '$!N;s/\n\t/ | /;ta;' -e 'P;D' | grep -e 'COUNTRY:' \ No newline at end of file
diff --git a/tools/download_statistic/remover b/tools/download_statistic/remover
new file mode 100755
index 0000000000..66f60e1f39
--- /dev/null
+++ b/tools/download_statistic/remover
@@ -0,0 +1 @@
+sed 's/ "[a-zA-Z0-9/._ ]*" / /1;s/ - - / /;s/ -0.00//;s/ ... ... -/ /;s/| \(.*\) COUNTRY://;s/ |\(.*\)$//' \ No newline at end of file
diff --git a/tools/download_statistic/requirements.txt b/tools/download_statistic/requirements.txt
new file mode 100644
index 0000000000..12e35115a1
--- /dev/null
+++ b/tools/download_statistic/requirements.txt
@@ -0,0 +1,8 @@
+Required python 2.7 and additional modules:
+geoip2
+ipaddr
+maxminddb
+
+also reuired IP to geo database from http://dev.maxmind.com/geoip/legacy/geolite/
+
+GeoLite2-Country.mmdb
diff --git a/tools/download_statistic/resolver b/tools/download_statistic/resolver
new file mode 100755
index 0000000000..57e2e37835
--- /dev/null
+++ b/tools/download_statistic/resolver
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+#coding: utf-8
+
+import geoip2.database
+import sys
+from collections import defaultdict
+
+reader = geoip2.database.Reader('./GeoLite2-Country.mmdb')
+
+try:
+ with sys.stdin as file:
+ for rec in file:
+ try:
+ parts = rec.strip().split('|')
+ ip = parts[0]
+ from_country = None
+ try:
+ from_country = reader.country(ip).country.name
+ except geoip2.errors.AddressNotFoundError:
+ from_country = 'Unknown'
+
+ print '{}|{}'.format(from_country,'|'.join(parts))
+# print '{} | {} {} {} | {} | {} | {}'.format(from_country, date[0], date[1], date[2][:4], ip, parts[1][1:13], parts[1][parts[1].find(':')+1:-1])
+ except:
+ pass # ignore all errors for one string
+except KeyboardInterrupt:
+ exit(0)
+except:
+ raise
+
+