Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorCyril Bonté <cyril.bonte@free.fr>2014-05-29 03:15:58 +0400
committerCyril Bonté <cyril.bonte@free.fr>2014-06-06 23:30:26 +0400
commitb4d4cd995e47d25915962abec2aa8428ec101efc (patch)
tree0320fe5d879c909943c4b6075352d4e7cfeff7a2 /misc
parentb405ba713220824d76da18c0e7548b174b62c806 (diff)
add a cache for parsed dates
Diffstat (limited to 'misc')
-rwxr-xr-xmisc/log-analytics/import_logs.py46
1 files changed, 30 insertions, 16 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py
index 632d7b69f2..223ae37624 100755
--- a/misc/log-analytics/import_logs.py
+++ b/misc/log-analytics/import_logs.py
@@ -47,6 +47,10 @@ except ImportError:
print >> sys.stderr, 'simplejson (http://pypi.python.org/pypi/simplejson/) is required.'
sys.exit(1)
+try:
+ from collections import OrderedDict
+except ImportError:
+ from ordereddict import OrderedDict
##
@@ -1592,6 +1596,7 @@ class Parser(object):
resolver.check_format(format)
hits = []
+ cache_dates = OrderedDict()
for lineno, line in enumerate(file):
try:
line = line.decode(config.options.encoding)
@@ -1669,24 +1674,33 @@ class Parser(object):
# Parse date.
# We parse it after calling check_methods as it's quite CPU hungry, and
# we want to avoid that cost for excluded hits.
- date_string = format.get('date')
- try:
- hit.date = datetime.datetime.strptime(date_string, format.date_format)
- except ValueError:
- invalid_line(line, 'invalid date')
- continue
+ # To mitigate CPU usage, parsed dates are cached.
+ date_key = format.get('date') + '|' + format.get('timezone')
+ hit.date = cache_dates.get(date_key)
+ if not hit.date:
+ date_string = format.get('date')
+ try:
+ hit.date = datetime.datetime.strptime(date_string, format.date_format)
+ except ValueError:
+ invalid_line(line, 'invalid date')
+ continue
- # Parse timezone and substract its value from the date
- try:
- timezone = float(format.get('timezone'))
- except BaseFormatException:
- timezone = 0
- except ValueError:
- invalid_line(line, 'invalid timezone')
- continue
+ # Parse timezone and substract its value from the date
+ try:
+ timezone = float(format.get('timezone'))
+ except BaseFormatException:
+ timezone = 0
+ except ValueError:
+ invalid_line(line, 'invalid timezone')
+ continue
+
+ if timezone:
+ hit.date -= datetime.timedelta(hours=timezone/100)
+
+ if len(cache_dates) > 3600:
+ cache_dates.popitem(False)
+ cache_dates[date_key] = hit.date
- if timezone:
- hit.date -= datetime.timedelta(hours=timezone/100)
if config.options.replay_tracking:
# we need a query string and we only consider requests with piwik.php
if not hit.query_string or not hit.path.lower().endswith('piwik.php'):