Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authordiosmosis <benaka.moorthi@gmail.com>2013-02-27 03:33:20 +0400
committerdiosmosis <benaka.moorthi@gmail.com>2013-02-27 03:33:20 +0400
commit7e93e75012153e5f79d3bac98d9662e43b9df21d (patch)
treeb9ac5d6313e2d810ba52ddaf4300f947338e0914 /misc
parentd976a3076de500f829eb9d8df9f1c1c660a718e6 (diff)
parent8d642de42efd40898441fd2223d1af0d8c457357 (diff)
Merge pull request #34 from clearcode/master
Modify replay tracking to use site ID in query string when using replay-tracking option.
Diffstat (limited to 'misc')
-rwxr-xr-xmisc/log-analytics/import_logs.py72
1 files changed, 50 insertions, 22 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py
index 5d174499d0..dbcea64735 100755
--- a/misc/log-analytics/import_logs.py
+++ b/misc/log-analytics/import_logs.py
@@ -879,6 +879,10 @@ class DynamicResolver(object):
def __init__(self):
self._cache = {}
+ if config.options.replay_tracking:
+ # get existing sites
+ self._cache['sites'] = piwik.call_api(
+ 'SitesManager.getAllSites')
def _get_site_id_from_hit_host(self, hit):
main_url = 'http://' + hit.host
@@ -939,24 +943,43 @@ class DynamicResolver(object):
stats.piwik_sites.add(site_id)
return site_id
+ def _resolve_when_replay_tracking(self, hit):
+ """
+ If parsed site ID found in the _cache['sites'] return site ID and main_url,
+ otherwise return (None, None) tuple.
+ """
+ site_id = hit.args['idsite']
+ if site_id in self._cache['sites'].keys():
+ stats.piwik_sites.add(site_id)
+ return (site_id, self._cache['sites'][site_id]['main_url'])
+ else:
+ return (None, None)
+
def resolve(self, hit):
"""
Return the site ID from the cache if found, otherwise call _resolve.
+ If replay_tracking option is enabled, call _resolve_when_replay_tracking.
"""
- try:
- site_id = self._cache[hit.host]
- except KeyError:
- logging.debug(
- 'Site ID for hostname %s not in cache', hit.host
- )
- site_id = self._resolve(hit)
- logging.debug('Site ID for hostname %s: %s', hit.host, site_id)
- self._cache[hit.host] = site_id
- return (site_id, 'http://' + hit.host)
+ if config.options.replay_tracking:
+ # We only consider requests with piwik.php which don't need host to be imported
+ return self._resolve_when_replay_tracking(hit)
+ else:
+ try:
+ site_id = self._cache[hit.host]
+ except KeyError:
+ logging.debug(
+ 'Site ID for hostname %s not in cache', hit.host
+ )
+ site_id = self._resolve(hit)
+ logging.debug('Site ID for hostname %s: %s', hit.host, site_id)
+ self._cache[hit.host] = site_id
+ return (site_id, 'http://' + hit.host)
def check_format(self, format):
- if 'host' not in format.regex.groupindex and not config.options.log_hostname:
+ if config.options.replay_tracking:
+ pass
+ elif 'host' not in format.regex.groupindex and not config.options.log_hostname:
fatal_error(
"the selected log format doesn't include the hostname: you must "
"specify the Piwik site ID with the --idsite argument"
@@ -1068,7 +1091,7 @@ class Recorder(object):
site_id, main_url = resolver.resolve(hit)
if site_id is None:
# This hit doesn't match any known Piwik site.
- stats.piwik_sites_ignored.add(hit.host)
+ stats.piwik_sites_ignored.add(getattr(hit, 'host', 'unrecognized host to site ID %s' % hit.args.get('idsite')))
stats.count_lines_no_site.increment()
return
@@ -1411,6 +1434,21 @@ class Parser(object):
if timezone:
hit.date -= datetime.timedelta(hours=timezone/100)
+ if config.options.replay_tracking:
+ # we need a query string and we only consider requests with piwik.php
+ if not hit.query_string or not hit.path.lower().endswith('piwik.php'):
+ continue
+ else:
+ query_arguments = urlparse.parse_qs(hit.query_string)
+ if not "idsite" in query_arguments:
+ invalid_line(line, 'missing idsite')
+ continue
+ else:
+ try:
+ hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems())
+ except UnicodeDecodeError:
+ invalid_line(line, 'invalid encoding')
+ continue
# Check if the hit must be excluded.
if all((method(hit) for method in self.check_methods)):
@@ -1420,16 +1458,6 @@ class Parser(object):
Recorder.add_hits(hits)
hits = []
- if config.options.replay_tracking:
- # we need a query string and we only consider requests with piwik.php
- if hit.query_string and hit.path.lower().endswith('piwik.php'):
- query_arguments = urlparse.parse_qs(hit.query_string)
- if "idsite" in query_arguments:
- try:
- hit.args.update((k, v.pop().encode('raw_unicode_escape').decode(config.options.encoding)) for k, v in query_arguments.iteritems())
- except UnicodeDecodeError:
- invalid_line(line, 'invalid encoding')
- continue
# add last chunk of hits
if len(hits) > 0:
Recorder.add_hits(hits)