Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authordiosmosis <benaka@piwik.pro>2015-02-13 02:18:46 +0300
committerdiosmosis <benaka@piwik.pro>2015-02-13 02:18:46 +0300
commit2e13a1a2b12e72f15ca2b48301e70bdf20e1f02d (patch)
treed3d3b493075a678f950545e2d4216de14dc5f1cd /misc
parent88fa5af2b3585930c66d550ae9890baecaf14aab (diff)
Refs #7151, add tests for new log importer options, fix small bug where userid not set in hit object, fix ignore group option value spliting.
Diffstat (limited to 'misc')
-rwxr-xr-xmisc/log-analytics/import_logs.py14
-rw-r--r--misc/log-analytics/tests/logs/iis.log2
-rw-r--r--misc/log-analytics/tests/tests.py102
3 files changed, 109 insertions, 9 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py
index 43448b3c1f..55ec11d4df 100755
--- a/misc/log-analytics/import_logs.py
+++ b/misc/log-analytics/import_logs.py
@@ -784,7 +784,7 @@ class Configuration(object):
self.options.download_extensions = DOWNLOAD_EXTENSIONS
if self.options.regex_groups_to_ignore:
- self.options.regex_groups_to_ignore = set(self.options.regex_groups_to_ignore.split())
+ self.options.regex_groups_to_ignore = set(self.options.regex_groups_to_ignore.split(','))
def __init__(self):
self._parse_args(self._create_parser())
@@ -1969,7 +1969,7 @@ class Parser(object):
userid = format.get('userid')
if userid != '-':
- hit.args['uid'] = userid
+ hit.args['uid'] = hit.userid = userid
except:
pass
@@ -2042,10 +2042,16 @@ class Parser(object):
def _add_custom_vars_from_regex_groups(self, hit, format, groups, is_page_var):
for group_name, custom_var_name in groups.iteritems():
if group_name in format.get_all():
+ value = format.get(group_name)
+
+ # don't track the '-' empty placeholder value
+ if value == '-':
+ continue
+
if is_page_var:
- hit.add_page_custom_var(custom_var_name, format.get(group_name))
+ hit.add_page_custom_var(custom_var_name, value)
else:
- hit.add_visit_custom_var(custom_var_name, format.get(group_name))
+ hit.add_visit_custom_var(custom_var_name, value)
def main():
"""
diff --git a/misc/log-analytics/tests/logs/iis.log b/misc/log-analytics/tests/logs/iis.log
index 0ec7bf504f..f25cc5fad6 100644
--- a/misc/log-analytics/tests/logs/iis.log
+++ b/misc/log-analytics/tests/logs/iis.log
@@ -2,4 +2,4 @@
#Version: 1.0
#Date: 2012-04-01 00:00:13
#Fields: date time s-sitename s-computername s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs-version cs(User-Agent) cs(Cookie) cs(Referer) cs-host sc-status sc-substatus sc-win32-status sc-bytes cs-bytes time-taken
-2012-04-01 00:00:13 W3SVC834221556 PXQD1 1.2.3.4 GET /foo/bar topCat1=divinity&submit=Search 80 - 5.6.7.8 HTTP/1.1 Mozilla/5.0+(X11;+U;+Linux+i686;+en-US;+rv:1.9.2.7)+Gecko/20100722+Firefox/3.6.7 - - example.com 200 0 0 27028 214 1687
+2012-04-01 00:00:13 W3SVC834221556 PXQD1 1.2.3.4 GET /foo/bar topCat1=divinity&submit=Search 80 theuser 5.6.7.8 HTTP/1.1 Mozilla/5.0+(X11;+U;+Linux+i686;+en-US;+rv:1.9.2.7)+Gecko/20100722+Firefox/3.6.7 - - example.com 200 654 456 27028 214 1687
diff --git a/misc/log-analytics/tests/tests.py b/misc/log-analytics/tests/tests.py
index a550e3a388..81b27ad36f 100644
--- a/misc/log-analytics/tests/tests.py
+++ b/misc/log-analytics/tests/tests.py
@@ -95,7 +95,13 @@ class Options(object):
enable_http_errors = False
download_extensions = 'doc,pdf'
custom_w3c_fields = {}
+ dump_log_regex = False
w3c_time_taken_in_millisecs = False
+ w3c_fields = None
+ w3c_field_regexes = {}
+ regex_group_to_visit_cvars_map = {}
+ regex_group_to_page_cvars_map = {}
+ regex_groups_to_ignore = None
class Config(object):
"""Mock configuration."""
@@ -460,8 +466,6 @@ def test_amazon_cloudfront_web_parsing():
hits = [hit.__dict__ for hit in Recorder.recorders]
- import_logs.logging.debug(hits)
-
assert hits[0]['status'] == u'200'
assert hits[0]['userid'] == None
assert hits[0]['is_error'] == False
@@ -502,8 +506,6 @@ def test_amazon_cloudfront_rtmp_parsing():
hits = [hit.__dict__ for hit in Recorder.recorders]
- import_logs.logging.debug(hits)
-
assert hits[0]['is_download'] == False
assert hits[0]['ip'] == u'192.0.2.147'
assert hits[0]['is_redirect'] == False
@@ -552,3 +554,95 @@ def test_amazon_cloudfront_rtmp_parsing():
assert hits[1]['full_path'] == u'/shqshne4jdp4b6.cloudfront.net/cfx/st\u200b'
assert len(hits) == 2
+
+def test_ignore_groups_option_removes_groups():
+ """Test that the --ignore-groups option removes groups so they do not appear in hits."""
+
+ file_ = 'logs/iis.log'
+
+ # have to override previous globals override for this test
+ import_logs.config.options.custom_w3c_fields = {}
+ Recorder.recorders = []
+ import_logs.parser = import_logs.Parser()
+ import_logs.config.format = None
+ import_logs.config.options.enable_http_redirects = True
+ import_logs.config.options.enable_http_errors = True
+ import_logs.config.options.replay_tracking = False
+ import_logs.config.options.w3c_time_taken_in_millisecs = True
+ import_logs.config.options.regex_groups_to_ignore = set(['userid','generation_time_milli'])
+ import_logs.parser.parse(file_)
+
+ hits = [hit.__dict__ for hit in Recorder.recorders]
+
+ assert hits[0]['userid'] == None
+ assert hits[0]['generation_time_milli'] == 0
+
+def test_regex_group_to_custom_var_options():
+ """Test that the --regex-group-to-visit-cvar and --regex-group-to-page-cvar track regex groups to custom vars."""
+
+ file_ = 'logs/iis.log'
+
+ # have to override previous globals override for this test
+ import_logs.config.options.custom_w3c_fields = {}
+ Recorder.recorders = []
+ import_logs.parser = import_logs.Parser()
+ import_logs.config.format = None
+ import_logs.config.options.enable_http_redirects = True
+ import_logs.config.options.enable_http_errors = True
+ import_logs.config.options.replay_tracking = False
+ import_logs.config.options.w3c_time_taken_in_millisecs = True
+ import_logs.config.options.regex_groups_to_ignore = set()
+ import_logs.config.options.regex_group_to_visit_cvars_map = {
+ 'userid': "User Name",
+ 'date': "The Date"
+ }
+ import_logs.config.options.regex_group_to_page_cvars_map = {
+ 'generation_time_milli': 'Geneartion Time',
+ 'referrer': 'The Referrer'
+ }
+ import_logs.parser.parse(file_)
+
+ hits = [hit.__dict__ for hit in Recorder.recorders]
+
+ assert hits[0]['args']['_cvar'] == {1: ['The Date', '2012-04-01 00:00:13'], 2: ['User Name', 'theuser']} # check visit custom vars
+ assert hits[0]['args']['cvar'] == {1: ['Geneartion Time', '1687']} # check page custom vars
+
+ assert hits[0]['userid'] == 'theuser'
+ assert hits[0]['date'] == datetime.datetime(2012, 4, 1, 0, 0, 13)
+ assert hits[0]['generation_time_milli'] == 1687
+ assert hits[0]['referrer'] == ''
+
+def test_w3c_custom_field_regex_option():
+ """Test that --w3c-field-regex can be used to match custom W3C log fields."""
+
+ file_ = 'logs/iis.log'
+
+ # have to override previous globals override for this test
+ import_logs.config.options.custom_w3c_fields = {}
+ Recorder.recorders = []
+ import_logs.parser = import_logs.Parser()
+ import_logs.config.format = None
+ import_logs.config.options.enable_http_redirects = True
+ import_logs.config.options.enable_http_errors = True
+ import_logs.config.options.replay_tracking = False
+ import_logs.config.options.w3c_time_taken_in_millisecs = True
+ import_logs.config.options.w3c_field_regexes = {
+ 'sc-substatus': '(?P<substatus>\S+)',
+ 'sc-win32-status': '(?P<win32_status>\S+)'
+ }
+
+ format = import_logs.W3cExtendedFormat()
+
+ file_handle = open(file_)
+ format.check_format(file_handle)
+ match = None
+ while not match:
+ line = file_handle.readline()
+ if not line:
+ break
+ match = format.match(line)
+ file_handle.close()
+
+ assert match is not None
+ assert format.get('substatus') == '654'
+ assert format.get('win32_status') == '456'