Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authordiosmosis <benaka@piwik.pro>2014-12-09 06:44:43 +0300
committerdiosmosis <benaka@piwik.pro>2014-12-09 06:44:43 +0300
commit103c893fdbfc71ee9a9e4dbc4c90ee0d24c36249 (patch)
treeaf388c45ac13fa109835ec8de291fad63d22001d /misc
parenta216ebc3682a0ffc046bb460793b68013acb4cfd (diff)
Refs #6388, map IIS time-taken field to gneration time in log importer.
Diffstat (limited to 'misc')
-rwxr-xr-xmisc/log-analytics/import_logs.py32
-rw-r--r--misc/log-analytics/tests/logs/netscaler.log2
-rw-r--r--misc/log-analytics/tests/tests.py13
3 files changed, 33 insertions, 14 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py
index d58f19ba7d..5929f3caa1 100755
--- a/misc/log-analytics/import_logs.py
+++ b/misc/log-analytics/import_logs.py
@@ -200,22 +200,20 @@ class IisFormat(RegexFormat):
'sc-status': '(?P<status>\d+)',
'sc-bytes': '(?P<length>\S+)',
'cs-host': '(?P<host>\S+)',
- 'cs-username': '(?P<userid>\S+)'
+ 'cs-username': '(?P<userid>\S+)',
+ 'time-taken': '(?P<generation_time_milli>\d+)'
}
def __init__(self):
super(IisFormat, self).__init__('iis', None, '%Y-%m-%d %H:%M:%S')
def check_format(self, file):
- line = file.readline()
- if not line.startswith('#'):
+ header_lines = [file.readline() for i in xrange(3)]
+
+ if not header_lines[0].startswith('#'):
file.seek(0)
return
- # Skip the next 2 lines.
- for i in xrange(2):
- file.readline()
-
# Parse the 4th 'Fields: ' line to create the regex to use
full_regex = []
line = file.readline()
@@ -225,6 +223,16 @@ class IisFormat(RegexFormat):
expected_fields[mapped_field_name] = IisFormat.fields[field_name]
del expected_fields[field_name]
+ # if the --iis-time-taken-secs option is used, make sure the time-taken field is interpreted as seconds
+ if config.options.iis_time_taken_in_secs:
+ expected_fields['time-taken'] = '(?P<generation_time_secs>\S+)'
+ else:
+ # check if we're importing netscaler logs and if so, issue a warning
+ if 'netscaler' in header_lines[1].lower():
+ logging.info("WARNING: netscaler log file being parsed without --iis-time-taken-secs option. Netscaler"
+ " stores second values in the time-taken field. If your logfile does this, the aforementioned"
+ " option must be used in order to get accurate generation times.")
+
# Skip the 'Fields: ' prefix.
line = line[9:]
for field in line.split():
@@ -501,6 +509,11 @@ class Configuration(object):
"files such as those from the Advanced Logging IIS module. Used as, eg, --iis-map-field my-date=date. "
"Recognized default fields include: %s" % (', '.join(IisFormat.fields.keys()))
)
+ option_parser.add_option(
+ '--iis-time-taken-secs', action='store_true', default=False, dest='iis_time_taken_in_secs',
+ help="If set, interprets the time-taken IIS log field as a number of seconds. This must be set for importing"
+ " netscaler logs."
+ )
return option_parser
def _set_iis_field_map(self, option, opt_str, value, parser):
@@ -1686,7 +1699,10 @@ class Parser(object):
try:
hit.generation_time_milli = int(format.get('generation_time_micro')) / 1000
except BaseFormatException:
- hit.generation_time_milli = 0
+ try:
+ hit.generation_time_milli = int(format.get('generation_time_secs')) * 1000
+ except BaseFormatException:
+ hit.generation_time_milli = 0
if config.options.log_hostname:
hit.host = config.options.log_hostname
diff --git a/misc/log-analytics/tests/logs/netscaler.log b/misc/log-analytics/tests/logs/netscaler.log
index 0eb5d310b3..380c09d2c4 100644
--- a/misc/log-analytics/tests/logs/netscaler.log
+++ b/misc/log-analytics/tests/logs/netscaler.log
@@ -2,4 +2,4 @@
#Software: Netscaler Web Logging(NSWL)
#Date: 2014-02-18 11:55:13
#Fields: date time c-ip cs-username sc-servicename s-ip s-port cs-method cs-uri-stem cs-uri-query sc-status cs-bytes sc-bytes time-taken cs-version cs(User-Agent) cs(Cookie) cs(Referer)
-2012-08-16 11:55:13 172.20.1.0 - HTTP 192.168.6.254 8080 GET /Citrix/XenApp/Wan/auth/login.jsp - 302 247 355 0 HTTP/1.1 Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022) - -
+2012-08-16 11:55:13 172.20.1.0 - HTTP 192.168.6.254 8080 GET /Citrix/XenApp/Wan/auth/login.jsp - 302 247 355 1 HTTP/1.1 Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022) - -
diff --git a/misc/log-analytics/tests/tests.py b/misc/log-analytics/tests/tests.py
index 5cde40d34e..89c28dfe4f 100644
--- a/misc/log-analytics/tests/tests.py
+++ b/misc/log-analytics/tests/tests.py
@@ -68,6 +68,7 @@ class Options(object):
enable_http_errors = False
download_extensions = 'doc,pdf'
custom_iis_fields = {}
+ iis_time_taken_in_secs = False
class Config(object):
"""Mock configuration."""
@@ -232,7 +233,7 @@ def check_iis_groups(groups):
assert groups['host'] == 'example.com'
expected_hit_properties = ['date', 'path', 'query_string', 'ip', 'referrer', 'user_agent',
- 'status', 'length', 'host', 'userid']
+ 'status', 'length', 'host', 'userid', 'generation_time_milli']
for property_name in groups.keys():
assert property_name in expected_hit_properties
@@ -301,7 +302,8 @@ def test_iis_custom_format():
import_logs.config.options.custom_iis_fields = {
'date-local': 'date',
'time-local': 'time',
- 'cs(Host)': 'cs-host'
+ 'cs(Host)': 'cs-host',
+ 'TimeTakenMS': 'time-taken'
}
Recorder.recorders = []
import_logs.parser = import_logs.Parser()
@@ -319,7 +321,7 @@ def test_iis_custom_format():
assert hits[0]['is_download'] == False
assert hits[0]['referrer'] == u'"http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en"'
assert hits[0]['args'] == {}
- assert hits[0]['generation_time_milli'] == 0
+ assert hits[0]['generation_time_milli'] == 109
assert hits[0]['host'] == 'foo'
assert hits[0]['filename'] == 'logs/iis_custom.log'
assert hits[0]['is_redirect'] == False
@@ -357,7 +359,7 @@ def test_iis_custom_format():
assert hits[2]['is_download'] == False
assert hits[2]['referrer'] == ''
assert hits[2]['args'] == {}
- assert hits[2]['generation_time_milli'] == 0
+ assert hits[2]['generation_time_milli'] == 359
assert hits[2]['host'] == 'foo'
assert hits[2]['filename'] == 'logs/iis_custom.log'
assert hits[2]['is_redirect'] == False
@@ -383,6 +385,7 @@ def test_netscaler_parsing():
import_logs.config.options.enable_http_redirects = True
import_logs.config.options.enable_http_errors = True
import_logs.config.options.replay_tracking = False
+ import_logs.config.options.iis_time_taken_in_secs = True
import_logs.parser.parse(file_)
hits = [hit.__dict__ for hit in Recorder.recorders]
@@ -394,7 +397,7 @@ def test_netscaler_parsing():
assert hits[0]['is_download'] == False
assert hits[0]['referrer'] == ''
assert hits[0]['args'] == {}
- assert hits[0]['generation_time_milli'] == 0
+ assert hits[0]['generation_time_milli'] == 1000
assert hits[0]['host'] == 'foo'
assert hits[0]['filename'] == 'logs/netscaler.log'
assert hits[0]['is_redirect'] == True