diff options
author | diosmosis <benaka@piwik.pro> | 2014-12-09 06:44:43 +0300 |
---|---|---|
committer | diosmosis <benaka@piwik.pro> | 2014-12-09 06:44:43 +0300 |
commit | 103c893fdbfc71ee9a9e4dbc4c90ee0d24c36249 (patch) | |
tree | af388c45ac13fa109835ec8de291fad63d22001d /misc | |
parent | a216ebc3682a0ffc046bb460793b68013acb4cfd (diff) |
Refs #6388, map IIS time-taken field to gneration time in log importer.
Diffstat (limited to 'misc')
-rwxr-xr-x | misc/log-analytics/import_logs.py | 32 | ||||
-rw-r--r-- | misc/log-analytics/tests/logs/netscaler.log | 2 | ||||
-rw-r--r-- | misc/log-analytics/tests/tests.py | 13 |
3 files changed, 33 insertions, 14 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py index d58f19ba7d..5929f3caa1 100755 --- a/misc/log-analytics/import_logs.py +++ b/misc/log-analytics/import_logs.py @@ -200,22 +200,20 @@ class IisFormat(RegexFormat): 'sc-status': '(?P<status>\d+)', 'sc-bytes': '(?P<length>\S+)', 'cs-host': '(?P<host>\S+)', - 'cs-username': '(?P<userid>\S+)' + 'cs-username': '(?P<userid>\S+)', + 'time-taken': '(?P<generation_time_milli>\d+)' } def __init__(self): super(IisFormat, self).__init__('iis', None, '%Y-%m-%d %H:%M:%S') def check_format(self, file): - line = file.readline() - if not line.startswith('#'): + header_lines = [file.readline() for i in xrange(3)] + + if not header_lines[0].startswith('#'): file.seek(0) return - # Skip the next 2 lines. - for i in xrange(2): - file.readline() - # Parse the 4th 'Fields: ' line to create the regex to use full_regex = [] line = file.readline() @@ -225,6 +223,16 @@ class IisFormat(RegexFormat): expected_fields[mapped_field_name] = IisFormat.fields[field_name] del expected_fields[field_name] + # if the --iis-time-taken-secs option is used, make sure the time-taken field is interpreted as seconds + if config.options.iis_time_taken_in_secs: + expected_fields['time-taken'] = '(?P<generation_time_secs>\S+)' + else: + # check if we're importing netscaler logs and if so, issue a warning + if 'netscaler' in header_lines[1].lower(): + logging.info("WARNING: netscaler log file being parsed without --iis-time-taken-secs option. Netscaler" + " stores second values in the time-taken field. If your logfile does this, the aforementioned" + " option must be used in order to get accurate generation times.") + # Skip the 'Fields: ' prefix. line = line[9:] for field in line.split(): @@ -501,6 +509,11 @@ class Configuration(object): "files such as those from the Advanced Logging IIS module. Used as, eg, --iis-map-field my-date=date. " "Recognized default fields include: %s" % (', '.join(IisFormat.fields.keys())) ) + option_parser.add_option( + '--iis-time-taken-secs', action='store_true', default=False, dest='iis_time_taken_in_secs', + help="If set, interprets the time-taken IIS log field as a number of seconds. This must be set for importing" + " netscaler logs." + ) return option_parser def _set_iis_field_map(self, option, opt_str, value, parser): @@ -1686,7 +1699,10 @@ class Parser(object): try: hit.generation_time_milli = int(format.get('generation_time_micro')) / 1000 except BaseFormatException: - hit.generation_time_milli = 0 + try: + hit.generation_time_milli = int(format.get('generation_time_secs')) * 1000 + except BaseFormatException: + hit.generation_time_milli = 0 if config.options.log_hostname: hit.host = config.options.log_hostname diff --git a/misc/log-analytics/tests/logs/netscaler.log b/misc/log-analytics/tests/logs/netscaler.log index 0eb5d310b3..380c09d2c4 100644 --- a/misc/log-analytics/tests/logs/netscaler.log +++ b/misc/log-analytics/tests/logs/netscaler.log @@ -2,4 +2,4 @@ #Software: Netscaler Web Logging(NSWL) #Date: 2014-02-18 11:55:13 #Fields: date time c-ip cs-username sc-servicename s-ip s-port cs-method cs-uri-stem cs-uri-query sc-status cs-bytes sc-bytes time-taken cs-version cs(User-Agent) cs(Cookie) cs(Referer) -2012-08-16 11:55:13 172.20.1.0 - HTTP 192.168.6.254 8080 GET /Citrix/XenApp/Wan/auth/login.jsp - 302 247 355 0 HTTP/1.1 Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022) - - +2012-08-16 11:55:13 172.20.1.0 - HTTP 192.168.6.254 8080 GET /Citrix/XenApp/Wan/auth/login.jsp - 302 247 355 1 HTTP/1.1 Mozilla/4.0+(compatible;+MSIE+7.0;+Windows+NT+5.1;+Trident/4.0;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727;+.NET+CLR+3.0.04506.648;+.NET+CLR+3.5.21022) - - diff --git a/misc/log-analytics/tests/tests.py b/misc/log-analytics/tests/tests.py index 5cde40d34e..89c28dfe4f 100644 --- a/misc/log-analytics/tests/tests.py +++ b/misc/log-analytics/tests/tests.py @@ -68,6 +68,7 @@ class Options(object): enable_http_errors = False download_extensions = 'doc,pdf' custom_iis_fields = {} + iis_time_taken_in_secs = False class Config(object): """Mock configuration.""" @@ -232,7 +233,7 @@ def check_iis_groups(groups): assert groups['host'] == 'example.com' expected_hit_properties = ['date', 'path', 'query_string', 'ip', 'referrer', 'user_agent', - 'status', 'length', 'host', 'userid'] + 'status', 'length', 'host', 'userid', 'generation_time_milli'] for property_name in groups.keys(): assert property_name in expected_hit_properties @@ -301,7 +302,8 @@ def test_iis_custom_format(): import_logs.config.options.custom_iis_fields = { 'date-local': 'date', 'time-local': 'time', - 'cs(Host)': 'cs-host' + 'cs(Host)': 'cs-host', + 'TimeTakenMS': 'time-taken' } Recorder.recorders = [] import_logs.parser = import_logs.Parser() @@ -319,7 +321,7 @@ def test_iis_custom_format(): assert hits[0]['is_download'] == False assert hits[0]['referrer'] == u'"http://example.com/Search/SearchResults.pg?informationRecipient.languageCode.c=en"' assert hits[0]['args'] == {} - assert hits[0]['generation_time_milli'] == 0 + assert hits[0]['generation_time_milli'] == 109 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/iis_custom.log' assert hits[0]['is_redirect'] == False @@ -357,7 +359,7 @@ def test_iis_custom_format(): assert hits[2]['is_download'] == False assert hits[2]['referrer'] == '' assert hits[2]['args'] == {} - assert hits[2]['generation_time_milli'] == 0 + assert hits[2]['generation_time_milli'] == 359 assert hits[2]['host'] == 'foo' assert hits[2]['filename'] == 'logs/iis_custom.log' assert hits[2]['is_redirect'] == False @@ -383,6 +385,7 @@ def test_netscaler_parsing(): import_logs.config.options.enable_http_redirects = True import_logs.config.options.enable_http_errors = True import_logs.config.options.replay_tracking = False + import_logs.config.options.iis_time_taken_in_secs = True import_logs.parser.parse(file_) hits = [hit.__dict__ for hit in Recorder.recorders] @@ -394,7 +397,7 @@ def test_netscaler_parsing(): assert hits[0]['is_download'] == False assert hits[0]['referrer'] == '' assert hits[0]['args'] == {} - assert hits[0]['generation_time_milli'] == 0 + assert hits[0]['generation_time_milli'] == 1000 assert hits[0]['host'] == 'foo' assert hits[0]['filename'] == 'logs/netscaler.log' assert hits[0]['is_redirect'] == True |