diff options
author | Benaka Moorthi <benaka.moorthi@gmail.com> | 2013-06-20 07:55:51 +0400 |
---|---|---|
committer | Benaka Moorthi <benaka.moorthi@gmail.com> | 2013-06-20 07:55:51 +0400 |
commit | e2e5176279255b7ff2d80f0f511a1ef5f3748ebe (patch) | |
tree | 80ed2cb2fa9fd215c35550242f6d7ec98b8d18a6 | |
parent | 782ceed6f2b4d932c281f8726cf9974f90ba242b (diff) |
Handle log files in log importer where site URL's protocol + host will be prepended to the path even if the path already contains a protocol & host.
-rwxr-xr-x | misc/log-analytics/import_logs.py | 5 | ||||
-rwxr-xr-x | tests/resources/fake_logs.log | 4 |
2 files changed, 6 insertions, 3 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py index 55192ad3a4..c8bbbb4e6e 100755 --- a/misc/log-analytics/import_logs.py +++ b/misc/log-analytics/import_logs.py @@ -1131,11 +1131,14 @@ class Recorder(object): path = hit.path if hit.query_string and not config.options.strip_query_string: path += config.options.query_string_delimiter + hit.query_string + + # only prepend main url if it's a path + url = (main_url if path.startswith('/') else '') + path[:1024] args = { 'rec': '1', 'apiv': '1', - 'url': (main_url + path[:1024]).encode('utf8'), + 'url': url.encode('utf8'), 'urlref': hit.referrer[:1024].encode('utf8'), 'cip': hit.ip, 'cdt': self.date_to_piwik(hit.date), diff --git a/tests/resources/fake_logs.log b/tests/resources/fake_logs.log index abd10d768d..522c979fb5 100755 --- a/tests/resources/fake_logs.log +++ b/tests/resources/fake_logs.log @@ -1,5 +1,5 @@ -175.41.192.40 - - [09/Aug/2012:10:10:38 +0200] "GET /blog/category/meta/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24" -175.41.192.40 - - [09/Aug/2012:10:11:30 +0200] "GET /faq/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; HTC Vision Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" +175.41.192.40 - - [09/Aug/2012:10:10:38 +0200] "GET http://piwik.net/blog/category/meta/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24" +175.41.192.40 - - [09/Aug/2012:10:11:30 +0200] "GET http://piwik.net/faq/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; HTC Vision Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1" 175.41.192.40 - - [09/Aug/2012:10:11:56 +0200] "GET /blog/category/community/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (X11; U; Linux x86_64; ca-ad) AppleWebKit/531.2+ (KHTML, like Gecko) Safari/531.2+ Epiphany/2.30.6" 175.41.192.40 - - [09/Aug/2012:10:12:03 +0200] "GET /docs/manage-websites/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (X11; Linux i686; rv:6.0) Gecko/20100101 Firefox/6.0" 72.44.32.10 - - [09/Aug/2012:15:48:07 +0200] "GET / HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0" junk extra |