Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xmisc/log-analytics/import_logs.py49
-rw-r--r--tests/resources/access-logs/fake_logs.log1
2 files changed, 40 insertions, 10 deletions
diff --git a/misc/log-analytics/import_logs.py b/misc/log-analytics/import_logs.py
index ba8639469c..974e553256 100755
--- a/misc/log-analytics/import_logs.py
+++ b/misc/log-analytics/import_logs.py
@@ -1402,17 +1402,21 @@ class Parser(object):
return True
@staticmethod
- def detect_format(file):
- """
- Return the best matching format for this file, or None if none was found.
- """
- logging.debug('Detecting the log format')
-
- format = None
+ def check_format(lineOrFile):
+ format = False
format_groups = 0
for name, candidate_format in FORMATS.iteritems():
logging.debug("Check format %s", name)
- match = candidate_format.check_format(file)
+
+ match = None
+ try:
+ if isinstance(lineOrFile, basestring):
+ match = candidate_format.check_format_line(lineOrFile)
+ else:
+ match = candidate_format.check_format(lineOrFile)
+ except:
+ pass
+
if match:
logging.debug('Format %s matches', name)
@@ -1428,10 +1432,35 @@ class Parser(object):
else:
logging.debug('Format %s does not match', name)
+
+ return format
+
+ @staticmethod
+ def detect_format(file):
+ """
+ Return the best matching format for this file, or None if none was found.
+ """
+ logging.debug('Detecting the log format')
+
+ format = False
+
+ # check the format using the file (for formats like the IIS one)
+ format = Parser.check_format(file)
+
+ # check the format using the first 1000 lines (to avoid irregular ones)
+ lineno = 0
+ while not format and lineno < 1000:
+ line = file.readline()
+ lineno = lineno + 1
+
+ logging.debug("Detecting format against line %i" % lineno)
+ format = Parser.check_format(line)
+
+ file.seek(0)
if not format:
- fatal_error("cannot determine the log format using the first line of the log file. Try removing it" +
- " or specifying the format with the --log-format-name command line argument.")
+ fatal_error("cannot determine the log format using the first 1000 lines of the log file. Try " +
+ "specifying the format with the --log-format-name command line argument.")
return
logging.debug('Format %s is the best match', format.name)
diff --git a/tests/resources/access-logs/fake_logs.log b/tests/resources/access-logs/fake_logs.log
index f267faf2ee..23de62cd7f 100644
--- a/tests/resources/access-logs/fake_logs.log
+++ b/tests/resources/access-logs/fake_logs.log
@@ -1,3 +1,4 @@
+70.117.169.113 - - [26/Nov/2013:01:41:01 -0500] "\x80w\x01\x03\x01" 400 226 "-" "-"
175.41.192.40 - - [09/Aug/2012:10:10:38 +0200] "GET http://piwik.net/blog/category/meta/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) RockMelt/0.9.58.494 Chrome/11.0.696.71 Safari/534.24"
175.41.192.40 - - [09/Aug/2012:10:11:30 +0200] "GET http://piwik.net/faq/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; HTC Vision Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
175.41.192.40 - - [09/Aug/2012:10:11:56 +0200] "GET /blog/category/community/ HTTP/1.1" 200 3574 "-" "Mozilla/5.0 (X11; U; Linux x86_64; ca-ad) AppleWebKit/531.2+ (KHTML, like Gecko) Safari/531.2+ Epiphany/2.30.6"