diff options
author | Yury Melnichek <melnichek@gmail.com> | 2012-09-17 14:12:12 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 01:43:33 +0300 |
commit | f8d90e92ce791650dc89944fca009fc36d9e3a90 (patch) | |
tree | e25271f1ac7d83d71fcef5afb96b2c21a9ee2ea5 /crawler | |
parent | c97750449bb772135dacf5bafeec28f05d9a8a45 (diff) |
[crawler] Handle correctly paths with symbols '(' and ')'.
Diffstat (limited to 'crawler')
-rwxr-xr-x | crawler/wikitravel-optimize-articles.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/crawler/wikitravel-optimize-articles.py b/crawler/wikitravel-optimize-articles.py index 07c0166c78..afa7c5d71b 100755 --- a/crawler/wikitravel-optimize-articles.py +++ b/crawler/wikitravel-optimize-articles.py @@ -7,6 +7,9 @@ import sys myPath = os.path.dirname(os.path.realpath(__file__)) +def formatPath(s): + return s.replace('(', '\\(').replace(')', '\\)') + for i, line in enumerate(sys.stdin): (url, title, fileBase) = json.loads(line) fileName = fileBase + '.article' @@ -19,4 +22,6 @@ for i, line in enumerate(sys.stdin): '--remove-intertag-spaces --simple-bool-attr --remove-quotes ' '--remove-js-protocol --type html ' '-o {outFileName} {fileName}' - .format(myPath = myPath, fileName = fileName, outFileName = outFileName)) + .format(myPath = myPath, + fileName = formatPath(fileName), + outFileName = formatPath(outFileName))) |