diff options
author | Yury Melnichek <melnichek@gmail.com> | 2012-09-11 01:54:37 +0400 |
---|---|---|
committer | Alex Zolotarev <alex@maps.me> | 2015-09-23 01:43:31 +0300 |
commit | 704b2fe733ca8a8f5d680c1cf829a124bf3041bd (patch) | |
tree | 29792f3a1d16dd4da3bb2f4846a840d4c32cddb9 /crawler | |
parent | 9ac54155a64d5feac9d62d0a556e76c473615fcf (diff) |
[crawler] Add script to do manual renames.
Diffstat (limited to 'crawler')
-rwxr-xr-x | crawler/wikitravel-manual-rename.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/crawler/wikitravel-manual-rename.py b/crawler/wikitravel-manual-rename.py new file mode 100755 index 0000000000..99593637bf --- /dev/null +++ b/crawler/wikitravel-manual-rename.py @@ -0,0 +1,18 @@ +#!/opt/local/bin/python +import hashlib +import json +import os.path +import sys +import string + +for i, line in enumerate(sys.stdin): + (url, title, fileName1) = json.loads(line) + page1 = url[27:] + page2 = page1.replace('(', '%28').replace(')', '%29') + fileName2 = page2.replace('/', '_') + '_' + hashlib.md5(page2).hexdigest()[:8]; + suffix = '.google_geocoded' + if os.path.exists(fileName2 + suffix): + if not os.path.exists(fileName1 + suffix): + cmd = 'mv "%s" "%s"' % (fileName2 + suffix, fileName1 + suffix) + print(cmd) + os.system(cmd) |