Welcome to mirror list, hosted at ThFree Co, Russian Federation.

wikitravel-crawler.sh « crawler - github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: dee0e843a134c097dd0bd5b6f19b4060b5466d51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
set -e -u -x
MY_PATH=`dirname $0`

$MY_PATH/wikitravel-download-lists.sh

cat wikitravel-redirects-*.html \
     | python $MY_PATH/wikitravel-process-redirects.py \
     | grep -v '[^\s]*:' \
     > wikitravel-redirects.json

cat wikitravel-pages-*.html \
     | python $MY_PATH/wikitravel-process-pages.py \
     > wikitravel-pages.json

echo "Total pages:"
wc -l wikitravel-pages.json

cat wikitravel-pages.json | python $MY_PATH/wikitravel-download-pages.py

cat wikitravel-pages.json | python $MY_PATH/wikitravel-geocode-yahoo.py

cat wikitravel-pages.json | python $MY_PATH/wikitravel-geocode-google.py

cat wikitravel-pages.json | python $MY_PATH/wikitravel-process-articles.py

cat wikitravel-pages.json | python $MY_PATH/wikitravel-optimize-articles.py

$MY_PATH/extract-image-urls.sh wikitravel-images.urls

$MY_PATH/normalize-image-urls.sh wikitravel-images.urls wikitravel-images-normalized.url

wget --wait=1 --random-wait --no-clobber -i wikitravel-images-normalized.urls

# TODO: Run publisher.