Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorMaksim Andrianov <maksimandrianov1@gmail.com>2019-04-12 17:22:57 +0300
committerArsentiy Milchakov <milcars@mapswithme.com>2019-04-12 18:04:26 +0300
commitce150f2169b1539eda6e845230102c8eca6e4d3a (patch)
treea4a68a7d68267490df6f7b80b741ce54755879b9 /tools
parent5768a61455f7a35e430b8a093ad5d2d159013d6b (diff)
[generator] New structure python projects. Refactored booking.
Diffstat (limited to 'tools')
-rw-r--r--tools/python/booking/__init__.py0
-rw-r--r--tools/python/booking/api/__init__.py0
-rw-r--r--tools/python/booking/api/booking_api.py119
-rw-r--r--tools/python/booking/api/exceptions.py14
-rwxr-xr-xtools/python/booking/download_hotels.py (renamed from tools/python/booking_hotels.py)144
-rw-r--r--tools/python/booking/requirements.txt4
-rw-r--r--tools/python/eviltransform.py149
-rwxr-xr-xtools/unix/generate_planet.sh2
8 files changed, 147 insertions, 285 deletions
diff --git a/tools/python/booking/__init__.py b/tools/python/booking/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/python/booking/__init__.py
diff --git a/tools/python/booking/api/__init__.py b/tools/python/booking/api/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tools/python/booking/api/__init__.py
diff --git a/tools/python/booking/api/booking_api.py b/tools/python/booking/api/booking_api.py
new file mode 100644
index 0000000000..4a854ac657
--- /dev/null
+++ b/tools/python/booking/api/booking_api.py
@@ -0,0 +1,119 @@
+import logging
+from functools import partial
+from random import randint
+from threading import Event
+from time import sleep
+
+import requests
+from ratelimit import limits, sleep_and_retry
+
+from .exceptions import AttemptsSpentError, HTTPError
+
+LIMIT_REQUESTS_PER_MINUTE = 400
+ATTEMPTS_COUNT = 10
+MINMAX_LIMIT_WAIT_AFTER_429_ERROR_SECONDS = (30, 120)
+
+
+class BookingApi:
+ ENDPOINTS = {
+ "countries": "list",
+ "hotels": "list"
+ }
+
+ def __init__(self, login, password, version):
+ major_minor = version.split(".")
+ assert len(major_minor) == 2
+ assert int(major_minor[0]) >= 2
+ assert 0 <= int(major_minor[1]) <= 4
+
+ self._event = Event()
+ self._event.set()
+ self._timeout = 5 * 60 # in seconds
+ self._login = login
+ self._password = password
+ self._base_url = f"https://distribution-xml.booking.com/{version}/json"
+ self._set_endpoints()
+
+ @sleep_and_retry
+ @limits(calls=LIMIT_REQUESTS_PER_MINUTE, period=60)
+ def call_endpoint(self, endpoint, **params):
+ self._event.wait()
+ try:
+ attempts = ATTEMPTS_COUNT
+ while attempts:
+ attempts -= 1
+ response = None
+ try:
+ response = requests.get(f"{self._base_url}/{endpoint}",
+ auth=(self._login, self._password),
+ params=params, timeout=self._timeout)
+ except requests.exceptions.ReadTimeout:
+ logging.exception("Timeout error.")
+ continue
+ if response.status_code == 200:
+ data = response.json()
+ return data["result"]
+ else:
+ self._handle_errors(response)
+ raise AttemptsSpentError(f"{ATTEMPTS_COUNT} attempts were spent.")
+ except Exception as e:
+ if not self._event.is_set():
+ self._event.set()
+ raise e
+
+ def _handle_errors(self, response):
+ error_message = ""
+ data = response.json()
+ try:
+ error_message = ",".join(x["message"] for x in data["errors"])
+ except KeyError:
+ error_message = data
+
+ if response.status_code == 429:
+ self._event.clear()
+ wait_seconds = randint(*MINMAX_LIMIT_WAIT_AFTER_429_ERROR_SECONDS)
+ logging.warning(f"Http error {response.status_code}: {error_message}. "
+ f"It waits {wait_seconds} seconds and tries again.")
+ sleep(wait_seconds)
+ self._event.set()
+ else:
+ raise HTTPError(
+ f"Http error with code {response.status_code}: {error_message}.")
+
+ def _set_endpoints(self):
+ for endpoint in BookingApi.ENDPOINTS:
+ setattr(self, endpoint, partial(self.call_endpoint, endpoint))
+
+
+class BookingListApi:
+ _ROWS_BY_REQUEST = 1000
+
+ def __init__(self, api):
+ self.api = api
+ self._set_endpoints()
+
+ def call_endpoint(self, endpoint, **params):
+ result = []
+ offset = 0
+ while True:
+ resp = self._call_endpoint_offset(offset, endpoint, **params)
+ result.extend(resp)
+ if len(resp) < BookingListApi._ROWS_BY_REQUEST:
+ break
+ offset += BookingListApi._ROWS_BY_REQUEST
+ return result
+
+ def _call_endpoint_offset(self, offset, endpoint, **params):
+ r = self.api.call_endpoint(endpoint, **{
+ "offset": offset,
+ "rows": BookingListApi._ROWS_BY_REQUEST,
+ **params
+ })
+ if not isinstance(r, list):
+ raise TypeError(f"Result has unexpected type {type(r)}")
+ return r
+
+ def _set_endpoints(self):
+ for endpoint in BookingApi.ENDPOINTS:
+ if BookingApi.ENDPOINTS[endpoint] == "list":
+ setattr(self, endpoint, partial(self.call_endpoint, endpoint)) \ No newline at end of file
diff --git a/tools/python/booking/api/exceptions.py b/tools/python/booking/api/exceptions.py
new file mode 100644
index 0000000000..784149ea99
--- /dev/null
+++ b/tools/python/booking/api/exceptions.py
@@ -0,0 +1,14 @@
+class BookingError(Exception):
+ pass
+
+
+class HTTPError(BookingError):
+ pass
+
+
+class AttemptsSpentError(BookingError):
+ pass
+
+
+class GettingMinPriceError(BookingError):
+ pass
diff --git a/tools/python/booking_hotels.py b/tools/python/booking/download_hotels.py
index ed036eecca..01d2ab698e 100755
--- a/tools/python/booking_hotels.py
+++ b/tools/python/booking/download_hotels.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf8
import argparse
import datetime
import logging
@@ -10,145 +9,19 @@ from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial
from multiprocessing.pool import ThreadPool
-from random import randint
-from threading import Event
-from time import sleep
-import eviltransform
import math
-import requests
-from ratelimit import limits, sleep_and_retry
+from eviltransform import gcj2wgs_exact
from tqdm import tqdm
-LIMIT_REQUESTS_PER_MINUTE = 400
-ATTEMPTS_COUNT = 10
-MINMAX_LIMIT_WAIT_AFTER_429_ERROR_SECONDS = (30, 120)
+from api.booking_api import BookingApi, BookingListApi, LIMIT_REQUESTS_PER_MINUTE
+from api.exceptions import GettingMinPriceError
+
SUPPORTED_LANGUAGES = ("en", "ru", "ar", "cs", "da", "nl", "fi", "fr", "de",
"hu", "id", "it", "ja", "ko", "pl", "pt", "ro", "es",
"sv", "th", "tr", "uk", "vi", "zh", "he", "sk", "el")
-class AppError(Exception):
- pass
-
-
-class HTTPError(AppError):
- pass
-
-
-class AttemptsSpentError(AppError):
- pass
-
-
-class GettingMinPriceError(AppError):
- pass
-
-
-class BookingApi:
- ENDPOINTS = {
- "countries": "list",
- "hotels": "list"
- }
-
- def __init__(self, login, password, version):
- major_minor = version.split(".")
- assert len(major_minor) == 2
- assert int(major_minor[0]) >= 2
- assert 0 <= int(major_minor[1]) <= 4
-
- self._event = Event()
- self._event.set()
- self._timeout = 5 * 60 # in seconds
- self._login = login
- self._password = password
- self._base_url = f"https://distribution-xml.booking.com/{version}/json"
- self._set_endpoints()
-
- @sleep_and_retry
- @limits(calls=LIMIT_REQUESTS_PER_MINUTE, period=60)
- def call_endpoint(self, endpoint, **params):
- self._event.wait()
- try:
- attempts = ATTEMPTS_COUNT
- while attempts:
- attempts -= 1
- response = None
- try:
- response = requests.get(f"{self._base_url}/{endpoint}",
- auth=(self._login, self._password),
- params=params, timeout=self._timeout)
- except requests.exceptions.ReadTimeout:
- logging.exception("Timeout error.")
- continue
- if response.status_code == 200:
- data = response.json()
- return data["result"]
- else:
- self._handle_errors(response)
- raise AttemptsSpentError(f"{ATTEMPTS_COUNT} attempts were spent.")
- except Exception as e:
- if not self._event.is_set():
- self._event.set()
- raise e
-
- def _handle_errors(self, response):
- error_message = ""
- data = response.json()
- try:
- error_message = ",".join(x["message"] for x in data["errors"])
- except KeyError:
- error_message = data
-
- if response.status_code == 429:
- self._event.clear()
- wait_seconds = randint(*MINMAX_LIMIT_WAIT_AFTER_429_ERROR_SECONDS)
- logging.warning(f"Http error {response.status_code}: {error_message}. "
- f"It waits {wait_seconds} seconds and tries again.")
- sleep(wait_seconds)
- self._event.set()
- else:
- raise HTTPError(
- f"Http error with code {response.status_code}: {error_message}.")
-
- def _set_endpoints(self):
- for endpoint in BookingApi.ENDPOINTS:
- setattr(self, endpoint, partial(self.call_endpoint, endpoint))
-
-
-class BookingListApi:
- _ROWS_BY_REQUEST = 1000
-
- def __init__(self, api):
- self.api = api
- self._set_endpoints()
-
- def call_endpoint(self, endpoint, **params):
- result = []
- offset = 0
- while True:
- resp = self._call_endpoint_offset(offset, endpoint, **params)
- result.extend(resp)
- if len(resp) < BookingListApi._ROWS_BY_REQUEST:
- break
- offset += BookingListApi._ROWS_BY_REQUEST
- return result
-
- def _call_endpoint_offset(self, offset, endpoint, **params):
- r = self.api.call_endpoint(endpoint, **{
- "offset": offset,
- "rows": BookingListApi._ROWS_BY_REQUEST,
- **params
- })
- if not isinstance(r, list):
- raise TypeError(f"Result has unexpected type {type(r)}")
- return r
-
- def _set_endpoints(self):
- for endpoint in BookingApi.ENDPOINTS:
- if BookingApi.ENDPOINTS[endpoint] == "list":
- setattr(self, endpoint, partial(self.call_endpoint, endpoint))
-
-
class BookingGen:
def __init__(self, api, country):
self.api = api
@@ -210,7 +83,7 @@ class BookingGen:
hotel_data = hotel["hotel_data"]
location = hotel_data["location"]
try:
- location["latitude"], location["longitude"] = eviltransform.gcj2wgs_exact(
+ location["latitude"], location["longitude"] = gcj2wgs_exact(
float(location["latitude"]), float(location["longitude"])
)
except ValueError:
@@ -304,13 +177,15 @@ def download_hotels_by_country(api, country):
return rows
-def download(country_code, user, password, path, threads_count, progress_bar):
+def download(country_code, user, password, path, threads_count,
+ progress_bar=tqdm(disable=True)):
api = BookingApi(user, password, "2.4")
list_api = BookingListApi(api)
countries = list_api.countries(languages="en")
if country_code is not None:
countries = list(filter(lambda x: x["country"] in country_code, countries))
logging.info(f"There is {len(countries)} countries.")
+ progress_bar.desc = "Countries"
progress_bar.total = len(countries)
with open(path, "w") as f:
with ThreadPool(threads_count) as pool:
@@ -323,8 +198,7 @@ def download(country_code, user, password, path, threads_count, progress_bar):
def process_options():
parser = argparse.ArgumentParser(description="Download and process booking hotels.")
- parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")
- parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
+ parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("--logfile", default="",
help="Name and destination for log file")
parser.add_argument("--password", required=True, dest="password",
diff --git a/tools/python/booking/requirements.txt b/tools/python/booking/requirements.txt
new file mode 100644
index 0000000000..4e17d841e3
--- /dev/null
+++ b/tools/python/booking/requirements.txt
@@ -0,0 +1,4 @@
+eviltransform
+ratelimit
+requests
+tqdm
diff --git a/tools/python/eviltransform.py b/tools/python/eviltransform.py
deleted file mode 100644
index 670ac93fbe..0000000000
--- a/tools/python/eviltransform.py
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-# Source: https://github.com/googollee/eviltransform
-# Published under 2-clause BSD license
-# Copyright (c) 2015, Googol Lee <i@googol.im>, @gutenye, @xingxing, @bewantbe,
-# @GhostFlying, @larryli, @gumblex,@lbt05, @chenweiyj
-
-import math
-
-
-__all__ = ['wgs2gcj', 'gcj2wgs', 'gcj2wgs_exact',
- 'distance', 'gcj2bd', 'bd2gcj', 'wgs2bd', 'bd2wgs']
-
-earthR = 6378137.0
-
-def outOfChina(lat, lng):
- return not (72.004 <= lng <= 137.8347 and 0.8293 <= lat <= 55.8271)
-
-
-def transform(x, y):
- xy = x * y
- absX = math.sqrt(abs(x))
- xPi = x * math.pi
- yPi = y * math.pi
- d = 20.0*math.sin(6.0*xPi) + 20.0*math.sin(2.0*xPi)
-
- lat = d
- lng = d
-
- lat += 20.0*math.sin(yPi) + 40.0*math.sin(yPi/3.0)
- lng += 20.0*math.sin(xPi) + 40.0*math.sin(xPi/3.0)
-
- lat += 160.0*math.sin(yPi/12.0) + 320*math.sin(yPi/30.0)
- lng += 150.0*math.sin(xPi/12.0) + 300.0*math.sin(xPi/30.0)
-
- lat *= 2.0 / 3.0
- lng *= 2.0 / 3.0
-
- lat += -100.0 + 2.0*x + 3.0*y + 0.2*y*y + 0.1*xy + 0.2*absX
- lng += 300.0 + x + 2.0*y + 0.1*x*x + 0.1*xy + 0.1*absX
-
- return lat, lng
-
-
-def delta(lat, lng):
- ee = 0.00669342162296594323
- dLat, dLng = transform(lng-105.0, lat-35.0)
- radLat = lat / 180.0 * math.pi
- magic = math.sin(radLat)
- magic = 1 - ee * magic * magic
- sqrtMagic = math.sqrt(magic)
- dLat = (dLat * 180.0) / ((earthR * (1 - ee)) / (magic * sqrtMagic) * math.pi)
- dLng = (dLng * 180.0) / (earthR / sqrtMagic * math.cos(radLat) * math.pi)
- return dLat, dLng
-
-
-def wgs2gcj(wgsLat, wgsLng):
- if outOfChina(wgsLat, wgsLng):
- return wgsLat, wgsLng
- else:
- dlat, dlng = delta(wgsLat, wgsLng)
- return wgsLat + dlat, wgsLng + dlng
-
-
-def gcj2wgs(gcjLat, gcjLng):
- if outOfChina(gcjLat, gcjLng):
- return gcjLat, gcjLng
- else:
- dlat, dlng = delta(gcjLat, gcjLng)
- return gcjLat - dlat, gcjLng - dlng
-
-
-def gcj2wgs_exact(gcjLat, gcjLng):
- initDelta = 0.01
- threshold = 0.000001
- dLat = dLng = initDelta
- mLat = gcjLat - dLat
- mLng = gcjLng - dLng
- pLat = gcjLat + dLat
- pLng = gcjLng + dLng
- for i in range(30):
- wgsLat = (mLat + pLat) / 2
- wgsLng = (mLng + pLng) / 2
- tmplat, tmplng = wgs2gcj(wgsLat, wgsLng)
- dLat = tmplat - gcjLat
- dLng = tmplng - gcjLng
- if abs(dLat) < threshold and abs(dLng) < threshold:
- return wgsLat, wgsLng
- if dLat > 0:
- pLat = wgsLat
- else:
- mLat = wgsLat
- if dLng > 0:
- pLng = wgsLng
- else:
- mLng = wgsLng
- return wgsLat, wgsLng
-
-
-def distance(latA, lngA, latB, lngB):
- pi180 = math.pi / 180
- arcLatA = latA * pi180
- arcLatB = latB * pi180
- x = (math.cos(arcLatA) * math.cos(arcLatB) *
- math.cos((lngA - lngB) * pi180))
- y = math.sin(arcLatA) * math.sin(arcLatB)
- s = x + y
- if s > 1:
- s = 1
- if s < -1:
- s = -1
- alpha = math.acos(s)
- distance = alpha * earthR
- return distance
-
-
-def gcj2bd(gcjLat, gcjLng):
- if outOfChina(gcjLat, gcjLng):
- return gcjLat, gcjLng
-
- x = gcjLng
- y = gcjLat
- z = math.hypot(x, y) + 0.00002 * math.sin(y * math.pi)
- theta = math.atan2(y, x) + 0.000003 * math.cos(x * math.pi)
- bdLng = z * math.cos(theta) + 0.0065
- bdLat = z * math.sin(theta) + 0.006
- return bdLat, bdLng
-
-
-def bd2gcj(bdLat, bdLng):
- if outOfChina(bdLat, bdLng):
- return bdLat, bdLng
-
- x = bdLng - 0.0065
- y = bdLat - 0.006
- z = math.hypot(x, y) - 0.00002 * math.sin(y * math.pi)
- theta = math.atan2(y, x) - 0.000003 * math.cos(x * math.pi)
- gcjLng = z * math.cos(theta)
- gcjLat = z * math.sin(theta)
- return gcjLat, gcjLng
-
-
-def wgs2bd(wgsLat, wgsLng):
- return gcj2bd(*wgs2gcj(wgsLat, wgsLng))
-
-
-def bd2wgs(bdLat, bdLng):
- return gcj2wgs(*bd2gcj(bdLat, bdLng))
diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh
index d95aeb8167..4b61212543 100755
--- a/tools/unix/generate_planet.sh
+++ b/tools/unix/generate_planet.sh
@@ -231,7 +231,7 @@ LOCALADS_SCRIPT="$PYTHON_SCRIPTS_PATH/local_ads/mwm_to_csv_4localads.py"
UGC_FILE="${UGC_FILE:-$INTDIR/ugc_db.sqlite3}"
POPULAR_PLACES_FILE="${POPULAR_PLACES_FILE:-$INTDIR/popular_places.csv}"
WIKIDATA_FILE="${WIKIDATA_FILE:-$INTDIR/idToWikidata.csv}"
-BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking_hotels.py"
+BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking/download_hotels.py"
BOOKING_FILE="${BOOKING_FILE:-$INTDIR/hotels.csv}"
OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
OPENTABLE_FILE="${OPENTABLE_FILE:-$INTDIR/restaurants.csv}"