diff options
author | Ilya Zverev <zverik@textual.ru> | 2016-06-10 16:21:55 +0300 |
---|---|---|
committer | Vladimir Byko-Ianko <v.bykoianko@corp.mail.ru> | 2016-06-23 19:26:24 +0300 |
commit | a011a5a5fad98919dd107eaa8d494488411fa5eb (patch) | |
tree | 49ea7076e8182a864c9baacd11a45446b7e7cc65 /tools | |
parent | 3ad9aad3f1dae455af101e659b54d8bf7342f22c (diff) |
[booking] Add translated name and address
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/python/booking_hotels.py | 44 |
1 files changed, 36 insertions, 8 deletions
diff --git a/tools/python/booking_hotels.py b/tools/python/booking_hotels.py index ca63585e6e..7e4a5cc9fc 100755 --- a/tools/python/booking_hotels.py +++ b/tools/python/booking_hotels.py @@ -81,26 +81,45 @@ def download(user, password, path): countrycode = country['countrycode'] logging.info(u'Download[{0}]: {1}'.format(countrycode, country['name'])) - allhotels = [] + allhotels = {} while True: hotels = api.call('getHotels', dict(new_hotel_type=1, offset=len(allhotels), rows=maxrows, countrycodes=countrycode)) # Check for error. - if not hotels: + if hotels is None: exit(1) - allhotels.extend(hotels) + for h in hotels: + allhotels[h['hotel_id']] = h # If hotels in answer less then maxrows, we reach end of data. if len(hotels) < maxrows: break - logging.info('Num of hotels: {0}'.format(len(allhotels))) + # Now the same for hotel translations + offset = 0 + while True: + hotels = api.call('getHotelTranslations', dict(offset=offset, rows=maxrows, countrycodes=countrycode)) + if hotels is None: + exit(1) + + # Add translations for each hotel + for h in hotels: + if h['hotel_id'] in allhotels: + if 'translations' not in allhotels[h['hotel_id']]: + allhotels[h['hotel_id']]['translations'] = {} + allhotels[h['hotel_id']]['translations'][h['languagecode']] = {'name': h['name'], 'address': h['address']} + + offset += len(hotels) + if len(hotels) < maxrows: + break + + logging.info('Num of hotels: {0}, translations: {1}'.format(len(allhotels), offset)) filename = os.path.join(path, '{0} - {1}.pkl'.format(country['area'].encode('utf8'), country['name'].encode('utf8'))) with open(filename, 'wb') as fd: - pickle.dump(allhotels, fd, pickle.HIGHEST_PROTOCOL) + pickle.dump(allhotels.values(), fd, pickle.HIGHEST_PROTOCOL) def translate(source, output): @@ -110,7 +129,7 @@ def translate(source, output): files = [filename for filename in os.listdir(source) if filename.endswith('.pkl')] data = [] - for filename in files: + for filename in sorted(files): logging.info('Processing {0}'.format(filename)) with open(filename, 'rb') as fd: data += pickle.load(fd) @@ -155,8 +174,17 @@ def translate(source, output): # Find a range that contains the price while rate <= len(rates) and price > avg * rates[rate - 1]: rate += 1 - l = [unicode(get_hotel_field(hotel, e, rate)).encode('utf8').replace('\t', ' ') for e in HOTEL_FIELDS] - print('\t'.join(l), file=fd) + l = [get_hotel_field(hotel, e, rate) for e in HOTEL_FIELDS] + # Add translations for hotel name and address if present. + if 'translations' in hotel: + tr_lang = hotel['languagecode'] + if tr_lang not in hotel['translations']: + tr_lang = hotel['translations'].keys()[0] + l.append(tr_lang) + l.extend([hotel['translations'][tr_lang][e] for e in ('name', 'address')]) + else: + l.extend([''] * 3) + print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ') for f in l]), file=fd) def process_options(): |