diff options
author | icoz <icoz.vt@gmail.com> | 2017-03-04 11:21:29 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-04 11:21:29 +0300 |
commit | 0de6bc02db68e68de416199abb2567e90168352b (patch) | |
tree | 8c0c33f0fcf08604e54aceae04a8e047a2775755 | |
parent | 4b4f644cf159fd1188c480a17929e11aa54dbc64 (diff) | |
parent | b056c83abae4ad30dc67fbaf564efbe3d4c9468e (diff) |
Merge pull request #16 from kvakanet/master
Изменено поле названия статьи для habrahabr
-rw-r--r-- | habr/topic.py | 12 |
1 files changed, 7 insertions, 5 deletions
diff --git a/habr/topic.py b/habr/topic.py index e9c3631..2988e62 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -46,9 +46,10 @@ class TMTopic(object): hubs = doc.xpath("//div[@class='hubs']/a") for h in hubs: self.post['hubs'].append((h.text, h.attrib['href'])) - post_title = doc.xpath('//h1[@class="post__title"]/span') + post_title = doc.xpath('//h1[@class="post__title"]/span') or \ + doc.xpath('//h1[@class="megapost-head__title"]') if len(post_title) == 0: - raise PostDeleted + raise PostDeleted('Post Deleted! {} gives status_code={}'.format(self.url, req.status_code)) self.post['title'] = post_title tmp = \ doc.xpath("//a[@class='post-type__value post-type__value_author']") or \ @@ -74,13 +75,14 @@ class TMTopic(object): #self.post['styles'] += style if (len(style)>2) else '' # post_keywords = doc.xpath("//meta[@name='keywords']/@content") - self.post['keywords'] = post_keywords[0].strip("\r\n") + self.post['keywords'] = post_keywords[0].strip("\r\n") if len(post_keywords) else '' ### # bug in class 'infopanel ' - space added tmp = doc.xpath( "//ul[@class='postinfo-panel postinfo-panel_post']//span[@class='oting-wjt__counter-score js-score']") self.post['rating'] = tmp[0].text if len(tmp) else '' - tmp = doc.xpath("//div[@class='content html_format']") + tmp = doc.xpath("//div[@class='content html_format']") or \ + doc.xpath('//div[@class="article__body"]') self.post['text'] = etree.tostring(tmp[0], pretty_print=True, method='html').decode('utf-8') \ if len(tmp) else '' self.post['comments'] = [] @@ -166,7 +168,7 @@ class TMTopic(object): class HabraTopic(TMTopic): def __init__(self, topic_id): super().__init__(topic_id, domain='habrahabr.ru') - self.post['title'] = self.post['title'][1].text + self.post['title'] = self.post['title'][0].text class GeektimesTopic(TMTopic): |