diff options
author | kvakanet <kvakanet@users.noreply.github.com> | 2017-03-04 07:25:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-04 07:25:41 +0300 |
commit | b056c83abae4ad30dc67fbaf564efbe3d4c9468e (patch) | |
tree | 8c0c33f0fcf08604e54aceae04a8e047a2775755 | |
parent | a651b05001fad0f2eeb9a97853f49e899336d663 (diff) |
Update for https://habrahabr.ru/article/318462/
Добавлены изменения для такого рода статей https://habrahabr.ru/article/318462/
И добавлена проверка keywords
-rw-r--r-- | habr/topic.py | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/habr/topic.py b/habr/topic.py index 640b931..2988e62 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -46,7 +46,8 @@ class TMTopic(object): hubs = doc.xpath("//div[@class='hubs']/a") for h in hubs: self.post['hubs'].append((h.text, h.attrib['href'])) - post_title = doc.xpath('//h1[@class="post__title"]/span') + post_title = doc.xpath('//h1[@class="post__title"]/span') or \ + doc.xpath('//h1[@class="megapost-head__title"]') if len(post_title) == 0: raise PostDeleted('Post Deleted! {} gives status_code={}'.format(self.url, req.status_code)) self.post['title'] = post_title @@ -74,13 +75,14 @@ class TMTopic(object): #self.post['styles'] += style if (len(style)>2) else '' # post_keywords = doc.xpath("//meta[@name='keywords']/@content") - self.post['keywords'] = post_keywords[0].strip("\r\n") + self.post['keywords'] = post_keywords[0].strip("\r\n") if len(post_keywords) else '' ### # bug in class 'infopanel ' - space added tmp = doc.xpath( "//ul[@class='postinfo-panel postinfo-panel_post']//span[@class='oting-wjt__counter-score js-score']") self.post['rating'] = tmp[0].text if len(tmp) else '' - tmp = doc.xpath("//div[@class='content html_format']") + tmp = doc.xpath("//div[@class='content html_format']") or \ + doc.xpath('//div[@class="article__body"]') self.post['text'] = etree.tostring(tmp[0], pretty_print=True, method='html').decode('utf-8') \ if len(tmp) else '' self.post['comments'] = [] |