Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/icoz/habraparse.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoricoz <icoz.vt@gmail.com>2017-03-04 11:21:29 +0300
committerGitHub <noreply@github.com>2017-03-04 11:21:29 +0300
commit0de6bc02db68e68de416199abb2567e90168352b (patch)
tree8c0c33f0fcf08604e54aceae04a8e047a2775755
parent4b4f644cf159fd1188c480a17929e11aa54dbc64 (diff)
parentb056c83abae4ad30dc67fbaf564efbe3d4c9468e (diff)
Merge pull request #16 from kvakanet/master
Изменено поле названия статьи для habrahabr
-rw-r--r--habr/topic.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/habr/topic.py b/habr/topic.py
index e9c3631..2988e62 100644
--- a/habr/topic.py
+++ b/habr/topic.py
@@ -46,9 +46,10 @@ class TMTopic(object):
hubs = doc.xpath("//div[@class='hubs']/a")
for h in hubs:
self.post['hubs'].append((h.text, h.attrib['href']))
- post_title = doc.xpath('//h1[@class="post__title"]/span')
+ post_title = doc.xpath('//h1[@class="post__title"]/span') or \
+ doc.xpath('//h1[@class="megapost-head__title"]')
if len(post_title) == 0:
- raise PostDeleted
+ raise PostDeleted('Post Deleted! {} gives status_code={}'.format(self.url, req.status_code))
self.post['title'] = post_title
tmp = \
doc.xpath("//a[@class='post-type__value post-type__value_author']") or \
@@ -74,13 +75,14 @@ class TMTopic(object):
#self.post['styles'] += style if (len(style)>2) else ''
#
post_keywords = doc.xpath("//meta[@name='keywords']/@content")
- self.post['keywords'] = post_keywords[0].strip("\r\n")
+ self.post['keywords'] = post_keywords[0].strip("\r\n") if len(post_keywords) else ''
###
# bug in class 'infopanel ' - space added
tmp = doc.xpath(
"//ul[@class='postinfo-panel postinfo-panel_post']//span[@class='oting-wjt__counter-score js-score']")
self.post['rating'] = tmp[0].text if len(tmp) else ''
- tmp = doc.xpath("//div[@class='content html_format']")
+ tmp = doc.xpath("//div[@class='content html_format']") or \
+ doc.xpath('//div[@class="article__body"]')
self.post['text'] = etree.tostring(tmp[0], pretty_print=True, method='html').decode('utf-8') \
if len(tmp) else ''
self.post['comments'] = []
@@ -166,7 +168,7 @@ class TMTopic(object):
class HabraTopic(TMTopic):
def __init__(self, topic_id):
super().__init__(topic_id, domain='habrahabr.ru')
- self.post['title'] = self.post['title'][1].text
+ self.post['title'] = self.post['title'][0].text
class GeektimesTopic(TMTopic):