diff options
author | icoz <icoz.vt@gmail.com> | 2014-08-27 23:33:02 +0400 |
---|---|---|
committer | icoz <icoz.vt@gmail.com> | 2014-08-27 23:33:02 +0400 |
commit | abfa240f35c479f6fd672052cb5a17030a9da61c (patch) | |
tree | d7b8accfb9204b0a351618c57f1063c44fc09732 /habr | |
parent | 795368dfdafcdbd2297a0b5cc97674f02f9e7e2e (diff) |
Added class HabraTopic
Diffstat (limited to 'habr')
-rw-r--r-- | habr/topic.py | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/habr/topic.py b/habr/topic.py new file mode 100644 index 0000000..190c01d --- /dev/null +++ b/habr/topic.py @@ -0,0 +1,81 @@ +from unittest import TestCase + +from lxml import etree, html +import requests + + +__author__ = 'vlad' + + +class HabraTopic(object): + def __init__(self, topic_id): + ''' + init + :param topic_id: str or int with topic id + :return: + ''' + if isinstance(topic_id, (str, int)): + self.url = str('http://habrahabr.ru/post/{}/').format(topic_id) + self.post = dict() + self._parseTopic() + else: + raise TypeError('topic_id must be str or int!') + + def _parseTopic(self): + ''' + returns info + ''' + self.post = dict() + req = requests.get(self.url) + if req.status_code != 200: + raise IOError('Not loaded! {} gives status_code={}'.format(self.url, req.status_code)) + doc = html.document_fromstring(req.text) + self.post['hubs'] = [] + hubs = doc.xpath("//div[@class='post_show']//div[@class='hubs']/a") + for h in hubs: + self.post['hubs'].append((h.text, h.attrib['href'])) + self.post['title'] = doc.xpath("//span[@class='post_title']")[0].text + self.post['author'] = doc.xpath("//div[@class='author']/a")[0].text + self.post['rating'] = doc.xpath("//div[@class='infopanel ']//span[@class='score']")[0].text + self.post['text'] = etree.tostring(doc.xpath("//div[@class='content html_format']")[0], pretty_print=True) + self.post['comments'] = [] + # bug in class 'comments_list ' - space added + comments = doc.xpath("//div[@class='comments_list ']//div[@class='comment_item']") + self.post['comments_count'] = len(comments) + for c in comments: + self.post['comments'].append(etree.tostring(c)) + + # TODO: deepcopy + def author(self): + return self.post['author'] + + def text(self): + return self.post['text'] + + + def title(self): + return self.post['title'] + + + def rating(self): + return self.post['rating'] + + def comments(self): + return self.post['comments'] + + def comments_count(self): + return self.post['comments_count'] + + +import pprint + + +class TestHabraTopic(TestCase): + def test_topic(self): + t = HabraTopic(231957) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(t.author()) + self.assertEqual(t.author(), 'yaklamm') + pp.pprint(t.title()) + pp.pprint(t.post['comments_count']) + pp.pprint(t.post['rating'])
\ No newline at end of file |