diff options
author | icoz <icoz.vt@gmail.com> | 2015-02-01 21:33:30 +0300 |
---|---|---|
committer | icoz <icoz.vt@gmail.com> | 2015-02-01 21:33:30 +0300 |
commit | b263fe35ced1247452ea63ca06ea6d3bcde7948f (patch) | |
tree | cabe279b24105168c6abed56e437b89c12285d7e /habr | |
parent | 03547173c23632acbd5950760dd068c1a4b30ae3 (diff) |
Adding support for geektimes, megamozg
HabraTopic -> TMTopic
Created classes: HabraTopic(TMTopic), GeektimesTopic(TMTopic), MegamozgTopic(TMTopic)
HabraUser -> TMUser
Created classes: HabraUser(TMUser), GeektimesUser(TMUser), MegamozgUser(TMUser)
Added tests in TMUser._parseUserpage()
Diffstat (limited to 'habr')
-rw-r--r-- | habr/topic.py | 66 | ||||
-rw-r--r-- | habr/user.py | 131 |
2 files changed, 159 insertions, 38 deletions
diff --git a/habr/topic.py b/habr/topic.py index ef1dd9d..91e7820 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -12,25 +12,27 @@ class PostDeleted(Exception): pass -class HabraTopic(object): - def __init__(self, topic_id): +class TMTopic(object): + def __init__(self, topic_id, domain='habrahabr.ru'): ''' init :param topic_id: str or int with topic id :return: ''' + self.domain = domain if isinstance(topic_id, (str, int)): - self.url = self.getTopicUrl(topic_id) self.post = dict() self._topic_id = topic_id + self.url = self._getTopicUrl(topic_id) self._parseTopic() else: raise TypeError('topic_id must be str or int!') + def getTopicUrl(self): + return self.url - @staticmethod - def getTopicUrl(topic_id): - return str('http://habrahabr.ru/post/{}/').format(topic_id) + def _getTopicUrl(self, topic_id): + return str('http://{domain}/post/{tid}/').format(domain=self.domain, tid=topic_id) def _parseTopic(self): ''' @@ -84,6 +86,19 @@ class HabraTopic(object): return self._topic_id +class HabraTopic(TMTopic): + def __init__(self, topic_id): + super().__init__(topic_id, domain='habrahabr.ru') + +class GeektimesTopic(TMTopic): + def __init__(self, topic_id): + super().__init__(topic_id, domain='geektimes.ru') + +class MegamozgTopic(TMTopic): + def __init__(self, topic_id): + super().__init__(topic_id, domain='megamozg.ru') + + import pprint @@ -105,3 +120,42 @@ class TestHabraTopic(TestCase): pp.pprint(t.title()) pp.pprint(t.post['comments_count']) pp.pprint(t.post['rating']) + + +class TestGTTopic(TestCase): + def test_topic(self): + t = GeektimesTopic(243447) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(t.author()) + self.assertEqual(t.author(), 'SOUNDPAL') + pp.pprint(t.title()) + pp.pprint(t.post['comments_count']) + pp.pprint(t.post['rating']) + + def test_topic2(self): + t = GeektimesTopic(245130) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(t.author()) + self.assertEqual(t.author(), 'Robotex') + pp.pprint(t.title()) + pp.pprint(t.post['comments_count']) + pp.pprint(t.post['rating']) + +class TestMMTopic(TestCase): + def test_topic(self): + t = MegamozgTopic(418) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(t.author()) + self.assertEqual(t.author(), 'Kirilkin') + pp.pprint(t.title()) + pp.pprint(t.post['comments_count']) + pp.pprint(t.post['rating']) + + def test_topic2(self): + t = MegamozgTopic(8568) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(t.author()) + self.assertEqual(t.author(), 'jasiejames') + pp.pprint(t.title()) + pp.pprint(t.post['comments_count']) + pp.pprint(t.post['rating']) diff --git a/habr/user.py b/habr/user.py index 42d1d37..bae5d34 100644 --- a/habr/user.py +++ b/habr/user.py @@ -20,14 +20,16 @@ def get_pages(doc): return pages -class HabraUser(object): - def __init__(self, username, need_favorites=False, need_user_posts=False): +class TMUser(object): + def __init__(self, username, need_favorites=False, need_user_posts=False, domain='habrahabr.ru'): + self._domain = domain self._username = username self._user = dict() self._user_karma = dict() self._user_profile = dict() self._user_activity = dict() + print(self._genUrlForUsername(username)) req_data = requests.get(self._genUrlForUsername(username)).text self._doc = html.document_fromstring(req_data) self._parseUserpage() @@ -79,7 +81,6 @@ class HabraUser(object): string with URL ''' return self._genUrlForUsername(username) + 'favorites/' - # 'http://habrahabr.ru/users/{}/favorites'.format(username) def _genUrlForUsername(self, username): ''' @@ -90,7 +91,7 @@ class HabraUser(object): :return: string with URL ''' - return 'http://habrahabr.ru/users/{}/'.format(username) + return 'http://{domain}/users/{username}/'.format(domain=self._domain, username=username) def _getUserCompanyList(self): @@ -110,36 +111,48 @@ class HabraUser(object): def _parseUserpage(self): - + # print(self._doc) p_tags = self._doc.xpath("//div[@class='user_profile']//ul[@id='people-tags']//a/span") registration_date = self._doc.xpath("//div[@class='user_profile']//dd[@class='grey']")[0].text - self._user['username'] = self._doc.xpath("//div[@class='user_header']/h2/a").pop().text - self._user_karma['karma'] = float( - self._doc.xpath("//div[@class='karma']//div[@class='num']").pop().text.replace(',', '.')) - self._user_karma['karma_vote'] = int( - self._doc.xpath("//div[@class='karma']/div[@class='votes']").pop().text.split(' ')[0]) - self._user_karma['rating'] = float( - self._doc.xpath("//div[@class='rating']/div[@class='num']").pop().text.replace(',', '.')) - self._user_profile['fullname'] = self._doc.xpath( - "//div[@class='user_profile']/div[@class='fullname']").pop().text.strip() - self._user_karma['rating_place'] = int( - self._doc.xpath("//div[@class='user_profile']/div[@class='rating-place']").pop().text.split('-')[0]) - if len(self._doc.xpath("//div[@class='user_profile']//dd[@class='bday']")): - self._user_profile['birthday'] = self._doc.xpath("//div[@class='user_profile']//dd[@class='bday']")[0].text - self._user_profile['country'] = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='country-name']")[ - 0].text - self._user_profile['region'] = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='region']")[0].text - self._user_profile['city'] = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='city']")[0].text + tmp = self._doc.xpath("//div[@class='user_header']/h2/a") + self._user['username'] = tmp.pop().text if len(tmp) else '' + + tmp = self._doc.xpath("//div[@class='karma']//div[@class='num']") + self._user_karma['karma'] = float(tmp.pop().text.replace(',', '.')) if len(tmp) else 0.0 + + tmp = self._doc.xpath("//div[@class='karma']/div[@class='votes']") + self._user_karma['karma_vote'] = int(tmp.pop().text.split(' ')[0]) if len(tmp) else 0 + + tmp = self._doc.xpath("//div[@class='rating']/div[@class='num']") + self._user_karma['rating'] = float(tmp.pop().text.replace(',', '.')) if len(tmp) else 0.0 + + tmp = self._doc.xpath("//div[@class='user_profile']/div[@class='fullname']") + self._user_profile['fullname'] = tmp.pop().text.strip() if len(tmp) else '' + + tmp = self._doc.xpath("//div[@class='user_profile']/div[@class='rating-place']") + self._user_karma['rating_place'] = int(tmp.pop().text.split('-')[0]) if len(tmp) else 0 + + tmp = self._doc.xpath("//div[@class='user_profile']//dd[@class='bday']") + self._user_profile['birthday'] = tmp[0].text if len(tmp) else '' + + tmp = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='country-name']") + self._user_profile['country'] = tmp[0].text if len(tmp) else '' + tmp = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='region']") + self._user_profile['region'] = tmp[0].text if len(tmp) else '' + tmp = self._doc.xpath("//div[@class='user_profile']//dd/a[@class='city']") + self._user_profile['city'] = tmp[0].text if len(tmp) else '' self._user_profile['people_tags'] = [i for i in map(lambda x: x.text, p_tags)] self._user_profile['registraion_date'] = registration_date[:registration_date.index('\r\n')] - self._user_activity['followers_count'] = int( - self._doc.xpath("//div[@class='stats']/div[@id='followers_count']/a").pop().text.split(' ')[0]) - self._user_activity['posts_count'] = int( - self._doc.xpath("//div[@class='stats']/div[@class='item posts_count']/a").pop().text.split(' ')[0]) - self._user_activity['comments_count'] = int( - self._doc.xpath("//div[@class='stats']/div[@class='item comments_count']/a").pop().text.split(' ')[0]) + tmp = self._doc.xpath("//div[@class='stats']/div[@id='followers_count']/a") + self._user_activity['followers_count'] = int(tmp.pop().text.split(' ')[0]) if len(tmp) else 0 + + tmp = self._doc.xpath("//div[@class='stats']/div[@class='item posts_count']/a") + self._user_activity['posts_count'] = int(tmp.pop().text.split(' ')[0]) if len(tmp) else 0 + + tmp = self._doc.xpath("//div[@class='stats']/div[@class='item comments_count']/a") + self._user_activity['comments_count'] = int(tmp.pop().text.split(' ')[0]) if len(tmp) else 0 self._user['company_list'] = self._getUserCompanyList() self._user['hubs_list'] = self._getUserHubList() @@ -167,7 +180,7 @@ class HabraUser(object): # topic_id = out[f.text] = str(f.attrib['href']).split('/')[-2] for p in range(2, pages): - url = 'http://habrahabr.ru/users/{0}/favorites/page{1}/'.format(self._username, p) + url = 'http://{0}/users/{1}/favorites/page{2}/'.format(self._domain, self._username, p) # if show_progress: # print('parsing page{0}... url={1}'.format(p, url)) doc = html.document_fromstring(requests.get(url).text) @@ -202,6 +215,21 @@ class HabraUser(object): return out +class HabraUser(TMUser): + def __init__(self, username, need_favorites=False, need_user_posts=False): + super().__init__(username, need_favorites, need_user_posts=need_user_posts, domain='habrahabr.ru') + + +class GeektimesUser(TMUser): + def __init__(self, username, need_favorites=False, need_user_posts=False): + super().__init__(username, need_favorites, need_user_posts=need_user_posts, domain='geektimes.ru') + + +class MegamozgUser(TMUser): + def __init__(self, username, need_favorites=False, need_user_posts=False): + super().__init__(username, need_favorites, need_user_posts=need_user_posts, domain='megamozg.ru') + + import pprint @@ -225,7 +253,46 @@ class Test_HabraUser(TestCase): pp = pprint.PrettyPrinter(indent=4) pp.pprint('userposts=') pp.pprint(hu.user_posts()) - # out = getFavForUsername('icoz') - # pp = pprint.PrettyPrinter(indent=4) - # pp.pprint(out) + + +class Test_GeektimesUser(TestCase): + def setUp(self): + self.hu = GeektimesUser('icoz') + pass + + def test_parseUserpage(self): + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(self.hu.activity()) + pp.pprint(self.hu.profile()) + pp.pprint(self.hu.karma()) + + # def test_favs(self): + # pp = pprint.PrettyPrinter(indent=4) + + def test_user_posts(self): + hu = GeektimesUser('Zelenyikot') + pp = pprint.PrettyPrinter(indent=4) + pp.pprint('userposts=') + pp.pprint(hu.user_posts()) + + +class Test_MegamozgUser(TestCase): + def setUp(self): + self.hu = MegamozgUser('icoz') + pass + + def test_parseUserpage(self): + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(self.hu.activity()) + pp.pprint(self.hu.profile()) + pp.pprint(self.hu.karma()) + + # def test_favs(self): + # pp = pprint.PrettyPrinter(indent=4) + + def test_user_posts(self): + hu = MegamozgUser('Zelenyikot') + pp = pprint.PrettyPrinter(indent=4) + pp.pprint('userposts=') + pp.pprint(hu.user_posts()) |