diff options
author | kvakanet <kvakanet@users.noreply.github.com> | 2017-07-25 20:30:44 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-25 20:30:44 +0300 |
commit | 9e213aa09f1dcdb747d350d5af8ca3cbfbe66bcc (patch) | |
tree | 998ef537ae4bc0f232c6068588158c40c5974289 | |
parent | b056c83abae4ad30dc67fbaf564efbe3d4c9468e (diff) | |
parent | 39fbf5a9fd5f89961f2385519147b38c12849768 (diff) |
Merge pull request #3 from icoz/master
Merge with Icoz
-rw-r--r-- | habr/topic.py | 18 | ||||
-rw-r--r-- | habr/user.py | 14 | ||||
-rwxr-xr-x | habraparse.py | 19 |
3 files changed, 33 insertions, 18 deletions
diff --git a/habr/topic.py b/habr/topic.py index 2988e62..a098f2c 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -242,15 +242,15 @@ class TestGTTopic(TestCase): pp.pprint(t.post['comments_count']) pp.pprint(t.post['rating']) - def test_topic2(self): - t = GeektimesTopic(245130) - pp = pprint.PrettyPrinter(indent=4) - pp.pprint(t.author()) - self.assertEqual(t.author(), '@Robotex') - pp.pprint(t.title()) - self.assertEqual(t.title(), 'Autodesk и Voxel8 делают 3D-печать электроники реальностью') - pp.pprint(t.post['comments_count']) - pp.pprint(t.post['rating']) + # def test_topic2(self): + # t = GeektimesTopic(245130) + # pp = pprint.PrettyPrinter(indent=4) + # pp.pprint(t.author()) + # self.assertEqual(t.author(), '@Robotex') + # pp.pprint(t.title()) + # self.assertEqual(t.title(), 'Autodesk и Voxel8 делают 3D-печать электроники реальностью') + # pp.pprint(t.post['comments_count']) + # pp.pprint(t.post['rating']) # class TestMMTopic(TestCase): # def test_topic(self): diff --git a/habr/user.py b/habr/user.py index 611927a..e3e512c 100644 --- a/habr/user.py +++ b/habr/user.py @@ -112,8 +112,15 @@ class TMUser(object): def _parseUserpage(self): # print(self._doc) + # check for BAN + val = self._doc.xpath("//div[@class='main']/h1") + if val and val[0].text.strip() == "Доступ закрыт": + # maybe raise ERROR??? + return p_tags = self._doc.xpath("//div[@class='user_profile']//ul[@id='people-tags']//a/span") - date_of_registration = self._doc.xpath("//div[@class='user_profile']//dd[@class='grey']")[0].text.strip() + # date_of_registration = self._doc.xpath("//div[@class='user_profile']//dd[@class='grey']")[0].text.strip() + tmp = self._doc.xpath("//div[@class='user_profile']//p[@class='profile-section__invited']") + date_of_registration = tmp[0].text.strip() if tmp else "" tmp = self._doc.xpath("//div[@class='user_profile']//dl[last()]/dd") date_of_last_login = tmp[0].text.strip() @@ -184,6 +191,11 @@ class TMUser(object): """ url = self._genFavoritesUrlByUser(self._username) doc = html.document_fromstring(requests.get(url).text) + # check for BAN + val = self._doc.xpath("//div[@class='main']/h1") + if val and val[0].text.strip() == "Доступ закрыт": + # maybe raise ERROR??? + return out = dict() pages = get_pages(doc) favs = doc.xpath("//div[@class='user_favorites']//a[@class='post__title_link']") diff --git a/habraparse.py b/habraparse.py index c455089..c9f4f7e 100755 --- a/habraparse.py +++ b/habraparse.py @@ -215,14 +215,17 @@ def create_url_list(username, filename, project='h'): T = GeektimesTopic if project == 'g' else HabraTopic urls = list() favs_id = hu.favorites() - for topic_name in favs_id: - try: - urls.append(T(favs_id[topic_name]).getTopicUrl()) - except PostDeleted: - print('Post {} is deleted!'.format(favs_id[topic_name])) - urls.sort() - with open(filename, 'wt') as f: - f.write('\n'.join(urls)) + if favs_id: + for topic_name in favs_id: + try: + urls.append(T(favs_id[topic_name]).getTopicUrl()) + except PostDeleted: + print('Post {} is deleted!'.format(favs_id[topic_name])) + urls.sort() + with open(filename, 'wt') as f: + f.write('\n'.join(urls)) + else: + print("Something went wrong. Maybe user is banned or deleted.") import docopt |