Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/icoz/habraparse.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkvakanet <kvakanet@users.noreply.github.com>2017-07-25 20:30:44 +0300
committerGitHub <noreply@github.com>2017-07-25 20:30:44 +0300
commit9e213aa09f1dcdb747d350d5af8ca3cbfbe66bcc (patch)
tree998ef537ae4bc0f232c6068588158c40c5974289
parentb056c83abae4ad30dc67fbaf564efbe3d4c9468e (diff)
parent39fbf5a9fd5f89961f2385519147b38c12849768 (diff)
Merge pull request #3 from icoz/master
Merge with Icoz
-rw-r--r--habr/topic.py18
-rw-r--r--habr/user.py14
-rwxr-xr-xhabraparse.py19
3 files changed, 33 insertions, 18 deletions
diff --git a/habr/topic.py b/habr/topic.py
index 2988e62..a098f2c 100644
--- a/habr/topic.py
+++ b/habr/topic.py
@@ -242,15 +242,15 @@ class TestGTTopic(TestCase):
pp.pprint(t.post['comments_count'])
pp.pprint(t.post['rating'])
- def test_topic2(self):
- t = GeektimesTopic(245130)
- pp = pprint.PrettyPrinter(indent=4)
- pp.pprint(t.author())
- self.assertEqual(t.author(), '@Robotex')
- pp.pprint(t.title())
- self.assertEqual(t.title(), 'Autodesk и Voxel8 делают 3D-печать электроники реальностью')
- pp.pprint(t.post['comments_count'])
- pp.pprint(t.post['rating'])
+ # def test_topic2(self):
+ # t = GeektimesTopic(245130)
+ # pp = pprint.PrettyPrinter(indent=4)
+ # pp.pprint(t.author())
+ # self.assertEqual(t.author(), '@Robotex')
+ # pp.pprint(t.title())
+ # self.assertEqual(t.title(), 'Autodesk и Voxel8 делают 3D-печать электроники реальностью')
+ # pp.pprint(t.post['comments_count'])
+ # pp.pprint(t.post['rating'])
# class TestMMTopic(TestCase):
# def test_topic(self):
diff --git a/habr/user.py b/habr/user.py
index 611927a..e3e512c 100644
--- a/habr/user.py
+++ b/habr/user.py
@@ -112,8 +112,15 @@ class TMUser(object):
def _parseUserpage(self):
# print(self._doc)
+ # check for BAN
+ val = self._doc.xpath("//div[@class='main']/h1")
+ if val and val[0].text.strip() == "Доступ закрыт":
+ # maybe raise ERROR???
+ return
p_tags = self._doc.xpath("//div[@class='user_profile']//ul[@id='people-tags']//a/span")
- date_of_registration = self._doc.xpath("//div[@class='user_profile']//dd[@class='grey']")[0].text.strip()
+ # date_of_registration = self._doc.xpath("//div[@class='user_profile']//dd[@class='grey']")[0].text.strip()
+ tmp = self._doc.xpath("//div[@class='user_profile']//p[@class='profile-section__invited']")
+ date_of_registration = tmp[0].text.strip() if tmp else ""
tmp = self._doc.xpath("//div[@class='user_profile']//dl[last()]/dd")
date_of_last_login = tmp[0].text.strip()
@@ -184,6 +191,11 @@ class TMUser(object):
"""
url = self._genFavoritesUrlByUser(self._username)
doc = html.document_fromstring(requests.get(url).text)
+ # check for BAN
+ val = self._doc.xpath("//div[@class='main']/h1")
+ if val and val[0].text.strip() == "Доступ закрыт":
+ # maybe raise ERROR???
+ return
out = dict()
pages = get_pages(doc)
favs = doc.xpath("//div[@class='user_favorites']//a[@class='post__title_link']")
diff --git a/habraparse.py b/habraparse.py
index c455089..c9f4f7e 100755
--- a/habraparse.py
+++ b/habraparse.py
@@ -215,14 +215,17 @@ def create_url_list(username, filename, project='h'):
T = GeektimesTopic if project == 'g' else HabraTopic
urls = list()
favs_id = hu.favorites()
- for topic_name in favs_id:
- try:
- urls.append(T(favs_id[topic_name]).getTopicUrl())
- except PostDeleted:
- print('Post {} is deleted!'.format(favs_id[topic_name]))
- urls.sort()
- with open(filename, 'wt') as f:
- f.write('\n'.join(urls))
+ if favs_id:
+ for topic_name in favs_id:
+ try:
+ urls.append(T(favs_id[topic_name]).getTopicUrl())
+ except PostDeleted:
+ print('Post {} is deleted!'.format(favs_id[topic_name]))
+ urls.sort()
+ with open(filename, 'wt') as f:
+ f.write('\n'.join(urls))
+ else:
+ print("Something went wrong. Maybe user is banned or deleted.")
import docopt