diff options
author | icoz <icoz.vt@gmail.com> | 2017-08-06 10:05:09 +0300 |
---|---|---|
committer | icoz <icoz.vt@gmail.com> | 2017-08-06 10:05:09 +0300 |
commit | 55fddda03182fb3d2c2163d98926a1ef8d676d69 (patch) | |
tree | f3e991df89ea289321fbc97384758250022c041c | |
parent | 39fbf5a9fd5f89961f2385519147b38c12849768 (diff) |
user.py:
- поправлен тест, ибо профиль пользователя удален
-rw-r--r-- | habr/topic.py | 5 | ||||
-rwxr-xr-x | habraparse.py | 7 | ||||
-rw-r--r-- | requirements.txt | 15 |
3 files changed, 10 insertions, 17 deletions
diff --git a/habr/topic.py b/habr/topic.py index a098f2c..0ca4e36 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -81,7 +81,8 @@ class TMTopic(object): tmp = doc.xpath( "//ul[@class='postinfo-panel postinfo-panel_post']//span[@class='oting-wjt__counter-score js-score']") self.post['rating'] = tmp[0].text if len(tmp) else '' - tmp = doc.xpath("//div[@class='content html_format']") or \ + tmp = doc.xpath("//div[@class='content html_format js-mediator-article']") or \ + doc.xpath("//div[@class='content html_format']") or \ doc.xpath('//div[@class="article__body"]') self.post['text'] = etree.tostring(tmp[0], pretty_print=True, method='html').decode('utf-8') \ if len(tmp) else '' @@ -89,7 +90,7 @@ class TMTopic(object): # bug in class 'comments_list ' - space added # comments = doc.xpath("//div[@class='comments_list ']//div[@class='comment_item']") # comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']") - # record = (author, text) + # record = (author, text)text authors = list( map(lambda x: x.text, doc.xpath("//ul[@id='comments-list']//a[@class='comment-item__username']")) ) diff --git a/habraparse.py b/habraparse.py index c9f4f7e..6831c1b 100755 --- a/habraparse.py +++ b/habraparse.py @@ -48,6 +48,13 @@ def prepare_html(topic, with_comments=False): # <link href="https://habracdn.net/habr/styles/1464788371/_build/global_main.css" rel="stylesheet" media="all" /> # <link href="https://habracdn.net/habr/styles/1464788371/_build/company_post_show_common.css" rel="stylesheet" media="all" /> # <link href="https://habracdn.net/habr/styles/1464788371/_build/post_common_css.css" rel="stylesheet" media="all" /> + # 09.07.2017 + # <link href="https://habracdn.net/habr/styles/1499416660/_build/post_common_css.css" rel="stylesheet" media="all" /> + # <link href="https://habracdn.net/habr/styles/1499416660/_build/global_main.css" rel="stylesheet" media="all" /> + # <link href="https://habracdn.net/habr/styles/1499416660/_build/post_common_css.css" rel="stylesheet" media="print" /> + # <link href="https://habracdn.net/habr/styles/1499416660/_build/global_main.css" rel="stylesheet" media="print" /> + + html_head = ''' <html> diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 7d3c6a3..0000000 --- a/requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -six>=1.10.0 -cairocffi>=0.7.2 -CairoSVG>=2.0.0 -cffi>=1.9.1 -cssselect>=1.0.0 -docopt>=0.6.2 -html5lib>=0.999999999 -lxml>=3.7.0 -Pillow>=3.4.2 -pycparser>=2.17 -Pyphen>=0.9.4 -requests>=2.12.3 -tinycss>=0.4 -WeasyPrint>=0.33 -webencodings>=0.5 |