Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/icoz/habraparse.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoricoz <icoz.vt@gmail.com>2017-08-06 10:05:09 +0300
committericoz <icoz.vt@gmail.com>2017-08-06 10:05:09 +0300
commit55fddda03182fb3d2c2163d98926a1ef8d676d69 (patch)
treef3e991df89ea289321fbc97384758250022c041c
parent39fbf5a9fd5f89961f2385519147b38c12849768 (diff)
user.py:
- поправлен тест, ибо профиль пользователя удален
-rw-r--r--habr/topic.py5
-rwxr-xr-xhabraparse.py7
-rw-r--r--requirements.txt15
3 files changed, 10 insertions, 17 deletions
diff --git a/habr/topic.py b/habr/topic.py
index a098f2c..0ca4e36 100644
--- a/habr/topic.py
+++ b/habr/topic.py
@@ -81,7 +81,8 @@ class TMTopic(object):
tmp = doc.xpath(
"//ul[@class='postinfo-panel postinfo-panel_post']//span[@class='oting-wjt__counter-score js-score']")
self.post['rating'] = tmp[0].text if len(tmp) else ''
- tmp = doc.xpath("//div[@class='content html_format']") or \
+ tmp = doc.xpath("//div[@class='content html_format js-mediator-article']") or \
+ doc.xpath("//div[@class='content html_format']") or \
doc.xpath('//div[@class="article__body"]')
self.post['text'] = etree.tostring(tmp[0], pretty_print=True, method='html').decode('utf-8') \
if len(tmp) else ''
@@ -89,7 +90,7 @@ class TMTopic(object):
# bug in class 'comments_list ' - space added
# comments = doc.xpath("//div[@class='comments_list ']//div[@class='comment_item']")
# comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']")
- # record = (author, text)
+ # record = (author, text)text
authors = list(
map(lambda x: x.text, doc.xpath("//ul[@id='comments-list']//a[@class='comment-item__username']"))
)
diff --git a/habraparse.py b/habraparse.py
index c9f4f7e..6831c1b 100755
--- a/habraparse.py
+++ b/habraparse.py
@@ -48,6 +48,13 @@ def prepare_html(topic, with_comments=False):
# <link href="https://habracdn.net/habr/styles/1464788371/_build/global_main.css" rel="stylesheet" media="all" />
# <link href="https://habracdn.net/habr/styles/1464788371/_build/company_post_show_common.css" rel="stylesheet" media="all" />
# <link href="https://habracdn.net/habr/styles/1464788371/_build/post_common_css.css" rel="stylesheet" media="all" />
+ # 09.07.2017
+ # <link href="https://habracdn.net/habr/styles/1499416660/_build/post_common_css.css" rel="stylesheet" media="all" />
+ # <link href="https://habracdn.net/habr/styles/1499416660/_build/global_main.css" rel="stylesheet" media="all" />
+ # <link href="https://habracdn.net/habr/styles/1499416660/_build/post_common_css.css" rel="stylesheet" media="print" />
+ # <link href="https://habracdn.net/habr/styles/1499416660/_build/global_main.css" rel="stylesheet" media="print" />
+
+
html_head = '''
<html>
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 7d3c6a3..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-six>=1.10.0
-cairocffi>=0.7.2
-CairoSVG>=2.0.0
-cffi>=1.9.1
-cssselect>=1.0.0
-docopt>=0.6.2
-html5lib>=0.999999999
-lxml>=3.7.0
-Pillow>=3.4.2
-pycparser>=2.17
-Pyphen>=0.9.4
-requests>=2.12.3
-tinycss>=0.4
-WeasyPrint>=0.33
-webencodings>=0.5