Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/icoz/habraparse.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/habr
diff options
context:
space:
mode:
authoricoz <icoz.vt@gmail.com>2016-06-01 23:56:55 +0300
committericoz <icoz.vt@gmail.com>2016-06-01 23:56:55 +0300
commita12ea54bc620845f4b97c8bc4d99c438211d1621 (patch)
tree1686245a1307ac80e76b1f2fb85e216e0903c0af /habr
parentd96a9a7c3e5828adaded35f131241a5c3a1cf853 (diff)
Поправлен разбор комментариев (#8). Добавлена выборка времени комментария.
Изменено внутреннее представление коментариев. Теперь это dict(). При генерации разобранной статьи добавлена новая функция generate_comments(), которая рекурсивно строит дерево коментариев.
Diffstat (limited to 'habr')
-rw-r--r--habr/topic.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/habr/topic.py b/habr/topic.py
index dff7fb7..6446a99 100644
--- a/habr/topic.py
+++ b/habr/topic.py
@@ -64,14 +64,27 @@ class TMTopic(object):
self.post['comments'] = []
# bug in class 'comments_list ' - space added
# comments = doc.xpath("//div[@class='comments_list ']//div[@class='comment_item']")
- comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']")
- self.post['comments_count'] = len(comments)
+ # comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']")
# record = (author, text)
authors = list(map(lambda x: x.text, doc.xpath("//ul[@id='comments-list']//a[@class='comment-item__username']")))
cmt_texts = list(map(lambda x: x.text.strip(), doc.xpath("//ul[@id='comments-list']//div[starts-with(@class,'message html_format ')]")))
c_id = list(map(lambda x: int(x.attrib['id'][8:]), doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']")))
p_id = list(map(lambda x: int(x.attrib['data-parent_id']), doc.xpath("//ul[@id='comments-list']//span[@class='parent_id']")))
- self.post['comments'] = tuple(zip(authors, cmt_texts, c_id, p_id))
+ time = list(map(lambda x: x.text.strip(), doc.xpath("//ul[@id='comments-list']//time")))
+ tpl = tuple(zip(authors, cmt_texts, c_id, p_id, time))
+ self.post['comments'] = tuple(
+ map(
+ lambda x:
+ {
+ 'author':x[0],
+ 'text': x[1],
+ 'c_id': x[2],
+ 'p_id': x[3],
+ 'time': x[4],
+ },
+ tpl)
+ )
+ self.post['comments_count'] = len(self.post['comments'])
# self.post['comments'] = list()
# for c in comments: