diff options
author | icoz <icoz.vt@gmail.com> | 2016-06-01 23:56:55 +0300 |
---|---|---|
committer | icoz <icoz.vt@gmail.com> | 2016-06-01 23:56:55 +0300 |
commit | a12ea54bc620845f4b97c8bc4d99c438211d1621 (patch) | |
tree | 1686245a1307ac80e76b1f2fb85e216e0903c0af | |
parent | d96a9a7c3e5828adaded35f131241a5c3a1cf853 (diff) |
Поправлен разбор комментариев (#8). Добавлена выборка времени комментария.
Изменено внутреннее представление коментариев. Теперь это dict().
При генерации разобранной статьи добавлена новая функция generate_comments(), которая рекурсивно строит дерево коментариев.
-rw-r--r-- | habr/topic.py | 19 | ||||
-rwxr-xr-x | habraparse.py | 70 |
2 files changed, 59 insertions, 30 deletions
diff --git a/habr/topic.py b/habr/topic.py index dff7fb7..6446a99 100644 --- a/habr/topic.py +++ b/habr/topic.py @@ -64,14 +64,27 @@ class TMTopic(object): self.post['comments'] = [] # bug in class 'comments_list ' - space added # comments = doc.xpath("//div[@class='comments_list ']//div[@class='comment_item']") - comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']") - self.post['comments_count'] = len(comments) + # comments = doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']") # record = (author, text) authors = list(map(lambda x: x.text, doc.xpath("//ul[@id='comments-list']//a[@class='comment-item__username']"))) cmt_texts = list(map(lambda x: x.text.strip(), doc.xpath("//ul[@id='comments-list']//div[starts-with(@class,'message html_format ')]"))) c_id = list(map(lambda x: int(x.attrib['id'][8:]), doc.xpath("//ul[@id='comments-list']//li[@class='comment_item']"))) p_id = list(map(lambda x: int(x.attrib['data-parent_id']), doc.xpath("//ul[@id='comments-list']//span[@class='parent_id']"))) - self.post['comments'] = tuple(zip(authors, cmt_texts, c_id, p_id)) + time = list(map(lambda x: x.text.strip(), doc.xpath("//ul[@id='comments-list']//time"))) + tpl = tuple(zip(authors, cmt_texts, c_id, p_id, time)) + self.post['comments'] = tuple( + map( + lambda x: + { + 'author':x[0], + 'text': x[1], + 'c_id': x[2], + 'p_id': x[3], + 'time': x[4], + }, + tpl) + ) + self.post['comments_count'] = len(self.post['comments']) # self.post['comments'] = list() # for c in comments: diff --git a/habraparse.py b/habraparse.py index ede96c6..8dcdb8d 100755 --- a/habraparse.py +++ b/habraparse.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import os +from pprint import pprint import sys from weasyprint import HTML, CSS @@ -10,11 +11,44 @@ from habr.user import HabraUser, GeektimesUser __author__ = 'icoz' +def generate_comments(cmnts, id=0): + html_subcmnt = ''' + <ul class="reply_comments" id="reply_comments_{c_id}"> + {list_cmnts} + </ul> + ''' + html_cmnt = ''' + <li class="comment_item" id="comment_{c_id}"> + <span class="parent_id" data-parent_id="{p_id}"></span> + <div class="comment_body "> + <div class="info comments-list__item comment-item " rel="{c_id}"> + <span class="comment-item__user-info" data-user-login="{user}"> + <a href="https://habrahabr.ru/users/{user}/" class="comment-item__username">{user}</a> + <time class="comment-item__time_published">{time}</time> + </span> + <div class="message html_format "> + {cmnt_text} + </div> + </div> + </div> + ''' + cmnts2 = tuple(filter(lambda x: x['p_id'] == id, cmnts)) + if len(cmnts2) == 0: return '' + out = '' + for c in cmnts2: + out += html_cmnt.format(c_id=c['c_id'], p_id=id, user=c['author'], time=c['time'], cmnt_text=c['text']) + out += html_subcmnt.format(c_id = c['c_id'], list_cmnts=generate_comments(cmnts, c['c_id'])) + return out def prepare_html(topic, with_comments=False): t = topic # <link href="http://habrahabr.ru/styles/1412005750/printer.css" rel="stylesheet" media="print" /> # <link href="http://habrahabr.ru/styles/1412005750/assets/global_main.css" rel="stylesheet" media="all" /> + # worked. 01/06/2016 <link href="http://habrahabr.ru/styles/1412005750/assets/post_common_css.css" rel="stylesheet" media="all" /> + # <link href="https://habracdn.net/habr/styles/1464788371/_build/global_main.css" rel="stylesheet" media="all" /> + # <link href="https://habracdn.net/habr/styles/1464788371/_build/company_post_show_common.css" rel="stylesheet" media="all" /> + # <link href="https://habracdn.net/habr/styles/1464788371/_build/post_common_css.css" rel="stylesheet" media="all" /> + html_head = ''' <html> <head> @@ -38,23 +72,13 @@ def prepare_html(topic, with_comments=False): </div> ''' html_cmnts = ''' - <h2>Комментарии</h2> - <ul id="comments-list"> - {comments} - </ul> - ''' - html_cmnt = ''' - <li class="comment_item" id="comment_{c_id}"> - <span class="parent_id" data-parent_id="{p_id}"></span> - <div class="comment_body "> - <div class="info comments-list__item comment-item " rel="{c_id}"> - <span class="comment-item__user-info" rel="user-popover" data-user-login="{user}"> - <a href="https://habrahabr.ru/users/{user}/" class="comment-item__username">{user}</a> - </span> - <div class="message html_format "> - {cmnt} - </div> - </div> + <div class="comments_list " id="comments"> + <h2 class="title "> + Комментарии (<span id="comments_count">{cmnts_count}</span>) + </h2> + <ul id="comments-list"> + {comments} + </ul> </div> ''' html_foot = ''' @@ -65,17 +89,9 @@ def prepare_html(topic, with_comments=False): </html> ''' if with_comments: - cmnts = '' html_format = html_head + html_cmnts + html_foot - # print("t.comments()=", len(t.comments())) - l = 0 - for c in t.comments(): - user, cmnt, c_id, p_id = c - cmnts += html_cmnt.format(user=user, cmnt=cmnt, c_id=c_id, p_id=p_id) - # print('cmnts.len=', len(cmnts)) - # print('l=', l) - html = html_format.format(title=t.title(), author=t.author(), text=t.text(), comments=cmnts) - # print('html.len=', len(html)) + html = html_format.format(title=t.title(), author=t.author(), text=t.text(), + comments=generate_comments(t.comments(), 0), cmnts_count=t.comments_count()) else: html_format = html_head + html_foot html = html_format.format(title=t.title(), author=t.author(), text=t.text()) |