diff options
author | icoz <icoz.vt@gmail.com> | 2015-02-01 13:03:47 +0300 |
---|---|---|
committer | icoz <icoz.vt@gmail.com> | 2015-02-01 13:03:47 +0300 |
commit | 03547173c23632acbd5950760dd068c1a4b30ae3 (patch) | |
tree | 63562bd99bb491790ec5818b6452127a24c86099 /habraparse.py | |
parent | 8df46e08db722a037592946438eaeaeb947ce50a (diff) |
Bugfix in prepare_html - thanks to habrauser 4mz!
Bugfix in save_pdf and save_html - wrong work with dirs
Default behavior now - saving in PDF, so options in cmdline changed
Added saving topics by name
Diffstat (limited to 'habraparse.py')
-rwxr-xr-x | habraparse.py | 62 |
1 files changed, 38 insertions, 24 deletions
diff --git a/habraparse.py b/habraparse.py index 4fab33a..fae1c14 100755 --- a/habraparse.py +++ b/habraparse.py @@ -15,12 +15,12 @@ __author__ = 'icoz' def prepare_html(topic_id, with_comments=False): t = HabraTopic(str(topic_id)) # <link href="http://habrahabr.ru/styles/1412005750/printer.css" rel="stylesheet" media="print" /> - html_format = ''' + # <link href="http://habrahabr.ru/styles/1412005750/assets/global_main.css" rel="stylesheet" media="all" /> + html_head = ''' <html> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8"> <meta charset="UTF-8"> - <link href="http://habrahabr.ru/styles/1412005750/assets/global_main.css" rel="stylesheet" media="all" /> <link href="http://habrahabr.ru/styles/1412005750/assets/post_common_css.css" rel="stylesheet" media="all" /> <title>{title}</title> </head> @@ -37,22 +37,29 @@ def prepare_html(topic_id, with_comments=False): {text} </div> </div> - + ''' + html_cmnts = ''' <div id="comments" class="comments_list"> <h2 class="title">Комментарии</h2> {comments} </div> + ''' + html_foot = ''' </div> </div> </div> </body> </html> ''' + html_format = html_head + html_foot cmnts = '' if with_comments: + html_format = html_head + html_cmnts + html_foot for c in t.comments(): cmnts += '{}\n'.format(c) - html = html_format.format(title=t.title(), author=t.author(), text=t.text(), comments=cmnts) + html = html_format.format(title=t.title(), author=t.author(), text=t.text(), comments=cmnts) + else: + html = html_format.format(title=t.title(), author=t.author(), text=t.text()) html = str(html).replace('//habrastorage.org', 'http://habrastorage.org') html = str(html).replace('http:http:', 'http:') return html @@ -60,8 +67,8 @@ def prepare_html(topic_id, with_comments=False): def save_html(topic_id, filename, with_comments=False): dir = os.path.dirname(filename) - dir_imgs = filename+'.files' - if not os.path.exists(dir): + dir_imgs = filename + '.files' + if dir != '' and not os.path.exists(dir): os.mkdir(dir) if not os.path.exists(dir_imgs): os.mkdir(dir_imgs) @@ -81,17 +88,17 @@ def save_pdf(topic_id, filename, with_comments=False): logger.handlers = [] # Remove the default stderr handler logger.addHandler(logging.FileHandler('pdf_weasyprint.log')) dir = os.path.dirname(filename) - if not os.path.exists(dir): + if dir != '' and not os.path.exists(dir): os.mkdir(dir) html = prepare_html(topic_id, with_comments=with_comments) css = CSS(string='@page { size: A4; margin: 1cm !important}') HTML(string=html).write_pdf(filename, stylesheets=[css]) -def save_all_favs_for_user(username, out_dir, save_in_pdf=False, with_comments=False, limit=None): - filetype = 'html' - if save_in_pdf: - filetype = 'pdf' +def save_all_favs_for_user(username, out_dir, save_in_html=True, with_comments=False, save_by_name=False, limit=None): + filetype = 'pdf' + if save_in_html: + filetype = 'html' # raise NotImplemented hu = HabraUser(username, need_favorites=True) favs_id = hu.favorites() @@ -108,13 +115,17 @@ def save_all_favs_for_user(username, out_dir, save_in_pdf=False, with_comments=F topic_id = favs_id[topic_name] print('Downloading "{}"...'.format(topic_name)) # topic = HabraTopic(topic_id) - filename = '{dir}/{id}.{filetype}'.format(dir=out_dir, id=topic_id, filetype=filetype) + if save_by_name: + t_name = topic_name.replace('/', '_').replace('\\', '_').replace('!', '.') + filename = '{dir}/{name}.{filetype}'.format(dir=out_dir, name=t_name, filetype=filetype) + else: + filename = '{dir}/{id}.{filetype}'.format(dir=out_dir, id=topic_id, filetype=filetype) print('Saving it in "{}"'.format(filename)) try: - if save_in_pdf: - save_pdf(topic_id, filename, with_comments=with_comments) - else: + if save_in_html: html = save_html(topic_id, filename, with_comments=with_comments) + else: + save_pdf(topic_id, filename, with_comments=with_comments) except PostDeleted: print('Post {} is deleted!'.format(topic_id)) deleted.append(topic_id) @@ -148,9 +159,9 @@ import docopt def main(): params = """Usage: {prog} save_favs_list <username> <out_file> - {prog} save_favs [-pc --limit=N] <username> <out_dir> - {prog} save_post [-pc] <topic_id> <out_file> - {prog} save_posts [-pc --limit=N] <username> <out_dir> + {prog} save_favs [-c --limit=N] <username> <out_dir> + {prog} save_post [-c] <topic_id> <out_file> + {prog} save_posts [-c --limit=N] <username> <out_dir> Arguments: username Имя пользовтеля Habrahabr.ru @@ -158,7 +169,8 @@ def main(): out_dir Путь для сохранения избранного Options: - -p, --save-pdf Сохранить в PDF (по умолчанию, в HTML) + --save-html Сохранить в HTML (по умолчанию, в PDF) + --save-by-name Сохранять с именем, полученным из названия статьи (по умолчанию - по ID статьи) -c, --with-comments Сохранить вместе с коментариями --limit=N Ограничить количество в N статей """.format(prog=sys.argv[0]) @@ -169,17 +181,19 @@ def main(): create_url_list(args['<username>'], args['<out_file>']) return if args['save_favs']: - save_all_favs_for_user(args['<username>'], args['<out_dir>'], save_in_pdf=args['--save-pdf'], - with_comments=args['--with-comments'], limit=args['--limit']) + save_all_favs_for_user(args['<username>'], args['<out_dir>'], save_in_html=args['--save-html'], + with_comments=args['--with-comments'], save_by_name=args['--save-by-name'], + limit=args['--limit']) return if args['save_post']: t_id = args['<topic_id>'] fname = args['<out_file>'] - if args['--save-pdf']: - save_pdf(t_id, filename=fname, with_comments=args['--with-comments']) - else: + if args['--save-html']: save_html(t_id, filename=fname, with_comments=args['--with-comments']) + else: + save_pdf(t_id, filename=fname, with_comments=args['--with-comments']) if args['save_posts']: + print('Not implemented yet') return except docopt.DocoptExit as e: |