Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/icoz/habraparse.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoricoz <icoz.vt@gmail.com>2015-02-01 13:03:47 +0300
committericoz <icoz.vt@gmail.com>2015-02-01 13:03:47 +0300
commit03547173c23632acbd5950760dd068c1a4b30ae3 (patch)
tree63562bd99bb491790ec5818b6452127a24c86099 /habraparse.py
parent8df46e08db722a037592946438eaeaeb947ce50a (diff)
Bugfix in prepare_html - thanks to habrauser 4mz!
Bugfix in save_pdf and save_html - wrong work with dirs Default behavior now - saving in PDF, so options in cmdline changed Added saving topics by name
Diffstat (limited to 'habraparse.py')
-rwxr-xr-xhabraparse.py62
1 files changed, 38 insertions, 24 deletions
diff --git a/habraparse.py b/habraparse.py
index 4fab33a..fae1c14 100755
--- a/habraparse.py
+++ b/habraparse.py
@@ -15,12 +15,12 @@ __author__ = 'icoz'
def prepare_html(topic_id, with_comments=False):
t = HabraTopic(str(topic_id))
# <link href="http://habrahabr.ru/styles/1412005750/printer.css" rel="stylesheet" media="print" />
- html_format = '''
+ # <link href="http://habrahabr.ru/styles/1412005750/assets/global_main.css" rel="stylesheet" media="all" />
+ html_head = '''
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta charset="UTF-8">
- <link href="http://habrahabr.ru/styles/1412005750/assets/global_main.css" rel="stylesheet" media="all" />
<link href="http://habrahabr.ru/styles/1412005750/assets/post_common_css.css" rel="stylesheet" media="all" />
<title>{title}</title>
</head>
@@ -37,22 +37,29 @@ def prepare_html(topic_id, with_comments=False):
{text}
</div>
</div>
-
+ '''
+ html_cmnts = '''
<div id="comments" class="comments_list">
<h2 class="title">Комментарии</h2>
{comments}
</div>
+ '''
+ html_foot = '''
</div>
</div>
</div>
</body>
</html>
'''
+ html_format = html_head + html_foot
cmnts = ''
if with_comments:
+ html_format = html_head + html_cmnts + html_foot
for c in t.comments():
cmnts += '{}\n'.format(c)
- html = html_format.format(title=t.title(), author=t.author(), text=t.text(), comments=cmnts)
+ html = html_format.format(title=t.title(), author=t.author(), text=t.text(), comments=cmnts)
+ else:
+ html = html_format.format(title=t.title(), author=t.author(), text=t.text())
html = str(html).replace('//habrastorage.org', 'http://habrastorage.org')
html = str(html).replace('http:http:', 'http:')
return html
@@ -60,8 +67,8 @@ def prepare_html(topic_id, with_comments=False):
def save_html(topic_id, filename, with_comments=False):
dir = os.path.dirname(filename)
- dir_imgs = filename+'.files'
- if not os.path.exists(dir):
+ dir_imgs = filename + '.files'
+ if dir != '' and not os.path.exists(dir):
os.mkdir(dir)
if not os.path.exists(dir_imgs):
os.mkdir(dir_imgs)
@@ -81,17 +88,17 @@ def save_pdf(topic_id, filename, with_comments=False):
logger.handlers = [] # Remove the default stderr handler
logger.addHandler(logging.FileHandler('pdf_weasyprint.log'))
dir = os.path.dirname(filename)
- if not os.path.exists(dir):
+ if dir != '' and not os.path.exists(dir):
os.mkdir(dir)
html = prepare_html(topic_id, with_comments=with_comments)
css = CSS(string='@page { size: A4; margin: 1cm !important}')
HTML(string=html).write_pdf(filename, stylesheets=[css])
-def save_all_favs_for_user(username, out_dir, save_in_pdf=False, with_comments=False, limit=None):
- filetype = 'html'
- if save_in_pdf:
- filetype = 'pdf'
+def save_all_favs_for_user(username, out_dir, save_in_html=True, with_comments=False, save_by_name=False, limit=None):
+ filetype = 'pdf'
+ if save_in_html:
+ filetype = 'html'
# raise NotImplemented
hu = HabraUser(username, need_favorites=True)
favs_id = hu.favorites()
@@ -108,13 +115,17 @@ def save_all_favs_for_user(username, out_dir, save_in_pdf=False, with_comments=F
topic_id = favs_id[topic_name]
print('Downloading "{}"...'.format(topic_name))
# topic = HabraTopic(topic_id)
- filename = '{dir}/{id}.{filetype}'.format(dir=out_dir, id=topic_id, filetype=filetype)
+ if save_by_name:
+ t_name = topic_name.replace('/', '_').replace('\\', '_').replace('!', '.')
+ filename = '{dir}/{name}.{filetype}'.format(dir=out_dir, name=t_name, filetype=filetype)
+ else:
+ filename = '{dir}/{id}.{filetype}'.format(dir=out_dir, id=topic_id, filetype=filetype)
print('Saving it in "{}"'.format(filename))
try:
- if save_in_pdf:
- save_pdf(topic_id, filename, with_comments=with_comments)
- else:
+ if save_in_html:
html = save_html(topic_id, filename, with_comments=with_comments)
+ else:
+ save_pdf(topic_id, filename, with_comments=with_comments)
except PostDeleted:
print('Post {} is deleted!'.format(topic_id))
deleted.append(topic_id)
@@ -148,9 +159,9 @@ import docopt
def main():
params = """Usage:
{prog} save_favs_list <username> <out_file>
- {prog} save_favs [-pc --limit=N] <username> <out_dir>
- {prog} save_post [-pc] <topic_id> <out_file>
- {prog} save_posts [-pc --limit=N] <username> <out_dir>
+ {prog} save_favs [-c --limit=N] <username> <out_dir>
+ {prog} save_post [-c] <topic_id> <out_file>
+ {prog} save_posts [-c --limit=N] <username> <out_dir>
Arguments:
username Имя пользовтеля Habrahabr.ru
@@ -158,7 +169,8 @@ def main():
out_dir Путь для сохранения избранного
Options:
- -p, --save-pdf Сохранить в PDF (по умолчанию, в HTML)
+ --save-html Сохранить в HTML (по умолчанию, в PDF)
+ --save-by-name Сохранять с именем, полученным из названия статьи (по умолчанию - по ID статьи)
-c, --with-comments Сохранить вместе с коментариями
--limit=N Ограничить количество в N статей
""".format(prog=sys.argv[0])
@@ -169,17 +181,19 @@ def main():
create_url_list(args['<username>'], args['<out_file>'])
return
if args['save_favs']:
- save_all_favs_for_user(args['<username>'], args['<out_dir>'], save_in_pdf=args['--save-pdf'],
- with_comments=args['--with-comments'], limit=args['--limit'])
+ save_all_favs_for_user(args['<username>'], args['<out_dir>'], save_in_html=args['--save-html'],
+ with_comments=args['--with-comments'], save_by_name=args['--save-by-name'],
+ limit=args['--limit'])
return
if args['save_post']:
t_id = args['<topic_id>']
fname = args['<out_file>']
- if args['--save-pdf']:
- save_pdf(t_id, filename=fname, with_comments=args['--with-comments'])
- else:
+ if args['--save-html']:
save_html(t_id, filename=fname, with_comments=args['--with-comments'])
+ else:
+ save_pdf(t_id, filename=fname, with_comments=args['--with-comments'])
if args['save_posts']:
+ print('Not implemented yet')
return
except docopt.DocoptExit as e: