From f5f0da95e478705bea24f8bbbf61d41de94a9ed7 Mon Sep 17 00:00:00 2001 From: kshcherban Date: Sun, 10 Sep 2017 01:52:59 +0200 Subject: Documented search and added script --- README.md | 15 ++++++++++ create_json.py | 66 +++++++++++++++++++++++++++++++++++++++++++ exampleSite/config.toml | 2 +- exampleSite/content/search.md | 26 +++++++++++++++++ exampleSite/create_json.py | 1 + 5 files changed, 109 insertions(+), 1 deletion(-) create mode 100755 create_json.py create mode 100644 exampleSite/content/search.md create mode 120000 exampleSite/create_json.py diff --git a/README.md b/README.md index de58f99..52913b1 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Based on [Pixyll for Jekyll](https://github.com/johnotander/pixyll) - Social links (currently only for twitter). - [Formspree](http://formspree.io/) for contanct form. - Pagination support. +- Search based on client side javascript. Example config: @@ -37,4 +38,18 @@ theme = "pixyll" paginate = true ``` +## Search +To activate search ensure that `[params]` section has line `search_engine = true`. +Then you need to generate search index: +1. Copy script `create_json.py` to your website root location, +where `config.toml` is located. +1. Install beautifulsoup4 4.3.2: `pip install beautifulsoup4==4.3.2` +1. Install html5lib 1.0b8: `pip install html5lib==1.0b8` +1. Run `python22 create_json.py` + +Index will be placed into `public/tipuesearch_content.json`. Every time you add +new article you need to regenerate index. +If you run hugo as a server, please append option `--renderToDisk` othervise hugo +will not pick up files generated by search script. + ![Pixyll Screenshot](https://raw.githubusercontent.com/azmelanar/hugo-theme-pixyll/master/images/tn.png) diff --git a/create_json.py b/create_json.py new file mode 100755 index 0000000..2f70d69 --- /dev/null +++ b/create_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import json +from bs4 import BeautifulSoup + + +# Takes Hugo public directory and returns all html files +def walker(path): + pages = [] + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith('.html'): + pages.append('/'.join((root, file))) + return pages + + +# Takes html page and outputs json object +def parser(page): + soup = BeautifulSoup(open(page, 'r')) + node = {} + try: + node['title'] = soup.title.get_text(' ', strip=True).replace(' ', ' ').replace('^', '^') + node['loc'] = soup.link['href'] + node['text'] = soup.article.get_text(' ', strip=True).replace('^', '^') + tags = ['nonetags'] + #for a in soup.find("p", id='tags').find_all("a"): + # tags.append(a['href'].split('/')[-1]) + node['tags'] = ' '.join(tags) + return node + except: + return None + + +# Json accumulator +def jsoner(nodes): + jdata = {'pages': nodes} + with open('public/tipuesearch_content.json', 'w') as f: + json.dump(jdata, f) + + +# Sitemap generation +def sitemaper(nodes): + xml = ''' +\n''' + url = '{0}daily0.5\n' + for n in nodes: + xml = xml + url.format(n['loc']) + xml = xml + '\n' + with open('public/search/sitemap.xml', 'w') as f: + f.write(xml) + +if os.path.exists('./public'): + pages = walker('.') + nodes = [] + for p in pages: + node = parser(p) + if node: + nodes.append(node) + jsoner(nodes) + sitemaper(nodes) +else: + print 'Error: place this script in hugo site root' diff --git a/exampleSite/config.toml b/exampleSite/config.toml index 7ff017c..f6a6b2c 100644 --- a/exampleSite/config.toml +++ b/exampleSite/config.toml @@ -30,7 +30,7 @@ hrefTargetBlank = true tag = "tags" [params] - #search_engine = true + search_engine = true #google_analytics_id = "" twitter_username = "username" #disqus_shortname = "" diff --git a/exampleSite/content/search.md b/exampleSite/content/search.md new file mode 100644 index 0000000..7b93931 --- /dev/null +++ b/exampleSite/content/search.md @@ -0,0 +1,26 @@ ++++ +date = "2014-11-09T13:49:44+04:00" +draft = false +title = "search" + ++++ + +
+ + + + + +
+
+
+
diff --git a/exampleSite/create_json.py b/exampleSite/create_json.py new file mode 120000 index 0000000..4902329 --- /dev/null +++ b/exampleSite/create_json.py @@ -0,0 +1 @@ +../create_json.py \ No newline at end of file -- cgit v1.2.3 From 4d54629d445a03d37a2ba2e3712c1e1b99ca720f Mon Sep 17 00:00:00 2001 From: kshcherban Date: Sun, 22 Oct 2017 13:38:56 +0200 Subject: Fixes for search script documentation --- README.md | 9 +++++---- requirements.txt | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 requirements.txt diff --git a/README.md b/README.md index 52913b1..b6256ad 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,13 @@ To activate search ensure that `[params]` section has line `search_engine = true Then you need to generate search index: 1. Copy script `create_json.py` to your website root location, where `config.toml` is located. -1. Install beautifulsoup4 4.3.2: `pip install beautifulsoup4==4.3.2` -1. Install html5lib 1.0b8: `pip install html5lib==1.0b8` -1. Run `python22 create_json.py` +1. Install python requirements: `pip install -r requirements.txt` +1. Run `python2 create_json.py` + +Script was tested with python 2.7 only. Index will be placed into `public/tipuesearch_content.json`. Every time you add -new article you need to regenerate index. +new article you need to regenerate index with command above. If you run hugo as a server, please append option `--renderToDisk` othervise hugo will not pick up files generated by search script. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a056a46 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.3.2 +html5lib==1.0b8 -- cgit v1.2.3