diff options
author | Dmytro Slupytskyi <dslupytskyi@gmail.com> | 2017-10-22 15:36:03 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-22 15:36:03 +0300 |
commit | 85e8b4dfbcc542467a28f4ad661597b4e876e26e (patch) | |
tree | 3090f92d31fbcc8fa7d4f9d99cad858842557ce2 | |
parent | 909b9c9ca5102f13d40b94c2b89b59fab2df6aa9 (diff) | |
parent | 4d54629d445a03d37a2ba2e3712c1e1b99ca720f (diff) |
Merge pull request #17 from azmelanar/search-readme
Documented search and added script
-rw-r--r-- | README.md | 16 | ||||
-rwxr-xr-x | create_json.py | 66 | ||||
-rw-r--r-- | exampleSite/config.toml | 2 | ||||
-rw-r--r-- | exampleSite/content/search.md | 26 | ||||
l--------- | exampleSite/create_json.py | 1 | ||||
-rw-r--r-- | requirements.txt | 2 |
6 files changed, 112 insertions, 1 deletions
@@ -11,6 +11,7 @@ Based on [Pixyll for Jekyll](https://github.com/johnotander/pixyll) - Social links (currently only for twitter). - [Formspree](http://formspree.io/) for contanct form. - Pagination support. +- Search based on client side javascript. Example config: @@ -37,4 +38,19 @@ theme = "pixyll" paginate = true ``` +## Search +To activate search ensure that `[params]` section has line `search_engine = true`. +Then you need to generate search index: +1. Copy script `create_json.py` to your website root location, +where `config.toml` is located. +1. Install python requirements: `pip install -r requirements.txt` +1. Run `python2 create_json.py` + +Script was tested with python 2.7 only. + +Index will be placed into `public/tipuesearch_content.json`. Every time you add +new article you need to regenerate index with command above. +If you run hugo as a server, please append option `--renderToDisk` othervise hugo +will not pick up files generated by search script. + ![Pixyll Screenshot](https://raw.githubusercontent.com/azmelanar/hugo-theme-pixyll/master/images/tn.png) diff --git a/create_json.py b/create_json.py new file mode 100755 index 0000000..2f70d69 --- /dev/null +++ b/create_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +import json +from bs4 import BeautifulSoup + + +# Takes Hugo public directory and returns all html files +def walker(path): + pages = [] + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith('.html'): + pages.append('/'.join((root, file))) + return pages + + +# Takes html page and outputs json object +def parser(page): + soup = BeautifulSoup(open(page, 'r')) + node = {} + try: + node['title'] = soup.title.get_text(' ', strip=True).replace(' ', ' ').replace('^', '^') + node['loc'] = soup.link['href'] + node['text'] = soup.article.get_text(' ', strip=True).replace('^', '^') + tags = ['nonetags'] + #for a in soup.find("p", id='tags').find_all("a"): + # tags.append(a['href'].split('/')[-1]) + node['tags'] = ' '.join(tags) + return node + except: + return None + + +# Json accumulator +def jsoner(nodes): + jdata = {'pages': nodes} + with open('public/tipuesearch_content.json', 'w') as f: + json.dump(jdata, f) + + +# Sitemap generation +def sitemaper(nodes): + xml = '''<?xml version="1.0" encoding="utf-8"?> +<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" +xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n''' + url = '<url><loc>{0}</loc><changefreq>daily</changefreq><priority>0.5</priority></url>\n' + for n in nodes: + xml = xml + url.format(n['loc']) + xml = xml + '\n</urlset>' + with open('public/search/sitemap.xml', 'w') as f: + f.write(xml) + +if os.path.exists('./public'): + pages = walker('.') + nodes = [] + for p in pages: + node = parser(p) + if node: + nodes.append(node) + jsoner(nodes) + sitemaper(nodes) +else: + print 'Error: place this script in hugo site root' diff --git a/exampleSite/config.toml b/exampleSite/config.toml index 7ff017c..f6a6b2c 100644 --- a/exampleSite/config.toml +++ b/exampleSite/config.toml @@ -30,7 +30,7 @@ hrefTargetBlank = true tag = "tags" [params] - #search_engine = true + search_engine = true #google_analytics_id = "" twitter_username = "username" #disqus_shortname = "" diff --git a/exampleSite/content/search.md b/exampleSite/content/search.md new file mode 100644 index 0000000..7b93931 --- /dev/null +++ b/exampleSite/content/search.md @@ -0,0 +1,26 @@ ++++ +date = "2014-11-09T13:49:44+04:00" +draft = false +title = "search" + ++++ + +<div> +<link rel="stylesheet" type="text/css" href="../tipuesearch/tipuesearch.css"> +<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js"></script> +<script type="text/javascript" src="../tipuesearch/tipuesearch_set.js"></script> +<script type="text/javascript" src="../tipuesearch/tipuesearch.min.js"></script> +<script> +$(document).ready(function() { + $('#tipue_search_input').tipuesearch({ + 'mode' : 'json', + 'show': 10, + 'newWindow': true, + 'contentLocation': '../tipuesearch_content.json' + }); +}); +</script> +<div class="span8 offset2"> + <div id="tipue_search_content"><div id="tipue_search_loading"></div> +</div> +</div> diff --git a/exampleSite/create_json.py b/exampleSite/create_json.py new file mode 120000 index 0000000..4902329 --- /dev/null +++ b/exampleSite/create_json.py @@ -0,0 +1 @@ +../create_json.py
\ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a056a46 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.3.2 +html5lib==1.0b8 |