Merge pull request #17 from azmelanar/search-readme

Documented search and added script
author: Dmytro Slupytskyi <dslupytskyi@gmail.com> 2017-10-22 15:36:03 +0300
committer: GitHub <noreply@github.com> 2017-10-22 15:36:03 +0300
commit: 85e8b4dfbcc542467a28f4ad661597b4e876e26e (patch)
tree: 3090f92d31fbcc8fa7d4f9d99cad858842557ce2
parent: 909b9c9ca5102f13d40b94c2b89b59fab2df6aa9 (diff)
parent: 4d54629d445a03d37a2ba2e3712c1e1b99ca720f (diff)
6 files changed, 112 insertions, 1 deletions
diff --git a/README.md b/README.md
index de58f99..b6256ad 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ Based on [Pixyll for Jekyll](https://github.com/johnotander/pixyll)
 - Social links (currently only for twitter).
 - [Formspree](http://formspree.io/) for contanct form.
 - Pagination support.
+- Search based on client side javascript.
 
 Example config:
 
@@ -37,4 +38,19 @@ theme = "pixyll"
   paginate = true
 ```
 
+## Search
+To activate search ensure that `[params]` section has line `search_engine = true`.
+Then you need to generate search index:
+1. Copy script `create_json.py` to your website root location,
+where `config.toml` is located.
+1. Install python requirements: `pip install -r requirements.txt`
+1. Run `python2 create_json.py`
+
+Script was tested with python 2.7 only.
+
+Index will be placed into `public/tipuesearch_content.json`. Every time you add
+new article you need to regenerate index with command above.
+If you run hugo as a server, please append option `--renderToDisk` othervise hugo
+will not pick up files generated by search script.
+
 ![Pixyll Screenshot](https://raw.githubusercontent.com/azmelanar/hugo-theme-pixyll/master/images/tn.png)
diff --git a/create_json.py b/create_json.py
new file mode 100755
index 0000000..2f70d69
--- /dev/null
+++ b/create_json.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import json
+from bs4 import BeautifulSoup
+
+
+# Takes Hugo public directory and returns all html files
+def walker(path):
+    pages = []
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            if file.endswith('.html'):
+                pages.append('/'.join((root, file)))
+    return pages
+
+
+# Takes html page and outputs json object
+def parser(page):
+    soup = BeautifulSoup(open(page, 'r'))
+    node = {}
+    try:
+        node['title'] = soup.title.get_text(' ', strip=True).replace('&nbsp;', ' ').replace('^', '&#94;')
+        node['loc'] = soup.link['href']
+        node['text'] = soup.article.get_text(' ', strip=True).replace('^', '&#94;')
+        tags = ['nonetags']
+        #for a in soup.find("p", id='tags').find_all("a"):
+        #    tags.append(a['href'].split('/')[-1])
+        node['tags'] = ' '.join(tags)
+        return node
+    except:
+        return None
+
+
+# Json accumulator
+def jsoner(nodes):
+    jdata = {'pages': nodes}
+    with open('public/tipuesearch_content.json', 'w') as f:
+        json.dump(jdata, f)
+
+
+# Sitemap generation
+def sitemaper(nodes):
+    xml = '''<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'''
+    url = '<url><loc>{0}</loc><changefreq>daily</changefreq><priority>0.5</priority></url>\n'
+    for n in nodes:
+        xml = xml + url.format(n['loc'])
+    xml = xml + '\n</urlset>'
+    with open('public/search/sitemap.xml', 'w') as f:
+        f.write(xml)
+
+if os.path.exists('./public'):
+    pages = walker('.')
+    nodes = []
+    for p in pages:
+        node = parser(p)
+        if node:
+            nodes.append(node)
+    jsoner(nodes)
+    sitemaper(nodes)
+else:
+    print 'Error: place this script in hugo site root'
diff --git a/exampleSite/config.toml b/exampleSite/config.toml
index 7ff017c..f6a6b2c 100644
--- a/exampleSite/config.toml
+++ b/exampleSite/config.toml
@@ -30,7 +30,7 @@ hrefTargetBlank = true
   tag = "tags"
 
 [params]
-  #search_engine = true
+  search_engine = true
   #google_analytics_id = ""
   twitter_username = "username"
   #disqus_shortname = ""
diff --git a/exampleSite/content/search.md b/exampleSite/content/search.md
new file mode 100644
index 0000000..7b93931
--- /dev/null
+++ b/exampleSite/content/search.md
@@ -0,0 +1,26 @@
++++
+date = "2014-11-09T13:49:44+04:00"
+draft = false
+title = "search"
+
++++
+
+<div>
+<link rel="stylesheet" type="text/css" href="../tipuesearch/tipuesearch.css">
+<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js"></script>
+<script type="text/javascript" src="../tipuesearch/tipuesearch_set.js"></script>
+<script type="text/javascript" src="../tipuesearch/tipuesearch.min.js"></script>
+<script>
+$(document).ready(function() {
+     $('#tipue_search_input').tipuesearch({
+         'mode' : 'json',
+         'show': 10,
+         'newWindow': true,
+         'contentLocation': '../tipuesearch_content.json'
+     });
+});
+</script>
+<div class="span8 offset2">
+    <div id="tipue_search_content"><div id="tipue_search_loading"></div>
+</div>
+</div>
diff --git a/exampleSite/create_json.py b/exampleSite/create_json.py
new file mode 120000
index 0000000..4902329
--- /dev/null
+++ b/exampleSite/create_json.py
@@ -0,0 +1 @@
+../create_json.py
+\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a056a46
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4==4.3.2
+html5lib==1.0b8
author	Dmytro Slupytskyi <dslupytskyi@gmail.com>	2017-10-22 15:36:03 +0300
committer	GitHub <noreply@github.com>	2017-10-22 15:36:03 +0300
commit	85e8b4dfbcc542467a28f4ad661597b4e876e26e (patch)
tree	3090f92d31fbcc8fa7d4f9d99cad858842557ce2
parent	909b9c9ca5102f13d40b94c2b89b59fab2df6aa9 (diff)
parent	4d54629d445a03d37a2ba2e3712c1e1b99ca720f (diff)