From f5f0da95e478705bea24f8bbbf61d41de94a9ed7 Mon Sep 17 00:00:00 2001
From: kshcherban <k.scherban@gmail.com>
Date: Sun, 10 Sep 2017 01:52:59 +0200
Subject: Documented search and added script

---
 README.md                     | 15 ++++++++++
 create_json.py                | 66 +++++++++++++++++++++++++++++++++++++++++++
 exampleSite/config.toml       |  2 +-
 exampleSite/content/search.md | 26 +++++++++++++++++
 exampleSite/create_json.py    |  1 +
 5 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100755 create_json.py
 create mode 100644 exampleSite/content/search.md
 create mode 120000 exampleSite/create_json.py
diff --git a/README.md b/README.md
index de58f99..52913b1 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ Based on [Pixyll for Jekyll](https://github.com/johnotander/pixyll)
 - Social links (currently only for twitter).
 - [Formspree](http://formspree.io/) for contanct form.
 - Pagination support.
+- Search based on client side javascript.
 
 Example config:
 
@@ -37,4 +38,18 @@ theme = "pixyll"
   paginate = true
 ```
 
+## Search
+To activate search ensure that `[params]` section has line `search_engine = true`.
+Then you need to generate search index:
+1. Copy script `create_json.py` to your website root location,
+where `config.toml` is located.
+1. Install beautifulsoup4 4.3.2: `pip install beautifulsoup4==4.3.2`
+1. Install html5lib 1.0b8: `pip install html5lib==1.0b8`
+1. Run `python22 create_json.py`
+
+Index will be placed into `public/tipuesearch_content.json`. Every time you add
+new article you need to regenerate index.
+If you run hugo as a server, please append option `--renderToDisk` othervise hugo
+will not pick up files generated by search script.
+
 ![Pixyll Screenshot](https://raw.githubusercontent.com/azmelanar/hugo-theme-pixyll/master/images/tn.png)
diff --git a/create_json.py b/create_json.py
new file mode 100755
index 0000000..2f70d69
--- /dev/null
+++ b/create_json.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import json
+from bs4 import BeautifulSoup
+
+
+# Takes Hugo public directory and returns all html files
+def walker(path):
+    pages = []
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            if file.endswith('.html'):
+                pages.append('/'.join((root, file)))
+    return pages
+
+
+# Takes html page and outputs json object
+def parser(page):
+    soup = BeautifulSoup(open(page, 'r'))
+    node = {}
+    try:
+        node['title'] = soup.title.get_text(' ', strip=True).replace('&nbsp;', ' ').replace('^', '&#94;')
+        node['loc'] = soup.link['href']
+        node['text'] = soup.article.get_text(' ', strip=True).replace('^', '&#94;')
+        tags = ['nonetags']
+        #for a in soup.find("p", id='tags').find_all("a"):
+        #    tags.append(a['href'].split('/')[-1])
+        node['tags'] = ' '.join(tags)
+        return node
+    except:
+        return None
+
+
+# Json accumulator
+def jsoner(nodes):
+    jdata = {'pages': nodes}
+    with open('public/tipuesearch_content.json', 'w') as f:
+        json.dump(jdata, f)
+
+
+# Sitemap generation
+def sitemaper(nodes):
+    xml = '''<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'''
+    url = '<url><loc>{0}</loc><changefreq>daily</changefreq><priority>0.5</priority></url>\n'
+    for n in nodes:
+        xml = xml + url.format(n['loc'])
+    xml = xml + '\n</urlset>'
+    with open('public/search/sitemap.xml', 'w') as f:
+        f.write(xml)
+
+if os.path.exists('./public'):
+    pages = walker('.')
+    nodes = []
+    for p in pages:
+        node = parser(p)
+        if node:
+            nodes.append(node)
+    jsoner(nodes)
+    sitemaper(nodes)
+else:
+    print 'Error: place this script in hugo site root'
diff --git a/exampleSite/config.toml b/exampleSite/config.toml
index 7ff017c..f6a6b2c 100644
--- a/exampleSite/config.toml
+++ b/exampleSite/config.toml
@@ -30,7 +30,7 @@ hrefTargetBlank = true
   tag = "tags"
 
 [params]
-  #search_engine = true
+  search_engine = true
   #google_analytics_id = ""
   twitter_username = "username"
   #disqus_shortname = ""
diff --git a/exampleSite/content/search.md b/exampleSite/content/search.md
new file mode 100644
index 0000000..7b93931
--- /dev/null
+++ b/exampleSite/content/search.md
@@ -0,0 +1,26 @@
++++
+date = "2014-11-09T13:49:44+04:00"
+draft = false
+title = "search"
+
++++
+
+<div>
+<link rel="stylesheet" type="text/css" href="../tipuesearch/tipuesearch.css">
+<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js"></script>
+<script type="text/javascript" src="../tipuesearch/tipuesearch_set.js"></script>
+<script type="text/javascript" src="../tipuesearch/tipuesearch.min.js"></script>
+<script>
+$(document).ready(function() {
+     $('#tipue_search_input').tipuesearch({
+         'mode' : 'json',
+         'show': 10,
+         'newWindow': true,
+         'contentLocation': '../tipuesearch_content.json'
+     });
+});
+</script>
+<div class="span8 offset2">
+    <div id="tipue_search_content"><div id="tipue_search_loading"></div>
+</div>
+</div>
diff --git a/exampleSite/create_json.py b/exampleSite/create_json.py
new file mode 120000
index 0000000..4902329
--- /dev/null
+++ b/exampleSite/create_json.py
@@ -0,0 +1 @@
+../create_json.py
\ No newline at end of file
-- 
cgit v1.2.3


From 4d54629d445a03d37a2ba2e3712c1e1b99ca720f Mon Sep 17 00:00:00 2001
From: kshcherban <k.scherban@gmail.com>
Date: Sun, 22 Oct 2017 13:38:56 +0200
Subject: Fixes for search script documentation

---
 README.md        | 9 +++++----
 requirements.txt | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
index 52913b1..b6256ad 100644
--- a/README.md
+++ b/README.md
@@ -43,12 +43,13 @@ To activate search ensure that `[params]` section has line `search_engine = true
 Then you need to generate search index:
 1. Copy script `create_json.py` to your website root location,
 where `config.toml` is located.
-1. Install beautifulsoup4 4.3.2: `pip install beautifulsoup4==4.3.2`
-1. Install html5lib 1.0b8: `pip install html5lib==1.0b8`
-1. Run `python22 create_json.py`
+1. Install python requirements: `pip install -r requirements.txt`
+1. Run `python2 create_json.py`
+
+Script was tested with python 2.7 only.
 
 Index will be placed into `public/tipuesearch_content.json`. Every time you add
-new article you need to regenerate index.
+new article you need to regenerate index with command above.
 If you run hugo as a server, please append option `--renderToDisk` othervise hugo
 will not pick up files generated by search script.
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a056a46
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+beautifulsoup4==4.3.2
+html5lib==1.0b8
-- 
cgit v1.2.3