Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/azmelanar/hugo-theme-pixyll.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'create_search_index.py')
-rwxr-xr-xcreate_search_index.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/create_search_index.py b/create_search_index.py
new file mode 100755
index 0000000..667b2ad
--- /dev/null
+++ b/create_search_index.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+#
+# Script creates search index for Tipue Search 7.0
+# Check http://www.tipue.com/search/help/ for more info
+
+import json
+import os
+from bs4 import BeautifulSoup
+
+
+# Takes Hugo public directory and returns all html files
+def walker(path):
+ pages = []
+ for root, dirs, files in os.walk(path):
+ for file in files:
+ if file.endswith('.html'):
+ pages.append('/'.join((root, file)))
+ return pages
+
+
+# Takes html page and outputs json object
+def parser(page):
+ soup = BeautifulSoup(open(page, 'r'))
+ node = {}
+ try:
+ node['title'] = soup.title.get_text(' ', strip=True).replace(' ', ' ').replace('^', '^')
+ node['url'] = soup.link['href']
+ node['text'] = soup.article.get_text(' ', strip=True).replace('^', '^')
+ tags = []
+ for a in soup.find("p", class_="post-meta").find_all("a"):
+ tags.append(a['href'].split('/')[-1])
+ node['tags'] = ' '.join(tags)
+ return node
+ except Exception as e:
+ #print(e)
+ return None
+
+
+# Json accumulator
+def jsoner(nodes):
+ jdata = {'pages': nodes}
+ output = json.dumps(jdata)
+ output = 'var tipuesearch = ' + output + ';'
+ # This is hardcoded http://www.tipue.com/search/help/?d=2
+ with open('public/tipuesearch/tipuesearch_content.js', 'w') as f:
+ f.write(output)
+
+
+# Sitemap generation
+def sitemaper(nodes):
+ xml = '''<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'''
+ url = '<url><loc>{0}</loc><changefreq>daily</changefreq><priority>0.5</priority></url>\n'
+ for n in nodes:
+ xml = xml + url.format(n['url'])
+ xml = xml + '\n</urlset>'
+ with open('public/search/sitemap.xml', 'w') as f:
+ f.write(xml)
+
+
+if os.path.exists('./public/tipuesearch'):
+ pages = walker('.')
+ nodes = []
+ for p in pages:
+ node = parser(p)
+ if node:
+ nodes.append(node)
+ jsoner(nodes)
+ sitemaper(nodes)
+else:
+ print 'Error: place this script in hugo site root'