Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/azmelanar/hugo-theme-pixyll.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'create_json.py')
-rwxr-xr-xcreate_json.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/create_json.py b/create_json.py
new file mode 100755
index 0000000..2f70d69
--- /dev/null
+++ b/create_json.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import json
+from bs4 import BeautifulSoup
+
+
+# Takes Hugo public directory and returns all html files
+def walker(path):
+ pages = []
+ for root, dirs, files in os.walk(path):
+ for file in files:
+ if file.endswith('.html'):
+ pages.append('/'.join((root, file)))
+ return pages
+
+
+# Takes html page and outputs json object
+def parser(page):
+ soup = BeautifulSoup(open(page, 'r'))
+ node = {}
+ try:
+ node['title'] = soup.title.get_text(' ', strip=True).replace(' ', ' ').replace('^', '^')
+ node['loc'] = soup.link['href']
+ node['text'] = soup.article.get_text(' ', strip=True).replace('^', '^')
+ tags = ['nonetags']
+ #for a in soup.find("p", id='tags').find_all("a"):
+ # tags.append(a['href'].split('/')[-1])
+ node['tags'] = ' '.join(tags)
+ return node
+ except:
+ return None
+
+
+# Json accumulator
+def jsoner(nodes):
+ jdata = {'pages': nodes}
+ with open('public/tipuesearch_content.json', 'w') as f:
+ json.dump(jdata, f)
+
+
+# Sitemap generation
+def sitemaper(nodes):
+ xml = '''<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'''
+ url = '<url><loc>{0}</loc><changefreq>daily</changefreq><priority>0.5</priority></url>\n'
+ for n in nodes:
+ xml = xml + url.format(n['loc'])
+ xml = xml + '\n</urlset>'
+ with open('public/search/sitemap.xml', 'w') as f:
+ f.write(xml)
+
+if os.path.exists('./public'):
+ pages = walker('.')
+ nodes = []
+ for p in pages:
+ node = parser(p)
+ if node:
+ nodes.append(node)
+ jsoner(nodes)
+ sitemaper(nodes)
+else:
+ print 'Error: place this script in hugo site root'