#!/usr/bin/env python2 # -*- coding: utf-8 -*- # # Script creates search index for Tipue Search 7.0 # Check http://www.tipue.com/search/help/ for more info import json import os from bs4 import BeautifulSoup # Takes Hugo public directory and returns all html files def walker(path): pages = [] for root, dirs, files in os.walk(path): for file in files: if file.endswith('.html'): pages.append('/'.join((root, file))) return pages # Takes html page and outputs json object def parser(page): soup = BeautifulSoup(open(page, 'r')) node = {} try: node['title'] = soup.title.get_text(' ', strip=True).replace(' ', ' ').replace('^', '^') node['url'] = soup.link['href'] node['text'] = soup.article.get_text(' ', strip=True).replace('^', '^') tags = [] for a in soup.find("p", class_="post-meta").find_all("a"): tags.append(a['href'].split('/')[-1]) node['tags'] = ' '.join(tags) return node except Exception as e: #print(e) return None # Json accumulator def jsoner(nodes): jdata = {'pages': nodes} output = json.dumps(jdata) output = 'var tipuesearch = ' + output + ';' # This is hardcoded http://www.tipue.com/search/help/?d=2 with open('public/tipuesearch/tipuesearch_content.js', 'w') as f: f.write(output) # Sitemap generation def sitemaper(nodes): xml = ''' \n''' url = '{0}daily0.5\n' for n in nodes: xml = xml + url.format(n['url']) xml = xml + '\n' with open('public/search/sitemap.xml', 'w') as f: f.write(xml) if os.path.exists('./public/tipuesearch'): pages = walker('.') nodes = [] for p in pages: node = parser(p) if node: nodes.append(node) jsoner(nodes) sitemaper(nodes) else: print 'Error: place this script in hugo site root'