Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitlab-docs.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSarah German <sgerman@gitlab.com>2022-08-18 18:02:30 +0300
committerMarcel Amirault <mamirault@gitlab.com>2022-08-18 18:02:30 +0300
commitab74a97edcc8a2c366cc68ffa2c5434fb32df423 (patch)
tree8684d88d9b5d1a4b335d7bb2638242313463f60e /scripts
parent5a2916f6bcdf04f06376213c2e6435f5b44aaae1 (diff)
Add Lunr.js index script
Diffstat (limited to 'scripts')
-rw-r--r--scripts/lunr/preindex.js106
1 files changed, 106 insertions, 0 deletions
diff --git a/scripts/lunr/preindex.js b/scripts/lunr/preindex.js
new file mode 100644
index 00000000..ffae7d8c
--- /dev/null
+++ b/scripts/lunr/preindex.js
@@ -0,0 +1,106 @@
+#!/usr/bin/env node
+
+/**
+ * @file preindex.js
+ * Creates data files required for Lunr search.
+ *
+ * This script creates two JSON files:
+ * - lunr-index.js: A serialized search index.
+ * - lunr-map.js: Maps index item IDs to their human-readable titles.
+ *
+ * @see https://lunrjs.com/guides/index_prebuilding.html
+ */
+
+/* eslint-disable no-console */
+
+const fs = require('fs');
+const lunr = require('lunr');
+const cheerio = require('cheerio');
+const glob = require('glob');
+
+const htmlSrc = 'public/';
+const outputDir = `${htmlSrc}assets/javascripts`;
+
+/**
+ * Find all HTML files within a given path,
+ * then execute a callback function to build the index.
+ */
+const buildIndex = (path, callback) => {
+ glob(`${path}/**/*.html`, callback);
+};
+
+/**
+ * Extracts text from a given HTML element.
+ *
+ * @param {cheerio} $
+ * A Cheerio page object
+ * @param {String} element
+ * An HTML element to search for
+ *
+ * @return {String}
+ * All text contained within the given element
+ */
+const getText = ($, element) => {
+ const headingText = [];
+ $(element)
+ .toArray()
+ .forEach((el) => {
+ headingText.push($(el).text().replace('\n', ''));
+ });
+ return headingText.join(' ');
+};
+
+/**
+ * Build the index and output files.
+ */
+buildIndex(htmlSrc, (err, filenames) => {
+ if (err) {
+ console.error(err);
+ }
+
+ // Create an array of objects containing each page's text content.
+ const pages = [];
+ Object.keys(filenames).forEach((key) => {
+ const filename = filenames[key];
+ const $ = cheerio.load(fs.readFileSync(filename));
+ const title = getText($, 'h1');
+
+ if (title.length) {
+ pages.push({
+ id: filename.slice(htmlSrc.length),
+ h1: title,
+ h2: getText($, 'h2'),
+ h3: getText($, 'h3'),
+ });
+ }
+ });
+
+ // Build the serialized Lunr search index.
+ const idx = lunr((e) => {
+ e.ref('id');
+ e.field('h1', { boost: 10 });
+ e.field('h2', { boost: 5 });
+ e.field('h3', { boost: 2 });
+ pages.forEach((doc) => {
+ e.add(doc);
+ }, e);
+ });
+
+ // Write the index file.
+ fs.writeFile(`${outputDir}/lunr-index.json`, JSON.stringify(idx), (fsErr) => {
+ if (fsErr) {
+ console.error(fsErr);
+ }
+ });
+
+ // Write the map file.
+ // We can drop h2s and h3s from this since we don't display those in results.
+ const pageMap = pages.map(({ h2, h3, ...rest }) => {
+ return rest;
+ });
+ fs.writeFile(`${outputDir}/lunr-map.json`, JSON.stringify(pageMap), (fsErr) => {
+ if (fsErr) {
+ console.error(fsErr);
+ }
+ });
+});