diff options
author | Sarah German <sgerman@gitlab.com> | 2022-08-18 18:02:30 +0300 |
---|---|---|
committer | Marcel Amirault <mamirault@gitlab.com> | 2022-08-18 18:02:30 +0300 |
commit | ab74a97edcc8a2c366cc68ffa2c5434fb32df423 (patch) | |
tree | 8684d88d9b5d1a4b335d7bb2638242313463f60e | |
parent | 5a2916f6bcdf04f06376213c2e6435f5b44aaae1 (diff) |
Add Lunr.js index script
-rw-r--r-- | .gitlab/ci/build-and-deploy.gitlab-ci.yml | 7 | ||||
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | lib/helpers/generic.rb | 7 | ||||
-rw-r--r-- | package.json | 4 | ||||
-rw-r--r-- | scripts/lunr/preindex.js | 106 | ||||
-rw-r--r-- | yarn.lock | 106 |
6 files changed, 232 insertions, 2 deletions
diff --git a/.gitlab/ci/build-and-deploy.gitlab-ci.yml b/.gitlab/ci/build-and-deploy.gitlab-ci.yml index e1ba016e..8cc8aa13 100644 --- a/.gitlab/ci/build-and-deploy.gitlab-ci.yml +++ b/.gitlab/ci/build-and-deploy.gitlab-ci.yml @@ -12,6 +12,8 @@ - bundle exec nanoc compile -VV # Create _redirects for Pages redirects - bundle exec rake redirects + # Build the Lunr.js search index if needed + - if [[ "$ALGOLIA_SEARCH" == "false" ]]; then node scripts/lunr/preindex.js; fi # Calculate sizes before and after minifying/gzipping the static files (HTML, CSS, JS) - SIZE_BEFORE=$(du -sh public/ | awk '{print $1}') # Minify the assets of the resulting site @@ -21,7 +23,7 @@ - SIZE_AFTER_MINIFY=$(du -sh public/ | awk '{print $1}') # Use gzip to compress static content for faster web serving # https://docs.gitlab.com/ee/user/project/pages/introduction.html#serving-compressed-assets - - find public/ -type f \( -iname "*.html" -o -iname "*.js" -o -iname "*.css" -o -iname "*.svg" \) -exec gzip --keep --best --force --verbose {} \; + - find public/ -type f \( -iname "*.html" -o -iname "*.js" -o -iname "*.css" -o -iname "*.svg" -o -iname "*.json" \) -exec gzip --keep --best --force --verbose {} \; - SIZE_AFTER_GZIP=$(du -sh public/ | awk '{print $1}') # Print size results - echo "Minify and compress the static assets (HTML, CSS, JS)" @@ -40,6 +42,7 @@ compile_prod: - .rules_prod - .build_base variables: + ALGOLIA_SEARCH: 'true' NANOC_ENV: 'production' # @@ -49,6 +52,8 @@ compile_dev: extends: - .rules_dev - .build_base + variables: + ALGOLIA_SEARCH: 'false' ############################################### @@ -158,3 +158,7 @@ markdownlint-tests: @yarn markdownlint doc/**/*.md test: setup rspec-tests jest-tests eslint-tests prettier-tests stylelint-tests hadolint-tests yamllint-tests markdownlint-tests + +build-lunr-index: + @printf "\n$(INFO)INFO: Building offline search index..$(INFO_END)\n" + node scripts/lunr/preindex.js
\ No newline at end of file diff --git a/lib/helpers/generic.rb b/lib/helpers/generic.rb index 1f7a091c..5db5f02f 100644 --- a/lib/helpers/generic.rb +++ b/lib/helpers/generic.rb @@ -53,5 +53,12 @@ module Nanoc::Helpers def show_banner? @items['/_data/banner.yaml'][:show_banner] end + + # + # Check if this environment is set to run Algolia search. + # + def algolia? + ENV['ALGOLIA_SEARCH'] == "true" + end end end diff --git a/package.json b/package.json index 6c51acfa..61e48fe8 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,6 @@ "babel-jest": "^28.1.3", "eslint": "^8.22.0", "flush-promises": "^1.0.2", - "glob": "^8.0.3", "jest": "^28.1.3", "jest-environment-jsdom": "^28.1.3", "jest-fail-on-console": "^2.4.2", @@ -50,13 +49,16 @@ "@popperjs/core": "^2.11.6", "algoliasearch": "^4.14.2", "bootstrap": "^4.6.1", + "cheerio": "^1.0.0-rc.12", "clipboard": "^2.0.11", "compare-versions": "^4.1.3", "eslint-plugin-filenames": "^1.3.2", + "glob": "^8.0.3", "instantsearch.css": "^7.4.5", "instantsearch.js": "^4.44.0", "jquery": "^3.6.0", "mermaid": "^9.1.5", + "lunr": "^2.3.9", "pikaday": "^1.8.2", "rollup-plugin-import-css": "^3.0.3", "vue": "^2.7.0", diff --git a/scripts/lunr/preindex.js b/scripts/lunr/preindex.js new file mode 100644 index 00000000..ffae7d8c --- /dev/null +++ b/scripts/lunr/preindex.js @@ -0,0 +1,106 @@ +#!/usr/bin/env node + +/** + * @file preindex.js + * Creates data files required for Lunr search. + * + * This script creates two JSON files: + * - lunr-index.js: A serialized search index. + * - lunr-map.js: Maps index item IDs to their human-readable titles. + * + * @see https://lunrjs.com/guides/index_prebuilding.html + */ + +/* eslint-disable no-console */ + +const fs = require('fs'); +const lunr = require('lunr'); +const cheerio = require('cheerio'); +const glob = require('glob'); + +const htmlSrc = 'public/'; +const outputDir = `${htmlSrc}assets/javascripts`; + +/** + * Find all HTML files within a given path, + * then execute a callback function to build the index. + */ +const buildIndex = (path, callback) => { + glob(`${path}/**/*.html`, callback); +}; + +/** + * Extracts text from a given HTML element. + * + * @param {cheerio} $ + * A Cheerio page object + * @param {String} element + * An HTML element to search for + * + * @return {String} + * All text contained within the given element + */ +const getText = ($, element) => { + const headingText = []; + $(element) + .toArray() + .forEach((el) => { + headingText.push($(el).text().replace('\n', '')); + }); + return headingText.join(' '); +}; + +/** + * Build the index and output files. + */ +buildIndex(htmlSrc, (err, filenames) => { + if (err) { + console.error(err); + } + + // Create an array of objects containing each page's text content. + const pages = []; + Object.keys(filenames).forEach((key) => { + const filename = filenames[key]; + const $ = cheerio.load(fs.readFileSync(filename)); + const title = getText($, 'h1'); + + if (title.length) { + pages.push({ + id: filename.slice(htmlSrc.length), + h1: title, + h2: getText($, 'h2'), + h3: getText($, 'h3'), + }); + } + }); + + // Build the serialized Lunr search index. + const idx = lunr((e) => { + e.ref('id'); + e.field('h1', { boost: 10 }); + e.field('h2', { boost: 5 }); + e.field('h3', { boost: 2 }); + pages.forEach((doc) => { + e.add(doc); + }, e); + }); + + // Write the index file. + fs.writeFile(`${outputDir}/lunr-index.json`, JSON.stringify(idx), (fsErr) => { + if (fsErr) { + console.error(fsErr); + } + }); + + // Write the map file. + // We can drop h2s and h3s from this since we don't display those in results. + const pageMap = pages.map(({ h2, h3, ...rest }) => { + return rest; + }); + fs.writeFile(`${outputDir}/lunr-map.json`, JSON.stringify(pageMap), (fsErr) => { + if (fsErr) { + console.error(fsErr); + } + }); +}); @@ -2332,6 +2332,31 @@ character-parser@^2.2.0: dependencies: is-regex "^1.0.3" +cheerio-select@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4" + integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g== + dependencies: + boolbase "^1.0.0" + css-select "^5.1.0" + css-what "^6.1.0" + domelementtype "^2.3.0" + domhandler "^5.0.3" + domutils "^3.0.1" + +cheerio@^1.0.0-rc.12: + version "1.0.0-rc.12" + resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" + integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== + dependencies: + cheerio-select "^2.1.0" + dom-serializer "^2.0.0" + domhandler "^5.0.3" + domutils "^3.0.1" + htmlparser2 "^8.0.1" + parse5 "^7.0.0" + parse5-htmlparser2-tree-adapter "^7.0.0" + "chokidar@>=3.0.0 <4.0.0": version "3.5.3" resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd" @@ -2571,6 +2596,17 @@ css-parse@~2.0.0: dependencies: css "^2.0.0" +css-select@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6" + integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg== + dependencies: + boolbase "^1.0.0" + css-what "^6.1.0" + domhandler "^5.0.2" + domutils "^3.0.1" + nth-check "^2.0.1" + css-selector-tokenizer@^0.7.0: version "0.7.3" resolved "https://registry.yarnpkg.com/css-selector-tokenizer/-/css-selector-tokenizer-0.7.3.tgz#735f26186e67c749aaf275783405cf0661fae8f1" @@ -2601,6 +2637,11 @@ css-values@^0.1.0: ends-with "^0.2.0" postcss-value-parser "^3.3.0" +css-what@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" + integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== + css@^2.0.0: version "2.2.4" resolved "https://registry.yarnpkg.com/css/-/css-2.2.4.tgz#c646755c73971f2bba6a601e2cf2fd71b1298929" @@ -3299,6 +3340,20 @@ dom-event-types@^1.0.0: resolved "https://registry.yarnpkg.com/dom-event-types/-/dom-event-types-1.1.0.tgz#120c1f92ddea7758db1ccee0a100a33c39f4701b" integrity sha512-jNCX+uNJ3v38BKvPbpki6j5ItVlnSqVV6vDWGS6rExzCMjsc39frLjm1n91o6YaKK6AZl0wLloItW6C6mr61BQ== +dom-serializer@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53" + integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.2" + entities "^4.2.0" + +domelementtype@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d" + integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== + domexception@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/domexception/-/domexception-4.0.0.tgz#4ad1be56ccadc86fc76d033353999a8037d03673" @@ -3306,11 +3361,27 @@ domexception@^4.0.0: dependencies: webidl-conversions "^7.0.0" +domhandler@^5.0.1, domhandler@^5.0.2, domhandler@^5.0.3: + version "5.0.3" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31" + integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w== + dependencies: + domelementtype "^2.3.0" + dompurify@2.3.10, dompurify@^2.3.10: version "2.3.10" resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.10.tgz#901f7390ffe16a91a5a556b94043314cd4850385" integrity sha512-o7Fg/AgC7p/XpKjf/+RC3Ok6k4St5F7Q6q6+Nnm3p2zGWioAY6dh0CbbuwOhH2UcSzKsdniE/YnE2/92JcsA+g== +domutils@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.0.1.tgz#696b3875238338cb186b6c0612bd4901c89a4f1c" + integrity sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q== + dependencies: + dom-serializer "^2.0.0" + domelementtype "^2.3.0" + domhandler "^5.0.1" + echarts@^5.3.2: version "5.3.3" resolved "https://registry.yarnpkg.com/echarts/-/echarts-5.3.3.tgz#df97b09c4c0e2ffcdfb44acf518d50c50e0b838e" @@ -3354,6 +3425,11 @@ ends-with@^0.2.0: resolved "https://registry.yarnpkg.com/ends-with/-/ends-with-0.2.0.tgz#2f9da98d57a50cfda4571ce4339000500f4e6b8a" integrity sha512-lRppY4dK3VkqBdR242sKcAJeYc8Gf/DhoX9AWvWI2RzccmLnqBQfwm2k4oSDv5MPDjUqawCauXhZkyWxkVhRsg== +entities@^4.2.0, entities@^4.3.0: + version "4.3.1" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.3.1.tgz#c34062a94c865c322f9d67b4384e4169bcede6a4" + integrity sha512-o4q/dYJlmyjP2zfnaWDUC6A3BQFmVTX+tZPezK7k0GLSU9QYCauscf5Y+qcEPzKL+EixVouYDgLQK5H9GrLpkg== + entities@~3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/entities/-/entities-3.0.1.tgz#2b887ca62585e96db3903482d336c1006c3001d4" @@ -4174,6 +4250,16 @@ html-tags@^3.2.0: resolved "https://registry.yarnpkg.com/html-tags/-/html-tags-3.2.0.tgz#dbb3518d20b726524e4dd43de397eb0a95726961" integrity sha512-vy7ClnArOZwCnqZgvv+ddgHgJiAFXe3Ge9ML5/mBctVJoUoYPCdxVucOywjDARn6CVoh3dRSFdPHy2sX80L0Wg== +htmlparser2@^8.0.1: + version "8.0.1" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.1.tgz#abaa985474fcefe269bc761a779b544d7196d010" + integrity sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.2" + domutils "^3.0.1" + entities "^4.3.0" + http-proxy-agent@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz#5129800203520d434f142bc78ff3c170800f2b43" @@ -5256,6 +5342,11 @@ lru-cache@^6.0.0: dependencies: yallist "^4.0.0" +lunr@^2.3.9: + version "2.3.9" + resolved "https://registry.yarnpkg.com/lunr/-/lunr-2.3.9.tgz#18b123142832337dd6e964df1a5a7707b25d35e1" + integrity sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow== + magic-string@^0.25.7: version "0.25.9" resolved "https://registry.yarnpkg.com/magic-string/-/magic-string-0.25.9.tgz#de7f9faf91ef8a1c91d02c2e5314c8277dbcdd1c" @@ -5737,11 +5828,26 @@ parse-json@^5.0.0, parse-json@^5.2.0: json-parse-even-better-errors "^2.3.0" lines-and-columns "^1.1.6" +parse5-htmlparser2-tree-adapter@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1" + integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g== + dependencies: + domhandler "^5.0.2" + parse5 "^7.0.0" + parse5@6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== +parse5@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.0.0.tgz#51f74a5257f5fcc536389e8c2d0b3802e1bfa91a" + integrity sha512-y/t8IXSPWTuRZqXc0ajH/UwDj4mnqLEbSttNbThcFhGrZuOyoyvNBO85PBp2jQa55wY9d07PBNjsK8ZP3K5U6g== + dependencies: + entities "^4.3.0" + path-exists@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-3.0.0.tgz#ce0ebeaa5f78cb18925ea7d810d7b59b010fd515" |