From e60378f84bbc7762b935fcae66c91e1d0bf0b9a8 Mon Sep 17 00:00:00 2001 From: Sarah German Date: Tue, 8 Nov 2022 19:08:20 +0000 Subject: Add a script to report on pages not in the global nav --- scripts/pages_not_in_nav.js | 72 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100755 scripts/pages_not_in_nav.js (limited to 'scripts') diff --git a/scripts/pages_not_in_nav.js b/scripts/pages_not_in_nav.js new file mode 100755 index 00000000..87e15fe0 --- /dev/null +++ b/scripts/pages_not_in_nav.js @@ -0,0 +1,72 @@ +#!/usr/bin/env node + +/** + * @file pages_not_in_nav.js + * Generates a report of pages which are not included in navigation.yaml. + */ + +/* eslint-disable no-console */ + +const fs = require('fs'); +const glob = require('glob'); +const yaml = require('js-yaml'); +const fm = require('front-matter'); + +// Load site data sources from nanoc config. +const nanocConfig = yaml.load(fs.readFileSync('nanoc.yaml', 'utf8')); +const dataSources = nanocConfig.data_sources.filter((source) => source.items_root !== '/'); + +// Load the global navigation data file. +const navYaml = yaml.load(fs.readFileSync('content/_data/navigation.yaml', 'utf8')); +const nav = JSON.stringify(navYaml); + +// Read the markdown file and extract the fields we need. +const getPageData = (filename) => { + const contents = fs.readFileSync(filename, 'utf-8'); + return { + filename, + isRedirect: contents.includes('redirect_to'), + stage: fm(contents).attributes.stage, + group: fm(contents).attributes.group, + }; +}; + +// Loop through each data source's markdown files. +const lostPages = []; +dataSources.forEach((source) => { + glob.sync(`${source.content_dir}/**/*.md`).forEach((filename) => { + const pageData = getPageData(filename); + if (pageData.isRedirect) { + return; + } + + // Convert the markdown filepath into a string that matches the URL path on the website. + const path = + source.items_root.replaceAll('/', '') + + filename + .replace(source.content_dir, '') + .replace(source, '') + .replace('index.md', '') + .replace('.md', '.html'); + + if ( + // Include pages that are not in the nav. + !nav.includes(path) && + // Exclude sections that are intentionally not in the nav. + !path.includes('/architecture/blueprints') && + !path.includes('/user/application_security/dast/checks/') && + !path.includes('/legal/') && + !path.includes('/drawers/') && + !path.includes('/adr/') + ) { + lostPages.push({ + url: `https://docs.gitlab.com/${path}`, + stage: pageData.stage, + group: pageData.group, + }); + } + }); +}); + +// Return results as JSON. +console.log(JSON.stringify(lostPages)); -- cgit v1.2.3