From 5fd26da477129af474f6b8eee41e61ffc2bbe188 Mon Sep 17 00:00:00 2001 From: Facundo Tuesca Date: Thu, 9 Jun 2022 14:14:59 -0300 Subject: tools: add script for vulnerability checking This change adds a new script that queries vulnerability databases in order to find if any of Node's dependencies is vulnerable. The `deps/` directory of Node's repo is scanned to gather the currently used version of each dependency, and if any vulnerability is found for that version a message is printed out with its ID and a link to a description of the issue. Refs: nodejs/security-wg#802 PR-URL: https://github.com/nodejs/node/pull/43362 Reviewed-By: Michael Dawson Reviewed-By: Vladimir de Turckheim Reviewed-By: Richard Lau Reviewed-By: Rafael Gonzaga --- tools/dep_checker/README.md | 62 +++++++++++++ tools/dep_checker/dependencies.py | 97 ++++++++++++++++++++ tools/dep_checker/main.py | 168 ++++++++++++++++++++++++++++++++++ tools/dep_checker/requirements.txt | 3 + tools/dep_checker/versions_parser.py | 169 +++++++++++++++++++++++++++++++++++ 5 files changed, 499 insertions(+) create mode 100644 tools/dep_checker/README.md create mode 100644 tools/dep_checker/dependencies.py create mode 100644 tools/dep_checker/main.py create mode 100644 tools/dep_checker/requirements.txt create mode 100644 tools/dep_checker/versions_parser.py (limited to 'tools') diff --git a/tools/dep_checker/README.md b/tools/dep_checker/README.md new file mode 100644 index 00000000000..69dde6badab --- /dev/null +++ b/tools/dep_checker/README.md @@ -0,0 +1,62 @@ +# Node.js dependency vulnerability checker + +This script queries the [National Vulnerability Database (NVD)](https://nvd.nist.gov/) and +the [GitHub Advisory Database](https://github.com/advisories) for vulnerabilities found +in Node's dependencies. + +## How to use + +In order to query the GitHub Advisory Database, +a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) +has to be created (no permissions need to be given to the token, since it's only used to query the public database). +Once acquired, the script can be run as follows: + +```shell +cd node/tools/dep_checker/ +pip install -r requirements.txt + +# Python >= 3.9 required +python main.py --gh-token=$PERSONAL_ACCESS_TOKEN + +# or to skip querying the GitHub Advisory Database, simply run: +python main.py +``` + +## Example output + +``` +WARNING: New vulnerabilities found +- npm (version 1.2.1) : + - GHSA-v3jv-wrf4-5845: https://github.com/advisories/GHSA-v3jv-wrf4-5845 + - GHSA-93f3-23rq-pjfp: https://github.com/advisories/GHSA-93f3-23rq-pjfp + - GHSA-m6cx-g6qm-p2cx: https://github.com/advisories/GHSA-m6cx-g6qm-p2cx + - GHSA-4328-8hgf-7wjr: https://github.com/advisories/GHSA-4328-8hgf-7wjr + - GHSA-x8qc-rrcw-4r46: https://github.com/advisories/GHSA-x8qc-rrcw-4r46 + - GHSA-m5h6-hr3q-22h5: https://github.com/advisories/GHSA-m5h6-hr3q-22h5 +- acorn (version 6.0.0) : + - GHSA-6chw-6frg-f759: https://github.com/advisories/GHSA-6chw-6frg-f759 + +For each dependency and vulnerability, check the following: +- Check the vulnerability's description to see if it applies to the dependency as +used by Node. If not, the vulnerability ID (either a CVE or a GHSA) can be added to the ignore list in +dependencies.py. IMPORTANT: Only do this if certain that the vulnerability found is a false positive. +- Otherwise, the vulnerability found must be remediated by updating the dependency in the Node repo to a +non-affected version. +``` + +## Implementation details + +- For each dependency in Node's `deps/` folder, the script parses their version number and queries the databases to find + vulnerabilities for that specific version. +- The queries can return false positives ( + see [this](https://github.com/nodejs/security-wg/issues/802#issuecomment-1144207417) comment for an example). These + can be ignored by adding the vulnerability to the `ignore_list` in `dependencies.py` +- The script takes a while to finish (~2 min) because queries to the NVD + are [rate-limited](https://nvd.nist.gov/developers) +- If any vulnerabilities are found, the script returns 1 and prints out a list with the ID and a link to a description + of + the vulnerability. This is the case except when the ID matches one in the ignore-list (inside `dependencies.py`) in + which case the vulnerability is ignored. + + + diff --git a/tools/dep_checker/dependencies.py b/tools/dep_checker/dependencies.py new file mode 100644 index 00000000000..0951dae5ab5 --- /dev/null +++ b/tools/dep_checker/dependencies.py @@ -0,0 +1,97 @@ +"""A list of dependencies, including their CPE, names and keywords for querying different vulnerability databases""" + +from typing import Optional +import versions_parser as vp + + +class CPE: + def __init__(self, vendor: str, product: str): + self.vendor = vendor + self.product = product + + +class Dependency: + def __init__( + self, + version: str, + cpe: Optional[CPE] = None, + npm_name: Optional[str] = None, + keyword: Optional[str] = None, + ): + self.version = version + self.cpe = cpe + self.npm_name = npm_name + self.keyword = keyword + + def get_cpe(self) -> Optional[str]: + if self.cpe: + return f"cpe:2.3:a:{self.cpe.vendor}:{self.cpe.product}:{self.version}:*:*:*:*:*:*:*" + else: + return None + + +ignore_list: list[str] = [ + "CVE-2018-25032", # zlib, already fixed in the fork Node uses (Chromium's) + "CVE-2007-5536", # openssl, old and only in combination with HP-UX + "CVE-2019-0190", # openssl, can be only triggered in combination with Apache HTTP Server version 2.4.37 +] + +dependencies: dict[str, Dependency] = { + "zlib": Dependency( + version=vp.get_zlib_version(), cpe=CPE(vendor="zlib", product="zlib") + ), + # TODO: Add V8 + # "V8": Dependency("cpe:2.3:a:google:chrome:*:*:*:*:*:*:*:*", "v8"), + "uvwasi": Dependency(version=vp.get_uvwasi_version(), cpe=None, keyword="uvwasi"), + "libuv": Dependency( + version=vp.get_libuv_version(), cpe=CPE(vendor="libuv_project", product="libuv") + ), + "undici": Dependency( + version=vp.get_undici_version(), cpe=None, keyword="undici", npm_name="undici" + ), + "OpenSSL": Dependency( + version=vp.get_openssl_version(), cpe=CPE(vendor="openssl", product="openssl") + ), + "npm": Dependency( + version=vp.get_npm_version(), + cpe=CPE(vendor="npmjs", product="npm"), + npm_name="npm", + ), + "nghttp3": Dependency( + version=vp.get_nghttp3_version(), cpe=None, keyword="nghttp3" + ), + "ngtcp2": Dependency(version=vp.get_ngtcp2_version(), cpe=None, keyword="ngtcp2"), + "nghttp2": Dependency( + version=vp.get_nghttp2_version(), cpe=CPE(vendor="nghttp2", product="nghttp2") + ), + "llhttp": Dependency( + version=vp.get_llhttp_version(), + cpe=CPE(vendor="llhttp", product="llhttp"), + npm_name="llhttp", + ), + "ICU": Dependency( + version=vp.get_icu_version(), + cpe=CPE(vendor="icu-project", product="international_components_for_unicode"), + ), + "HdrHistogram": Dependency(version="0.11.2", cpe=None, keyword="hdrhistogram"), + "corepack": Dependency( + version=vp.get_corepack_version(), + cpe=None, + keyword="corepack", + npm_name="corepack", + ), + "CJS Module Lexer": Dependency( + version=vp.get_cjs_lexer_version(), + cpe=None, + keyword="cjs-module-lexer", + npm_name="cjs-module-lexer", + ), + "c-ares": Dependency( + version=vp.get_c_ares_version(), + cpe=CPE(vendor="c-ares_project", product="c-ares"), + ), + "brotli": Dependency( + version=vp.get_brotli_version(), cpe=CPE(vendor="google", product="brotli") + ), + "acorn": Dependency(version=vp.get_acorn_version(), cpe=None, npm_name="acorn"), +} diff --git a/tools/dep_checker/main.py b/tools/dep_checker/main.py new file mode 100644 index 00000000000..cccb435f838 --- /dev/null +++ b/tools/dep_checker/main.py @@ -0,0 +1,168 @@ +""" Node.js dependency vulnerability checker + +This script queries the National Vulnerability Database (NVD) and the GitHub Advisory Database for vulnerabilities found +in Node's dependencies. + +For each dependency in Node's `deps/` folder, the script parses their version number and queries the databases to find +vulnerabilities for that specific version. + +If any vulnerabilities are found, the script returns 1 and prints out a list with the ID and a link to a description of +the vulnerability. This is the case except when the ID matches one in the ignore-list (inside `dependencies.py`) in +which case the vulnerability is ignored. +""" + +from argparse import ArgumentParser +from collections import defaultdict +from dependencies import ignore_list, dependencies +from gql import gql, Client +from gql.transport.aiohttp import AIOHTTPTransport +from nvdlib import searchCVE # type: ignore +from packaging.specifiers import SpecifierSet + + +class Vulnerability: + def __init__(self, id: str, url: str): + self.id = id + self.url = url + + +vulnerability_found_message = """For each dependency and vulnerability, check the following: +- Check that the dependency's version printed by the script corresponds to the version present in the Node repo. +If not, update dependencies.py with the actual version number and run the script again. +- If the version is correct, check the vulnerability's description to see if it applies to the dependency as +used by Node. If not, the vulnerability ID (either a CVE or a GHSA) can be added to the ignore list in +dependencies.py. IMPORTANT: Only do this if certain that the vulnerability found is a false positive. +- Otherwise, the vulnerability found must be remediated by updating the dependency in the Node repo to a +non-affected version, followed by updating dependencies.py with the new version. +""" + + +github_vulnerabilities_query = gql( + """ + query($package_name:String!) { + securityVulnerabilities(package:$package_name, last:10) { + nodes { + vulnerableVersionRange + advisory { + ghsaId + permalink + withdrawnAt + } + } + } + } +""" +) + + +def query_ghad(gh_token: str) -> dict[str, list[Vulnerability]]: + """Queries the GitHub Advisory Database for vulnerabilities reported for Node's dependencies. + + The database supports querying by package name in the NPM ecosystem, so we only send queries for the dependencies + that are also NPM packages. + """ + + deps_in_npm = { + name: dep for name, dep in dependencies.items() if dep.npm_name is not None + } + + transport = AIOHTTPTransport( + url="https://api.github.com/graphql", + headers={"Authorization": f"bearer {gh_token}"}, + ) + client = Client( + transport=transport, + fetch_schema_from_transport=True, + serialize_variables=True, + parse_results=True, + ) + + found_vulnerabilities: dict[str, list[Vulnerability]] = defaultdict(list) + for name, dep in deps_in_npm.items(): + variables_package = { + "package_name": dep.npm_name, + } + result = client.execute( + github_vulnerabilities_query, variable_values=variables_package + ) + matching_vulns = [ + v + for v in result["securityVulnerabilities"]["nodes"] + if v["advisory"]["withdrawnAt"] is None + and dep.version in SpecifierSet(v["vulnerableVersionRange"]) + and v["advisory"]["ghsaId"] not in ignore_list + ] + if matching_vulns: + found_vulnerabilities[name].extend( + [ + Vulnerability( + id=vuln["advisory"]["ghsaId"], url=vuln["advisory"]["permalink"] + ) + for vuln in matching_vulns + ] + ) + + return found_vulnerabilities + + +def query_nvd() -> dict[str, list[Vulnerability]]: + """Queries the National Vulnerability Database for vulnerabilities reported for Node's dependencies. + + The database supports querying by CPE (Common Platform Enumeration) or by a keyword present in the CVE's + description. + Since some of Node's dependencies don't have an associated CPE, we use their name as a keyword in the query. + """ + deps_in_nvd = { + name: dep + for name, dep in dependencies.items() + if dep.cpe is not None or dep.keyword is not None + } + found_vulnerabilities: dict[str, list[Vulnerability]] = defaultdict(list) + for name, dep in deps_in_nvd.items(): + query_results = [ + cve + for cve in searchCVE(cpeMatchString=dep.get_cpe(), keyword=dep.keyword) + if cve.id not in ignore_list + ] + if query_results: + found_vulnerabilities[name].extend( + [Vulnerability(id=cve.id, url=cve.url) for cve in query_results] + ) + + return found_vulnerabilities + + +def main(): + parser = ArgumentParser( + description="Query the NVD and the GitHub Advisory Database for new vulnerabilities in Node's dependencies" + ) + parser.add_argument( + "--gh-token", + help="the GitHub authentication token for querying the GH Advisory Database", + ) + gh_token = parser.parse_args().gh_token + if gh_token is None: + print( + "Warning: GitHub authentication token not provided, skipping GitHub Advisory Database queries" + ) + ghad_vulnerabilities: dict[str, list[Vulnerability]] = ( + {} if gh_token is None else query_ghad(gh_token) + ) + nvd_vulnerabilities = query_nvd() + + if not ghad_vulnerabilities and not nvd_vulnerabilities: + print(f"No new vulnerabilities found ({len(ignore_list)} ignored)") + return 0 + else: + print("WARNING: New vulnerabilities found") + for source in (ghad_vulnerabilities, nvd_vulnerabilities): + for name, vulns in source.items(): + print(f"- {name} (version {dependencies[name].version}) :") + for v in vulns: + print(f"\t- {v.id}: {v.url}") + print(f"\n{vulnerability_found_message}") + return 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/tools/dep_checker/requirements.txt b/tools/dep_checker/requirements.txt new file mode 100644 index 00000000000..894840287e8 --- /dev/null +++ b/tools/dep_checker/requirements.txt @@ -0,0 +1,3 @@ +gql[aiohttp] +nvdlib +packaging diff --git a/tools/dep_checker/versions_parser.py b/tools/dep_checker/versions_parser.py new file mode 100644 index 00000000000..3a385bf1d2d --- /dev/null +++ b/tools/dep_checker/versions_parser.py @@ -0,0 +1,169 @@ +"""Utility functions to parse version numbers from each of Node's dependencies""" + +from pathlib import Path +import re + + +def get_package_json_version(path: Path) -> str: + with open(path, "r") as f: + matches = re.search('"version": "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError(f"Error extracting version number from {path}") + return matches.groupdict()["version"] + + +def get_acorn_version() -> str: + return get_package_json_version(Path("../../deps/acorn/acorn/package.json")) + + +def get_brotli_version() -> str: + with open("../../deps/brotli/c/common/version.h", "r") as f: + matches = re.search("#define BROTLI_VERSION (?P.*)", f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for brotli") + hex_version = matches.groupdict()["version"] + major_version = int(hex_version, 16) >> 24 + minor_version = int(hex_version, 16) >> 12 & 0xFF + patch_version = int(hex_version, 16) & 0xFFFFF + return f"{major_version}.{minor_version}.{patch_version}" + + +def get_c_ares_version() -> str: + with open("../../deps/cares/include/ares_version.h", "r") as f: + matches = re.search('#define ARES_VERSION_STR "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for c-ares") + return matches.groupdict()["version"] + + +def get_cjs_lexer_version() -> str: + return get_package_json_version(Path("../../deps/cjs-module-lexer/package.json")) + + +def get_corepack_version() -> str: + return get_package_json_version(Path("../../deps/corepack/package.json")) + + +def get_icu_version() -> str: + with open("../../deps/icu-small/source/common/unicode/uvernum.h", "r") as f: + matches = re.search('#define U_ICU_VERSION "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for ICU") + return matches.groupdict()["version"] + + +def get_llhttp_version() -> str: + with open("../../deps/llhttp/include/llhttp.h", "r") as f: + matches = re.search( + "#define LLHTTP_VERSION_MAJOR (?P.*)\n" + "#define LLHTTP_VERSION_MINOR (?P.*)\n" + "#define LLHTTP_VERSION_PATCH (?P.*)", + f.read(), + re.MULTILINE, + ) + if matches is None: + raise RuntimeError("Error extracting version number for llhttp") + versions = matches.groupdict() + return f"{versions['major']}.{versions['minor']}.{versions['patch']}" + + +def get_nghttp2_version() -> str: + with open("../../deps/nghttp2/lib/includes/nghttp2/nghttp2ver.h", "r") as f: + matches = re.search('#define NGHTTP2_VERSION "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for nghttp2") + return matches.groupdict()["version"] + + +def get_ngtcp2_version() -> str: + with open("../../deps/ngtcp2/ngtcp2/lib/includes/ngtcp2/version.h", "r") as f: + matches = re.search('#define NGTCP2_VERSION "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for ngtcp2") + return matches.groupdict()["version"] + + +def get_nghttp3_version() -> str: + with open("../../deps/ngtcp2/nghttp3/lib/includes/nghttp3/version.h", "r") as f: + matches = re.search('#define NGHTTP3_VERSION "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for nghttp3") + return matches.groupdict()["version"] + + +def get_npm_version() -> str: + return get_package_json_version(Path("../../deps/npm/package.json")) + + +def get_openssl_version() -> str: + with open("../../deps/openssl/openssl/VERSION.dat", "r") as f: + matches = re.search( + "MAJOR=(?P.*)\n" "MINOR=(?P.*)\n" "PATCH=(?P.*)", + f.read(), + re.MULTILINE, + ) + if matches is None: + raise RuntimeError("Error extracting version number for openssl") + versions = matches.groupdict() + return f"{versions['major']}.{versions['minor']}.{versions['patch']}" + + +def get_undici_version() -> str: + return get_package_json_version(Path("../../deps/undici/src/package.json")) + + +def get_libuv_version() -> str: + with open("../../deps/uv/include/uv/version.h", "r") as f: + matches = re.search( + "#define UV_VERSION_MAJOR (?P.*)\n" + "#define UV_VERSION_MINOR (?P.*)\n" + "#define UV_VERSION_PATCH (?P.*)", + f.read(), + re.MULTILINE, + ) + if matches is None: + raise RuntimeError("Error extracting version number for libuv") + versions = matches.groupdict() + return f"{versions['major']}.{versions['minor']}.{versions['patch']}" + + +def get_uvwasi_version() -> str: + with open("../../deps/uvwasi/include/uvwasi.h", "r") as f: + matches = re.search( + "#define UVWASI_VERSION_MAJOR (?P.*)\n" + "#define UVWASI_VERSION_MINOR (?P.*)\n" + "#define UVWASI_VERSION_PATCH (?P.*)", + f.read(), + re.MULTILINE, + ) + if matches is None: + raise RuntimeError("Error extracting version number for uvwasi") + versions = matches.groupdict() + return f"{versions['major']}.{versions['minor']}.{versions['patch']}" + + +def get_v8_version() -> str: + with open("../../deps/v8/include/v8-version.h", "r") as f: + matches = re.search( + "#define V8_MAJOR_VERSION (?P.*)\n" + "#define V8_MINOR_VERSION (?P.*)\n" + "#define V8_BUILD_NUMBER (?P.*)\n" + "#define V8_PATCH_LEVEL (?P.*)\n", + f.read(), + re.MULTILINE, + ) + if matches is None: + raise RuntimeError("Error extracting version number for v8") + versions = matches.groupdict() + patch_suffix = "" if versions["patch"] == "0" else f".{versions['patch']}" + return ( + f"{versions['major']}.{versions['minor']}.{versions['build']}{patch_suffix}" + ) + + +def get_zlib_version() -> str: + with open("../../deps/zlib/zlib.h", "r") as f: + matches = re.search('#define ZLIB_VERSION "(?P.*)"', f.read()) + if matches is None: + raise RuntimeError("Error extracting version number for zlib") + return matches.groupdict()["version"] -- cgit v1.2.3