diff options
author | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2020-05-31 19:37:15 +0300 |
---|---|---|
committer | Takeshi KOMIYA <i.tkomiya@gmail.com> | 2020-05-31 19:48:46 +0300 |
commit | a7725ad8ca03ede875945cf5aafedb96f84071e6 (patch) | |
tree | 23a52a866df7ce9d0ebeca32f166fe1d84c309a7 /sphinx | |
parent | c063c9c0fe383a20da613448d1f606b9635773b9 (diff) |
Close #7247: linkcheck: Add linkcheck_request_headers
Diffstat (limited to 'sphinx')
-rw-r--r-- | sphinx/builders/linkcheck.py | 31 |
1 files changed, 26 insertions, 5 deletions
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 9fe689ec9..dd5317087 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -16,7 +16,7 @@ import threading from html.parser import HTMLParser from os import path from typing import Any, Dict, List, Set, Tuple -from urllib.parse import unquote +from urllib.parse import unquote, urlparse from docutils import nodes from docutils.nodes import Node @@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error logger = logging.getLogger(__name__) +DEFAULT_REQUEST_HEADERS = { + 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', +} + + class AnchorCheckParser(HTMLParser): """Specialized HTML parser that looks for a specific anchor.""" @@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder): def check_thread(self) -> None: kwargs = { 'allow_redirects': True, - 'headers': { - 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8', - }, - } + } # type: Dict if self.app.config.linkcheck_timeout: kwargs['timeout'] = self.app.config.linkcheck_timeout + def get_request_headers() -> Dict: + url = urlparse(uri) + candidates = ["%s://%s" % (url.scheme, url.netloc), + "%s://%s/" % (url.scheme, url.netloc), + uri, + "*"] + + for u in candidates: + if u in self.config.linkcheck_request_headers: + headers = dict(DEFAULT_REQUEST_HEADERS) + headers.update(self.config.linkcheck_request_headers[u]) + return headers + + return {} + def check_uri() -> Tuple[str, str, int]: # split off anchor if '#' in uri: @@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder): else: auth_info = None + # update request headers for the URL + kwargs['headers'] = get_request_headers() + try: if anchor and self.app.config.linkcheck_anchors: # Read the whole document and see if #anchor exists @@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]: app.add_config_value('linkcheck_ignore', [], None) app.add_config_value('linkcheck_auth', [], None) + app.add_config_value('linkcheck_request_headers', {}, None) app.add_config_value('linkcheck_retries', 1, None) app.add_config_value('linkcheck_timeout', None, None, [int]) app.add_config_value('linkcheck_workers', 5, None) |