Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/sphinx-doc/sphinx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/sphinx
diff options
context:
space:
mode:
authorTakeshi KOMIYA <i.tkomiya@gmail.com>2020-05-31 19:37:15 +0300
committerTakeshi KOMIYA <i.tkomiya@gmail.com>2020-05-31 19:48:46 +0300
commita7725ad8ca03ede875945cf5aafedb96f84071e6 (patch)
tree23a52a866df7ce9d0ebeca32f166fe1d84c309a7 /sphinx
parentc063c9c0fe383a20da613448d1f606b9635773b9 (diff)
Close #7247: linkcheck: Add linkcheck_request_headers
Diffstat (limited to 'sphinx')
-rw-r--r--sphinx/builders/linkcheck.py31
1 files changed, 26 insertions, 5 deletions
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index 9fe689ec9..dd5317087 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -16,7 +16,7 @@ import threading
from html.parser import HTMLParser
from os import path
from typing import Any, Dict, List, Set, Tuple
-from urllib.parse import unquote
+from urllib.parse import unquote, urlparse
from docutils import nodes
from docutils.nodes import Node
@@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error
logger = logging.getLogger(__name__)
+DEFAULT_REQUEST_HEADERS = {
+ 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
+}
+
+
class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""
@@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder):
def check_thread(self) -> None:
kwargs = {
'allow_redirects': True,
- 'headers': {
- 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
- },
- }
+ } # type: Dict
if self.app.config.linkcheck_timeout:
kwargs['timeout'] = self.app.config.linkcheck_timeout
+ def get_request_headers() -> Dict:
+ url = urlparse(uri)
+ candidates = ["%s://%s" % (url.scheme, url.netloc),
+ "%s://%s/" % (url.scheme, url.netloc),
+ uri,
+ "*"]
+
+ for u in candidates:
+ if u in self.config.linkcheck_request_headers:
+ headers = dict(DEFAULT_REQUEST_HEADERS)
+ headers.update(self.config.linkcheck_request_headers[u])
+ return headers
+
+ return {}
+
def check_uri() -> Tuple[str, str, int]:
# split off anchor
if '#' in uri:
@@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder):
else:
auth_info = None
+ # update request headers for the URL
+ kwargs['headers'] = get_request_headers()
+
try:
if anchor and self.app.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
@@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_ignore', [], None)
app.add_config_value('linkcheck_auth', [], None)
+ app.add_config_value('linkcheck_request_headers', {}, None)
app.add_config_value('linkcheck_retries', 1, None)
app.add_config_value('linkcheck_timeout', None, None, [int])
app.add_config_value('linkcheck_workers', 5, None)