Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/sphinx-doc/sphinx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakeshi KOMIYA <i.tkomiya@gmail.com>2020-06-04 19:17:31 +0300
committerGitHub <noreply@github.com>2020-06-04 19:17:31 +0300
commitdce45413e6a1aabd2399244e8eff90695e3393ba (patch)
tree446897d1ee18e2d058659c2ba4c0a9e0751c4610
parent07fb907feabfd96ef21ac0c4048a0f13789dd4ab (diff)
parenta7725ad8ca03ede875945cf5aafedb96f84071e6 (diff)
Merge pull request #7762 from tk0miya/7247_linkcheck_request_headers
Close #7247: linkcheck: Add linkcheck_request_headers
-rw-r--r--CHANGES2
-rw-r--r--doc/usage/configuration.rst26
-rw-r--r--sphinx/builders/linkcheck.py31
-rw-r--r--tests/test_build_linkcheck.py33
4 files changed, 87 insertions, 5 deletions
diff --git a/CHANGES b/CHANGES
index ef1d36002..78dc38213 100644
--- a/CHANGES
+++ b/CHANGES
@@ -84,6 +84,8 @@ Features added
of ``foo[=bar]``
* #7582: napoleon: a type for attribute are represented like type annotation
* #7734: napoleon: overescaped trailing underscore on attribute
+* #7247: linkcheck: Add :confval:`linkcheck_request_headers` to send custom HTTP
+ headers for specific host
* #7683: Add ``allowed_exceptions`` parameter to ``Sphinx.emit()`` to allow
handlers to raise specified exceptions
* #7295: C++, parse (trailing) requires clauses.
diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst
index bc483fa1c..cdcc2a561 100644
--- a/doc/usage/configuration.rst
+++ b/doc/usage/configuration.rst
@@ -2390,6 +2390,32 @@ Options for the linkcheck builder
.. versionadded:: 1.1
+.. confval:: linkcheck_request_headers
+
+ A dictionary that maps baseurls to HTTP request headers.
+
+ The key is a URL base string like ``"https://sphinx-doc.org/"``. To specify
+ headers for other hosts, ``"*"`` can be used. It matches all hosts only when
+ the URL does not match other settings.
+
+ The value is a dictionary that maps header name to its value.
+
+ Example:
+
+ .. code-block:: python
+
+ linkcheck_request_headers = {
+ "https://sphinx-doc.org/": {
+ "Accept": "text/html",
+ "Accept-Encoding": "utf-8",
+ },
+ "*": {
+ "Accept": "text/html,application/xhtml+xml",
+ }
+ }
+
+ .. versionadded:: 3.1
+
.. confval:: linkcheck_retries
The number of times the linkcheck builder will attempt to check a URL before
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index 9fe689ec9..dd5317087 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -16,7 +16,7 @@ import threading
from html.parser import HTMLParser
from os import path
from typing import Any, Dict, List, Set, Tuple
-from urllib.parse import unquote
+from urllib.parse import unquote, urlparse
from docutils import nodes
from docutils.nodes import Node
@@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error
logger = logging.getLogger(__name__)
+DEFAULT_REQUEST_HEADERS = {
+ 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
+}
+
+
class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""
@@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder):
def check_thread(self) -> None:
kwargs = {
'allow_redirects': True,
- 'headers': {
- 'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
- },
- }
+ } # type: Dict
if self.app.config.linkcheck_timeout:
kwargs['timeout'] = self.app.config.linkcheck_timeout
+ def get_request_headers() -> Dict:
+ url = urlparse(uri)
+ candidates = ["%s://%s" % (url.scheme, url.netloc),
+ "%s://%s/" % (url.scheme, url.netloc),
+ uri,
+ "*"]
+
+ for u in candidates:
+ if u in self.config.linkcheck_request_headers:
+ headers = dict(DEFAULT_REQUEST_HEADERS)
+ headers.update(self.config.linkcheck_request_headers[u])
+ return headers
+
+ return {}
+
def check_uri() -> Tuple[str, str, int]:
# split off anchor
if '#' in uri:
@@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder):
else:
auth_info = None
+ # update request headers for the URL
+ kwargs['headers'] = get_request_headers()
+
try:
if anchor and self.app.config.linkcheck_anchors:
# Read the whole document and see if #anchor exists
@@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_ignore', [], None)
app.add_config_value('linkcheck_auth', [], None)
+ app.add_config_value('linkcheck_request_headers', {}, None)
app.add_config_value('linkcheck_retries', 1, None)
app.add_config_value('linkcheck_timeout', None, None, [int])
app.add_config_value('linkcheck_workers', 5, None)
diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py
index 54bde6b68..d1fec550f 100644
--- a/tests/test_build_linkcheck.py
+++ b/tests/test_build_linkcheck.py
@@ -124,3 +124,36 @@ def test_auth(app, status, warning):
assert c_kwargs['auth'] == 'authinfo2'
else:
assert not c_kwargs['auth']
+
+
+@pytest.mark.sphinx(
+ 'linkcheck', testroot='linkcheck', freshenv=True,
+ confoverrides={'linkcheck_request_headers': {
+ "https://localhost:7777/": {
+ "Accept": "text/html",
+ },
+ "http://www.sphinx-doc.org": { # no slash at the end
+ "Accept": "application/json",
+ },
+ "*": {
+ "X-Secret": "open sesami",
+ }
+ }})
+def test_linkcheck_request_headers(app, status, warning):
+ mock_req = mock.MagicMock()
+ mock_req.return_value = 'fake-response'
+
+ with mock.patch.multiple('requests', get=mock_req, head=mock_req):
+ app.builder.build_all()
+ for args, kwargs in mock_req.call_args_list:
+ url = args[0]
+ headers = kwargs.get('headers', {})
+ if "https://localhost:7777" in url:
+ assert headers["Accept"] == "text/html"
+ elif 'http://www.sphinx-doc.org' in url:
+ assert headers["Accept"] == "application/json"
+ elif 'https://www.google.com' in url:
+ assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"
+ assert headers["X-Secret"] == "open sesami"
+ else:
+ assert headers["Accept"] == "text/html,application/xhtml+xml;q=0.9,*/*;q=0.8"