Close #7247: linkcheck: Add linkcheck_request_headers

author: Takeshi KOMIYA <i.tkomiya@gmail.com> 2020-05-31 19:37:15 +0300
committer: Takeshi KOMIYA <i.tkomiya@gmail.com> 2020-05-31 19:48:46 +0300
commit: a7725ad8ca03ede875945cf5aafedb96f84071e6 (patch)
tree: 23a52a866df7ce9d0ebeca32f166fe1d84c309a7 /sphinx
parent: c063c9c0fe383a20da613448d1f606b9635773b9 (diff)
1 files changed, 26 insertions, 5 deletions
diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
index 9fe689ec9..dd5317087 100644
--- a/sphinx/builders/linkcheck.py
+++ b/sphinx/builders/linkcheck.py
@@ -16,7 +16,7 @@ import threading
 from html.parser import HTMLParser
 from os import path
 from typing import Any, Dict, List, Set, Tuple
-from urllib.parse import unquote
+from urllib.parse import unquote, urlparse
 
 from docutils import nodes
 from docutils.nodes import Node
@@ -36,6 +36,11 @@ from sphinx.util.requests import is_ssl_error
 logger = logging.getLogger(__name__)
 
 
+DEFAULT_REQUEST_HEADERS = {
+    'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
+}
+
+
 class AnchorCheckParser(HTMLParser):
     """Specialized HTML parser that looks for a specific anchor."""
 
@@ -107,13 +112,25 @@ class CheckExternalLinksBuilder(Builder):
     def check_thread(self) -> None:
         kwargs = {
             'allow_redirects': True,
-            'headers': {
-                'Accept': 'text/html,application/xhtml+xml;q=0.9,*/*;q=0.8',
-            },
-        }
+        }  # type: Dict
         if self.app.config.linkcheck_timeout:
             kwargs['timeout'] = self.app.config.linkcheck_timeout
 
+        def get_request_headers() -> Dict:
+            url = urlparse(uri)
+            candidates = ["%s://%s" % (url.scheme, url.netloc),
+                          "%s://%s/" % (url.scheme, url.netloc),
+                          uri,
+                          "*"]
+
+            for u in candidates:
+                if u in self.config.linkcheck_request_headers:
+                    headers = dict(DEFAULT_REQUEST_HEADERS)
+                    headers.update(self.config.linkcheck_request_headers[u])
+                    return headers
+
+            return {}
+
         def check_uri() -> Tuple[str, str, int]:
             # split off anchor
             if '#' in uri:
@@ -139,6 +156,9 @@ class CheckExternalLinksBuilder(Builder):
             else:
                 auth_info = None
 
+            # update request headers for the URL
+            kwargs['headers'] = get_request_headers()
+
             try:
                 if anchor and self.app.config.linkcheck_anchors:
                     # Read the whole document and see if #anchor exists
@@ -337,6 +357,7 @@ def setup(app: Sphinx) -> Dict[str, Any]:
 
     app.add_config_value('linkcheck_ignore', [], None)
     app.add_config_value('linkcheck_auth', [], None)
+    app.add_config_value('linkcheck_request_headers', {}, None)
     app.add_config_value('linkcheck_retries', 1, None)
     app.add_config_value('linkcheck_timeout', None, None, [int])
     app.add_config_value('linkcheck_workers', 5, None)
author	Takeshi KOMIYA <i.tkomiya@gmail.com>	2020-05-31 19:37:15 +0300
committer	Takeshi KOMIYA <i.tkomiya@gmail.com>	2020-05-31 19:48:46 +0300
commit	a7725ad8ca03ede875945cf5aafedb96f84071e6 (patch)
tree	23a52a866df7ce9d0ebeca32f166fe1d84c309a7 /sphinx
parent	c063c9c0fe383a20da613448d1f606b9635773b9 (diff)