get_environment_proxies,
get_logger,
same_origin,
- should_not_be_proxied,
warn_deprecated,
)
if proxies is None:
if trust_env:
return {
- key: Proxy(url=url)
+ key: None if url is None else Proxy(url=url)
for key, url in get_environment_proxies().items()
}
return {}
url = request.url
enforce_http_url(request)
- if self._proxies and not should_not_be_proxied(url):
- for pattern, transport in self._proxies.items():
- if pattern.matches(url):
- return self._transport if transport is None else transport
+ for pattern, transport in self._proxies.items():
+ if pattern.matches(url):
+ return self._transport if transport is None else transport
return self._transport
url = request.url
enforce_http_url(request)
- if self._proxies and not should_not_be_proxied(url):
- for pattern, transport in self._proxies.items():
- if pattern.matches(url):
- return self._transport if transport is None else transport
+ for pattern, transport in self._proxies.items():
+ if pattern.matches(url):
+ return self._transport if transport is None else transport
return self._transport
)
-def should_not_be_proxied(url: "URL") -> bool:
- """
- Return True if url should not be proxied,
- return False otherwise.
- """
- no_proxy = getproxies().get("no")
- if not no_proxy:
- return False
- no_proxy_list = [host.strip() for host in no_proxy.split(",")]
- for name in no_proxy_list:
- if name == "*":
- return True
- if name:
- name = name.lstrip(".") # ignore leading dots
- name = re.escape(name)
- pattern = r"(.+\.)?%s$" % name
- if re.match(pattern, url.host, re.I) or re.match(
- pattern, url.authority, re.I
- ):
- return True
- return False
-
-
-def get_environment_proxies() -> typing.Dict[str, str]:
+def get_environment_proxies() -> typing.Dict[str, typing.Optional[str]]:
"""Gets proxy information from the environment"""
# urllib.request.getproxies() falls back on System
# Registry and Config for proxies on Windows and macOS.
# We don't want to propagate non-HTTP proxies into
# our configuration such as 'TRAVIS_APT_PROXY'.
- supported_proxy_schemes = ("http", "https", "all")
- return {
- key: val
- for key, val in getproxies().items()
- if ("://" in key or key in supported_proxy_schemes)
- }
+ proxy_info = getproxies()
+ mounts: typing.Dict[str, typing.Optional[str]] = {}
+
+ for scheme in ("http", "https", "all"):
+ if proxy_info.get(scheme):
+ mounts[scheme] = proxy_info[scheme]
+
+ no_proxy_hosts = [host.strip() for host in proxy_info.get("no", "").split(",")]
+ for hostname in no_proxy_hosts:
+ # See https://curl.haxx.se/libcurl/c/CURLOPT_NOPROXY.html for details
+ # on how names in `NO_PROXY` are handled.
+ if hostname == "*":
+ # If NO_PROXY=* is used or if "*" occurs as any one of the comma
+ # seperated hostnames, then we should just bypass any information
+ # from HTTP_PROXY, HTTPS_PROXY, ALL_PROXY, and always ignore
+ # proxies.
+ return {}
+ elif hostname:
+ # NO_PROXY=.google.com is marked as "all://*.google.com,
+ # which disables "www.google.com" but not "google.com"
+ # NO_PROXY=google.com is marked as "all://*google.com,
+ # which disables "www.google.com" and "google.com".
+ # (But not "wwwgoogle.com")
+ mounts[f"all://*{hostname}"] = None
+
+ return mounts
def to_bytes(value: typing.Union[str, bytes], encoding: str = "utf-8") -> bytes:
url = URL(pattern)
self.pattern = pattern
self.scheme = "" if url.scheme == "all" else url.scheme
- self.host = url.host
+ self.host = "" if url.host == "*" else url.host
self.port = url.port
+ if not url.host or url.host == "*":
+ self.host_regex: typing.Optional[typing.Pattern[str]] = None
+ else:
+ if url.host.startswith("*."):
+ # *.example.com should match "www.example.com", but not "example.com"
+ domain = re.escape(url.host[2:])
+ self.host_regex = re.compile(f"^.+\\.{domain}$")
+ elif url.host.startswith("*"):
+ # *example.com should match "www.example.com" and "example.com"
+ domain = re.escape(url.host[1:])
+ self.host_regex = re.compile(f"^(.+\\.)?{domain}$")
+ else:
+ # example.com should match "example.com" but not "www.example.com"
+ domain = re.escape(url.host)
+ self.host_regex = re.compile(f"^{domain}$")
def matches(self, other: "URL") -> bool:
if self.scheme and self.scheme != other.scheme:
return False
- if self.host and self.host != other.host:
+ if (
+ self.host
+ and self.host_regex is not None
+ and not self.host_regex.match(other.host)
+ ):
return False
if self.port is not None and self.port != other.port:
return False
The priority allows URLPattern instances to be sortable, so that
we can match from most specific to least specific.
"""
- port_priority = -1 if self.port is not None else 0
+ # URLs with a port should take priority over URLs without a port.
+ port_priority = 0 if self.port is not None else 1
+ # Longer hostnames should match first.
host_priority = -len(self.host)
+ # Longer schemes should match first.
scheme_priority = -len(self.scheme)
return (port_priority, host_priority, scheme_priority)
("http://example.com", {}, None),
("http://example.com", {"https": PROXY_URL}, None),
("http://example.com", {"http://example.net": PROXY_URL}, None),
+ # Using "*" should match any domain name.
+ ("http://example.com", {"http://*": PROXY_URL}, PROXY_URL),
+ ("https://example.com", {"http://*": PROXY_URL}, None),
+ # Using "example.com" should match example.com, but not www.example.com
+ ("http://example.com", {"http://example.com": PROXY_URL}, PROXY_URL),
+ ("http://www.example.com", {"http://example.com": PROXY_URL}, None),
+ # Using "*.example.com" should match www.example.com, but not example.com
+ ("http://example.com", {"http://*.example.com": PROXY_URL}, None),
+ ("http://www.example.com", {"http://*.example.com": PROXY_URL}, PROXY_URL),
+ # Using "*example.com" should match example.com and www.example.com
+ ("http://example.com", {"http://*example.com": PROXY_URL}, PROXY_URL),
+ ("http://www.example.com", {"http://*example.com": PROXY_URL}, PROXY_URL),
+ ("http://wwwexample.com", {"http://*example.com": PROXY_URL}, None),
+ # ...
("http://example.com:443", {"http://example.com": PROXY_URL}, PROXY_URL),
("http://example.com", {"all": PROXY_URL}, PROXY_URL),
("http://example.com", {"all": PROXY_URL, "http://example.com": None}, None),
{"HTTP_PROXY": "http://example.com", "NO_PROXY": "google.com"},
None,
),
+ # Everything proxied when NO_PROXY is empty/unset
+ (
+ "http://127.0.0.1",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ""},
+ "http://localhost:123",
+ ),
+ # Not proxied if NO_PROXY matches URL.
+ (
+ "http://127.0.0.1",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "127.0.0.1"},
+ None,
+ ),
+ # Proxied if NO_PROXY scheme does not match URL.
+ (
+ "http://127.0.0.1",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "https://127.0.0.1"},
+ "http://localhost:123",
+ ),
+ # Proxied if NO_PROXY scheme does not match host.
+ (
+ "http://127.0.0.1",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "1.1.1.1"},
+ "http://localhost:123",
+ ),
+ # Not proxied if NO_PROXY matches host domain suffix.
+ (
+ "http://courses.mit.edu",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"},
+ None,
+ ),
+ # Proxied even though NO_PROXY matches host domain *prefix*.
+ (
+ "https://mit.edu.info",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu"},
+ "http://localhost:123",
+ ),
+ # Not proxied if one item in NO_PROXY case matches host domain suffix.
+ (
+ "https://mit.edu.info",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,edu.info"},
+ None,
+ ),
+ # Not proxied if one item in NO_PROXY case matches host domain suffix.
+ # May include whitespace.
+ (
+ "https://mit.edu.info",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu, edu.info"},
+ None,
+ ),
+ # Proxied if no items in NO_PROXY match.
+ (
+ "https://mit.edu.info",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "mit.edu,mit.info"},
+ "http://localhost:123",
+ ),
+ # Proxied if NO_PROXY domain doesn't match.
+ (
+ "https://foo.example.com",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "www.example.com"},
+ "http://localhost:123",
+ ),
+ # Not proxied for subdomains matching NO_PROXY, with a leading ".".
+ (
+ "https://www.example1.com",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": ".example1.com"},
+ None,
+ ),
+ # Proxied, because NO_PROXY subdomains only match if "." seperated.
+ (
+ "https://www.example2.com",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "ample2.com"},
+ "http://localhost:123",
+ ),
+ # No requests are proxied if NO_PROXY="*" is set.
+ (
+ "https://www.example3.com",
+ {"ALL_PROXY": "http://localhost:123", "NO_PROXY": "*"},
+ None,
+ ),
],
)
@pytest.mark.parametrize("client_class", [httpx.Client, httpx.AsyncClient])
obfuscate_sensitive_headers,
parse_header_links,
same_origin,
- should_not_be_proxied,
)
from tests.utils import override_log_level
assert list(obfuscate_sensitive_headers(bytes_headers)) == bytes_output
-@pytest.mark.parametrize(
- ["url", "no_proxy", "expected"],
- [
- (
- "http://127.0.0.1",
- {"NO_PROXY": ""},
- False,
- ), # everything proxied when no_proxy is empty/unset
- (
- "http://127.0.0.1",
- {"NO_PROXY": "127.0.0.1"},
- True,
- ), # no_proxy as ip case is matched
- (
- "http://127.0.0.1",
- {"NO_PROXY": "https://127.0.0.1"},
- False,
- ), # no_proxy with scheme is ignored
- (
- "http://127.0.0.1",
- {"NO_PROXY": "1.1.1.1"},
- False,
- ), # different no_proxy means its proxied
- (
- "http://courses.mit.edu",
- {"NO_PROXY": "mit.edu"},
- True,
- ), # no_proxy for sub-domain matches
- (
- "https://mit.edu.info",
- {"NO_PROXY": "mit.edu"},
- False,
- ), # domain is actually edu.info, so should be proxied
- (
- "https://mit.edu.info",
- {"NO_PROXY": "mit.edu,edu.info"},
- True,
- ), # list in no_proxy, matches second domain
- (
- "https://mit.edu.info",
- {"NO_PROXY": "mit.edu, edu.info"},
- True,
- ), # list with spaces in no_proxy
- (
- "https://mit.edu.info",
- {"NO_PROXY": "mit.edu,mit.info"},
- False,
- ), # list in no_proxy, without any domain matching
- (
- "https://foo.example.com",
- {"NO_PROXY": "www.example.com"},
- False,
- ), # different subdomains foo vs www means we still proxy
- (
- "https://www.example1.com",
- {"NO_PROXY": ".example1.com"},
- True,
- ), # no_proxy starting with dot
- (
- "https://www.example2.com",
- {"NO_PROXY": "ample2.com"},
- False,
- ), # whole-domain matching
- (
- "https://www.example3.com",
- {"NO_PROXY": "*"},
- True,
- ), # wildcard * means nothing proxied
- ],
-)
-def test_should_not_be_proxied(url, no_proxy, expected):
- os.environ.update(no_proxy)
- parsed_url = httpx.URL(url)
- assert should_not_be_proxied(parsed_url) == expected
-
-
def test_same_origin():
origin1 = httpx.URL("https://example.com")
origin2 = httpx.URL("HTTPS://EXAMPLE.COM:443")