Add logic for determining if a URL should be proxied (#472)

author mariaS210 <maria.sumedre@gmail.com>

Thu, 17 Oct 2019 15:58:11 +0000 (18:58 +0300)

committer Seth Michael Larson <sethmichaellarson@gmail.com>

Thu, 17 Oct 2019 15:58:11 +0000 (10:58 -0500)
author mariaS210 <maria.sumedre@gmail.com>
Thu, 17 Oct 2019 15:58:11 +0000 (18:58 +0300)
committer Seth Michael Larson <sethmichaellarson@gmail.com>
Thu, 17 Oct 2019 15:58:11 +0000 (10:58 -0500)
diff --git a/httpx/utils.py b/httpx/utils.py

index a3aff3581c585154ee4e0f1d9ac25ce587fb09a9..b66544ef56684abb0e53b31fab82dd0cd435ed44 100644 (file)
--- a/httpx/utils.py
+++ b/httpx/utils.py
@@ -14,6 +14,7 @@ from urllib.request import getproxies
  
  if typing.TYPE_CHECKING:  # pragma: no cover
      from .models import PrimitiveData
+    from .models import URL
  
  
  def normalize_header_key(value: typing.AnyStr, encoding: str = None) -> bytes:
@@ -210,6 +211,28 @@ def kv_format(**kwargs: typing.Any) -> str:
      return " ".join(f"{key}={value!r}" for key, value in kwargs.items())
  
  
+def should_not_be_proxied(url: "URL") -> bool:
+    """ Return True if url should not be proxied,
+    return False otherwise.
+    """
+    no_proxy = getproxies().get("no")
+    if not no_proxy:
+        return False
+    no_proxy_list = [host.strip() for host in no_proxy.split(",")]
+    for name in no_proxy_list:
+        if name == "*":
+            return True
+        if name:
+            name = name.lstrip(".")  # ignore leading dots
+            name = re.escape(name)
+            pattern = r"(.+\.)?%s$" % name
+            if re.match(pattern, url.host, re.I) or re.match(
+                pattern, url.authority, re.I
+            ):
+                return True
+    return False
+
+
  def get_environment_proxies() -> typing.Dict[str, str]:
      """Gets proxy information from the environment"""
  
diff --git a/tests/test_utils.py b/tests/test_utils.py

index f8295b2911f55aae0c694ac87bfd1443d644a36b..5c83a35eb1370acd0610457c89b985529d11a59b 100644 (file)
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -14,6 +14,7 @@ from httpx.utils import (
      guess_json_utf,
      obfuscate_sensitive_headers,
      parse_header_links,
+    should_not_be_proxied,
  )
  
  
@@ -201,3 +202,79 @@ def test_obfuscate_sensitive_headers(headers, output):
      bytes_output = [(k.encode(), v.encode()) for k, v in output]
      assert list(obfuscate_sensitive_headers(headers)) == output
      assert list(obfuscate_sensitive_headers(bytes_headers)) == bytes_output
+
+
+@pytest.mark.parametrize(
+    ["url", "no_proxy", "expected"],
+    [
+        (
+            "http://127.0.0.1",
+            {"NO_PROXY": ""},
+            False,
+        ),  # everything proxied when no_proxy is empty/unset
+        (
+            "http://127.0.0.1",
+            {"NO_PROXY": "127.0.0.1"},
+            True,
+        ),  # no_proxy as ip case is matched
+        (
+            "http://127.0.0.1",
+            {"NO_PROXY": "https://127.0.0.1"},
+            False,
+        ),  # no_proxy with scheme is ignored
+        (
+            "http://127.0.0.1",
+            {"NO_PROXY": "1.1.1.1"},
+            False,
+        ),  # different no_proxy means its proxied
+        (
+            "http://courses.mit.edu",
+            {"NO_PROXY": "mit.edu"},
+            True,
+        ),  # no_proxy for sub-domain matches
+        (
+            "https://mit.edu.info",
+            {"NO_PROXY": "mit.edu"},
+            False,
+        ),  # domain is actually edu.info, so should be proxied
+        (
+            "https://mit.edu.info",
+            {"NO_PROXY": "mit.edu,edu.info"},
+            True,
+        ),  # list in no_proxy, matches second domain
+        (
+            "https://mit.edu.info",
+            {"NO_PROXY": "mit.edu, edu.info"},
+            True,
+        ),  # list with spaces in no_proxy
+        (
+            "https://mit.edu.info",
+            {"NO_PROXY": "mit.edu,mit.info"},
+            False,
+        ),  # list in no_proxy, without any domain matching
+        (
+            "https://foo.example.com",
+            {"NO_PROXY": "www.example.com"},
+            False,
+        ),  # different subdomains foo vs www means we still proxy
+        (
+            "https://www.example1.com",
+            {"NO_PROXY": ".example1.com"},
+            True,
+        ),  # no_proxy starting with dot
+        (
+            "https://www.example2.com",
+            {"NO_PROXY": "ample2.com"},
+            False,
+        ),  # whole-domain matching
+        (
+            "https://www.example3.com",
+            {"NO_PROXY": "*"},
+            True,
+        ),  # wildcard * means nothing proxied
+    ],
+)
+def test_should_not_be_proxied(url, no_proxy, expected):
+    os.environ.update(no_proxy)
+    parsed_url = httpx.models.URL(url)
+    assert should_not_be_proxied(parsed_url) == expected
author	mariaS210 <maria.sumedre@gmail.com>
	Thu, 17 Oct 2019 15:58:11 +0000 (18:58 +0300)
committer	Seth Michael Larson <sethmichaellarson@gmail.com>
	Thu, 17 Oct 2019 15:58:11 +0000 (10:58 -0500)
httpx/utils.py		patch \| blob \| blame \| history
tests/test_utils.py		patch \| blob \| blame \| history