]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
SOCKS proxy support (#2034)
authorTom Christie <tom@tomchristie.com>
Wed, 19 Jan 2022 14:58:19 +0000 (14:58 +0000)
committerGitHub <noreply@github.com>
Wed, 19 Jan 2022 14:58:19 +0000 (14:58 +0000)
README.md
docs/advanced.md
docs/index.md
httpx/_config.py
httpx/_transports/default.py
requirements.txt
setup.py
tests/client/test_proxies.py
tests/test_config.py

index cc819e5a323c3ece5e286fdf36ce4f75a3122ff7..2375c365f158525efa239e6c8db1afdfc1a3f352 100644 (file)
--- a/README.md
+++ b/README.md
@@ -129,12 +129,16 @@ The HTTPX project relies on these excellent libraries:
 
 * `httpcore` - The underlying transport implementation for `httpx`.
   * `h11` - HTTP/1.1 support.
-  * `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
 * `certifi` - SSL certificates.
 * `charset_normalizer` - Charset auto-detection.
 * `rfc3986` - URL parsing & normalization.
   * `idna` - Internationalized domain name support.
 * `sniffio` - Async library autodetection.
+
+As well as these optional installs:
+
+* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
+* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)*
 * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
 * `click` - Command line client support. *(Optional, with `httpx[cli]`)*
 * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
index ad4b6f17069dac0fd1669eb41a57897382ecb662..58eb29dab902fba5723167deba5e0c70c46331c5 100644 (file)
@@ -387,8 +387,6 @@ client = httpx.Client(trust_env=False)
 
 HTTPX supports setting up [HTTP proxies](https://en.wikipedia.org/wiki/Proxy_server#Web_proxy_servers) via the `proxies` parameter to be passed on client initialization or top-level API functions like `httpx.get(..., proxies=...)`.
 
-_Note: SOCKS proxies are not supported yet._
-
 <div align="center">
     <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/2/27/Open_proxy_h2g2bob.svg/480px-Open_proxy_h2g2bob.svg.png"/>
     <figcaption><em>Diagram of how a proxy works (source: Wikipedia). The left hand side "Internet" blob may be your HTTPX client requesting <code>example.com</code> through a proxy.</em></figcaption>
@@ -565,44 +563,34 @@ See documentation on [`HTTP_PROXY`, `HTTPS_PROXY`, `ALL_PROXY`](environment_vari
 In general, the flow for making an HTTP request through a proxy is as follows:
 
 1. The client connects to the proxy (initial connection request).
-1. The proxy somehow transfers data to the server on your behalf.
+2. The proxy transfers data to the server on your behalf.
 
 How exactly step 2/ is performed depends on which of two proxying mechanisms is used:
 
 * **Forwarding**: the proxy makes the request for you, and sends back the response it obtained from the server.
-* **Tunneling**: the proxy establishes a TCP connection to the server on your behalf, and the client reuses this connection to send the request and receive the response. This is known as an [HTTP Tunnel](https://en.wikipedia.org/wiki/HTTP_tunnel). This mechanism is how you can access websites that use HTTPS from an HTTP proxy (the client "upgrades" the connection to HTTPS by performing the TLS handshake with the server over the TCP connection provided by the proxy).
+* **Tunnelling**: the proxy establishes a TCP connection to the server on your behalf, and the client reuses this connection to send the request and receive the response. This is known as an [HTTP Tunnel](https://en.wikipedia.org/wiki/HTTP_tunnel). This mechanism is how you can access websites that use HTTPS from an HTTP proxy (the client "upgrades" the connection to HTTPS by performing the TLS handshake with the server over the TCP connection provided by the proxy).
 
-#### Default behavior
+### Troubleshooting proxies
 
-Given the technical definitions above, by default (and regardless of whether you're using an HTTP or HTTPS proxy), HTTPX will:
+If you encounter issues when setting up proxies, please refer to our [Troubleshooting guide](troubleshooting.md#proxies).
 
-* Use forwarding for HTTP requests.
-* Use tunneling for HTTPS requests.
+## SOCKS
 
-This ensures that you can make HTTP and HTTPS requests in all cases (i.e. regardless of which type of proxy you're using).
+In addition to HTTP proxies, `httpcore` also supports proxies using the SOCKS protocol.
+This is an optional feature that requires an additional third-party library be installed before use.
 
-#### Forcing the proxy mechanism
+You can install SOCKS support using `pip`:
 
-In most cases, the default behavior should work just fine as well as provide enough security.
+```shell
+$ pip install httpx[socks]
+```
 
-But if you know what you're doing and you want to force which mechanism to use, you can do so by passing an `httpx.Proxy()` instance, setting the `mode` to either `FORWARD_ONLY` or `TUNNEL_ONLY`. For example...
+You can now configure a client to make requests via a proxy using the SOCKS protocol:
 
 ```python
-# Route all requests through an HTTPS proxy, using tunneling only.
-proxies = httpx.Proxy(
-    url="https://localhost:8030",
-    mode="TUNNEL_ONLY",
-)
-
-with httpx.Client(proxies=proxies) as client:
-    # This HTTP request will be tunneled instead of forwarded.
-    r = client.get("http://example.com")
+httpx.Client(proxies='socks5://user:pass@host:port')
 ```
 
-### Troubleshooting proxies
-
-If you encounter issues when setting up proxies, please refer to our [Troubleshooting guide](troubleshooting.md#proxies).
-
 ## Timeout Configuration
 
 HTTPX is careful to enforce timeouts everywhere by default.
index 448247c0d2c13d74f2e4c446a57661f77c826c21..bf0eecfffff1634c45564be78bd03e965a350d17 100644 (file)
@@ -112,12 +112,16 @@ The HTTPX project relies on these excellent libraries:
 
 * `httpcore` - The underlying transport implementation for `httpx`.
   * `h11` - HTTP/1.1 support.
-  * `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
 * `certifi` - SSL certificates.
 * `charset_normalizer` - Charset auto-detection.
 * `rfc3986` - URL parsing & normalization.
   * `idna` - Internationalized domain name support.
 * `sniffio` - Async library autodetection.
+
+As well as these optional installs:
+
+* `h2` - HTTP/2 support. *(Optional, with `httpx[http2]`)*
+* `socksio` - SOCKS proxy support. *(Optional, with `httpx[socks]`)*
 * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
 * `click` - Command line client support. *(Optional, with `httpx[cli]`)*
 * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
index 927a67c2b168d27c98112596222d4ecbde268b3a..9cf6d1677753e2398886c63d7e94a01fae3f22c0 100644 (file)
@@ -1,7 +1,6 @@
 import os
 import ssl
 import typing
-from base64 import b64encode
 from pathlib import Path
 
 import certifi
@@ -316,32 +315,46 @@ class Limits:
 
 
 class Proxy:
-    def __init__(self, url: URLTypes, *, headers: HeaderTypes = None):
+    def __init__(
+        self,
+        url: URLTypes,
+        *,
+        auth: typing.Tuple[str, str] = None,
+        headers: HeaderTypes = None,
+    ):
         url = URL(url)
         headers = Headers(headers)
 
-        if url.scheme not in ("http", "https"):
+        if url.scheme not in ("http", "https", "socks5"):
             raise ValueError(f"Unknown scheme for proxy URL {url!r}")
 
         if url.username or url.password:
-            headers.setdefault(
-                "Proxy-Authorization",
-                self._build_auth_header(url.username, url.password),
-            )
-            # Remove userinfo from the URL authority, e.g.:
-            # 'username:password@proxy_host:proxy_port' -> 'proxy_host:proxy_port'
+            # Remove any auth credentials from the URL.
+            auth = (url.username, url.password)
             url = url.copy_with(username=None, password=None)
 
         self.url = url
+        self.auth = auth
         self.headers = headers
 
-    def _build_auth_header(self, username: str, password: str) -> str:
-        userpass = (username.encode("utf-8"), password.encode("utf-8"))
-        token = b64encode(b":".join(userpass)).decode()
-        return f"Basic {token}"
+    @property
+    def raw_auth(self) -> typing.Optional[typing.Tuple[bytes, bytes]]:
+        # The proxy authentication as raw bytes.
+        return (
+            None
+            if self.auth is None
+            else (self.auth[0].encode("utf-8"), self.auth[1].encode("utf-8"))
+        )
 
     def __repr__(self) -> str:
-        return f"Proxy(url={str(self.url)!r}, headers={dict(self.headers)!r})"
+        # The authentication is represented with the password component masked.
+        auth = (self.auth[0], "********") if self.auth else None
+
+        # Build a nice concise representation.
+        url_str = f"{str(self.url)!r}"
+        auth_str = f", auth={auth!r}" if auth else ""
+        headers_str = f", headers={dict(self.headers)!r}" if self.headers else ""
+        return f"Proxy({url_str}{auth_str}{headers_str})"
 
 
 DEFAULT_TIMEOUT_CONFIG = Timeout(timeout=5.0)
index bfb0333d4e8c47c3a28eae51aa8d10ad088c70ea..0995c7fa001f91bf6c10d640a57b9426ab3c3b92 100644 (file)
@@ -137,37 +137,51 @@ class HTTPTransport(BaseTransport):
                 local_address=local_address,
                 retries=retries,
             )
-        else:
+        elif proxy.url.scheme in ("http", "https"):
+            self._pool = httpcore.HTTPProxy(
+                proxy_url=httpcore.URL(
+                    scheme=proxy.url.raw_scheme,
+                    host=proxy.url.raw_host,
+                    port=proxy.url.port,
+                    target=proxy.url.raw_path,
+                ),
+                proxy_auth=proxy.raw_auth,
+                proxy_headers=proxy.headers.raw,
+                ssl_context=ssl_context,
+                max_connections=limits.max_connections,
+                max_keepalive_connections=limits.max_keepalive_connections,
+                keepalive_expiry=limits.keepalive_expiry,
+                http1=http1,
+                http2=http2,
+            )
+        elif proxy.url.scheme == "socks5":
             try:
-                self._pool = httpcore.HTTPProxy(
-                    proxy_url=httpcore.URL(
-                        scheme=proxy.url.raw_scheme,
-                        host=proxy.url.raw_host,
-                        port=proxy.url.port,
-                        target=proxy.url.raw_path,
-                    ),
-                    proxy_headers=proxy.headers.raw,
-                    ssl_context=ssl_context,
-                    max_connections=limits.max_connections,
-                    max_keepalive_connections=limits.max_keepalive_connections,
-                    keepalive_expiry=limits.keepalive_expiry,
-                    http1=http1,
-                    http2=http2,
-                )
-            except TypeError:  # pragma: nocover
-                self._pool = httpcore.HTTPProxy(
-                    proxy_url=httpcore.URL(
-                        scheme=proxy.url.raw_scheme,
-                        host=proxy.url.raw_host,
-                        port=proxy.url.port,
-                        target=proxy.url.raw_path,
-                    ),
-                    proxy_headers=proxy.headers.raw,
-                    ssl_context=ssl_context,
-                    max_connections=limits.max_connections,
-                    max_keepalive_connections=limits.max_keepalive_connections,
-                    keepalive_expiry=limits.keepalive_expiry,
-                )
+                import socksio  # noqa
+            except ImportError:  # pragma: nocover
+                raise ImportError(
+                    "Using SOCKS proxy, but the 'socksio' package is not installed. "
+                    "Make sure to install httpx using `pip install httpx[socks]`."
+                ) from None
+
+            self._pool = httpcore.SOCKSProxy(
+                proxy_url=httpcore.URL(
+                    scheme=proxy.url.raw_scheme,
+                    host=proxy.url.raw_host,
+                    port=proxy.url.port,
+                    target=proxy.url.raw_path,
+                ),
+                proxy_auth=proxy.raw_auth,
+                ssl_context=ssl_context,
+                max_connections=limits.max_connections,
+                max_keepalive_connections=limits.max_keepalive_connections,
+                keepalive_expiry=limits.keepalive_expiry,
+                http1=http1,
+                http2=http2,
+            )
+        else:  # pragma: nocover
+            raise ValueError(
+                f"Proxy protocol must be either 'http', 'https', or 'socks5', but got {proxy.url.scheme!r}."
+            )
 
     def __enter__(self: T) -> T:  # Use generics for subclass support.
         self._pool.__enter__()
@@ -258,7 +272,7 @@ class AsyncHTTPTransport(AsyncBaseTransport):
                 local_address=local_address,
                 retries=retries,
             )
-        else:
+        elif proxy.url.scheme in ("http", "https"):
             self._pool = httpcore.AsyncHTTPProxy(
                 proxy_url=httpcore.URL(
                     scheme=proxy.url.raw_scheme,
@@ -266,11 +280,42 @@ class AsyncHTTPTransport(AsyncBaseTransport):
                     port=proxy.url.port,
                     target=proxy.url.raw_path,
                 ),
+                proxy_auth=proxy.raw_auth,
                 proxy_headers=proxy.headers.raw,
                 ssl_context=ssl_context,
                 max_connections=limits.max_connections,
                 max_keepalive_connections=limits.max_keepalive_connections,
                 keepalive_expiry=limits.keepalive_expiry,
+                http1=http1,
+                http2=http2,
+            )
+        elif proxy.url.scheme == "socks5":
+            try:
+                import socksio  # noqa
+            except ImportError:  # pragma: nocover
+                raise ImportError(
+                    "Using SOCKS proxy, but the 'socksio' package is not installed. "
+                    "Make sure to install httpx using `pip install httpx[socks]`."
+                ) from None
+
+            self._pool = httpcore.AsyncSOCKSProxy(
+                proxy_url=httpcore.URL(
+                    scheme=proxy.url.raw_scheme,
+                    host=proxy.url.raw_host,
+                    port=proxy.url.port,
+                    target=proxy.url.raw_path,
+                ),
+                proxy_auth=proxy.raw_auth,
+                ssl_context=ssl_context,
+                max_connections=limits.max_connections,
+                max_keepalive_connections=limits.max_keepalive_connections,
+                keepalive_expiry=limits.keepalive_expiry,
+                http1=http1,
+                http2=http2,
+            )
+        else:  # pragma: nocover
+            raise ValueError(
+                f"Proxy protocol must be either 'http', 'https', or 'socks5', but got {proxy.url.scheme!r}."
             )
 
     async def __aenter__(self: A) -> A:  # Use generics for subclass support.
index bdd34bfa077df11f82c98a4868a3127fe6994319..9d1a88640a59b49be91b06c0f4b23e6a0f485141 100644 (file)
@@ -2,7 +2,7 @@
 # On the other hand, we're not pinning package dependencies, because our tests
 # needs to pass with the latest version of the packages.
 # Reference: https://github.com/encode/httpx/pull/1721#discussion_r661241588
--e .[cli,http2,brotli]
+-e .[brotli,cli,http2,socks]
 
 charset-normalizer==2.0.6
 
index e72c5be2ec688cb67111036f1b1d1f2f15e1390e..ba2360c0604165219433ac15e7e4c83e262301a3 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -65,6 +65,7 @@ setup(
     ],
     extras_require={
         "http2": "h2>=3,<5",
+        "socks": "socksio==1.*",
         "brotli": [
             "brotli; platform_python_implementation == 'CPython'",
             "brotlicffi; platform_python_implementation != 'CPython'"
index 2d9c15884c16112b0e196e179eea55392f87ac8a..2e88f644bb60f2378533ea82a64d86e548014f97 100644 (file)
@@ -47,6 +47,20 @@ def test_proxies_parameter(proxies, expected_proxies):
     assert len(expected_proxies) == len(client._mounts)
 
 
+def test_socks_proxy():
+    url = httpx.URL("http://www.example.com")
+
+    client = httpx.Client(proxies="socks5://localhost/")
+    transport = client._transport_for_url(url)
+    assert isinstance(transport, httpx.HTTPTransport)
+    assert isinstance(transport._pool, httpcore.SOCKSProxy)
+
+    async_client = httpx.AsyncClient(proxies="socks5://localhost/")
+    async_transport = async_client._transport_for_url(url)
+    assert isinstance(async_transport, httpx.AsyncHTTPTransport)
+    assert isinstance(async_transport._pool, httpcore.AsyncSOCKSProxy)
+
+
 PROXY_URL = "http://[::1]"
 
 
index 3a17cf597606401030d8283567a703b0c84bc3b6..56d354197d56f1531b30e1d555abdd6f4d2481f3 100644 (file)
@@ -199,25 +199,22 @@ def test_ssl_config_support_for_keylog_file(tmpdir, monkeypatch):  # pragma: noc
         assert context.keylog_filename is None  # type: ignore
 
 
-@pytest.mark.parametrize(
-    "url,expected_url,expected_headers",
-    [
-        ("https://example.com", "https://example.com", {}),
-        (
-            "https://user:pass@example.com",
-            "https://example.com",
-            {"proxy-authorization": "Basic dXNlcjpwYXNz"},
-        ),
-    ],
-)
-def test_proxy_from_url(url, expected_url, expected_headers):
-    proxy = httpx.Proxy(url)
+def test_proxy_from_url():
+    proxy = httpx.Proxy("https://example.com")
 
-    assert str(proxy.url) == expected_url
-    assert dict(proxy.headers) == expected_headers
-    assert repr(proxy) == "Proxy(url='{}', headers={})".format(
-        expected_url, str(expected_headers)
-    )
+    assert str(proxy.url) == "https://example.com"
+    assert proxy.auth is None
+    assert proxy.headers == {}
+    assert repr(proxy) == "Proxy('https://example.com')"
+
+
+def test_proxy_with_auth_from_url():
+    proxy = httpx.Proxy("https://username:password@example.com")
+
+    assert str(proxy.url) == "https://example.com"
+    assert proxy.auth == ("username", "password")
+    assert proxy.headers == {}
+    assert repr(proxy) == "Proxy('https://example.com', auth=('username', '********'))"
 
 
 def test_invalid_proxy_scheme():