+"""Blocking and non-blocking HTTP client interfaces.
+
+This module defines a common interface shared by two implementations,
+`simple_httpclient` and `curl_httpclient`. Applications may either
+instantiate their chosen implementation class directly or use the
+`AsyncHTTPClient` class from this module, which selects an implementation
+that can be overridden with the `AsyncHTTPClient.configure` method.
+
+The default implementation is `simple_httpclient`, and this is expected
+to be suitable for most users' needs. However, some applications may wish
+to switch to `curl_httpclient` for reasons such as the following:
+
+* `curl_httpclient` is more likely to be compatible with sites that are
+ not-quite-compliant with the HTTP spec, or sites that use little-exercised
+ features of HTTP.
+
+* `simple_httpclient` only supports SSL on Python 2.6 and above.
+
+* `curl_httpclient` is faster
+
+* `curl_httpclient` was the default prior to Tornado 2.0.
+
+Note that if you are using `curl_httpclient`, it is highly recommended that
+you use a recent version of ``libcurl`` and ``pycurl``. Currently the minimum
+supported version is 7.18.2, and the recommended version is 7.21.1 or newer.
+"""
+
import calendar
import email.utils
import httplib
class HTTPClient(object):
"""A blocking HTTP client.
+ This interface is provided for convenience and testing; most applications
+ that are running an IOLoop will want to use `AsyncHTTPClient` instead.
Typical usage looks like this::
http_client = httpclient.HTTPClient()
print response.body
except httpclient.HTTPError, e:
print "Error:", e
-
- fetch() can take a string URL or an HTTPRequest instance, which offers
- more options, like executing POST/PUT/DELETE requests.
"""
def __init__(self):
self._io_loop = IOLoop()
self._async_client.close()
def fetch(self, request, **kwargs):
- """Executes an HTTPRequest, returning an HTTPResponse.
+ """Executes a request, returning an `HTTPResponse`.
+
+ The request may be either a string URL or an `HTTPRequest` object.
+ If it is a string, we construct an `HTTPRequest` using any additional
+ kwargs: ``HTTPRequest(request, **kwargs)``
- If an error occurs during the fetch, we raise an HTTPError.
+ If an error occurs during the fetch, we raise an `HTTPError`.
"""
def callback(response):
self._response = response
http_client.fetch("http://www.google.com/", handle_request)
ioloop.IOLoop.instance().start()
- fetch() can take a string URL or an HTTPRequest instance, which offers
- more options, like executing POST/PUT/DELETE requests.
-
The constructor for this class is magic in several respects: It actually
creates an instance of an implementation-specific subclass, and instances
are reused as a kind of pseudo-singleton (one per IOLoop). The keyword
del self._async_clients[self.io_loop]
def fetch(self, request, callback, **kwargs):
- """Executes an HTTPRequest, calling callback with an HTTPResponse.
+ """Executes a request, calling callback with an `HTTPResponse`.
+
+ The request may be either a string URL or an `HTTPRequest` object.
+ If it is a string, we construct an `HTTPRequest` using any additional
+ kwargs: ``HTTPRequest(request, **kwargs)``
If an error occurs during the fetch, the HTTPResponse given to the
callback has a non-None error attribute that contains the exception
simultaneous fetch() operations that can execute in parallel
on each IOLoop. Additional arguments may be supported depending
on the implementation class in use.
+
+ Example::
+
+ AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
"""
if isinstance(impl, (unicode, bytes_type)):
impl = import_object(impl)
AsyncHTTPClient._impl_kwargs = kwargs
class HTTPRequest(object):
+ """HTTP client request object."""
def __init__(self, url, method="GET", headers=None, body=None,
auth_username=None, auth_password=None,
connect_timeout=20.0, request_timeout=20.0,
proxy_password='', allow_nonstandard_methods=False,
validate_cert=True, ca_certs=None,
allow_ipv6=None):
+ """Creates an `HTTPRequest`.
+
+ All parameters except `url` are optional.
+
+ :arg string url: URL to fetch
+ :arg string method: HTTP method, e.g. "GET" or "POST"
+ :arg headers: Additional HTTP headers to pass on the request
+ :type headers: `~tornado.httputil.HTTPHeaders` or `dict`
+ :arg string auth_username: Username for HTTP "Basic" authentication
+ :arg string auth_password: Password for HTTP "Basic" authentication
+ :arg float connect_timeout: Timeout for initial connection in seconds
+ :arg float request_timeout: Timeout for entire request in seconds
+ :arg datetime if_modified_since: Timestamp for ``If-Modified-Since``
+ header
+ :arg bool follow_redirects: Should redirects be followed automatically
+ or return the 3xx response?
+ :arg int max_redirects: Limit for `follow_redirects`
+ :arg string user_agent: String to send as ``User-Agent`` header
+ :arg bool use_gzip: Request gzip encoding from the server
+ :arg string network_interface: Network interface to use for request
+ :arg callable streaming_callback: If set, `streaming_callback` will
+ be run with each chunk of data as it is received, and
+ `~HTTPResponse.body` and `~HTTPResponse.buffer` will be empty in
+ the final response.
+ :arg callable header_callback: If set, `header_callback` will
+ be run with each header line as it is received, and
+ `~HTTPResponse.headers` will be empty in the final response.
+ :arg callable prepare_curl_callback: If set, will be called with
+ a `pycurl.Curl` object to allow the application to make additional
+ `setopt` calls.
+ :arg string proxy_host: HTTP proxy hostname. To use proxies,
+ `proxy_host` and `proxy_port` must be set; `proxy_username` and
+ `proxy_pass` are optional. Proxies are currently only support
+ with `curl_httpclient`.
+ :arg int proxy_port: HTTP proxy port
+ :arg string proxy_username: HTTP proxy username
+ :arg string proxy_password: HTTP proxy password
+ :arg bool allow_nonstandard_methods: Allow unknown values for `method`
+ argument?
+ :arg bool validate_cert: For HTTPS requests, validate the server's
+ certificate?
+ :arg string ca_certs: filename of CA certificates in PEM format,
+ or None to use defaults. Note that in `curl_httpclient`, if
+ any request uses a custom `ca_certs` file, they all must (they
+ don't have to all use the same `ca_certs`, but it's not possible
+ to mix requests with ca_certs and requests that use the defaults.
+ :arg bool allow_ipv6: Use IPv6 when available? Default is false in
+ `simple_httpclient` and true in `curl_httpclient`
+ """
if headers is None:
headers = httputil.HTTPHeaders()
if if_modified_since:
timestamp = calendar.timegm(if_modified_since.utctimetuple())
headers["If-Modified-Since"] = email.utils.formatdate(
timestamp, localtime=False, usegmt=True)
- # Proxy support: proxy_host and proxy_port must be set to connect via
- # proxy. The username and password credentials are optional.
self.proxy_host = proxy_host
self.proxy_port = proxy_port
self.proxy_username = proxy_username
self.header_callback = header_callback
self.prepare_curl_callback = prepare_curl_callback
self.allow_nonstandard_methods = allow_nonstandard_methods
- # SSL certificate validation:
- # validate_cert: boolean, set to False to disable validation
- # ca_certs: filename of CA certificates in PEM format, or
- # None to use defaults
- # Note that in the curl-based HTTP client, if any request
- # uses a custom ca_certs file, they all must (they don't have to
- # all use the same ca_certs, but it's not possible to mix requests
- # with ca_certs and requests that use the defaults).
- # SimpleAsyncHTTPClient does not have this limitation.
self.validate_cert = validate_cert
self.ca_certs = ca_certs
- # allow_ipv6 may be True, False, or None for default behavior
- # that varies by httpclient implementation.
self.allow_ipv6 = allow_ipv6
self.start_time = time.time()
body = property(_get_body)
def rethrow(self):
+ """If there was an error on the request, raise an `HTTPError`."""
if self.error:
raise self.error