IOLoop and HTTPClient docs

author Ben Darnell <ben@bendarnell.com>

Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)

committer Ben Darnell <ben@bendarnell.com>

Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)
author Ben Darnell <ben@bendarnell.com>
Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)
committer Ben Darnell <ben@bendarnell.com>
Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)
diff --git a/tornado/httpclient.py b/tornado/httpclient.py

index 0b77c480dd7e2d8f695c35e37c7d3a8f7e9eff70..9f45b0a8e39620109d9bc296619e3f45a5d6c987 100644 (file)
--- a/tornado/httpclient.py
+++ b/tornado/httpclient.py
@@ -1,3 +1,30 @@
+"""Blocking and non-blocking HTTP client interfaces.
+
+This module defines a common interface shared by two implementations,
+`simple_httpclient` and `curl_httpclient`.  Applications may either
+instantiate their chosen implementation class directly or use the
+`AsyncHTTPClient` class from this module, which selects an implementation
+that can be overridden with the `AsyncHTTPClient.configure` method.
+
+The default implementation is `simple_httpclient`, and this is expected
+to be suitable for most users' needs.  However, some applications may wish
+to switch to `curl_httpclient` for reasons such as the following:
+
+* `curl_httpclient` is more likely to be compatible with sites that are
+  not-quite-compliant with the HTTP spec, or sites that use little-exercised
+  features of HTTP.
+
+* `simple_httpclient` only supports SSL on Python 2.6 and above.
+
+* `curl_httpclient` is faster
+
+* `curl_httpclient` was the default prior to Tornado 2.0.
+
+Note that if you are using `curl_httpclient`, it is highly recommended that
+you use a recent version of ``libcurl`` and ``pycurl``.  Currently the minimum
+supported version is 7.18.2, and the recommended version is 7.21.1 or newer.
+"""
+
  import calendar
  import email.utils
  import httplib
@@ -13,6 +40,8 @@ from tornado.util import import_object, bytes_type
  class HTTPClient(object):
      """A blocking HTTP client.
  
+    This interface is provided for convenience and testing; most applications
+    that are running an IOLoop will want to use `AsyncHTTPClient` instead.
      Typical usage looks like this::
  
          http_client = httpclient.HTTPClient()
@@ -21,9 +50,6 @@ class HTTPClient(object):
              print response.body
          except httpclient.HTTPError, e:
              print "Error:", e
-
-    fetch() can take a string URL or an HTTPRequest instance, which offers
-    more options, like executing POST/PUT/DELETE requests.
      """
      def __init__(self):
          self._io_loop = IOLoop()
@@ -34,9 +60,13 @@ class HTTPClient(object):
          self._async_client.close()
  
      def fetch(self, request, **kwargs):
-        """Executes an HTTPRequest, returning an HTTPResponse.
+        """Executes a request, returning an `HTTPResponse`.
+        
+        The request may be either a string URL or an `HTTPRequest` object.
+        If it is a string, we construct an `HTTPRequest` using any additional
+        kwargs: ``HTTPRequest(request, **kwargs)``
  
-        If an error occurs during the fetch, we raise an HTTPError.
+        If an error occurs during the fetch, we raise an `HTTPError`.
          """
          def callback(response):
              self._response = response
@@ -66,9 +96,6 @@ class AsyncHTTPClient(object):
          http_client.fetch("http://www.google.com/", handle_request)
          ioloop.IOLoop.instance().start()
  
-    fetch() can take a string URL or an HTTPRequest instance, which offers
-    more options, like executing POST/PUT/DELETE requests.
-
      The constructor for this class is magic in several respects:  It actually
      creates an instance of an implementation-specific subclass, and instances
      are reused as a kind of pseudo-singleton (one per IOLoop).  The keyword
@@ -114,7 +141,11 @@ class AsyncHTTPClient(object):
              del self._async_clients[self.io_loop]
  
      def fetch(self, request, callback, **kwargs):
-        """Executes an HTTPRequest, calling callback with an HTTPResponse.
+        """Executes a request, calling callback with an `HTTPResponse`.
+
+        The request may be either a string URL or an `HTTPRequest` object.
+        If it is a string, we construct an `HTTPRequest` using any additional
+        kwargs: ``HTTPRequest(request, **kwargs)``
  
          If an error occurs during the fetch, the HTTPResponse given to the
          callback has a non-None error attribute that contains the exception
@@ -138,6 +169,10 @@ class AsyncHTTPClient(object):
          simultaneous fetch() operations that can execute in parallel
          on each IOLoop.  Additional arguments may be supported depending
          on the implementation class in use.
+
+        Example::
+
+           AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
          """
          if isinstance(impl, (unicode, bytes_type)):
              impl = import_object(impl)
@@ -147,6 +182,7 @@ class AsyncHTTPClient(object):
          AsyncHTTPClient._impl_kwargs = kwargs
  
  class HTTPRequest(object):
+    """HTTP client request object."""
      def __init__(self, url, method="GET", headers=None, body=None,
                   auth_username=None, auth_password=None,
                   connect_timeout=20.0, request_timeout=20.0,
@@ -158,14 +194,61 @@ class HTTPRequest(object):
                   proxy_password='', allow_nonstandard_methods=False,
                   validate_cert=True, ca_certs=None,
                   allow_ipv6=None):
+        """Creates an `HTTPRequest`.
+
+        All parameters except `url` are optional.
+
+        :arg string url: URL to fetch
+        :arg string method: HTTP method, e.g. "GET" or "POST"
+        :arg headers: Additional HTTP headers to pass on the request
+        :type headers: `~tornado.httputil.HTTPHeaders` or `dict`
+        :arg string auth_username: Username for HTTP "Basic" authentication
+        :arg string auth_password: Password for HTTP "Basic" authentication
+        :arg float connect_timeout: Timeout for initial connection in seconds
+        :arg float request_timeout: Timeout for entire request in seconds
+        :arg datetime if_modified_since: Timestamp for ``If-Modified-Since``
+           header
+        :arg bool follow_redirects: Should redirects be followed automatically
+           or return the 3xx response?
+        :arg int max_redirects: Limit for `follow_redirects`
+        :arg string user_agent: String to send as ``User-Agent`` header
+        :arg bool use_gzip: Request gzip encoding from the server
+        :arg string network_interface: Network interface to use for request
+        :arg callable streaming_callback: If set, `streaming_callback` will
+           be run with each chunk of data as it is received, and 
+           `~HTTPResponse.body` and `~HTTPResponse.buffer` will be empty in 
+           the final response.
+        :arg callable header_callback: If set, `header_callback` will
+           be run with each header line as it is received, and 
+           `~HTTPResponse.headers` will be empty in the final response.
+        :arg callable prepare_curl_callback: If set, will be called with
+           a `pycurl.Curl` object to allow the application to make additional
+           `setopt` calls.
+        :arg string proxy_host: HTTP proxy hostname.  To use proxies, 
+           `proxy_host` and `proxy_port` must be set; `proxy_username` and 
+           `proxy_pass` are optional.  Proxies are currently only support 
+           with `curl_httpclient`.
+        :arg int proxy_port: HTTP proxy port
+        :arg string proxy_username: HTTP proxy username
+        :arg string proxy_password: HTTP proxy password
+        :arg bool allow_nonstandard_methods: Allow unknown values for `method` 
+           argument?
+        :arg bool validate_cert: For HTTPS requests, validate the server's
+           certificate?
+        :arg string ca_certs: filename of CA certificates in PEM format,
+           or None to use defaults.  Note that in `curl_httpclient`, if
+           any request uses a custom `ca_certs` file, they all must (they
+           don't have to all use the same `ca_certs`, but it's not possible
+           to mix requests with ca_certs and requests that use the defaults.
+        :arg bool allow_ipv6: Use IPv6 when available?  Default is false in 
+           `simple_httpclient` and true in `curl_httpclient`
+        """
          if headers is None:
              headers = httputil.HTTPHeaders()
          if if_modified_since:
              timestamp = calendar.timegm(if_modified_since.utctimetuple())
              headers["If-Modified-Since"] = email.utils.formatdate(
                  timestamp, localtime=False, usegmt=True)
-        # Proxy support: proxy_host and proxy_port must be set to connect via
-        # proxy.  The username and password credentials are optional.
          self.proxy_host = proxy_host
          self.proxy_port = proxy_port
          self.proxy_username = proxy_username
@@ -187,19 +270,8 @@ class HTTPRequest(object):
          self.header_callback = header_callback
          self.prepare_curl_callback = prepare_curl_callback
          self.allow_nonstandard_methods = allow_nonstandard_methods
-        # SSL certificate validation:
-        # validate_cert: boolean, set to False to disable validation
-        # ca_certs: filename of CA certificates in PEM format, or
-        #     None to use defaults
-        # Note that in the curl-based HTTP client, if any request
-        # uses a custom ca_certs file, they all must (they don't have to
-        # all use the same ca_certs, but it's not possible to mix requests
-        # with ca_certs and requests that use the defaults).
-        # SimpleAsyncHTTPClient does not have this limitation.
          self.validate_cert = validate_cert
          self.ca_certs = ca_certs
-        # allow_ipv6 may be True, False, or None for default behavior
-        # that varies by httpclient implementation.
          self.allow_ipv6 = allow_ipv6
          self.start_time = time.time()
  
@@ -262,6 +334,7 @@ class HTTPResponse(object):
      body = property(_get_body)
  
      def rethrow(self):
+        """If there was an error on the request, raise an `HTTPError`."""
          if self.error:
              raise self.error
  
diff --git a/tornado/ioloop.py b/tornado/ioloop.py

index 0e0892a83ceaa5a9f650fd56c994355893bc91a7..b92ce1d3f3be61f2c086f66f1236d0061b896c40 100644 (file)
--- a/tornado/ioloop.py
+++ b/tornado/ioloop.py
@@ -14,7 +14,17 @@
  # License for the specific language governing permissions and limitations
  # under the License.
  
-"""A level-triggered I/O loop for non-blocking sockets."""
+"""An I/O event loop for non-blocking sockets.
+
+Typical applications will use a single `IOLoop` object, in the
+`IOLoop.instance` singleton.  The `IOLoop.start` method should usually
+be called at the end of the ``main()`` function.  Atypical applications may
+use more than one `IOLoop`, such as one `IOLoop` per thread, or per `unittest`
+case.
+
+In addition to I/O events, the `IOLoop` can also schedule time-based events.
+`IOLoop.add_timeout` is a non-blocking alternative to `time.sleep`.
+"""
  
  import errno
  import heapq
@@ -44,10 +54,11 @@ except ImportError:
  class IOLoop(object):
      """A level-triggered I/O loop.
  
-    We use epoll if it is available, or else we fall back on select(). If
-    you are implementing a system that needs to handle 1000s of simultaneous
-    connections, you should use Linux and either compile our epoll module or
-    use Python 2.6+ to get epoll support.
+    We use epoll (Linux) or kqueue (BSD and Mac OS X; requires python
+    2.6+) if they are available, or else we fall back on select(). If
+    you are implementing a system that needs to handle thousands of
+    simultaneous connections, you should use a system that supports either
+    epoll or queue.
  
      Example usage for a simple TCP server::
  
diff --git a/tornado/iostream.py b/tornado/iostream.py

index 698c9dda783917cfec2c4f917e4a4abd13cc39ab..bce6e633fc1ac984c4ae211530e5e5b374387416 100644 (file)
--- a/tornado/iostream.py
+++ b/tornado/iostream.py
@@ -411,7 +411,7 @@ class IOStream(object):
  
  
  class SSLIOStream(IOStream):
-    """A utility class to write to and read from a non-blocking socket.
+    """A utility class to write to and read from a non-blocking SSL socket.
  
      If the socket passed to the constructor is already connected,
      it should be wrapped with::
diff --git a/website/sphinx/httpclient.rst b/website/sphinx/httpclient.rst

index b1f28009cca7169521bdc575f688beb6f6c1506b..1477ed6ee1a493f7843d4db5b66d137a0357215d 100644 (file)
--- a/website/sphinx/httpclient.rst
+++ b/website/sphinx/httpclient.rst
@@ -2,4 +2,39 @@
  ===================================================
  
  .. automodule:: tornado.httpclient
-   :members:
+
+   HTTP client interfaces
+   ----------------------
+
+   .. autoclass:: HTTPClient
+      :members:
+
+   .. autoclass:: AsyncHTTPClient
+      :members:
+
+   Request objects
+   ---------------
+   .. autoclass:: HTTPRequest
+      :members:
+   
+   Response objects
+   ----------------
+   .. autoclass:: HTTPResponse
+      :members:
+
+   Exceptions
+   ----------
+   .. autoexception:: HTTPError
+      :members:
+
+   Command-line interface
+   ----------------------
+
+   This module provides a simple command-line interface to fetch a url
+   using Tornado's HTTP client.  Example usage::
+
+      # Fetch the url and print its body
+      python -m tornado.httpclient http://www.google.com
+
+      # Just print the headers
+      python -m tornado.httpclient --print_headers --print_body=false http://www.google.com
diff --git a/website/sphinx/ioloop.rst b/website/sphinx/ioloop.rst

index 1c23171c9ccb172e807a4a2379722cf066d8cf6d..58d6ec4aa848681e76ea90648fed1d9aae237e23 100644 (file)
--- a/website/sphinx/ioloop.rst
+++ b/website/sphinx/ioloop.rst
@@ -2,4 +2,39 @@
  ======================================
  
  .. automodule:: tornado.ioloop
-   :members:
+
+   IOLoop objects
+   --------------
+
+   .. autoclass:: IOLoop
+
+   Running an IOLoop
+   ^^^^^^^^^^^^^^^^^
+
+   .. automethod:: IOLoop.instance
+   .. automethod:: IOLoop.start
+   .. automethod:: IOLoop.stop
+   .. automethod:: IOLoop.running
+
+   I/O events
+   ^^^^^^^^^^
+
+   .. automethod:: IOLoop.add_handler
+   .. automethod:: IOLoop.update_handler
+   .. automethod:: IOLoop.remove_handler
+
+   Timeouts
+   ^^^^^^^^
+
+   .. automethod:: IOLoop.add_callback
+   .. automethod:: IOLoop.add_timeout
+   .. automethod:: IOLoop.remove_timeout
+   .. autoclass:: PeriodicCallback
+
+   Debugging and error handling
+   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   .. automethod:: IOLoop.handle_callback_exception
+   .. automethod:: IOLoop.set_blocking_signal_threshold
+   .. automethod:: IOLoop.set_blocking_log_threshold
+   .. automethod:: IOLoop.log_stack
diff --git a/website/static/sphinx.css b/website/static/sphinx.css

index af0308a8ad99421303f93ebdd91744f229b511cf..e5c53d9eb70f17dae4237d11ab677b3c7b8eb928 100644 (file)
--- a/website/static/sphinx.css
+++ b/website/static/sphinx.css
@@ -25,4 +25,8 @@ div.body p, div.body dd, div.body li {
  
  .highlight {
      background: #fff !important;
+}
+
+th.field-name {
+    background: #fff;
  }
 \ No newline at end of file
author	Ben Darnell <ben@bendarnell.com>
	Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)
committer	Ben Darnell <ben@bendarnell.com>
	Fri, 17 Jun 2011 07:16:40 +0000 (00:16 -0700)
tornado/httpclient.py		patch \| blob \| blame \| history
tornado/ioloop.py		patch \| blob \| blame \| history
tornado/iostream.py		patch \| blob \| blame \| history
website/sphinx/httpclient.rst		patch \| blob \| blame \| history
website/sphinx/ioloop.rst		patch \| blob \| blame \| history
website/static/sphinx.css		patch \| blob \| blame \| history