From: Ben Darnell Date: Fri, 8 Jun 2012 18:11:28 +0000 (-0700) Subject: Add flush method to GzipDecompressor interface and use it. X-Git-Tag: v2.4.0~74 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7a2779e958cb5bbb1d95a1a1f9f764c3e1f7dc01;p=thirdparty%2Ftornado.git Add flush method to GzipDecompressor interface and use it. --- diff --git a/tornado/simple_httpclient.py b/tornado/simple_httpclient.py index 24ec5da2a..32ba5c849 100644 --- a/tornado/simple_httpclient.py +++ b/tornado/simple_httpclient.py @@ -417,7 +417,8 @@ class _HTTPConnection(object): self.stream.close() return if self._decompressor: - data = self._decompressor(data) + data = (self._decompressor.decompress(data) + + self._decompressor.flush()) if self.request.streaming_callback: if self.chunks is None: # if chunks is not None, we already called streaming_callback @@ -438,9 +439,21 @@ class _HTTPConnection(object): # TODO: "chunk extensions" http://tools.ietf.org/html/rfc2616#section-3.6.1 length = int(data.strip(), 16) if length == 0: - # all the data has been decompressed, so we don't need to - # decompress again in _on_body - self._decompressor = None + if self._decompressor is not None: + tail = self._decompressor.flush() + if tail: + # I believe the tail will always be empty (i.e. + # decompress will return all it can). The purpose + # of the flush call is to detect errors such + # as truncated input. But in case it ever returns + # anything, treat it as an extra chunk + if self.request.streaming_callback is not None: + self.request.streaming_callback(tail) + else: + self.chunks.append(tail) + # all the data has been decompressed, so we don't need to + # decompress again in _on_body + self._decompressor = None self._on_body(b('').join(self.chunks)) else: self.stream.read_bytes(length + 2, # chunk ends with \r\n @@ -450,7 +463,7 @@ class _HTTPConnection(object): assert data[-2:] == b("\r\n") chunk = data[:-2] if self._decompressor: - chunk = self._decompressor(chunk) + chunk = self._decompressor.decompress(chunk) if self.request.streaming_callback is not None: self.request.streaming_callback(chunk) else: diff --git a/tornado/util.py b/tornado/util.py index 83b19f78f..80cab8980 100644 --- a/tornado/util.py +++ b/tornado/util.py @@ -18,14 +18,34 @@ class ObjectDict(dict): class GzipDecompressor(object): + """Streaming gzip decompressor. + + The interface is like that of `zlib.decompressobj` (without the + optional arguments, but it understands gzip headers and checksums. + """ def __init__(self): # Magic parameter makes zlib module understand gzip header # http://stackoverflow.com/questions/1838699/how-can-i-decompress-a-gzip-stream-with-zlib + # This works on cpython and pypy, but not jython. self.decompressobj = zlib.decompressobj(16 + zlib.MAX_WBITS) - def __call__(self, value): + def decompress(self, value): + """Decompress a chunk, returning newly-available data. + + Some data may be buffered for later processing; `flush` must + be called when there is no more input data to ensure that + all data was processed. + """ return self.decompressobj.decompress(value) + def flush(self): + """Return any remaining buffered data not yet returned by decompress. + + Also checks for errors such as truncated input. + No other methods may be called on this object after `flush`. + """ + return self.decompressobj.flush() + def import_object(name): """Imports an object by name.