From: Jeeyoung Kim Date: Mon, 22 Aug 2011 22:14:39 +0000 (-0700) Subject: Implemented read_until_regex in tornado. This is similar to read_until(), except... X-Git-Tag: v2.1.0~39^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=86404402d4d98e5559346b1533b8566af87978bd;p=thirdparty%2Ftornado.git Implemented read_until_regex in tornado. This is similar to read_until(), except it looks for a specific delimiter. This function is used to parse the header of the website, where it is delimited by '\n' only (like http://news.ycombinator.com/rss) --- diff --git a/tornado/iostream.py b/tornado/iostream.py index f6308d3fd..1a4f4992f 100644 --- a/tornado/iostream.py +++ b/tornado/iostream.py @@ -23,6 +23,7 @@ import errno import logging import socket import sys +import re from tornado import ioloop from tornado import stack_context @@ -89,6 +90,7 @@ class IOStream(object): self._read_buffer_size = 0 self._write_buffer_frozen = False self._read_delimiter = None + self._read_regex = None self._read_bytes = None self._read_until_close = False self._read_callback = None @@ -124,6 +126,20 @@ class IOStream(object): self._connect_callback = stack_context.wrap(callback) self._add_io_state(self.io_loop.WRITE) + def read_until_regex(self, regex, callback): + """Call callback when we read the given regex pattern.""" + assert not self._read_callback, "Already reading" + self._read_regex = re.compile(regex) + self._read_callback = stack_context.wrap(callback) + while True: + # See if we've already got the data from a previous read + if self._read_from_buffer(): + return + self._check_closed() + if self._read_to_buffer() == 0: + break + self._add_io_state(self.io_loop.READ) + def read_until(self, delimiter, callback): """Call callback when we read the given delimiter.""" assert not self._read_callback, "Already reading" @@ -374,6 +390,15 @@ class IOStream(object): self._run_callback(callback, self._consume(loc + delimiter_len)) return True + elif self._read_regex is not None: + _merge_prefix(self._read_buffer, sys.maxint) + m = self._read_regex.search(self._read_buffer[0]) + if m: + callback = self._read_callback + self._read_callback = None + self._read_regex = None + self._run_callback(callback, self._consume(m.end())) + return True return False def _handle_connect(self): diff --git a/tornado/simple_httpclient.py b/tornado/simple_httpclient.py index e1a9ce405..904364fcc 100644 --- a/tornado/simple_httpclient.py +++ b/tornado/simple_httpclient.py @@ -268,7 +268,7 @@ class _HTTPConnection(object): self.stream.write(b("\r\n").join(request_lines) + b("\r\n\r\n")) if self.request.body is not None: self.stream.write(self.request.body) - self.stream.read_until(b("\r\n\r\n"), self._on_headers) + self.stream.read_until_regex(b("\r?\n\r?\n"), self._on_headers) def _release(self): if self.release_callback is not None: @@ -298,7 +298,7 @@ class _HTTPConnection(object): def _on_headers(self, data): data = native_str(data.decode("latin1")) - first_line, _, header_data = data.partition("\r\n") + first_line, _, header_data = data.partition("\n") match = re.match("HTTP/1.[01] ([0-9]+)", first_line) assert match self.code = int(match.group(1))