From: Bernardo Heynemann Date: Wed, 6 Feb 2013 17:35:35 +0000 (-0200) Subject: Pre-compiling regex that removes control characters. X-Git-Tag: v3.0.0~136^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e3f8aa81d5b288217275d64315eed57136f74e92;p=thirdparty%2Ftornado.git Pre-compiling regex that removes control characters. Given that this regex might be used a lot in the lifetime of a given server (each request that uses get_argument for unicode strings) it seems sensible to store the compiled version of the regex. --- diff --git a/tornado/web.py b/tornado/web.py index 35f4e0982..3cccba564 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -114,6 +114,7 @@ class RequestHandler(object): _template_loaders = {} # {path: template.BaseLoader} _template_loader_lock = threading.Lock() + _remove_control_chars_regex = re.compile(r"[\x00-\x08\x0e-\x1f]") def __init__(self, application, request, **kwargs): super(RequestHandler, self).__init__() @@ -342,13 +343,14 @@ class RequestHandler(object): The returned values are always unicode. """ + values = [] for v in self.request.arguments.get(name, []): v = self.decode_argument(v, name=name) if isinstance(v, unicode_type): # Get rid of any weird control chars (unless decoding gave # us bytes, in which case leave it alone) - v = re.sub(r"[\x00-\x08\x0e-\x1f]", " ", v) + v = RequestHandler._remove_control_chars_regex.sub(" ", v) if strip: v = v.strip() values.append(v)