From e3f8aa81d5b288217275d64315eed57136f74e92 Mon Sep 17 00:00:00 2001 From: Bernardo Heynemann Date: Wed, 6 Feb 2013 15:35:35 -0200 Subject: [PATCH] Pre-compiling regex that removes control characters. Given that this regex might be used a lot in the lifetime of a given server (each request that uses get_argument for unicode strings) it seems sensible to store the compiled version of the regex. --- tornado/web.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tornado/web.py b/tornado/web.py index 35f4e0982..3cccba564 100644 --- a/tornado/web.py +++ b/tornado/web.py @@ -114,6 +114,7 @@ class RequestHandler(object): _template_loaders = {} # {path: template.BaseLoader} _template_loader_lock = threading.Lock() + _remove_control_chars_regex = re.compile(r"[\x00-\x08\x0e-\x1f]") def __init__(self, application, request, **kwargs): super(RequestHandler, self).__init__() @@ -342,13 +343,14 @@ class RequestHandler(object): The returned values are always unicode. """ + values = [] for v in self.request.arguments.get(name, []): v = self.decode_argument(v, name=name) if isinstance(v, unicode_type): # Get rid of any weird control chars (unless decoding gave # us bytes, in which case leave it alone) - v = re.sub(r"[\x00-\x08\x0e-\x1f]", " ", v) + v = RequestHandler._remove_control_chars_regex.sub(" ", v) if strip: v = v.strip() values.append(v) -- 2.47.2