From: Éric Araujo Date: Sat, 10 Sep 2011 16:10:58 +0000 (+0200) Subject: Use bytes regex instead of decoding whole pages X-Git-Tag: v3.3.0a1~1544^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=030cfe26a336826d08362cd60c8ea4be7775844b;p=thirdparty%2FPython%2Fcpython.git Use bytes regex instead of decoding whole pages --- diff --git a/Lib/packaging/pypi/simple.py b/Lib/packaging/pypi/simple.py index 710355d6480e..76aad02416d0 100644 --- a/Lib/packaging/pypi/simple.py +++ b/Lib/packaging/pypi/simple.py @@ -159,22 +159,20 @@ class Crawler(BaseClient): Return a list of names. """ - with self._open_url(self.index_url) as index: - if '*' in name: - name.replace('*', '.*') - else: - name = "%s%s%s" % ('*.?', name, '*.?') - name = name.replace('*', '[^<]*') # avoid matching end tag - projectname = re.compile(']*>(%s)' % name, re.I) - matching_projects = [] + if '*' in name: + name.replace('*', '.*') + else: + name = "%s%s%s" % ('*.?', name, '*.?') + name = name.replace('*', '[^<]*') # avoid matching end tag + pattern = (']*>(%s)' % name).encode('utf-8') + projectname = re.compile(pattern, re.I) + matching_projects = [] + with self._open_url(self.index_url) as index: index_content = index.read() - # FIXME should use bytes I/O and regexes instead of decoding - index_content = index_content.decode() - for match in projectname.finditer(index_content): - project_name = match.group(1) + project_name = match.group(1).decode('utf-8') matching_projects.append(self._get_project(project_name)) return matching_projects