]> git.ipfire.org Git - thirdparty/openembedded/openembedded-core-contrib.git/commitdiff
bitbake: wget.py: parse only <a> tags
authorAlexander Kanavin <alexander.kanavin@linux.intel.com>
Fri, 4 Dec 2015 11:00:20 +0000 (13:00 +0200)
committerRichard Purdie <richard.purdie@linuxfoundation.org>
Mon, 7 Dec 2015 17:01:21 +0000 (17:01 +0000)
For two reasons:
1) The important one: we hit the following bug when doing upstream version checks
on some webpages:
https://bugs.launchpad.net/beautifulsoup/+bug/1471755

2) Also, documentation for beautifulsoup states that memory usage and
speed is improved that way.

(Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6)

Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
bitbake/lib/bb/fetch2/wget.py

index bd2a8972a760e1ce0f0567cdfb56188b14cef8b8..c185f5b5f49e625b27aa4f1323b8eaa77bed7279 100644 (file)
@@ -38,6 +38,7 @@ from   bb.fetch2 import FetchError
 from   bb.fetch2 import logger
 from   bb.fetch2 import runfetchcmd
 from   bs4 import BeautifulSoup
+from   bs4 import SoupStrainer
 
 class Wget(FetchMethod):
     """Class to fetch urls via 'wget'"""
@@ -367,7 +368,7 @@ class Wget(FetchMethod):
         version = ['', '', '']
 
         bb.debug(3, "VersionURL: %s" % (url))
-        soup = BeautifulSoup(self._fetch_index(url, ud, d))
+        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
         if not soup:
             bb.debug(3, "*** %s NO SOUP" % (url))
             return ""
@@ -417,7 +418,7 @@ class Wget(FetchMethod):
                 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
         bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
 
-        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
+        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
         if not soup:
             return version[1]