From: Michael Tremer Date: Wed, 23 Feb 2011 21:20:55 +0000 (+0100) Subject: python-urlgrabber: Update to 3.9.1 (with fixes from upstream). X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f79f24feacb8be7a26333f36e25c97ef43fefc39;p=ipfire-3.x.git python-urlgrabber: Update to 3.9.1 (with fixes from upstream). --- diff --git a/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch b/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch new file mode 100644 index 000000000..6627a1f82 --- /dev/null +++ b/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch @@ -0,0 +1,701 @@ +diff --git a/scripts/urlgrabber b/scripts/urlgrabber +index 518e512..09cd896 100644 +--- a/scripts/urlgrabber ++++ b/scripts/urlgrabber +@@ -115,6 +115,7 @@ options: + including quotes in the case of strings. + e.g. --user_agent='"foobar/2.0"' + ++ --output FILE + -o FILE write output to FILE, otherwise the basename of the + url will be used + -O print the names of saved files to STDOUT +@@ -170,12 +171,17 @@ class client_options: + return ug_options, ug_defaults + + def process_command_line(self): +- short_options = 'vd:hoOpD' ++ short_options = 'vd:ho:OpD' + long_options = ['profile', 'repeat=', 'verbose=', +- 'debug=', 'help', 'progress'] ++ 'debug=', 'help', 'progress', 'output='] + ug_long = [ o + '=' for o in self.ug_options ] +- optlist, args = getopt.getopt(sys.argv[1:], short_options, +- long_options + ug_long) ++ try: ++ optlist, args = getopt.getopt(sys.argv[1:], short_options, ++ long_options + ug_long) ++ except getopt.GetoptError, e: ++ print >>sys.stderr, "Error:", e ++ self.help([], ret=1) ++ + self.verbose = 0 + self.debug = None + self.outputfile = None +@@ -193,6 +199,7 @@ class client_options: + if o == '--verbose': self.verbose = v + if o == '-v': self.verbose += 1 + if o == '-o': self.outputfile = v ++ if o == '--output': self.outputfile = v + if o == '-p' or o == '--progress': self.progress = 1 + if o == '-d' or o == '--debug': self.debug = v + if o == '--profile': self.profile = 1 +@@ -222,7 +229,7 @@ class client_options: + print "ERROR: cannot use -o when grabbing multiple files" + sys.exit(1) + +- def help(self, args): ++ def help(self, args, ret=0): + if not args: + print MAINHELP + else: +@@ -234,7 +241,7 @@ class client_options: + self.help_ug_option(a) + else: + print 'ERROR: no help on command "%s"' % a +- sys.exit(0) ++ sys.exit(ret) + + def help_doc(self): + print __doc__ +diff --git a/test/base_test_code.py b/test/base_test_code.py +index 50c6348..5fb43f9 100644 +--- a/test/base_test_code.py ++++ b/test/base_test_code.py +@@ -1,6 +1,6 @@ + from munittest import * + +-base_http = 'http://www.linux.duke.edu/projects/urlgrabber/test/' ++base_http = 'http://urlgrabber.baseurl.org/test/' + base_ftp = 'ftp://localhost/test/' + + # set to a proftp server only. we're working around a couple of +diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py +index 3e5f3b7..8eeaeda 100644 +--- a/urlgrabber/byterange.py ++++ b/urlgrabber/byterange.py +@@ -68,7 +68,7 @@ class HTTPRangeHandler(urllib2.BaseHandler): + + def http_error_416(self, req, fp, code, msg, hdrs): + # HTTP's Range Not Satisfiable error +- raise RangeError('Requested Range Not Satisfiable') ++ raise RangeError(9, 'Requested Range Not Satisfiable') + + class HTTPSRangeHandler(HTTPRangeHandler): + """ Range Header support for HTTPS. """ +@@ -208,7 +208,7 @@ class RangeableFileObject: + bufsize = offset - pos + buf = self.fo.read(bufsize) + if len(buf) != bufsize: +- raise RangeError('Requested Range Not Satisfiable') ++ raise RangeError(9, 'Requested Range Not Satisfiable') + pos+= bufsize + + class FileRangeHandler(urllib2.FileHandler): +@@ -238,7 +238,7 @@ class FileRangeHandler(urllib2.FileHandler): + (fb,lb) = brange + if lb == '': lb = size + if fb < 0 or fb > size or lb > size: +- raise RangeError('Requested Range Not Satisfiable') ++ raise RangeError(9, 'Requested Range Not Satisfiable') + size = (lb - fb) + fo = RangeableFileObject(fo, (fb,lb)) + headers = mimetools.Message(StringIO( +@@ -318,12 +318,12 @@ class FTPRangeHandler(urllib2.FTPHandler): + (fb,lb) = range_tup + if lb == '': + if retrlen is None or retrlen == 0: +- raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') ++ raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.') + lb = retrlen + retrlen = lb - fb + if retrlen < 0: + # beginning of range is larger than file +- raise RangeError('Requested Range Not Satisfiable') ++ raise RangeError(9, 'Requested Range Not Satisfiable') + else: + retrlen = lb - fb + fp = RangeableFileObject(fp, (0,retrlen)) +@@ -458,6 +458,6 @@ def range_tuple_normalize(range_tup): + # check if range is over the entire file + if (fb,lb) == (0,''): return None + # check that the range is valid +- if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) ++ if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb)) + return (fb,lb) + +diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py +index e090e90..b2770c5 100644 +--- a/urlgrabber/grabber.py ++++ b/urlgrabber/grabber.py +@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs) + (which can be set on default_grabber.throttle) is used. See + BANDWIDTH THROTTLING for more information. + +- timeout = None ++ timeout = 300 + +- a positive float expressing the number of seconds to wait for socket +- operations. If the value is None or 0.0, socket operations will block +- forever. Setting this option causes urlgrabber to call the settimeout +- method on the Socket object used for the request. See the Python +- documentation on settimeout for more information. +- http://www.python.org/doc/current/lib/socket-objects.html ++ a positive integer expressing the number of seconds to wait before ++ timing out attempts to connect to a server. If the value is None ++ or 0, connection attempts will not time out. The timeout is passed ++ to the underlying pycurl object as its CONNECTTIMEOUT option, see ++ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information. ++ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT + + bandwidth = 0 + +@@ -198,6 +198,12 @@ GENERAL ARGUMENTS (kwargs) + control, you should probably subclass URLParser and pass it in via + the 'urlparser' option. + ++ username = None ++ username to use for simple http auth - is automatically quoted for special characters ++ ++ password = None ++ password to use for simple http auth - is automatically quoted for special characters ++ + ssl_ca_cert = None + + this option can be used if M2Crypto is available and will be +@@ -248,6 +254,11 @@ GENERAL ARGUMENTS (kwargs) + + Maximum size (in bytes) of the headers. + ++ self.ip_resolve = 'whatever' ++ ++ What type of name to IP resolving to use, default is to do both IPV4 and ++ IPV6. ++ + + RETRY RELATED ARGUMENTS + +@@ -420,6 +431,7 @@ import time + import string + import urllib + import urllib2 ++from httplib import responses + import mimetools + import thread + import types +@@ -439,6 +451,12 @@ try: + except: + __version__ = '???' + ++try: ++ # this part isn't going to do much - need to talk to gettext ++ from i18n import _ ++except ImportError, msg: ++ def _(st): return st ++ + ######################################################################## + # functions for debugging output. These functions are here because they + # are also part of the module initialization. +@@ -527,6 +545,22 @@ def _(st): + # END MODULE INITIALIZATION + ######################################################################## + ++######################################################################## ++# UTILITY FUNCTIONS ++######################################################################## ++ ++# These functions are meant to be utilities for the urlgrabber library to use. ++ ++def _to_utf8(obj, errors='replace'): ++ '''convert 'unicode' to an encoded utf-8 byte string ''' ++ # stolen from yum.i18n ++ if isinstance(obj, unicode): ++ obj = obj.encode('utf-8', errors) ++ return obj ++ ++######################################################################## ++# END UTILITY FUNCTIONS ++######################################################################## + + + class URLGrabError(IOError): +@@ -662,6 +696,7 @@ class URLParser: + opts.quote = 0 --> do not quote it + opts.quote = None --> guess + """ ++ url = _to_utf8(url) + quote = opts.quote + + if opts.prefix: +@@ -800,6 +835,7 @@ class URLGrabberOptions: + self.close_connection = 0 + self.range = None + self.user_agent = 'urlgrabber/%s' % __version__ ++ self.ip_resolve = None + self.keepalive = 1 + self.proxies = None + self.reget = None +@@ -808,13 +844,15 @@ class URLGrabberOptions: + self.prefix = None + self.opener = None + self.cache_openers = True +- self.timeout = None ++ self.timeout = 300 + self.text = None + self.http_headers = None + self.ftp_headers = None + self.data = None + self.urlparser = URLParser() + self.quote = None ++ self.username = None ++ self.password = None + self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb + self.ssl_context = None # no-op in pycurl + self.ssl_verify_peer = True # check peer's cert for authenticityb +@@ -846,7 +884,7 @@ class URLGrabberOptions: + s = s + indent + '}' + return s + +-class URLGrabber: ++class URLGrabber(object): + """Provides easy opening of URLs with a variety of options. + + All options are specified as kwargs. Options may be specified when +@@ -931,6 +969,9 @@ class URLGrabber: + (scheme, host, path, parm, query, frag) = parts + if filename is None: + filename = os.path.basename( urllib.unquote(path) ) ++ if not filename: ++ # This is better than nothing. ++ filename = 'index.html' + if scheme == 'file' and not opts.copy_local: + # just return the name of the local file - don't make a + # copy currently +@@ -1030,7 +1071,7 @@ class URLGrabber: + default_grabber = URLGrabber() + + +-class PyCurlFileObject(): ++class PyCurlFileObject(object): + def __init__(self, url, filename, opts): + self.fo = None + self._hdr_dump = '' +@@ -1052,9 +1093,15 @@ class PyCurlFileObject(): + self._reget_length = 0 + self._prog_running = False + self._error = (None, None) +- self.size = None ++ self.size = 0 ++ self._hdr_ended = False + self._do_open() + ++ ++ def geturl(self): ++ """ Provide the geturl() method, used to be got from ++ urllib.addinfourl, via. urllib.URLopener.* """ ++ return self.url + + def __getattr__(self, name): + """This effectively allows us to wrap at the instance level. +@@ -1085,9 +1132,14 @@ class PyCurlFileObject(): + return -1 + + def _hdr_retrieve(self, buf): ++ if self._hdr_ended: ++ self._hdr_dump = '' ++ self.size = 0 ++ self._hdr_ended = False ++ + if self._over_max_size(cur=len(self._hdr_dump), + max_size=self.opts.max_header_size): +- return -1 ++ return -1 + try: + self._hdr_dump += buf + # we have to get the size before we do the progress obj start +@@ -1104,7 +1156,17 @@ class PyCurlFileObject(): + s = parse150(buf) + if s: + self.size = int(s) +- ++ ++ if buf.lower().find('location') != -1: ++ location = ':'.join(buf.split(':')[1:]) ++ location = location.strip() ++ self.scheme = urlparse.urlsplit(location)[0] ++ self.url = location ++ ++ if len(self._hdr_dump) != 0 and buf == '\r\n': ++ self._hdr_ended = True ++ if DEBUG: DEBUG.info('header ended:') ++ + return len(buf) + except KeyboardInterrupt: + return pycurl.READFUNC_ABORT +@@ -1113,8 +1175,10 @@ class PyCurlFileObject(): + if self._parsed_hdr: + return self._parsed_hdr + statusend = self._hdr_dump.find('\n') ++ statusend += 1 # ridiculous as it may seem. + hdrfp = StringIO() + hdrfp.write(self._hdr_dump[statusend:]) ++ hdrfp.seek(0) + self._parsed_hdr = mimetools.Message(hdrfp) + return self._parsed_hdr + +@@ -1136,11 +1200,21 @@ class PyCurlFileObject(): + self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) + self.curl_obj.setopt(pycurl.FAILONERROR, True) + self.curl_obj.setopt(pycurl.OPT_FILETIME, True) ++ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) + + if DEBUG: + self.curl_obj.setopt(pycurl.VERBOSE, True) + if opts.user_agent: + self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent) ++ if opts.ip_resolve: ++ # Default is: IPRESOLVE_WHATEVER ++ ipr = opts.ip_resolve.lower() ++ if ipr == 'whatever': # Do we need this? ++ self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER) ++ if ipr == 'ipv4': ++ self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) ++ if ipr == 'ipv6': ++ self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6) + + # maybe to be options later + self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) +@@ -1148,9 +1222,11 @@ class PyCurlFileObject(): + + # timeouts + timeout = 300 +- if opts.timeout: +- timeout = int(opts.timeout) +- self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) ++ if hasattr(opts, 'timeout'): ++ timeout = int(opts.timeout or 0) ++ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout) + + # ssl options + if self.scheme == 'https': +@@ -1203,12 +1279,19 @@ class PyCurlFileObject(): + if proxy == '_none_': proxy = "" + self.curl_obj.setopt(pycurl.PROXY, proxy) + +- # FIXME username/password/auth settings ++ if opts.username and opts.password: ++ if self.scheme in ('http', 'https'): ++ self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY) ++ ++ if opts.username and opts.password: ++ # apparently when applying them as curlopts they do not require quoting of any kind ++ userpwd = '%s:%s' % (opts.username, opts.password) ++ self.curl_obj.setopt(pycurl.USERPWD, userpwd) + + #posts - simple - expects the fields as they are + if opts.data: + self.curl_obj.setopt(pycurl.POST, True) +- self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data)) ++ self.curl_obj.setopt(pycurl.POSTFIELDS, _to_utf8(opts.data)) + + # our url + self.curl_obj.setopt(pycurl.URL, self.url) +@@ -1228,12 +1311,14 @@ class PyCurlFileObject(): + + code = self.http_code + errcode = e.args[0] ++ errurl = urllib.unquote(self.url) ++ + if self._error[0]: + errcode = self._error[0] + + if errcode == 23 and code >= 200 and code < 299: +- err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) +- err.url = self.url ++ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e)) ++ err.url = errurl + + # this is probably wrong but ultimately this is what happens + # we have a legit http code and a pycurl 'writer failed' code +@@ -1244,23 +1329,23 @@ class PyCurlFileObject(): + raise KeyboardInterrupt + + elif errcode == 28: +- err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) +- err.url = self.url ++ err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e)) ++ err.url = errurl + raise err + elif errcode == 35: + msg = _("problem making ssl connection") + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl + raise err + elif errcode == 37: +- msg = _("Could not open/read %s") % (self.url) ++ msg = _("Could not open/read %s") % (errurl) + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl + raise err + + elif errcode == 42: +- err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) +- err.url = self.url ++ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e)) ++ err.url = errurl + # this is probably wrong but ultimately this is what happens + # we have a legit http code and a pycurl 'writer failed' code + # which almost always means something aborted it from outside +@@ -1272,33 +1357,93 @@ class PyCurlFileObject(): + elif errcode == 58: + msg = _("problem with the local client certificate") + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl + raise err + + elif errcode == 60: +- msg = _("client cert cannot be verified or client cert incorrect") ++ msg = _("Peer cert cannot be verified or peer cert invalid") + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl + raise err + + elif errcode == 63: + if self._error[1]: + msg = self._error[1] + else: +- msg = _("Max download size exceeded on %s") % (self.url) ++ msg = _("Max download size exceeded on %s") % () + err = URLGrabError(14, msg) +- err.url = self.url ++ err.url = errurl + raise err + + elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it +- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) ++ if self.scheme in ['http', 'https']: ++ if self.http_code in responses: ++ resp = responses[self.http_code] ++ msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl) ++ else: ++ msg = 'HTTP Error %s : %s ' % (self.http_code, errurl) ++ elif self.scheme in ['ftp']: ++ msg = 'FTP Error %s : %s ' % (self.http_code, errurl) ++ else: ++ msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme) + else: +- msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1])) ++ pyerr2str = { 5 : _("Couldn't resolve proxy"), ++ 6 : _("Couldn't resolve host"), ++ 7 : _("Couldn't connect"), ++ 8 : _("Bad reply to FTP server"), ++ 9 : _("Access denied"), ++ 11 : _("Bad reply to FTP pass"), ++ 13 : _("Bad reply to FTP pasv"), ++ 14 : _("Bad reply to FTP 227"), ++ 15 : _("Couldn't get FTP host"), ++ 17 : _("Couldn't set FTP type"), ++ 18 : _("Partial file"), ++ 19 : _("FTP RETR command failed"), ++ 22 : _("HTTP returned error"), ++ 23 : _("Write error"), ++ 25 : _("Upload failed"), ++ 26 : _("Read error"), ++ 27 : _("Out of Memory"), ++ 28 : _("Operation timed out"), ++ 30 : _("FTP PORT command failed"), ++ 31 : _("FTP REST command failed"), ++ 33 : _("Range failed"), ++ 34 : _("HTTP POST failed"), ++ 35 : _("SSL CONNECT failed"), ++ 36 : _("Couldn't resume download"), ++ 37 : _("Couldn't read file"), ++ 42 : _("Aborted by callback"), ++ 47 : _("Too many redirects"), ++ 51 : _("Peer certificate failed verification"), ++ 53 : _("SSL engine not found"), ++ 54 : _("SSL engine set failed"), ++ 55 : _("Network error send()"), ++ 56 : _("Network error recv()"), ++ 58 : _("Local certificate failed"), ++ 59 : _("SSL set cipher failed"), ++ 60 : _("Local CA certificate failed"), ++ 61 : _("HTTP bad transfer encoding"), ++ 63 : _("Maximum file size exceeded"), ++ 64 : _("FTP SSL failed"), ++ 67 : _("Authentication failure"), ++ 70 : _("Out of disk space on server"), ++ 73 : _("Remove file exists"), ++ } ++ errstr = str(e.args[1]) ++ if not errstr: ++ errstr = pyerr2str.get(errcode, '') ++ msg = 'curl#%s - "%s"' % (errcode, errstr) + code = errcode + err = URLGrabError(14, msg) + err.code = code + err.exception = e + raise err ++ else: ++ if self._error[1]: ++ msg = self._error[1] ++ err = URLGRabError(14, msg) ++ err.url = urllib.unquote(self.url) ++ raise err + + def _do_open(self): + self.curl_obj = _curl_cache +@@ -1333,7 +1478,11 @@ class PyCurlFileObject(): + + if self.opts.range: + rt = self.opts.range +- if rt[0]: rt = (rt[0] + reget_length, rt[1]) ++ ++ if rt[0] is None: ++ rt = (0, rt[1]) ++ rt = (rt[0] + reget_length, rt[1]) ++ + + if rt: + header = range_tuple_to_header(rt) +@@ -1434,9 +1583,13 @@ class PyCurlFileObject(): + #fh, self._temp_name = mkstemp() + #self.fo = open(self._temp_name, 'wb') + +- +- self._do_perform() +- ++ try: ++ self._do_perform() ++ except URLGrabError, e: ++ self.fo.flush() ++ self.fo.close() ++ raise e ++ + + + if _was_filename: +@@ -1446,9 +1599,23 @@ class PyCurlFileObject(): + # set the time + mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME) + if mod_time != -1: +- os.utime(self.filename, (mod_time, mod_time)) ++ try: ++ os.utime(self.filename, (mod_time, mod_time)) ++ except OSError, e: ++ err = URLGrabError(16, _(\ ++ 'error setting timestamp on file %s from %s, OSError: %s') ++ % (self.filename, self.url, e)) ++ err.url = self.url ++ raise err + # re open it +- self.fo = open(self.filename, 'r') ++ try: ++ self.fo = open(self.filename, 'r') ++ except IOError, e: ++ err = URLGrabError(16, _(\ ++ 'error opening file from %s, IOError: %s') % (self.url, e)) ++ err.url = self.url ++ raise err ++ + else: + #self.fo = open(self._temp_name, 'r') + self.fo.seek(0) +@@ -1532,11 +1699,14 @@ class PyCurlFileObject(): + def _over_max_size(self, cur, max_size=None): + + if not max_size: +- max_size = self.size +- if self.opts.size: # if we set an opts size use that, no matter what +- max_size = self.opts.size ++ if not self.opts.size: ++ max_size = self.size ++ else: ++ max_size = self.opts.size ++ + if not max_size: return False # if we have None for all of the Max then this is dumb +- if cur > max_size + max_size*.10: ++ ++ if cur > int(float(max_size) * 1.10): + + msg = _("Downloaded more than max size for %s: %s > %s") \ + % (self.url, cur, max_size) +@@ -1544,13 +1714,6 @@ class PyCurlFileObject(): + return True + return False + +- def _to_utf8(self, obj, errors='replace'): +- '''convert 'unicode' to an encoded utf-8 byte string ''' +- # stolen from yum.i18n +- if isinstance(obj, unicode): +- obj = obj.encode('utf-8', errors) +- return obj +- + def read(self, amt=None): + self._fill_buffer(amt) + if amt is None: +@@ -1582,9 +1745,21 @@ class PyCurlFileObject(): + self.opts.progress_obj.end(self._amount_read) + self.fo.close() + +- ++ def geturl(self): ++ """ Provide the geturl() method, used to be got from ++ urllib.addinfourl, via. urllib.URLopener.* """ ++ return self.url ++ + _curl_cache = pycurl.Curl() # make one and reuse it over and over and over + ++def reset_curl_obj(): ++ """To make sure curl has reread the network/dns info we force a reload""" ++ global _curl_cache ++ _curl_cache.close() ++ _curl_cache = pycurl.Curl() ++ ++ ++ + + ##################################################################### + # DEPRECATED FUNCTIONS +diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py +index dad410b..8731aed 100644 +--- a/urlgrabber/mirror.py ++++ b/urlgrabber/mirror.py +@@ -90,7 +90,7 @@ CUSTOMIZATION + import random + import thread # needed for locking to make this threadsafe + +-from grabber import URLGrabError, CallbackObject, DEBUG ++from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 + + def _(st): + return st +@@ -263,7 +263,8 @@ class MirrorGroup: + def _parse_mirrors(self, mirrors): + parsed_mirrors = [] + for m in mirrors: +- if type(m) == type(''): m = {'mirror': m} ++ if isinstance(m, basestring): ++ m = {'mirror': _to_utf8(m)} + parsed_mirrors.append(m) + return parsed_mirrors + +diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py +index dd07c6a..45eb248 100644 +--- a/urlgrabber/progress.py ++++ b/urlgrabber/progress.py +@@ -658,6 +658,8 @@ def format_time(seconds, use_hours=0): + if seconds is None or seconds < 0: + if use_hours: return '--:--:--' + else: return '--:--' ++ elif seconds == float('inf'): ++ return 'Infinite' + else: + seconds = int(seconds) + minutes = seconds / 60 diff --git a/pkgs/core/python-urlgrabber/python-urlgrabber.nm b/pkgs/core/python-urlgrabber/python-urlgrabber.nm index 358de7f0e..b5beca805 100644 --- a/pkgs/core/python-urlgrabber/python-urlgrabber.nm +++ b/pkgs/core/python-urlgrabber/python-urlgrabber.nm @@ -25,7 +25,7 @@ include $(PKGROOT)/Include PKG_NAME = urlgrabber -PKG_VER = 3.1.0 +PKG_VER = 3.9.1 PKG_REL = 1 PKG_ARCH = noarch @@ -35,7 +35,7 @@ PKG_URL = http://urlgrabber.baseurl.org/ PKG_LICENSE = LGPLv2+ PKG_SUMMARY = A high-level cross-protocol url-grabber. -PKG_BUILD_DEPS+= python-devel +PKG_BUILD_DEPS+= python-devel python-pycurl define PKG_DESCRIPTION A high-level cross-protocol url-grabber for python supporting HTTP, \