From: Michael Tremer <michael.tremer@ipfire.org>
Date: Wed, 23 Feb 2011 21:20:55 +0000 (+0100)
Subject: python-urlgrabber: Update to 3.9.1 (with fixes from upstream).
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f79f24feacb8be7a26333f36e25c97ef43fefc39;p=ipfire-3.x.git

python-urlgrabber: Update to 3.9.1 (with fixes from upstream).
---

diff --git a/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch b/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch
new file mode 100644
index 000000000..6627a1f82
--- /dev/null
+++ b/pkgs/core/python-urlgrabber/patches/urlgrabber-HEAD.patch
@@ -0,0 +1,701 @@
+diff --git a/scripts/urlgrabber b/scripts/urlgrabber
+index 518e512..09cd896 100644
+--- a/scripts/urlgrabber
++++ b/scripts/urlgrabber
+@@ -115,6 +115,7 @@ options:
+                     including quotes in the case of strings.
+                     e.g.  --user_agent='"foobar/2.0"'
+ 
++  --output FILE
+   -o FILE           write output to FILE, otherwise the basename of the
+                     url will be used
+   -O                print the names of saved files to STDOUT
+@@ -170,12 +171,17 @@ class client_options:
+         return ug_options, ug_defaults
+ 
+     def process_command_line(self):
+-        short_options = 'vd:hoOpD'
++        short_options = 'vd:ho:OpD'
+         long_options = ['profile', 'repeat=', 'verbose=',
+-                        'debug=', 'help', 'progress']
++                        'debug=', 'help', 'progress', 'output=']
+         ug_long = [ o + '=' for o in self.ug_options ]
+-        optlist, args = getopt.getopt(sys.argv[1:], short_options,
+-                                      long_options + ug_long)
++        try:
++            optlist, args = getopt.getopt(sys.argv[1:], short_options,
++                                          long_options + ug_long)
++        except getopt.GetoptError, e:
++            print >>sys.stderr, "Error:", e
++            self.help([], ret=1)
++
+         self.verbose = 0
+         self.debug = None
+         self.outputfile = None
+@@ -193,6 +199,7 @@ class client_options:
+             if o == '--verbose': self.verbose = v
+             if o == '-v':        self.verbose += 1
+             if o == '-o':        self.outputfile = v
++            if o == '--output':  self.outputfile = v
+             if o == '-p' or o == '--progress': self.progress = 1
+             if o == '-d' or o == '--debug': self.debug = v
+             if o == '--profile': self.profile = 1
+@@ -222,7 +229,7 @@ class client_options:
+             print "ERROR: cannot use -o when grabbing multiple files"
+             sys.exit(1)
+ 
+-    def help(self, args):
++    def help(self, args, ret=0):
+         if not args:
+             print MAINHELP
+         else:
+@@ -234,7 +241,7 @@ class client_options:
+                     self.help_ug_option(a)
+                 else:
+                     print 'ERROR: no help on command "%s"' % a
+-        sys.exit(0)
++        sys.exit(ret)
+ 
+     def help_doc(self):
+         print __doc__
+diff --git a/test/base_test_code.py b/test/base_test_code.py
+index 50c6348..5fb43f9 100644
+--- a/test/base_test_code.py
++++ b/test/base_test_code.py
+@@ -1,6 +1,6 @@
+ from munittest import *
+ 
+-base_http = 'http://www.linux.duke.edu/projects/urlgrabber/test/'
++base_http = 'http://urlgrabber.baseurl.org/test/'
+ base_ftp  = 'ftp://localhost/test/'
+ 
+ # set to a proftp server only. we're working around a couple of
+diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
+index 3e5f3b7..8eeaeda 100644
+--- a/urlgrabber/byterange.py
++++ b/urlgrabber/byterange.py
+@@ -68,7 +68,7 @@ class HTTPRangeHandler(urllib2.BaseHandler):
+     
+     def http_error_416(self, req, fp, code, msg, hdrs):
+         # HTTP's Range Not Satisfiable error
+-        raise RangeError('Requested Range Not Satisfiable')
++        raise RangeError(9, 'Requested Range Not Satisfiable')
+ 
+ class HTTPSRangeHandler(HTTPRangeHandler):
+     """ Range Header support for HTTPS. """
+@@ -208,7 +208,7 @@ class RangeableFileObject:
+                 bufsize = offset - pos
+             buf = self.fo.read(bufsize)
+             if len(buf) != bufsize:
+-                raise RangeError('Requested Range Not Satisfiable')
++                raise RangeError(9, 'Requested Range Not Satisfiable')
+             pos+= bufsize
+ 
+ class FileRangeHandler(urllib2.FileHandler):
+@@ -238,7 +238,7 @@ class FileRangeHandler(urllib2.FileHandler):
+             (fb,lb) = brange
+             if lb == '': lb = size
+             if fb < 0 or fb > size or lb > size:
+-                raise RangeError('Requested Range Not Satisfiable')
++                raise RangeError(9, 'Requested Range Not Satisfiable')
+             size = (lb - fb)
+             fo = RangeableFileObject(fo, (fb,lb))
+         headers = mimetools.Message(StringIO(
+@@ -318,12 +318,12 @@ class FTPRangeHandler(urllib2.FTPHandler):
+                 (fb,lb) = range_tup
+                 if lb == '': 
+                     if retrlen is None or retrlen == 0:
+-                        raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
++                        raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.')
+                     lb = retrlen
+                     retrlen = lb - fb
+                     if retrlen < 0:
+                         # beginning of range is larger than file
+-                        raise RangeError('Requested Range Not Satisfiable')
++                        raise RangeError(9, 'Requested Range Not Satisfiable')
+                 else:
+                     retrlen = lb - fb
+                     fp = RangeableFileObject(fp, (0,retrlen))
+@@ -458,6 +458,6 @@ def range_tuple_normalize(range_tup):
+     # check if range is over the entire file
+     if (fb,lb) == (0,''): return None
+     # check that the range is valid
+-    if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
++    if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb))
+     return (fb,lb)
+ 
+diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
+index e090e90..b2770c5 100644
+--- a/urlgrabber/grabber.py
++++ b/urlgrabber/grabber.py
+@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
+     (which can be set on default_grabber.throttle) is used. See
+     BANDWIDTH THROTTLING for more information.
+ 
+-  timeout = None
++  timeout = 300
+ 
+-    a positive float expressing the number of seconds to wait for socket
+-    operations. If the value is None or 0.0, socket operations will block
+-    forever. Setting this option causes urlgrabber to call the settimeout
+-    method on the Socket object used for the request. See the Python
+-    documentation on settimeout for more information.
+-    http://www.python.org/doc/current/lib/socket-objects.html
++    a positive integer expressing the number of seconds to wait before
++    timing out attempts to connect to a server. If the value is None
++    or 0, connection attempts will not time out. The timeout is passed
++    to the underlying pycurl object as its CONNECTTIMEOUT option, see
++    the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
++    http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
+ 
+   bandwidth = 0
+ 
+@@ -198,6 +198,12 @@ GENERAL ARGUMENTS (kwargs)
+     control, you should probably subclass URLParser and pass it in via
+     the 'urlparser' option.
+ 
++  username = None
++    username to use for simple http auth - is automatically quoted for special characters
++
++  password = None
++    password to use for simple http auth - is automatically quoted for special characters
++
+   ssl_ca_cert = None
+ 
+     this option can be used if M2Crypto is available and will be
+@@ -248,6 +254,11 @@ GENERAL ARGUMENTS (kwargs)
+ 
+     Maximum size (in bytes) of the headers.
+     
++  self.ip_resolve = 'whatever'
++
++    What type of name to IP resolving to use, default is to do both IPV4 and
++    IPV6.
++
+ 
+ RETRY RELATED ARGUMENTS
+ 
+@@ -420,6 +431,7 @@ import time
+ import string
+ import urllib
+ import urllib2
++from httplib import responses
+ import mimetools
+ import thread
+ import types
+@@ -439,6 +451,12 @@ try:
+ except:
+     __version__ = '???'
+ 
++try:
++    # this part isn't going to do much - need to talk to gettext
++    from i18n import _
++except ImportError, msg:
++    def _(st): return st
++    
+ ########################################################################
+ # functions for debugging output.  These functions are here because they
+ # are also part of the module initialization.
+@@ -527,6 +545,22 @@ def _(st):
+ #                 END MODULE INITIALIZATION
+ ########################################################################
+ 
++########################################################################
++#                 UTILITY FUNCTIONS
++########################################################################
++
++# These functions are meant to be utilities for the urlgrabber library to use.
++
++def _to_utf8(obj, errors='replace'):
++    '''convert 'unicode' to an encoded utf-8 byte string '''
++    # stolen from yum.i18n
++    if isinstance(obj, unicode):
++        obj = obj.encode('utf-8', errors)
++    return obj
++
++########################################################################
++#                 END UTILITY FUNCTIONS
++########################################################################
+ 
+ 
+ class URLGrabError(IOError):
+@@ -662,6 +696,7 @@ class URLParser:
+           opts.quote = 0     --> do not quote it
+           opts.quote = None  --> guess
+         """
++        url = _to_utf8(url)
+         quote = opts.quote
+         
+         if opts.prefix:
+@@ -800,6 +835,7 @@ class URLGrabberOptions:
+         self.close_connection = 0
+         self.range = None
+         self.user_agent = 'urlgrabber/%s' % __version__
++        self.ip_resolve = None
+         self.keepalive = 1
+         self.proxies = None
+         self.reget = None
+@@ -808,13 +844,15 @@ class URLGrabberOptions:
+         self.prefix = None
+         self.opener = None
+         self.cache_openers = True
+-        self.timeout = None
++        self.timeout = 300
+         self.text = None
+         self.http_headers = None
+         self.ftp_headers = None
+         self.data = None
+         self.urlparser = URLParser()
+         self.quote = None
++        self.username = None
++        self.password = None
+         self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
+         self.ssl_context = None # no-op in pycurl
+         self.ssl_verify_peer = True # check peer's cert for authenticityb
+@@ -846,7 +884,7 @@ class URLGrabberOptions:
+         s = s + indent + '}'
+         return s
+ 
+-class URLGrabber:
++class URLGrabber(object):
+     """Provides easy opening of URLs with a variety of options.
+     
+     All options are specified as kwargs. Options may be specified when
+@@ -931,6 +969,9 @@ class URLGrabber:
+         (scheme, host, path, parm, query, frag) = parts
+         if filename is None:
+             filename = os.path.basename( urllib.unquote(path) )
++            if not filename:
++                # This is better than nothing.
++                filename = 'index.html'
+         if scheme == 'file' and not opts.copy_local:
+             # just return the name of the local file - don't make a 
+             # copy currently
+@@ -1030,7 +1071,7 @@ class URLGrabber:
+ default_grabber = URLGrabber()
+ 
+ 
+-class PyCurlFileObject():
++class PyCurlFileObject(object):
+     def __init__(self, url, filename, opts):
+         self.fo = None
+         self._hdr_dump = ''
+@@ -1052,9 +1093,15 @@ class PyCurlFileObject():
+         self._reget_length = 0
+         self._prog_running = False
+         self._error = (None, None)
+-        self.size = None
++        self.size = 0
++        self._hdr_ended = False
+         self._do_open()
+         
++
++    def geturl(self):
++        """ Provide the geturl() method, used to be got from
++            urllib.addinfourl, via. urllib.URLopener.* """
++        return self.url
+         
+     def __getattr__(self, name):
+         """This effectively allows us to wrap at the instance level.
+@@ -1085,9 +1132,14 @@ class PyCurlFileObject():
+             return -1
+             
+     def _hdr_retrieve(self, buf):
++        if self._hdr_ended:
++            self._hdr_dump = ''
++            self.size = 0
++            self._hdr_ended = False
++
+         if self._over_max_size(cur=len(self._hdr_dump), 
+                                max_size=self.opts.max_header_size):
+-            return -1            
++            return -1
+         try:
+             self._hdr_dump += buf
+             # we have to get the size before we do the progress obj start
+@@ -1104,7 +1156,17 @@ class PyCurlFileObject():
+                     s = parse150(buf)
+                 if s:
+                     self.size = int(s)
+-            
++                    
++            if buf.lower().find('location') != -1:
++                location = ':'.join(buf.split(':')[1:])
++                location = location.strip()
++                self.scheme = urlparse.urlsplit(location)[0]
++                self.url = location
++                
++            if len(self._hdr_dump) != 0 and buf == '\r\n':
++                self._hdr_ended = True
++                if DEBUG: DEBUG.info('header ended:')
++                
+             return len(buf)
+         except KeyboardInterrupt:
+             return pycurl.READFUNC_ABORT
+@@ -1113,8 +1175,10 @@ class PyCurlFileObject():
+         if self._parsed_hdr:
+             return self._parsed_hdr
+         statusend = self._hdr_dump.find('\n')
++        statusend += 1 # ridiculous as it may seem.
+         hdrfp = StringIO()
+         hdrfp.write(self._hdr_dump[statusend:])
++        hdrfp.seek(0)
+         self._parsed_hdr =  mimetools.Message(hdrfp)
+         return self._parsed_hdr
+     
+@@ -1136,11 +1200,21 @@ class PyCurlFileObject():
+         self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
+         self.curl_obj.setopt(pycurl.FAILONERROR, True)
+         self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
++        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+         
+         if DEBUG:
+             self.curl_obj.setopt(pycurl.VERBOSE, True)
+         if opts.user_agent:
+             self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
++        if opts.ip_resolve:
++            # Default is: IPRESOLVE_WHATEVER
++            ipr = opts.ip_resolve.lower()
++            if ipr == 'whatever': # Do we need this?
++                self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
++            if ipr == 'ipv4':
++                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
++            if ipr == 'ipv6':
++                self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
+         
+         # maybe to be options later
+         self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
+@@ -1148,9 +1222,11 @@ class PyCurlFileObject():
+         
+         # timeouts
+         timeout = 300
+-        if opts.timeout:
+-            timeout = int(opts.timeout)
+-            self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
++        if hasattr(opts, 'timeout'):
++            timeout = int(opts.timeout or 0)
++        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
++        self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)
++        self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
+ 
+         # ssl options
+         if self.scheme == 'https':
+@@ -1203,12 +1279,19 @@ class PyCurlFileObject():
+                         if proxy == '_none_': proxy = ""
+                         self.curl_obj.setopt(pycurl.PROXY, proxy)
+             
+-        # FIXME username/password/auth settings
++        if opts.username and opts.password:
++            if self.scheme in ('http', 'https'):
++                self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
++
++            if opts.username and opts.password:
++                # apparently when applying them as curlopts they do not require quoting of any kind
++                userpwd = '%s:%s' % (opts.username, opts.password)
++                self.curl_obj.setopt(pycurl.USERPWD, userpwd)
+ 
+         #posts - simple - expects the fields as they are
+         if opts.data:
+             self.curl_obj.setopt(pycurl.POST, True)
+-            self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
++            self.curl_obj.setopt(pycurl.POSTFIELDS, _to_utf8(opts.data))
+             
+         # our url
+         self.curl_obj.setopt(pycurl.URL, self.url)
+@@ -1228,12 +1311,14 @@ class PyCurlFileObject():
+             
+             code = self.http_code
+             errcode = e.args[0]
++            errurl = urllib.unquote(self.url)
++            
+             if self._error[0]:
+                 errcode = self._error[0]
+                 
+             if errcode == 23 and code >= 200 and code < 299:
+-                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
+-                err.url = self.url
++                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
++                err.url = errurl
+                 
+                 # this is probably wrong but ultimately this is what happens
+                 # we have a legit http code and a pycurl 'writer failed' code
+@@ -1244,23 +1329,23 @@ class PyCurlFileObject():
+                 raise KeyboardInterrupt
+             
+             elif errcode == 28:
+-                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
+-                err.url = self.url
++                err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
++                err.url = errurl
+                 raise err
+             elif errcode == 35:
+                 msg = _("problem making ssl connection")
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
+                 raise err
+             elif errcode == 37:
+-                msg = _("Could not open/read %s") % (self.url)
++                msg = _("Could not open/read %s") % (errurl)
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
+                 raise err
+                 
+             elif errcode == 42:
+-                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
+-                err.url = self.url
++                err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
++                err.url = errurl
+                 # this is probably wrong but ultimately this is what happens
+                 # we have a legit http code and a pycurl 'writer failed' code
+                 # which almost always means something aborted it from outside
+@@ -1272,33 +1357,93 @@ class PyCurlFileObject():
+             elif errcode == 58:
+                 msg = _("problem with the local client certificate")
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
+                 raise err
+ 
+             elif errcode == 60:
+-                msg = _("client cert cannot be verified or client cert incorrect")
++                msg = _("Peer cert cannot be verified or peer cert invalid")
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
+                 raise err
+             
+             elif errcode == 63:
+                 if self._error[1]:
+                     msg = self._error[1]
+                 else:
+-                    msg = _("Max download size exceeded on %s") % (self.url)
++                    msg = _("Max download size exceeded on %s") % ()
+                 err = URLGrabError(14, msg)
+-                err.url = self.url
++                err.url = errurl
+                 raise err
+                     
+             elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
+-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
++                if self.scheme in ['http', 'https']:
++                    if self.http_code in responses:
++                        resp = responses[self.http_code]
++                        msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
++                    else:
++                        msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
++                elif self.scheme in ['ftp']:
++                    msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
++                else:
++                    msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
+             else:
+-                msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
++                pyerr2str = { 5 : _("Couldn't resolve proxy"),
++                              6 : _("Couldn't resolve host"),
++                              7 : _("Couldn't connect"),
++                              8 : _("Bad reply to FTP server"),
++                              9 : _("Access denied"),
++                             11 : _("Bad reply to FTP pass"),
++                             13 : _("Bad reply to FTP pasv"),
++                             14 : _("Bad reply to FTP 227"),
++                             15 : _("Couldn't get FTP host"),
++                             17 : _("Couldn't set FTP type"),
++                             18 : _("Partial file"),
++                             19 : _("FTP RETR command failed"),
++                             22 : _("HTTP returned error"),
++                             23 : _("Write error"),
++                             25 : _("Upload failed"),
++                             26 : _("Read error"),
++                             27 : _("Out of Memory"),
++                             28 : _("Operation timed out"),
++                             30 : _("FTP PORT command failed"),
++                             31 : _("FTP REST command failed"),
++                             33 : _("Range failed"),
++                             34 : _("HTTP POST failed"),
++                             35 : _("SSL CONNECT failed"),
++                             36 : _("Couldn't resume download"),
++                             37 : _("Couldn't read file"),
++                             42 : _("Aborted by callback"),
++                             47 : _("Too many redirects"),
++                             51 : _("Peer certificate failed verification"),
++                             53 : _("SSL engine not found"),
++                             54 : _("SSL engine set failed"),
++                             55 : _("Network error send()"),
++                             56 : _("Network error recv()"),
++                             58 : _("Local certificate failed"),
++                             59 : _("SSL set cipher failed"),
++                             60 : _("Local CA certificate failed"),
++                             61 : _("HTTP bad transfer encoding"),
++                             63 : _("Maximum file size exceeded"),
++                             64 : _("FTP SSL failed"),
++                             67 : _("Authentication failure"),
++                             70 : _("Out of disk space on server"),
++                             73 : _("Remove file exists"),
++                              }
++                errstr = str(e.args[1])
++                if not errstr:
++                    errstr = pyerr2str.get(errcode, '<Unknown>')
++                msg = 'curl#%s - "%s"' % (errcode, errstr)
+                 code = errcode
+             err = URLGrabError(14, msg)
+             err.code = code
+             err.exception = e
+             raise err
++        else:
++            if self._error[1]:
++                msg = self._error[1]
++                err = URLGRabError(14, msg)
++                err.url = urllib.unquote(self.url)
++                raise err
+ 
+     def _do_open(self):
+         self.curl_obj = _curl_cache
+@@ -1333,7 +1478,11 @@ class PyCurlFileObject():
+                 
+         if self.opts.range:
+             rt = self.opts.range
+-            if rt[0]: rt = (rt[0] + reget_length, rt[1])
++            
++            if rt[0] is None:
++                rt = (0, rt[1])
++            rt = (rt[0] + reget_length, rt[1])
++            
+ 
+         if rt:
+             header = range_tuple_to_header(rt)
+@@ -1434,9 +1583,13 @@ class PyCurlFileObject():
+             #fh, self._temp_name = mkstemp()
+             #self.fo = open(self._temp_name, 'wb')
+ 
+-            
+-        self._do_perform()
+-        
++        try:            
++            self._do_perform()
++        except URLGrabError, e:
++            self.fo.flush()
++            self.fo.close()
++            raise e
++    
+ 
+ 
+         if _was_filename:
+@@ -1446,9 +1599,23 @@ class PyCurlFileObject():
+             # set the time
+             mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
+             if mod_time != -1:
+-                os.utime(self.filename, (mod_time, mod_time))
++                try:
++                    os.utime(self.filename, (mod_time, mod_time))
++                except OSError, e:
++                    err = URLGrabError(16, _(\
++                      'error setting timestamp on file %s from %s, OSError: %s') 
++                              % (self.filename, self.url, e))
++                    err.url = self.url
++                    raise err
+             # re open it
+-            self.fo = open(self.filename, 'r')
++            try:
++                self.fo = open(self.filename, 'r')
++            except IOError, e:
++                err = URLGrabError(16, _(\
++                  'error opening file from %s, IOError: %s') % (self.url, e))
++                err.url = self.url
++                raise err
++                
+         else:
+             #self.fo = open(self._temp_name, 'r')
+             self.fo.seek(0)
+@@ -1532,11 +1699,14 @@ class PyCurlFileObject():
+     def _over_max_size(self, cur, max_size=None):
+ 
+         if not max_size:
+-            max_size = self.size
+-        if self.opts.size: # if we set an opts size use that, no matter what
+-            max_size = self.opts.size
++            if not self.opts.size:
++                max_size = self.size
++            else:
++                max_size = self.opts.size
++
+         if not max_size: return False # if we have None for all of the Max then this is dumb
+-        if cur > max_size + max_size*.10:
++
++        if cur > int(float(max_size) * 1.10):
+ 
+             msg = _("Downloaded more than max size for %s: %s > %s") \
+                         % (self.url, cur, max_size)
+@@ -1544,13 +1714,6 @@ class PyCurlFileObject():
+             return True
+         return False
+         
+-    def _to_utf8(self, obj, errors='replace'):
+-        '''convert 'unicode' to an encoded utf-8 byte string '''
+-        # stolen from yum.i18n
+-        if isinstance(obj, unicode):
+-            obj = obj.encode('utf-8', errors)
+-        return obj
+-        
+     def read(self, amt=None):
+         self._fill_buffer(amt)
+         if amt is None:
+@@ -1582,9 +1745,21 @@ class PyCurlFileObject():
+             self.opts.progress_obj.end(self._amount_read)
+         self.fo.close()
+         
+-
++    def geturl(self):
++        """ Provide the geturl() method, used to be got from
++            urllib.addinfourl, via. urllib.URLopener.* """
++        return self.url
++        
+ _curl_cache = pycurl.Curl() # make one and reuse it over and over and over
+ 
++def reset_curl_obj():
++    """To make sure curl has reread the network/dns info we force a reload"""
++    global _curl_cache
++    _curl_cache.close()
++    _curl_cache = pycurl.Curl()
++
++
++    
+ 
+ #####################################################################
+ # DEPRECATED FUNCTIONS
+diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
+index dad410b..8731aed 100644
+--- a/urlgrabber/mirror.py
++++ b/urlgrabber/mirror.py
+@@ -90,7 +90,7 @@ CUSTOMIZATION
+ import random
+ import thread  # needed for locking to make this threadsafe
+ 
+-from grabber import URLGrabError, CallbackObject, DEBUG
++from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
+ 
+ def _(st): 
+     return st
+@@ -263,7 +263,8 @@ class MirrorGroup:
+     def _parse_mirrors(self, mirrors):
+         parsed_mirrors = []
+         for m in mirrors:
+-            if type(m) == type(''): m = {'mirror': m}
++            if isinstance(m, basestring):
++                m = {'mirror': _to_utf8(m)}
+             parsed_mirrors.append(m)
+         return parsed_mirrors
+     
+diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
+index dd07c6a..45eb248 100644
+--- a/urlgrabber/progress.py
++++ b/urlgrabber/progress.py
+@@ -658,6 +658,8 @@ def format_time(seconds, use_hours=0):
+     if seconds is None or seconds < 0:
+         if use_hours: return '--:--:--'
+         else:         return '--:--'
++    elif seconds == float('inf'):
++        return 'Infinite'
+     else:
+         seconds = int(seconds)
+         minutes = seconds / 60
diff --git a/pkgs/core/python-urlgrabber/python-urlgrabber.nm b/pkgs/core/python-urlgrabber/python-urlgrabber.nm
index 358de7f0e..b5beca805 100644
--- a/pkgs/core/python-urlgrabber/python-urlgrabber.nm
+++ b/pkgs/core/python-urlgrabber/python-urlgrabber.nm
@@ -25,7 +25,7 @@
 include $(PKGROOT)/Include
 
 PKG_NAME       = urlgrabber
-PKG_VER        = 3.1.0
+PKG_VER        = 3.9.1
 PKG_REL        = 1
 PKG_ARCH       = noarch
 
@@ -35,7 +35,7 @@ PKG_URL        = http://urlgrabber.baseurl.org/
 PKG_LICENSE    = LGPLv2+
 PKG_SUMMARY    = A high-level cross-protocol url-grabber.
 
-PKG_BUILD_DEPS+= python-devel
+PKG_BUILD_DEPS+= python-devel python-pycurl
 
 define PKG_DESCRIPTION
 	A high-level cross-protocol url-grabber for python supporting HTTP, \