From: Michael Tremer Date: Fri, 1 Mar 2013 12:15:24 +0000 (+0100) Subject: python-urlgrabber: Import latest fixes from upstream. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4fa42d3f4a0faba8459d3c49b43ce895fc6d8a2a;p=ipfire-3.x.git python-urlgrabber: Import latest fixes from upstream. Basically this adds a patch against HEAD. --- diff --git a/python-urlgrabber/patches/urlgrabber-HEAD.patch b/python-urlgrabber/patches/urlgrabber-HEAD.patch index 6627a1f82..aaf9cbc60 100644 --- a/python-urlgrabber/patches/urlgrabber-HEAD.patch +++ b/python-urlgrabber/patches/urlgrabber-HEAD.patch @@ -1,3 +1,16 @@ +diff --git a/.gitignore b/.gitignore +new file mode 100644 +index 0000000..1ffe416 +--- /dev/null ++++ b/.gitignore +@@ -0,0 +1,7 @@ ++*.py[co] ++MANIFEST ++dist ++build ++*.kdev* ++*.kateproject ++ipython.log* diff --git a/scripts/urlgrabber b/scripts/urlgrabber index 518e512..09cd896 100644 --- a/scripts/urlgrabber @@ -58,6 +71,104 @@ index 518e512..09cd896 100644 def help_doc(self): print __doc__ +diff --git a/scripts/urlgrabber-ext-down b/scripts/urlgrabber-ext-down +new file mode 100755 +index 0000000..3dafb12 +--- /dev/null ++++ b/scripts/urlgrabber-ext-down +@@ -0,0 +1,75 @@ ++#! /usr/bin/python ++# A very simple external downloader ++# Copyright 2011-2012 Zdenek Pavlas ++ ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the ++# Free Software Foundation, Inc., ++# 59 Temple Place, Suite 330, ++# Boston, MA 02111-1307 USA ++ ++import time, os, errno, sys ++from urlgrabber.grabber import \ ++ _readlines, URLGrabberOptions, _loads, \ ++ PyCurlFileObject, URLGrabError ++ ++def write(fmt, *arg): ++ try: os.write(1, fmt % arg) ++ except OSError, e: ++ if e.args[0] != errno.EPIPE: raise ++ sys.exit(1) ++ ++class ProxyProgress: ++ def start(self, *d1, **d2): ++ self.next_update = 0 ++ def update(self, _amount_read): ++ t = time.time() ++ if t < self.next_update: return ++ self.next_update = t + 0.31 ++ write('%d %d\n', self._id, _amount_read) ++ ++def main(): ++ import signal ++ signal.signal(signal.SIGINT, lambda n, f: sys.exit(1)) ++ cnt = 0 ++ while True: ++ lines = _readlines(0) ++ if not lines: break ++ for line in lines: ++ cnt += 1 ++ opts = URLGrabberOptions() ++ opts._id = cnt ++ for k in line.split(' '): ++ k, v = k.split('=', 1) ++ setattr(opts, k, _loads(v)) ++ if opts.progress_obj: ++ opts.progress_obj = ProxyProgress() ++ opts.progress_obj._id = cnt ++ ++ dlsz = dltm = 0 ++ try: ++ fo = PyCurlFileObject(opts.url, opts.filename, opts) ++ fo._do_grab() ++ fo.fo.close() ++ size = fo._amount_read ++ if fo._tm_last: ++ dlsz = fo._tm_last[0] - fo._tm_first[0] ++ dltm = fo._tm_last[1] - fo._tm_first[1] ++ ug_err = 'OK' ++ except URLGrabError, e: ++ size = 0 ++ ug_err = '%d %s' % e.args ++ write('%d %d %d %.3f %s\n', opts._id, size, dlsz, dltm, ug_err) ++ ++if __name__ == '__main__': ++ main() +diff --git a/setup.py b/setup.py +index d0b87b8..bfa4a18 100644 +--- a/setup.py ++++ b/setup.py +@@ -15,8 +15,10 @@ url = _urlgrabber.__url__ + packages = ['urlgrabber'] + package_dir = {'urlgrabber':'urlgrabber'} + scripts = ['scripts/urlgrabber'] +-data_files = [('share/doc/' + name + '-' + version, +- ['README','LICENSE', 'TODO', 'ChangeLog'])] ++data_files = [ ++ ('share/doc/' + name + '-' + version, ['README','LICENSE', 'TODO', 'ChangeLog']), ++ ('libexec', ['scripts/urlgrabber-ext-down']), ++] + options = { 'clean' : { 'all' : 1 } } + classifiers = [ + 'Development Status :: 4 - Beta', diff --git a/test/base_test_code.py b/test/base_test_code.py index 50c6348..5fb43f9 100644 --- a/test/base_test_code.py @@ -125,10 +236,38 @@ index 3e5f3b7..8eeaeda 100644 return (fb,lb) diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py -index e090e90..b2770c5 100644 +index e090e90..6ce9861 100644 --- a/urlgrabber/grabber.py +++ b/urlgrabber/grabber.py -@@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs) +@@ -49,11 +49,26 @@ GENERAL ARGUMENTS (kwargs) + progress_obj = None + + a class instance that supports the following methods: +- po.start(filename, url, basename, length, text) ++ po.start(filename, url, basename, size, now, text) + # length will be None if unknown + po.update(read) # read == bytes read so far + po.end() + ++ multi_progress_obj = None ++ ++ a class instance that supports the following methods: ++ mo.start(total_files, total_size) ++ mo.newMeter() => meter ++ mo.removeMeter(meter) ++ mo.end() ++ ++ The 'meter' object is similar to progress_obj, but multiple ++ instances may be created and updated at the same time. ++ ++ When downloading multiple files in parallel and multi_progress_obj ++ is None progress_obj is used in compatibility mode: finished files ++ are shown but there's no in-progress display. ++ + text = None + + specifies alternative text to be passed to the progress meter +@@ -68,14 +83,14 @@ GENERAL ARGUMENTS (kwargs) (which can be set on default_grabber.throttle) is used. See BANDWIDTH THROTTLING for more information. @@ -150,7 +289,22 @@ index e090e90..b2770c5 100644 bandwidth = 0 -@@ -198,6 +198,12 @@ GENERAL ARGUMENTS (kwargs) +@@ -143,8 +158,12 @@ GENERAL ARGUMENTS (kwargs) + note that proxy authentication information may be provided using + normal URL constructs: + proxies={ 'http' : 'http://user:host@foo:3128' } +- Lastly, if proxies is None, the default environment settings will +- be used. ++ ++ libproxy = False ++ ++ Use the libproxy module (if installed) to find proxies. ++ The libproxy code is only used if the proxies dictionary ++ does not provide any proxies. + + prefix = None + +@@ -198,6 +217,12 @@ GENERAL ARGUMENTS (kwargs) control, you should probably subclass URLParser and pass it in via the 'urlparser' option. @@ -163,19 +317,108 @@ index e090e90..b2770c5 100644 ssl_ca_cert = None this option can be used if M2Crypto is available and will be -@@ -248,6 +254,11 @@ GENERAL ARGUMENTS (kwargs) +@@ -211,43 +236,75 @@ GENERAL ARGUMENTS (kwargs) + No-op when using the curl backend (default) + + +- self.ssl_verify_peer = True ++ ssl_verify_peer = True + + Check the server's certificate to make sure it is valid with what our CA validates + +- self.ssl_verify_host = True ++ ssl_verify_host = True + + Check the server's hostname to make sure it matches the certificate DN + +- self.ssl_key = None ++ ssl_key = None + + Path to the key the client should use to connect/authenticate with + +- self.ssl_key_type = 'PEM' ++ ssl_key_type = 'PEM' + + PEM or DER - format of key + +- self.ssl_cert = None ++ ssl_cert = None + + Path to the ssl certificate the client should use to to authenticate with + +- self.ssl_cert_type = 'PEM' ++ ssl_cert_type = 'PEM' + + PEM or DER - format of certificate + +- self.ssl_key_pass = None ++ ssl_key_pass = None + + password to access the ssl_key + +- self.size = None ++ size = None + + size (in bytes) or Maximum size of the thing being downloaded. + This is mostly to keep us from exploding with an endless datastream + +- self.max_header_size = 2097152 ++ max_header_size = 2097152 Maximum size (in bytes) of the headers. -+ self.ip_resolve = 'whatever' ++ ip_resolve = 'whatever' + + What type of name to IP resolving to use, default is to do both IPV4 and + IPV6. ++ ++ async = (key, limit) ++ ++ When this option is set, the urlgrab() is not processed immediately ++ but queued. parallel_wait() then processes grabs in parallel, limiting ++ the numer of connections in each 'key' group to at most 'limit'. ++ ++ max_connections ++ ++ The global connection limit. ++ ++ timedhosts ++ ++ The filename of the host download statistics. If defined, urlgrabber ++ will update the stats at the end of every download. At the end of ++ parallel_wait(), the updated stats are saved. If synchronous grabs ++ are used, you should call th_save(). ++ ++ default_speed, half_life ++ ++ These options only affect the async mirror selection code. ++ The default_speed option sets the speed estimate for mirrors ++ we have never downloaded from, and defaults to 1 MBps. ++ ++ The speed estimate also drifts exponentially from the speed ++ actually measured to the default speed, with default ++ period of 30 days. + RETRY RELATED ARGUMENTS -@@ -420,6 +431,7 @@ import time +@@ -328,6 +385,15 @@ RETRY RELATED ARGUMENTS + but it cannot (without severe trickiness) prevent the exception + from being raised. + ++ failfunc = None ++ ++ The callback that gets called when urlgrab request fails. ++ If defined, urlgrab() calls it instead of raising URLGrabError. ++ Callback syntax is identical to failure_callback. ++ ++ Contrary to failure_callback, it's called only once. It's primary ++ purpose is to use urlgrab() without a try/except block. ++ + interrupt_callback = None + + This callback is called if KeyboardInterrupt is received at any +@@ -420,6 +486,7 @@ import time import string import urllib import urllib2 @@ -183,7 +426,26 @@ index e090e90..b2770c5 100644 import mimetools import thread import types -@@ -439,6 +451,12 @@ try: +@@ -428,9 +495,17 @@ import pycurl + from ftplib import parse150 + from StringIO import StringIO + from httplib import HTTPException +-import socket ++import socket, select, fcntl + from byterange import range_tuple_normalize, range_tuple_to_header, RangeError + ++try: ++ import xattr ++ if not hasattr(xattr, 'set'): ++ xattr = None # This is a "newer" API. ++except ImportError: ++ xattr = None ++ ++ + ######################################################################## + # MODULE INITIALIZATION + ######################################################################## +@@ -439,6 +514,12 @@ try: except: __version__ = '???' @@ -196,7 +458,26 @@ index e090e90..b2770c5 100644 ######################################################################## # functions for debugging output. These functions are here because they # are also part of the module initialization. -@@ -527,6 +545,22 @@ def _(st): +@@ -504,6 +585,7 @@ def _init_default_logger(logspec=None): + else: handler = logging.FileHandler(filename) + handler.setFormatter(formatter) + DBOBJ = logging.getLogger('urlgrabber') ++ DBOBJ.propagate = False + DBOBJ.addHandler(handler) + DBOBJ.setLevel(level) + except (KeyError, ImportError, ValueError): +@@ -512,8 +594,8 @@ def _init_default_logger(logspec=None): + + def _log_package_state(): + if not DEBUG: return +- DEBUG.info('urlgrabber version = %s' % __version__) +- DEBUG.info('trans function "_" = %s' % _) ++ DEBUG.debug('urlgrabber version = %s' % __version__) ++ DEBUG.debug('trans function "_" = %s' % _) + + _init_default_logger() + _log_package_state() +@@ -527,6 +609,29 @@ def _(st): # END MODULE INITIALIZATION ######################################################################## @@ -213,13 +494,20 @@ index e090e90..b2770c5 100644 + obj = obj.encode('utf-8', errors) + return obj + ++def exception2msg(e): ++ try: ++ return str(e) ++ except UnicodeEncodeError: ++ # always use byte strings ++ return unicode(e).encode('utf8') ++ +######################################################################## +# END UTILITY FUNCTIONS +######################################################################## class URLGrabError(IOError): -@@ -662,6 +696,7 @@ class URLParser: +@@ -662,6 +767,7 @@ class URLParser: opts.quote = 0 --> do not quote it opts.quote = None --> guess """ @@ -227,15 +515,71 @@ index e090e90..b2770c5 100644 quote = opts.quote if opts.prefix: -@@ -800,6 +835,7 @@ class URLGrabberOptions: +@@ -768,6 +874,41 @@ class URLGrabberOptions: + else: # throttle is a float + return self.bandwidth * self.throttle + ++ def find_proxy(self, url, scheme): ++ """Find the proxy to use for this URL. ++ Use the proxies dictionary first, then libproxy. ++ """ ++ self.proxy = None ++ if scheme not in ('ftp', 'http', 'https'): ++ return ++ ++ if self.proxies: ++ proxy = self.proxies.get(scheme) ++ if proxy is None: ++ if scheme == 'http': ++ proxy = self.proxies.get('https') ++ elif scheme == 'https': ++ proxy = self.proxies.get('http') ++ if proxy == '_none_': ++ proxy = '' ++ self.proxy = proxy ++ return ++ ++ if self.libproxy: ++ global _libproxy_cache ++ if _libproxy_cache is None: ++ try: ++ import libproxy ++ _libproxy_cache = libproxy.ProxyFactory() ++ except: ++ _libproxy_cache = False ++ if _libproxy_cache: ++ for proxy in _libproxy_cache.getProxies(url): ++ if proxy.startswith('http://'): ++ if DEBUG: DEBUG.info('using proxy "%s" for url %s' % (proxy, url)) ++ self.proxy = proxy ++ break ++ + def derive(self, **kwargs): + """Create a derived URLGrabberOptions instance. + This method creates a new instance and overrides the +@@ -791,30 +932,37 @@ class URLGrabberOptions: + provided here. + """ + self.progress_obj = None ++ self.multi_progress_obj = None + self.throttle = 1.0 + self.bandwidth = 0 + self.retry = None + self.retrycodes = [-1,2,4,5,6,7] + self.checkfunc = None ++ self.failfunc = _do_raise + self.copy_local = 0 self.close_connection = 0 self.range = None self.user_agent = 'urlgrabber/%s' % __version__ + self.ip_resolve = None self.keepalive = 1 self.proxies = None ++ self.libproxy = False ++ self.proxy = None self.reget = None -@@ -808,13 +844,15 @@ class URLGrabberOptions: + self.failure_callback = None + self.interrupt_callback = None self.prefix = None self.opener = None self.cache_openers = True @@ -252,17 +596,109 @@ index e090e90..b2770c5 100644 self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb self.ssl_context = None # no-op in pycurl self.ssl_verify_peer = True # check peer's cert for authenticityb -@@ -846,7 +884,7 @@ class URLGrabberOptions: +@@ -827,6 +975,12 @@ class URLGrabberOptions: + self.size = None # if we know how big the thing we're getting is going + # to be. this is ultimately a MAXIMUM size for the file + self.max_header_size = 2097152 #2mb seems reasonable for maximum header size ++ self.async = None # blocking by default ++ self.mirror_group = None ++ self.max_connections = 5 ++ self.timedhosts = None ++ self.half_life = 30*24*60*60 # 30 days ++ self.default_speed = 1e6 # 1 MBit + + def __repr__(self): + return self.format() +@@ -846,7 +1000,18 @@ class URLGrabberOptions: s = s + indent + '}' return s -class URLGrabber: ++def _do_raise(obj): ++ raise obj.exception ++ ++def _run_callback(cb, obj): ++ if not cb: ++ return ++ if callable(cb): ++ return cb(obj) ++ cb, arg, karg = cb ++ return cb(obj, *arg, **karg) ++ +class URLGrabber(object): """Provides easy opening of URLs with a variety of options. All options are specified as kwargs. Options may be specified when -@@ -931,6 +969,9 @@ class URLGrabber: +@@ -872,7 +1037,6 @@ class URLGrabber: + # beware of infinite loops :) + tries = tries + 1 + exception = None +- retrycode = None + callback = None + if DEBUG: DEBUG.info('attempt %i/%s: %s', + tries, opts.retry, args[0]) +@@ -883,54 +1047,62 @@ class URLGrabber: + except URLGrabError, e: + exception = e + callback = opts.failure_callback +- retrycode = e.errno + except KeyboardInterrupt, e: + exception = e + callback = opts.interrupt_callback ++ if not callback: ++ raise + + if DEBUG: DEBUG.info('exception: %s', exception) + if callback: + if DEBUG: DEBUG.info('calling callback: %s', callback) +- cb_func, cb_args, cb_kwargs = self._make_callback(callback) + obj = CallbackObject(exception=exception, url=args[0], + tries=tries, retry=opts.retry) +- cb_func(obj, *cb_args, **cb_kwargs) ++ _run_callback(callback, obj) + + if (opts.retry is None) or (tries == opts.retry): + if DEBUG: DEBUG.info('retries exceeded, re-raising') + raise + ++ retrycode = getattr(exception, 'errno', None) + if (retrycode is not None) and (retrycode not in opts.retrycodes): + if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', + retrycode, opts.retrycodes) + raise + +- def urlopen(self, url, **kwargs): ++ def urlopen(self, url, opts=None, **kwargs): + """open the url and return a file object + If a progress object or throttle value specified when this + object was created, then a special file object will be + returned that supports them. The file object can be treated + like any other file object. + """ +- opts = self.opts.derive(**kwargs) ++ url = _to_utf8(url) ++ opts = (opts or self.opts).derive(**kwargs) + if DEBUG: DEBUG.debug('combined options: %s' % repr(opts)) + (url,parts) = opts.urlparser.parse(url, opts) ++ opts.find_proxy(url, parts[0]) + def retryfunc(opts, url): + return PyCurlFileObject(url, filename=None, opts=opts) + return self._retry(opts, retryfunc, url) + +- def urlgrab(self, url, filename=None, **kwargs): ++ def urlgrab(self, url, filename=None, opts=None, **kwargs): + """grab the file at and make a local copy at + If filename is none, the basename of the url is used. + urlgrab returns the filename of the local file, which may be + different from the passed-in filename if copy_local == 0. + """ +- opts = self.opts.derive(**kwargs) ++ url = _to_utf8(url) ++ opts = (opts or self.opts).derive(**kwargs) + if DEBUG: DEBUG.debug('combined options: %s' % repr(opts)) + (url,parts) = opts.urlparser.parse(url, opts) (scheme, host, path, parm, query, frag) = parts ++ opts.find_proxy(url, scheme) if filename is None: filename = os.path.basename( urllib.unquote(path) ) + if not filename: @@ -271,7 +707,97 @@ index e090e90..b2770c5 100644 if scheme == 'file' and not opts.copy_local: # just return the name of the local file - don't make a # copy currently -@@ -1030,7 +1071,7 @@ class URLGrabber: +@@ -950,41 +1122,51 @@ class URLGrabber: + + elif not opts.range: + if not opts.checkfunc is None: +- cb_func, cb_args, cb_kwargs = \ +- self._make_callback(opts.checkfunc) +- obj = CallbackObject() +- obj.filename = path +- obj.url = url +- apply(cb_func, (obj, )+cb_args, cb_kwargs) ++ obj = CallbackObject(filename=path, url=url) ++ _run_callback(opts.checkfunc, obj) + return path + ++ if opts.async: ++ opts.url = url ++ opts.filename = filename ++ opts.size = int(opts.size or 0) ++ _async_queue.append(opts) ++ return filename ++ + def retryfunc(opts, url, filename): + fo = PyCurlFileObject(url, filename, opts) + try: + fo._do_grab() ++ if fo._tm_last: ++ dlsz = fo._tm_last[0] - fo._tm_first[0] ++ dltm = fo._tm_last[1] - fo._tm_first[1] ++ _TH.update(url, dlsz, dltm, None) + if not opts.checkfunc is None: +- cb_func, cb_args, cb_kwargs = \ +- self._make_callback(opts.checkfunc) +- obj = CallbackObject() +- obj.filename = filename +- obj.url = url +- apply(cb_func, (obj, )+cb_args, cb_kwargs) ++ obj = CallbackObject(filename=filename, url=url) ++ _run_callback(opts.checkfunc, obj) + finally: + fo.close() + return filename + +- return self._retry(opts, retryfunc, url, filename) ++ try: ++ return self._retry(opts, retryfunc, url, filename) ++ except URLGrabError, e: ++ _TH.update(url, 0, 0, e) ++ opts.exception = e ++ return _run_callback(opts.failfunc, opts) + +- def urlread(self, url, limit=None, **kwargs): ++ def urlread(self, url, limit=None, opts=None, **kwargs): + """read the url into a string, up to 'limit' bytes + If the limit is exceeded, an exception will be thrown. Note + that urlread is NOT intended to be used as a way of saying + "I want the first N bytes" but rather 'read the whole file + into memory, but don't use too much' + """ +- opts = self.opts.derive(**kwargs) ++ url = _to_utf8(url) ++ opts = (opts or self.opts).derive(**kwargs) + if DEBUG: DEBUG.debug('combined options: %s' % repr(opts)) + (url,parts) = opts.urlparser.parse(url, opts) ++ opts.find_proxy(url, parts[0]) + if limit is not None: + limit = limit + 1 + +@@ -1000,12 +1182,8 @@ class URLGrabber: + else: s = fo.read(limit) + + if not opts.checkfunc is None: +- cb_func, cb_args, cb_kwargs = \ +- self._make_callback(opts.checkfunc) +- obj = CallbackObject() +- obj.data = s +- obj.url = url +- apply(cb_func, (obj, )+cb_args, cb_kwargs) ++ obj = CallbackObject(data=s, url=url) ++ _run_callback(opts.checkfunc, obj) + finally: + fo.close() + return s +@@ -1020,6 +1198,7 @@ class URLGrabber: + return s + + def _make_callback(self, callback_obj): ++ # not used, left for compatibility + if callable(callback_obj): + return callback_obj, (), {} + else: +@@ -1030,7 +1209,7 @@ class URLGrabber: default_grabber = URLGrabber() @@ -280,24 +806,47 @@ index e090e90..b2770c5 100644 def __init__(self, url, filename, opts): self.fo = None self._hdr_dump = '' -@@ -1052,9 +1093,15 @@ class PyCurlFileObject(): +@@ -1052,10 +1231,13 @@ class PyCurlFileObject(): self._reget_length = 0 self._prog_running = False self._error = (None, None) - self.size = None + self.size = 0 + self._hdr_ended = False ++ self._tm_first = None ++ self._tm_last = None self._do_open() +- + -+ def geturl(self): -+ """ Provide the geturl() method, used to be got from -+ urllib.addinfourl, via. urllib.URLopener.* """ -+ return self.url - def __getattr__(self, name): """This effectively allows us to wrap at the instance level. -@@ -1085,9 +1132,14 @@ class PyCurlFileObject(): + Any attribute not found in _this_ object will be searched for +@@ -1067,6 +1249,12 @@ class PyCurlFileObject(): + + def _retrieve(self, buf): + try: ++ tm = self._amount_read + len(buf), time.time() ++ if self._tm_first is None: ++ self._tm_first = tm ++ else: ++ self._tm_last = tm ++ + if not self._prog_running: + if self.opts.progress_obj: + size = self.size + self._reget_length +@@ -1079,15 +1267,24 @@ class PyCurlFileObject(): + self.opts.progress_obj.update(self._amount_read) + + self._amount_read += len(buf) +- self.fo.write(buf) ++ try: ++ self.fo.write(buf) ++ except IOError, e: ++ self._cb_error = URLGrabError(16, exception2msg(e)) ++ return -1 + return len(buf) + except KeyboardInterrupt: return -1 def _hdr_retrieve(self, buf): @@ -313,7 +862,7 @@ index e090e90..b2770c5 100644 try: self._hdr_dump += buf # we have to get the size before we do the progress obj start -@@ -1104,7 +1156,17 @@ class PyCurlFileObject(): +@@ -1104,7 +1301,17 @@ class PyCurlFileObject(): s = parse150(buf) if s: self.size = int(s) @@ -327,12 +876,12 @@ index e090e90..b2770c5 100644 + + if len(self._hdr_dump) != 0 and buf == '\r\n': + self._hdr_ended = True -+ if DEBUG: DEBUG.info('header ended:') ++ if DEBUG: DEBUG.debug('header ended:') + return len(buf) except KeyboardInterrupt: return pycurl.READFUNC_ABORT -@@ -1113,8 +1175,10 @@ class PyCurlFileObject(): +@@ -1113,8 +1320,10 @@ class PyCurlFileObject(): if self._parsed_hdr: return self._parsed_hdr statusend = self._hdr_dump.find('\n') @@ -343,13 +892,24 @@ index e090e90..b2770c5 100644 self._parsed_hdr = mimetools.Message(hdrfp) return self._parsed_hdr -@@ -1136,11 +1200,21 @@ class PyCurlFileObject(): +@@ -1127,6 +1336,9 @@ class PyCurlFileObject(): + if not opts: + opts = self.opts + ++ # keepalives ++ if not opts.keepalive: ++ self.curl_obj.setopt(pycurl.FORBID_REUSE, 1) + + # defaults we're always going to set + self.curl_obj.setopt(pycurl.NOPROGRESS, False) +@@ -1136,11 +1348,21 @@ class PyCurlFileObject(): self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update) self.curl_obj.setopt(pycurl.FAILONERROR, True) self.curl_obj.setopt(pycurl.OPT_FILETIME, True) + self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) - if DEBUG: +- if DEBUG: ++ if DEBUG and DEBUG.level <= 10: self.curl_obj.setopt(pycurl.VERBOSE, True) if opts.user_agent: self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent) @@ -365,7 +925,7 @@ index e090e90..b2770c5 100644 # maybe to be options later self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True) -@@ -1148,9 +1222,11 @@ class PyCurlFileObject(): +@@ -1148,9 +1370,11 @@ class PyCurlFileObject(): # timeouts timeout = 300 @@ -375,16 +935,57 @@ index e090e90..b2770c5 100644 + if hasattr(opts, 'timeout'): + timeout = int(opts.timeout or 0) + self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout) -+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1) ++ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1000) + self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout) # ssl options if self.scheme == 'https': -@@ -1203,12 +1279,19 @@ class PyCurlFileObject(): - if proxy == '_none_': proxy = "" - self.curl_obj.setopt(pycurl.PROXY, proxy) +@@ -1158,13 +1382,16 @@ class PyCurlFileObject(): + self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert) + self.curl_obj.setopt(pycurl.CAINFO, opts.ssl_ca_cert) + self.curl_obj.setopt(pycurl.SSL_VERIFYPEER, opts.ssl_verify_peer) +- self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, opts.ssl_verify_host) ++ if opts.ssl_verify_host: # 1 is meaningless to curl ++ self.curl_obj.setopt(pycurl.SSL_VERIFYHOST, 2) + if opts.ssl_key: + self.curl_obj.setopt(pycurl.SSLKEY, opts.ssl_key) + if opts.ssl_key_type: + self.curl_obj.setopt(pycurl.SSLKEYTYPE, opts.ssl_key_type) + if opts.ssl_cert: + self.curl_obj.setopt(pycurl.SSLCERT, opts.ssl_cert) ++ # if we have a client side cert - turn off reuse b/c nss is odd ++ self.curl_obj.setopt(pycurl.FORBID_REUSE, 1) + if opts.ssl_cert_type: + self.curl_obj.setopt(pycurl.SSLCERTTYPE, opts.ssl_cert_type) + if opts.ssl_key_pass: +@@ -1187,28 +1414,26 @@ class PyCurlFileObject(): + if hasattr(opts, 'raw_throttle') and opts.raw_throttle(): + self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle())) +- # proxy settings +- if opts.proxies: +- for (scheme, proxy) in opts.proxies.items(): +- if self.scheme in ('ftp'): # only set the ftp proxy for ftp items +- if scheme not in ('ftp'): +- continue +- else: +- if proxy == '_none_': proxy = "" +- self.curl_obj.setopt(pycurl.PROXY, proxy) +- elif self.scheme in ('http', 'https'): +- if scheme not in ('http', 'https'): +- continue +- else: +- if proxy == '_none_': proxy = "" +- self.curl_obj.setopt(pycurl.PROXY, proxy) +- - # FIXME username/password/auth settings ++ # proxy ++ if opts.proxy is not None: ++ self.curl_obj.setopt(pycurl.PROXY, opts.proxy) ++ self.curl_obj.setopt(pycurl.PROXYAUTH, ++ # All but Kerberos. BZ 769254 ++ pycurl.HTTPAUTH_ANY - pycurl.HTTPAUTH_GSSNEGOTIATE) ++ + if opts.username and opts.password: + if self.scheme in ('http', 'https'): + self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY) @@ -402,7 +1003,7 @@ index e090e90..b2770c5 100644 # our url self.curl_obj.setopt(pycurl.URL, self.url) -@@ -1228,12 +1311,14 @@ class PyCurlFileObject(): +@@ -1228,39 +1453,36 @@ class PyCurlFileObject(): code = self.http_code errcode = e.args[0] @@ -411,16 +1012,19 @@ index e090e90..b2770c5 100644 if self._error[0]: errcode = self._error[0] - if errcode == 23 and code >= 200 and code < 299: +- if errcode == 23 and code >= 200 and code < 299: - err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) - err.url = self.url -+ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e)) -+ err.url = errurl - +- ++ if errcode == 23 and 200 <= code <= 299: # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code -@@ -1244,23 +1329,23 @@ class PyCurlFileObject(): - raise KeyboardInterrupt + # which almost always means something aborted it from outside + # since we cannot know what it is -I'm banking on it being + # a ctrl-c. XXXX - if there's a way of going back two raises to + # figure out what aborted the pycurl process FIXME +- raise KeyboardInterrupt ++ raise getattr(self, '_cb_error', KeyboardInterrupt) elif errcode == 28: - err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e)) @@ -445,12 +1049,10 @@ index e090e90..b2770c5 100644 elif errcode == 42: - err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e)) - err.url = self.url -+ err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e)) -+ err.url = errurl # this is probably wrong but ultimately this is what happens # we have a legit http code and a pycurl 'writer failed' code # which almost always means something aborted it from outside -@@ -1272,33 +1357,93 @@ class PyCurlFileObject(): +@@ -1272,33 +1494,94 @@ class PyCurlFileObject(): elif errcode == 58: msg = _("problem with the local client certificate") err = URLGrabError(14, msg) @@ -477,8 +1079,9 @@ index e090e90..b2770c5 100644 + err.url = errurl raise err - elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it +- elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it - msg = 'HTTP Error %s : %s ' % (self.http_code, self.url) ++ elif str(e.args[1]) == '' and code and not 200 <= code <= 299: + if self.scheme in ['http', 'https']: + if self.http_code in responses: + resp = responses[self.http_code] @@ -519,6 +1122,7 @@ index e090e90..b2770c5 100644 + 42 : _("Aborted by callback"), + 47 : _("Too many redirects"), + 51 : _("Peer certificate failed verification"), ++ 52 : _("Got nothing: SSL certificate expired?"), + 53 : _("SSL engine not found"), + 54 : _("SSL engine set failed"), + 55 : _("Network error send()"), @@ -545,13 +1149,13 @@ index e090e90..b2770c5 100644 + else: + if self._error[1]: + msg = self._error[1] -+ err = URLGRabError(14, msg) ++ err = URLGrabError(14, msg) + err.url = urllib.unquote(self.url) + raise err def _do_open(self): self.curl_obj = _curl_cache -@@ -1333,7 +1478,11 @@ class PyCurlFileObject(): +@@ -1333,7 +1616,11 @@ class PyCurlFileObject(): if self.opts.range: rt = self.opts.range @@ -564,13 +1168,15 @@ index e090e90..b2770c5 100644 if rt: header = range_tuple_to_header(rt) -@@ -1434,9 +1583,13 @@ class PyCurlFileObject(): +@@ -1434,21 +1721,46 @@ class PyCurlFileObject(): #fh, self._temp_name = mkstemp() #self.fo = open(self._temp_name, 'wb') - - self._do_perform() - +- +- + try: + self._do_perform() + except URLGrabError, e: @@ -578,10 +1184,19 @@ index e090e90..b2770c5 100644 + self.fo.close() + raise e + - - if _was_filename: -@@ -1446,9 +1599,23 @@ class PyCurlFileObject(): + # close it up + self.fo.flush() + self.fo.close() ++ ++ # Set the URL where we got it from: ++ if xattr is not None: ++ # See: http://www.freedesktop.org/wiki/CommonExtendedAttributes ++ try: ++ xattr.set(self.filename, 'user.xdg.origin.url', self.url) ++ except: ++ pass # URL too long. = IOError ... ignore everything. ++ # set the time mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME) if mod_time != -1: @@ -607,7 +1222,14 @@ index e090e90..b2770c5 100644 else: #self.fo = open(self._temp_name, 'r') self.fo.seek(0) -@@ -1532,11 +1699,14 @@ class PyCurlFileObject(): +@@ -1526,17 +1838,20 @@ class PyCurlFileObject(): + if self._prog_running: + downloaded += self._reget_length + self.opts.progress_obj.update(downloaded) +- except KeyboardInterrupt: ++ except (KeyboardInterrupt, IOError): + return -1 + def _over_max_size(self, cur, max_size=None): if not max_size: @@ -626,7 +1248,7 @@ index e090e90..b2770c5 100644 msg = _("Downloaded more than max size for %s: %s > %s") \ % (self.url, cur, max_size) -@@ -1544,13 +1714,6 @@ class PyCurlFileObject(): +@@ -1544,13 +1859,6 @@ class PyCurlFileObject(): return True return False @@ -640,7 +1262,7 @@ index e090e90..b2770c5 100644 def read(self, amt=None): self._fill_buffer(amt) if amt is None: -@@ -1582,9 +1745,21 @@ class PyCurlFileObject(): +@@ -1582,9 +1890,21 @@ class PyCurlFileObject(): self.opts.progress_obj.end(self._amount_read) self.fo.close() @@ -658,25 +1280,568 @@ index e090e90..b2770c5 100644 + _curl_cache.close() + _curl_cache = pycurl.Curl() + -+ ++_libproxy_cache = None + ##################################################################### # DEPRECATED FUNCTIONS +@@ -1621,6 +1941,478 @@ def retrygrab(url, filename=None, copy_local=0, close_connection=0, + + + ##################################################################### ++# Serializer + parser: A replacement of the rather bulky Json code. ++# ++# - handles basic python literals, lists and tuples. ++# - serialized strings never contain ' ' or '\n' ++# ++##################################################################### ++ ++_quoter_map = {} ++for c in '%[(,)] \n': ++ _quoter_map[c] = '%%%02x' % ord(c) ++del c ++ ++def _dumps(v): ++ if v is None: return 'None' ++ if v is True: return 'True' ++ if v is False: return 'False' ++ if type(v) in (int, long, float): ++ return str(v) ++ if type(v) == unicode: ++ v = v.encode('UTF8') ++ if type(v) == str: ++ def quoter(c): return _quoter_map.get(c, c) ++ return "'%s'" % ''.join(map(quoter, v)) ++ if type(v) == tuple: ++ return "(%s)" % ','.join(map(_dumps, v)) ++ if type(v) == list: ++ return "[%s]" % ','.join(map(_dumps, v)) ++ raise TypeError, 'Can\'t serialize %s' % v ++ ++def _loads(s): ++ def decode(v): ++ if v == 'None': return None ++ if v == 'True': return True ++ if v == 'False': return False ++ try: return int(v) ++ except ValueError: pass ++ try: return float(v) ++ except ValueError: pass ++ if len(v) >= 2 and v[0] == v[-1] == "'": ++ ret = []; i = 1 ++ while True: ++ j = v.find('%', i) ++ ret.append(v[i:j]) # skips the final "'" ++ if j == -1: break ++ ret.append(chr(int(v[j + 1:j + 3], 16))) ++ i = j + 3 ++ v = ''.join(ret) ++ return v ++ stk = None ++ l = [] ++ i = j = 0 ++ while True: ++ if j == len(s) or s[j] in ',)]': ++ if j > i: ++ l.append(decode(s[i:j])) ++ if j == len(s): break ++ if s[j] in ')]': ++ if s[j] == ')': ++ l = tuple(l) ++ stk[0].append(l) ++ l, stk = stk ++ i = j = j + 1 ++ elif s[j] in '[(': ++ stk = l, stk ++ l = [] ++ i = j = j + 1 ++ else: ++ j += 1 # safe because '[(,)]' are quoted ++ if stk: raise ValueError ++ if len(l) == 1: l = l[0] ++ return l ++ ++ ++##################################################################### ++# External downloader process ++##################################################################### ++ ++def _readlines(fd): ++ buf = os.read(fd, 4096) ++ if not buf: return None ++ # whole lines only, no buffering ++ while buf[-1] != '\n': ++ buf += os.read(fd, 4096) ++ return buf[:-1].split('\n') ++ ++import subprocess ++ ++class _ExternalDownloader: ++ def __init__(self): ++ self.popen = subprocess.Popen( ++ '/usr/libexec/urlgrabber-ext-down', ++ stdin = subprocess.PIPE, ++ stdout = subprocess.PIPE, ++ ) ++ self.stdin = self.popen.stdin.fileno() ++ self.stdout = self.popen.stdout.fileno() ++ self.running = {} ++ self.cnt = 0 ++ ++ # list of options we pass to downloader ++ _options = ( ++ 'url', 'filename', ++ 'timeout', 'close_connection', 'keepalive', ++ 'throttle', 'bandwidth', 'range', 'reget', ++ 'user_agent', 'http_headers', 'ftp_headers', ++ 'proxy', 'prefix', 'username', 'password', ++ 'ssl_ca_cert', ++ 'ssl_cert', 'ssl_cert_type', ++ 'ssl_key', 'ssl_key_type', ++ 'ssl_key_pass', ++ 'ssl_verify_peer', 'ssl_verify_host', ++ 'size', 'max_header_size', 'ip_resolve', ++ ) ++ ++ def start(self, opts): ++ arg = [] ++ for k in self._options: ++ v = getattr(opts, k) ++ if v is None: continue ++ arg.append('%s=%s' % (k, _dumps(v))) ++ if opts.progress_obj and opts.multi_progress_obj: ++ arg.append('progress_obj=True') ++ arg = ' '.join(arg) ++ if DEBUG: DEBUG.info('attempt %i/%s: %s', opts.tries, opts.retry, opts.url) ++ ++ self.cnt += 1 ++ self.running[self.cnt] = opts ++ os.write(self.stdin, arg +'\n') ++ ++ def perform(self): ++ ret = [] ++ lines = _readlines(self.stdout) ++ if not lines: ++ if DEBUG: DEBUG.info('downloader died') ++ raise KeyboardInterrupt ++ for line in lines: ++ # parse downloader output ++ line = line.split(' ', 5) ++ _id, size = map(int, line[:2]) ++ if len(line) == 2: ++ self.running[_id]._progress.update(size) ++ continue ++ # job done ++ opts = self.running.pop(_id) ++ if line[4] == 'OK': ++ ug_err = None ++ if DEBUG: DEBUG.info('success') ++ else: ++ ug_err = URLGrabError(int(line[4]), line[5]) ++ if DEBUG: DEBUG.info('failure: %s', ug_err) ++ _TH.update(opts.url, int(line[2]), float(line[3]), ug_err, opts.async[0]) ++ ret.append((opts, size, ug_err)) ++ return ret ++ ++ def abort(self): ++ self.popen.stdin.close() ++ self.popen.stdout.close() ++ self.popen.wait() ++ ++class _ExternalDownloaderPool: ++ def __init__(self): ++ self.epoll = select.epoll() ++ self.running = {} ++ self.cache = {} ++ ++ def start(self, opts): ++ host = urlparse.urlsplit(opts.url).netloc ++ dl = self.cache.pop(host, None) ++ if not dl: ++ dl = _ExternalDownloader() ++ fl = fcntl.fcntl(dl.stdin, fcntl.F_GETFD) ++ fcntl.fcntl(dl.stdin, fcntl.F_SETFD, fl | fcntl.FD_CLOEXEC) ++ self.epoll.register(dl.stdout, select.EPOLLIN) ++ self.running[dl.stdout] = dl ++ dl.start(opts) ++ ++ def perform(self): ++ ret = [] ++ for fd, event in self.epoll.poll(): ++ if event & select.EPOLLHUP: ++ if DEBUG: DEBUG.info('downloader died') ++ raise KeyboardInterrupt ++ assert event & select.EPOLLIN ++ done = self.running[fd].perform() ++ if not done: continue ++ assert len(done) == 1 ++ ret.extend(done) ++ ++ # dl finished, move it to the cache ++ host = urlparse.urlsplit(done[0][0].url).netloc ++ if host in self.cache: self.cache[host].abort() ++ self.epoll.unregister(fd) ++ self.cache[host] = self.running.pop(fd) ++ return ret ++ ++ def abort(self): ++ for dl in self.running.values(): ++ self.epoll.unregister(dl.stdout) ++ dl.abort() ++ for dl in self.cache.values(): ++ dl.abort() ++ ++ ++##################################################################### ++# High level async API ++##################################################################### ++ ++_async_queue = [] ++ ++def parallel_wait(meter=None): ++ '''Process queued requests in parallel. ++ ''' ++ ++ # calculate total sizes ++ meters = {} ++ for opts in _async_queue: ++ if opts.progress_obj and opts.multi_progress_obj: ++ count, total = meters.get(opts.multi_progress_obj) or (0, 0) ++ meters[opts.multi_progress_obj] = count + 1, total + opts.size ++ ++ # start multi-file meters ++ for meter in meters: ++ count, total = meters[meter] ++ meter.start(count, total) ++ ++ dl = _ExternalDownloaderPool() ++ host_con = {} # current host connection counts ++ ++ def start(opts, tries): ++ opts.tries = tries ++ try: ++ dl.start(opts) ++ except OSError, e: ++ # can't spawn downloader, give up immediately ++ opts.exception = URLGrabError(5, exception2msg(e)) ++ _run_callback(opts.failfunc, opts) ++ return ++ ++ key, limit = opts.async ++ host_con[key] = host_con.get(key, 0) + 1 ++ if opts.progress_obj: ++ if opts.multi_progress_obj: ++ opts._progress = opts.multi_progress_obj.newMeter() ++ opts._progress.start(text=opts.text) ++ else: ++ opts._progress = time.time() # no updates ++ ++ def perform(): ++ for opts, size, ug_err in dl.perform(): ++ key, limit = opts.async ++ host_con[key] -= 1 ++ ++ if ug_err is None: ++ if opts.checkfunc: ++ try: _run_callback(opts.checkfunc, opts) ++ except URLGrabError, ug_err: pass ++ ++ if opts.progress_obj: ++ if opts.multi_progress_obj: ++ if ug_err: ++ opts._progress.failure(None) ++ else: ++ opts.multi_progress_obj.re.total += size - opts.size # correct totals ++ opts._progress.end(size) ++ opts.multi_progress_obj.removeMeter(opts._progress) ++ else: ++ opts.progress_obj.start(text=opts.text, now=opts._progress) ++ opts.progress_obj.update(size) ++ opts.progress_obj.end(size) ++ del opts._progress ++ ++ if ug_err is None: ++ continue ++ ++ retry = opts.retry or 0 ++ if opts.failure_callback: ++ opts.exception = ug_err ++ try: _run_callback(opts.failure_callback, opts) ++ except URLGrabError, ug_err: ++ retry = 0 # no retries ++ if opts.tries < retry and ug_err.errno in opts.retrycodes: ++ start(opts, opts.tries + 1) # simple retry ++ continue ++ ++ if opts.mirror_group: ++ mg, errors, failed, removed = opts.mirror_group ++ errors.append((opts.url, exception2msg(ug_err))) ++ failed[key] = failed.get(key, 0) + 1 ++ opts.mirror = key ++ opts.exception = ug_err ++ action = mg.default_action or {} ++ if mg.failure_callback: ++ opts.tries = len(errors) ++ action = dict(action) # update only the copy ++ action.update(_run_callback(mg.failure_callback, opts)) ++ if not action.get('fail', 0): ++ # mask this mirror and retry ++ if action.get('remove', 1): ++ removed.add(key) ++ _async_queue.append(opts) ++ continue ++ # fail=1 from callback ++ ug_err.errors = errors ++ ++ # urlgrab failed ++ opts.exception = ug_err ++ _run_callback(opts.failfunc, opts) ++ ++ try: ++ idx = 0 ++ while True: ++ if idx >= len(_async_queue): ++ # the queue is empty ++ if not dl.running: break ++ # pending dl may extend it ++ perform() ++ continue ++ ++ # handle next request ++ opts = _async_queue[idx] ++ idx += 1 ++ ++ # check global limit ++ while len(dl.running) >= default_grabber.opts.max_connections: ++ perform() ++ if DEBUG: ++ DEBUG.info('max_connections: %d/%d', len(dl.running), default_grabber.opts.max_connections) ++ ++ if opts.mirror_group: ++ mg, errors, failed, removed = opts.mirror_group ++ ++ # find the best mirror ++ best = None ++ best_speed = None ++ for mirror in mg.mirrors: ++ key = mirror['mirror'] ++ if key in removed: continue ++ ++ # estimate mirror speed ++ speed, fail = _TH.estimate(key) ++ speed /= 1 + host_con.get(key, 0) ++ ++ # order by: least failures, private flag, best speed ++ # ignore 'private' flag if there were failures ++ private = not fail and mirror.get('kwargs', {}).get('private', False) ++ speed = -failed.get(key, 0), private, speed ++ if best is None or speed > best_speed: ++ best = mirror ++ best_speed = speed ++ ++ if best is None: ++ opts.exception = URLGrabError(256, _('No more mirrors to try.')) ++ opts.exception.errors = errors ++ _run_callback(opts.failfunc, opts) ++ continue ++ ++ # update the grabber object, apply mirror kwargs ++ grabber = best.get('grabber') or mg.grabber ++ opts.delegate = grabber.opts.derive(**best.get('kwargs', {})) ++ ++ # update the current mirror and limit ++ key = best['mirror'] ++ limit = best.get('kwargs', {}).get('max_connections', 2) ++ opts.async = key, limit ++ ++ # update URL and proxy ++ url = mg._join_url(key, opts.relative_url) ++ url, parts = opts.urlparser.parse(url, opts) ++ opts.find_proxy(url, parts[0]) ++ opts.url = url ++ ++ # check host limit, then start ++ key, limit = opts.async ++ while host_con.get(key, 0) >= limit: ++ perform() ++ if DEBUG: ++ DEBUG.info('max_connections(%s): %d/%d', key, host_con.get(key, 0), limit) ++ ++ start(opts, 1) ++ except IOError, e: ++ if e.errno != 4: raise ++ raise KeyboardInterrupt ++ ++ finally: ++ dl.abort() ++ for meter in meters: ++ meter.end() ++ del _async_queue[:] ++ _TH.save() ++ ++ ++##################################################################### ++# Host bandwidth estimation ++##################################################################### ++ ++class _TH: ++ hosts = {} ++ dirty = None ++ ++ @staticmethod ++ def load(): ++ filename = default_grabber.opts.timedhosts ++ if filename and _TH.dirty is None: ++ try: ++ for line in open(filename): ++ host, speed, fail, ts = line.rsplit(' ', 3) ++ _TH.hosts[host] = int(speed), int(fail), int(ts) ++ except IOError: pass ++ _TH.dirty = False ++ ++ @staticmethod ++ def save(): ++ filename = default_grabber.opts.timedhosts ++ if filename and _TH.dirty is True: ++ tmp = '%s.%d' % (filename, os.getpid()) ++ try: ++ f = open(tmp, 'w') ++ for host in _TH.hosts: ++ f.write(host + ' %d %d %d\n' % _TH.hosts[host]) ++ f.close() ++ os.rename(tmp, filename) ++ except IOError: pass ++ _TH.dirty = False ++ ++ @staticmethod ++ def update(url, dl_size, dl_time, ug_err, baseurl=None): ++ # Use hostname from URL. If it's a file:// URL, use baseurl. ++ # If no baseurl, do not update timedhosts. ++ host = urlparse.urlsplit(url).netloc.split('@')[-1] or baseurl ++ if not host: return ++ ++ _TH.load() ++ speed, fail, ts = _TH.hosts.get(host) or (0, 0, 0) ++ now = time.time() ++ ++ if ug_err is None: ++ # defer first update if the file was small. BZ 851178. ++ if not ts and dl_size < 1e6: return ++ ++ # k1: the older, the less useful ++ # k2: <500ms readings are less reliable ++ # speeds vary, use 10:1 smoothing ++ k1 = 2**((ts - now) / default_grabber.opts.half_life) ++ k2 = min(dl_time / .500, 1.0) / 10 ++ if k2 > 0: ++ speed = (k1 * speed + k2 * dl_size / dl_time) / (k1 + k2) ++ fail = 0 ++ elif getattr(ug_err, 'code', None) == 404: ++ fail = 0 # alive, at least ++ else: ++ fail += 1 # seems dead ++ ++ _TH.hosts[host] = speed, fail, now ++ _TH.dirty = True ++ ++ @staticmethod ++ def estimate(baseurl): ++ _TH.load() ++ ++ # Use just the hostname, unless it's a file:// baseurl. ++ host = urlparse.urlsplit(baseurl).netloc.split('@')[-1] or baseurl ++ ++ default_speed = default_grabber.opts.default_speed ++ try: speed, fail, ts = _TH.hosts[host] ++ except KeyError: return default_speed, 0 ++ ++ speed *= 2**-fail ++ k = 2**((ts - time.time()) / default_grabber.opts.half_life) ++ speed = k * speed + (1 - k) * default_speed ++ return speed, fail ++ ++##################################################################### + # TESTING + def _main_test(): + try: url, filename = sys.argv[1:3] diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py -index dad410b..8731aed 100644 +index dad410b..7975f1b 100644 --- a/urlgrabber/mirror.py +++ b/urlgrabber/mirror.py -@@ -90,7 +90,7 @@ CUSTOMIZATION +@@ -76,6 +76,9 @@ CUSTOMIZATION + 'grabber' is omitted, the default grabber will be used. If + kwargs are omitted, then (duh) they will not be used. + ++ kwarg 'max_connections' limits the number of concurrent ++ connections to this mirror. ++ + 3) Pass keyword arguments when instantiating the mirror group. + See, for example, the failure_callback argument. + +@@ -87,10 +90,14 @@ CUSTOMIZATION + """ + + ++import sys import random import thread # needed for locking to make this threadsafe -from grabber import URLGrabError, CallbackObject, DEBUG +from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8 ++from grabber import _run_callback, _do_raise ++from grabber import exception2msg ++from grabber import _TH def _(st): return st -@@ -263,7 +263,8 @@ class MirrorGroup: +@@ -126,7 +133,9 @@ class MirrorGroup: + files) + + * if the local list is ever exhausted, a URLGrabError will be +- raised (errno=256, no more mirrors) ++ raised (errno=256, No more mirrors). The 'errors' attribute ++ holds a list of (full_url, errmsg) tuples. This contains ++ all URLs tried and the corresponding error messages. + + OPTIONS + +@@ -153,7 +162,8 @@ class MirrorGroup: + + The 'fail' option will cause immediate failure by re-raising + the exception and no further attempts to get the current +- download. ++ download. As in the "No more mirrors" case, the 'errors' ++ attribute is set in the exception object. + + This dict can be set at instantiation time, + mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) +@@ -184,6 +194,7 @@ class MirrorGroup: + + obj.exception = < exception that was raised > + obj.mirror = < the mirror that was tried > ++ obj.tries = < the number of mirror tries so far > + obj.relative_url = < url relative to the mirror > + obj.url = < full url that failed > + # .url is just the combination of .mirror +@@ -251,6 +262,17 @@ class MirrorGroup: + self.default_action = None + self._process_kwargs(kwargs) + ++ # use the same algorithm as parallel downloader to initially sort ++ # the mirror list (sort by speed, but prefer live private mirrors) ++ def estimate(m): ++ speed, fail = _TH.estimate(m['mirror']) ++ private = not fail and m.get('kwargs', {}).get('private', False) ++ return private, speed ++ ++ # update the initial order. since sorting is stable, the relative ++ # order of unknown (not used yet) hosts is retained. ++ self.mirrors.sort(key=estimate, reverse=True) ++ + # if these values are found in **kwargs passed to one of the urlXXX + # methods, they will be stripped before getting passed on to the + # grabber +@@ -263,7 +285,8 @@ class MirrorGroup: def _parse_mirrors(self, mirrors): parsed_mirrors = [] for m in mirrors: @@ -686,11 +1851,410 @@ index dad410b..8731aed 100644 parsed_mirrors.append(m) return parsed_mirrors +@@ -280,7 +303,9 @@ class MirrorGroup: + # return a random mirror so that multiple mirrors get used + # even without failures. + if not gr.mirrors: +- raise URLGrabError(256, _('No more mirrors to try.')) ++ e = URLGrabError(256, _('No more mirrors to try.')) ++ e.errors = gr.errors ++ raise e + return gr.mirrors[gr._next] + + def _failure(self, gr, cb_obj): +@@ -307,7 +332,9 @@ class MirrorGroup: + a.update(action) + action = a + self.increment_mirror(gr, action) +- if action and action.get('fail', 0): raise ++ if action and action.get('fail', 0): ++ sys.exc_info()[1].errors = gr.errors ++ raise + + def increment_mirror(self, gr, action={}): + """Tell the mirror object increment the mirror index +@@ -377,35 +404,50 @@ class MirrorGroup: + gr.url = url + gr.kw = dict(kw) + self._load_gr(gr) ++ gr.errors = [] + + for k in self.options: + try: del kw[k] + except KeyError: pass + ++ tries = 0 + while 1: ++ tries += 1 + mirrorchoice = self._get_mirror(gr) + fullurl = self._join_url(mirrorchoice['mirror'], gr.url) +- kwargs = dict(mirrorchoice.get('kwargs', {})) +- kwargs.update(kw) + grabber = mirrorchoice.get('grabber') or self.grabber ++ # apply mirrorchoice kwargs on top of grabber.opts ++ opts = grabber.opts.derive(**mirrorchoice.get('kwargs', {})) + func_ref = getattr(grabber, func) + if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) + try: +- return func_ref( *(fullurl,), **kwargs ) ++ return func_ref( *(fullurl,), opts=opts, **kw ) + except URLGrabError, e: + if DEBUG: DEBUG.info('MIRROR: failed') ++ gr.errors.append((fullurl, exception2msg(e))) + obj = CallbackObject() + obj.exception = e + obj.mirror = mirrorchoice['mirror'] + obj.relative_url = gr.url + obj.url = fullurl ++ obj.tries = tries + self._failure(gr, obj) + + def urlgrab(self, url, filename=None, **kwargs): + kw = dict(kwargs) + kw['filename'] = filename ++ if kw.get('async'): ++ # enable mirror failovers in async path ++ kw['mirror_group'] = self, [], {}, set() ++ kw['relative_url'] = url ++ else: ++ kw.pop('failfunc', None) + func = 'urlgrab' +- return self._mirror_try(func, url, kw) ++ try: ++ return self._mirror_try(func, url, kw) ++ except URLGrabError, e: ++ obj = CallbackObject(url=url, filename=filename, exception=e, **kwargs) ++ return _run_callback(kwargs.get('failfunc', _do_raise), obj) + + def urlopen(self, url, **kwargs): + kw = dict(kwargs) diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py -index dd07c6a..45eb248 100644 +index dd07c6a..077fd99 100644 --- a/urlgrabber/progress.py +++ b/urlgrabber/progress.py -@@ -658,6 +658,8 @@ def format_time(seconds, use_hours=0): +@@ -133,8 +133,8 @@ class BaseMeter: + # for a real gui, you probably want to override and put a call + # to your mainloop iteration function here + if now is None: now = time.time() +- if (now >= self.last_update_time + self.update_period) or \ +- not self.last_update_time: ++ if (not self.last_update_time or ++ (now >= self.last_update_time + self.update_period)): + self.re.update(amount_read, now) + self.last_amount_read = amount_read + self.last_update_time = now +@@ -211,6 +211,21 @@ def text_meter_total_size(size, downloaded=0): + # 4. + ( 5, total: 32) + # + ++def _term_add_bar(tl, bar_max_length, pc): ++ blen = bar_max_length ++ bar = '='*int(blen * pc) ++ if (blen * pc) - int(blen * pc) >= 0.5: ++ bar += '-' ++ return tl.add(' [%-*.*s]' % (blen, blen, bar)) ++ ++def _term_add_end(tl, osize, size): ++ if osize is not None: ++ if size > osize: # Is ??? better? Really need something to say < vs >. ++ return tl.add(' !!! '), True ++ elif size != osize: ++ return tl.add(' ... '), True ++ return tl.add(' ' * 5), False ++ + class TextMeter(BaseMeter): + def __init__(self, fo=sys.stderr): + BaseMeter.__init__(self) +@@ -218,7 +233,6 @@ class TextMeter(BaseMeter): + + def _do_update(self, amount_read, now=None): + etime = self.re.elapsed_time() +- fetime = format_time(etime) + fread = format_number(amount_read) + #self.size = None + if self.text is not None: +@@ -234,16 +248,20 @@ class TextMeter(BaseMeter): + + # Include text + ui_rate in minimal + tl = TerminalLine(8, 8+1+8) ++ if tl._llen > 80: ++ use_hours = True # For big screens, make it more readable. ++ else: ++ use_hours = False + ui_size = tl.add(' | %5sB' % fread) + if self.size is None: +- ui_time = tl.add(' %9s' % fetime) ++ ui_time = tl.add(' %9s' % format_time(etime, use_hours)) + ui_end = tl.add(' ' * 5) + ui_rate = tl.add(' %5sB/s' % ave_dl) + out = '%-*.*s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, + ui_rate, ui_size, ui_time, ui_end) + else: + rtime = self.re.remaining_time() +- frtime = format_time(rtime) ++ frtime = format_time(rtime, use_hours) + frac = self.re.fraction_read() + + ui_time = tl.add(' %9s' % frtime) +@@ -259,13 +277,10 @@ class TextMeter(BaseMeter): + ui_rate = tl.add(' %5sB/s' % ave_dl) + # Make text grow a bit before we start growing the bar too + blen = 4 + tl.rest_split(8 + 8 + 4) +- bar = '='*int(blen * frac) +- if (blen * frac) - int(blen * frac) >= 0.5: +- bar += '-' +- ui_bar = tl.add(' [%-*.*s]' % (blen, blen, bar)) +- out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, +- ui_sofar_pc, ui_pc, ui_bar, +- ui_rate, ui_size, ui_time, ui_end) ++ ui_bar = _term_add_bar(tl, blen, frac) ++ out = '\r%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, ++ ui_sofar_pc, ui_pc, ui_bar, ++ ui_rate,ui_size,ui_time, ui_end) + + self.fo.write(out) + self.fo.flush() +@@ -274,7 +289,6 @@ class TextMeter(BaseMeter): + global _text_meter_total_size + global _text_meter_sofar_size + +- total_time = format_time(self.re.elapsed_time()) + total_size = format_number(amount_read) + if self.text is not None: + text = self.text +@@ -282,14 +296,13 @@ class TextMeter(BaseMeter): + text = self.basename + + tl = TerminalLine(8) +- ui_size = tl.add(' | %5sB' % total_size) +- ui_time = tl.add(' %9s' % total_time) +- not_done = self.size is not None and amount_read != self.size +- if not_done: +- ui_end = tl.add(' ... ') ++ if tl._llen > 80: ++ use_hours = True # For big screens, make it more readable. + else: +- ui_end = tl.add(' ' * 5) +- ++ use_hours = False ++ ui_size = tl.add(' | %5sB' % total_size) ++ ui_time = tl.add(' %9s' % format_time(self.re.elapsed_time(),use_hours)) ++ ui_end, not_done = _term_add_end(tl, self.size, amount_read) + out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text, + ui_size, ui_time, ui_end) + self.fo.write(out) +@@ -331,12 +344,21 @@ class MultiFileHelper(BaseMeter): + def message(self, message): + self.master.message_meter(self, message) + ++class _FakeLock: ++ def acquire(self): ++ pass ++ def release(self): ++ pass ++ + class MultiFileMeter: + helperclass = MultiFileHelper +- def __init__(self): ++ def __init__(self, threaded=True): + self.meters = [] + self.in_progress_meters = [] +- self._lock = thread.allocate_lock() ++ if threaded: ++ self._lock = thread.allocate_lock() ++ else: ++ self._lock = _FakeLock() + self.update_period = 0.3 # seconds + + self.numfiles = None +@@ -369,6 +391,7 @@ class MultiFileMeter: + + def end(self, now=None): + if now is None: now = time.time() ++ self.re.update(self._amount_read(), now) + self._do_end(now) + + def _do_end(self, now): +@@ -407,8 +430,8 @@ class MultiFileMeter: + def update_meter(self, meter, now): + if not meter in self.meters: + raise ValueError('attempt to use orphaned meter') +- if (now >= self.last_update_time + self.update_period) or \ +- not self.last_update_time: ++ if (not self.last_update_time or ++ (now >= self.last_update_time + self.update_period)): + self.re.update(self._amount_read(), now) + self.last_update_time = now + self._do_update_meter(meter, now) +@@ -466,34 +489,87 @@ class MultiFileMeter: + + + class TextMultiFileMeter(MultiFileMeter): +- def __init__(self, fo=sys.stderr): ++ def __init__(self, fo=sys.stderr, threaded=True): + self.fo = fo +- MultiFileMeter.__init__(self) ++ MultiFileMeter.__init__(self, threaded) ++ self.index_time = self.index = 0 + + # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:## ++# New output, like TextMeter output... ++# update: No size (minimal: 17 chars) ++# ----------------------------------- ++# (<#file>/<#tot files>): | ++# 8-48 1 8 3 6 1 7-9 5 ++# ++# update: Size, All files ++# ----------------------- ++# (<#file>/<#tot files>): | ETA ++# 8-22 1 3-4 1 6-12 1 8 3 6 1 7-9 1 3 1 ++# end ++# --- ++# | ++# 8-56 3 6 1 9 5 + def _do_update_meter(self, meter, now): + self._lock.acquire() + try: +- format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \ +- "time: %8.8s/%8.8s" + df = self.finished_files + tf = self.numfiles or 1 +- pf = 100 * float(df)/tf + 0.49 ++ # Don't use "percent of files complete" ... ++ # pf = 100 * float(df)/tf + 0.49 + dd = self.re.last_amount_read +- td = self.total_size ++ td = self.re.total + pd = 100 * (self.re.fraction_read() or 0) + 0.49 + dt = self.re.elapsed_time() + rt = self.re.remaining_time() +- if rt is None: tt = None +- else: tt = dt + rt + +- fdd = format_number(dd) + 'B' +- ftd = format_number(td) + 'B' +- fdt = format_time(dt, 1) +- ftt = format_time(tt, 1) +- +- out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt)) +- self.fo.write('\r' + out) ++ frac = self.re.fraction_read() or 0 ++ pf = 100 * frac ++ ave_dl = format_number(self.re.average_rate()) ++ ++ # cycle through active meters ++ if now > self.index_time: ++ self.index_time = now + 1.0 ++ self.index += 1 ++ if self.index >= len(self.meters): ++ self.index = 0 ++ meter = self.meters[self.index] ++ text = meter.text or meter.basename ++ if tf > 1: ++ text = '(%u/%u): %s' % (df+1+self.index, tf, text) ++ ++ # Include text + ui_rate in minimal ++ tl = TerminalLine(8, 8+1+8) ++ if tl._llen > 80: ++ use_hours = True # For big screens, make it more readable. ++ time_len = 9 ++ else: ++ use_hours = False ++ time_len = 7 ++ ++ ui_size = tl.add(' | %5sB' % format_number(dd)) ++ ++ if not self.re.total: ++ ui_time = tl.add(' %*s' % (time_len,format_time(dt, use_hours))) ++ ui_end = tl.add(' ' * 5) ++ ui_rate = tl.add(' %5sB/s' % ave_dl) ++ out = '\r%-*.*s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, ++ ui_rate, ui_size, ui_time, ui_end) ++ else: ++ ui_time = tl.add(' %*s' % (time_len,format_time(rt, use_hours))) ++ ui_end = tl.add(' ETA ') ++ ++ ui_sofar_pc = tl.add(' %i%%' % pf, ++ full_len=len(" (100%)")) ++ ui_rate = tl.add(' %5sB/s' % ave_dl) ++ ++ # Make text grow a bit before we start growing the bar too ++ blen = 4 + tl.rest_split(8 + 8 + 4) ++ ui_bar = _term_add_bar(tl, blen, frac) ++ out = '\r%-*.*s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text, ++ ui_sofar_pc, ui_bar, ++ ui_rate, ui_size, ui_time, ++ ui_end) ++ self.fo.write(out) + self.fo.flush() + finally: + self._lock.release() +@@ -502,24 +578,40 @@ class TextMultiFileMeter(MultiFileMeter): + self._lock.acquire() + try: + format = "%-30.30s %6.6s %8.8s %9.9s" +- fn = meter.basename ++ fn = meter.text or meter.basename + size = meter.last_amount_read + fsize = format_number(size) + 'B' + et = meter.re.elapsed_time() +- fet = format_time(et, 1) +- frate = format_number(size / et) + 'B/s' +- +- out = '%-79.79s' % (format % (fn, fsize, fet, frate)) +- self.fo.write('\r' + out + '\n') ++ frate = format_number(et and size / et) + 'B/s' ++ df = self.finished_files ++ tf = self.numfiles or 1 ++ ++ total_size = format_number(size) ++ text = meter.text or meter.basename ++ if tf > 1: ++ text = '(%u/%u): %s' % (df, tf, text) ++ ++ tl = TerminalLine(8) ++ if tl._llen > 80: ++ use_hours = True # For big screens, make it more readable. ++ time_len = 9 ++ else: ++ use_hours = False ++ time_len = 7 ++ ui_size = tl.add(' | %5sB' % total_size) ++ ui_time = tl.add(' %*s' % (time_len, format_time(et, use_hours))) ++ ui_end, not_done = _term_add_end(tl, meter.size, size) ++ out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text, ++ ui_size, ui_time, ui_end) ++ self.fo.write(out) + finally: + self._lock.release() +- self._do_update_meter(meter, now) + + def _do_failure_meter(self, meter, message, now): + self._lock.acquire() + try: + format = "%-30.30s %6.6s %s" +- fn = meter.basename ++ fn = meter.text or meter.basename + if type(message) in (type(''), type(u'')): + message = message.splitlines() + if not message: message = [''] +@@ -536,15 +628,6 @@ class TextMultiFileMeter(MultiFileMeter): + pass + finally: + self._lock.release() +- +- def _do_end(self, now): +- self._do_update_meter(None, now) +- self._lock.acquire() +- try: +- self.fo.write('\n') +- self.fo.flush() +- finally: +- self._lock.release() + + ###################################################################### + # support classes and functions +@@ -658,6 +741,8 @@ def format_time(seconds, use_hours=0): if seconds is None or seconds < 0: if use_hours: return '--:--:--' else: return '--:--' @@ -699,3 +2263,81 @@ index dd07c6a..45eb248 100644 else: seconds = int(seconds) minutes = seconds / 60 +@@ -722,9 +807,77 @@ def _tst(fn, cur, tot, beg, size, *args): + time.sleep(delay) + tm.end(size) + ++def _mtst(datas, *args): ++ print '-' * 79 ++ tm = TextMultiFileMeter(threaded=False) ++ ++ dl_sizes = {} ++ ++ num = 0 ++ total_size = 0 ++ dl_total_size = 0 ++ for data in datas: ++ dl_size = None ++ if len(data) == 2: ++ fn, size = data ++ dl_size = size ++ if len(data) == 3: ++ fn, size, dl_size = data ++ nm = tm.newMeter() ++ nm.start(fn, "http://www.example.com/path/to/fn/" + fn, fn, size, ++ text=fn) ++ num += 1 ++ assert dl_size is not None ++ dl_total_size += dl_size ++ dl_sizes[nm] = dl_size ++ if size is None or total_size is None: ++ total_size = None ++ else: ++ total_size += size ++ tm.start(num, total_size) ++ ++ num = 0 ++ off = 0 ++ for (inc, delay) in args: ++ off += 1 ++ while num < ((dl_total_size * off) / len(args)): ++ num += inc ++ for nm in tm.meters[:]: ++ if dl_sizes[nm] <= num: ++ nm.end(dl_sizes[nm]) ++ tm.removeMeter(nm) ++ else: ++ nm.update(num) ++ time.sleep(delay) ++ assert not tm.meters ++ + if __name__ == "__main__": + # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28 + # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08 ++ if len(sys.argv) >= 2 and sys.argv[1] == 'multi': ++ _mtst((("sm-1.0.0-1.fc8.i386.rpm", 1000), ++ ("s-1.0.1-1.fc8.i386.rpm", 5000), ++ ("m-1.0.1-2.fc8.i386.rpm", 10000)), ++ (100, 0.33), (500, 0.25), (1000, 0.1)) ++ ++ _mtst((("sm-1.0.0-1.fc8.i386.rpm", 1000), ++ ("s-1.0.1-1.fc8.i386.rpm", 5000), ++ ("m-1.0.1-2.fc8.i386.rpm", None, 10000)), ++ (100, 0.33), (500, 0.25), (1000, 0.1)) ++ ++ _mtst((("sm-1.0.0-1.fc8.i386.rpm", 1000), ++ ("s-1.0.1-1.fc8.i386.rpm", 2500000), ++ ("m-1.0.1-2.fc8.i386.rpm", 10000)), ++ (10, 0.2), (50, 0.1), (1000, 0.1)) ++ ++ _mtst((("sm-1.0.0-1.fc8.i386.rpm", 1000), ++ ("s-1.0.1-1.fc8.i386.rpm", None, 2500000), ++ ("m-1.0.1-2.fc8.i386.rpm", None, 10000)), ++ (10, 0.2), (50, 0.1), (1000, 0.1)) ++ # (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25)) ++ # (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25)) ++ sys.exit(0) ++ + if len(sys.argv) >= 2 and sys.argv[1] == 'total': + text_meter_total_size(1000 + 10000 + 10000 + 1000000 + 1000000 + + 1000000 + 10000 + 10000 + 10000 + 1000000) diff --git a/python-urlgrabber/python-urlgrabber.nm b/python-urlgrabber/python-urlgrabber.nm index a7cbc7682..1fe108373 100644 --- a/python-urlgrabber/python-urlgrabber.nm +++ b/python-urlgrabber/python-urlgrabber.nm @@ -5,7 +5,7 @@ name = python-urlgrabber version = 3.9.1 -release = 3 +release = 4 arch = noarch thisapp = urlgrabber-%{version} @@ -28,6 +28,11 @@ build python-pycurl end + prepare_cmds + # Move everything from /usr/libexec to /usr/lib/urlgrabber. + find . -type f | xargs sed -i "s@libexec@lib/urlgrabber@g" + end + build = # Nothing to do install