]> git.ipfire.org Git - people/ms/ipfire-3.x.git/blob - python-urlgrabber/patches/urlgrabber-HEAD.patch
Move all packages to root.
[people/ms/ipfire-3.x.git] / python-urlgrabber / patches / urlgrabber-HEAD.patch
1 diff --git a/scripts/urlgrabber b/scripts/urlgrabber
2 index 518e512..09cd896 100644
3 --- a/scripts/urlgrabber
4 +++ b/scripts/urlgrabber
5 @@ -115,6 +115,7 @@ options:
6 including quotes in the case of strings.
7 e.g. --user_agent='"foobar/2.0"'
8
9 + --output FILE
10 -o FILE write output to FILE, otherwise the basename of the
11 url will be used
12 -O print the names of saved files to STDOUT
13 @@ -170,12 +171,17 @@ class client_options:
14 return ug_options, ug_defaults
15
16 def process_command_line(self):
17 - short_options = 'vd:hoOpD'
18 + short_options = 'vd:ho:OpD'
19 long_options = ['profile', 'repeat=', 'verbose=',
20 - 'debug=', 'help', 'progress']
21 + 'debug=', 'help', 'progress', 'output=']
22 ug_long = [ o + '=' for o in self.ug_options ]
23 - optlist, args = getopt.getopt(sys.argv[1:], short_options,
24 - long_options + ug_long)
25 + try:
26 + optlist, args = getopt.getopt(sys.argv[1:], short_options,
27 + long_options + ug_long)
28 + except getopt.GetoptError, e:
29 + print >>sys.stderr, "Error:", e
30 + self.help([], ret=1)
31 +
32 self.verbose = 0
33 self.debug = None
34 self.outputfile = None
35 @@ -193,6 +199,7 @@ class client_options:
36 if o == '--verbose': self.verbose = v
37 if o == '-v': self.verbose += 1
38 if o == '-o': self.outputfile = v
39 + if o == '--output': self.outputfile = v
40 if o == '-p' or o == '--progress': self.progress = 1
41 if o == '-d' or o == '--debug': self.debug = v
42 if o == '--profile': self.profile = 1
43 @@ -222,7 +229,7 @@ class client_options:
44 print "ERROR: cannot use -o when grabbing multiple files"
45 sys.exit(1)
46
47 - def help(self, args):
48 + def help(self, args, ret=0):
49 if not args:
50 print MAINHELP
51 else:
52 @@ -234,7 +241,7 @@ class client_options:
53 self.help_ug_option(a)
54 else:
55 print 'ERROR: no help on command "%s"' % a
56 - sys.exit(0)
57 + sys.exit(ret)
58
59 def help_doc(self):
60 print __doc__
61 diff --git a/test/base_test_code.py b/test/base_test_code.py
62 index 50c6348..5fb43f9 100644
63 --- a/test/base_test_code.py
64 +++ b/test/base_test_code.py
65 @@ -1,6 +1,6 @@
66 from munittest import *
67
68 -base_http = 'http://www.linux.duke.edu/projects/urlgrabber/test/'
69 +base_http = 'http://urlgrabber.baseurl.org/test/'
70 base_ftp = 'ftp://localhost/test/'
71
72 # set to a proftp server only. we're working around a couple of
73 diff --git a/urlgrabber/byterange.py b/urlgrabber/byterange.py
74 index 3e5f3b7..8eeaeda 100644
75 --- a/urlgrabber/byterange.py
76 +++ b/urlgrabber/byterange.py
77 @@ -68,7 +68,7 @@ class HTTPRangeHandler(urllib2.BaseHandler):
78
79 def http_error_416(self, req, fp, code, msg, hdrs):
80 # HTTP's Range Not Satisfiable error
81 - raise RangeError('Requested Range Not Satisfiable')
82 + raise RangeError(9, 'Requested Range Not Satisfiable')
83
84 class HTTPSRangeHandler(HTTPRangeHandler):
85 """ Range Header support for HTTPS. """
86 @@ -208,7 +208,7 @@ class RangeableFileObject:
87 bufsize = offset - pos
88 buf = self.fo.read(bufsize)
89 if len(buf) != bufsize:
90 - raise RangeError('Requested Range Not Satisfiable')
91 + raise RangeError(9, 'Requested Range Not Satisfiable')
92 pos+= bufsize
93
94 class FileRangeHandler(urllib2.FileHandler):
95 @@ -238,7 +238,7 @@ class FileRangeHandler(urllib2.FileHandler):
96 (fb,lb) = brange
97 if lb == '': lb = size
98 if fb < 0 or fb > size or lb > size:
99 - raise RangeError('Requested Range Not Satisfiable')
100 + raise RangeError(9, 'Requested Range Not Satisfiable')
101 size = (lb - fb)
102 fo = RangeableFileObject(fo, (fb,lb))
103 headers = mimetools.Message(StringIO(
104 @@ -318,12 +318,12 @@ class FTPRangeHandler(urllib2.FTPHandler):
105 (fb,lb) = range_tup
106 if lb == '':
107 if retrlen is None or retrlen == 0:
108 - raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
109 + raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.')
110 lb = retrlen
111 retrlen = lb - fb
112 if retrlen < 0:
113 # beginning of range is larger than file
114 - raise RangeError('Requested Range Not Satisfiable')
115 + raise RangeError(9, 'Requested Range Not Satisfiable')
116 else:
117 retrlen = lb - fb
118 fp = RangeableFileObject(fp, (0,retrlen))
119 @@ -458,6 +458,6 @@ def range_tuple_normalize(range_tup):
120 # check if range is over the entire file
121 if (fb,lb) == (0,''): return None
122 # check that the range is valid
123 - if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
124 + if lb < fb: raise RangeError(9, 'Invalid byte range: %s-%s' % (fb,lb))
125 return (fb,lb)
126
127 diff --git a/urlgrabber/grabber.py b/urlgrabber/grabber.py
128 index e090e90..b2770c5 100644
129 --- a/urlgrabber/grabber.py
130 +++ b/urlgrabber/grabber.py
131 @@ -68,14 +68,14 @@ GENERAL ARGUMENTS (kwargs)
132 (which can be set on default_grabber.throttle) is used. See
133 BANDWIDTH THROTTLING for more information.
134
135 - timeout = None
136 + timeout = 300
137
138 - a positive float expressing the number of seconds to wait for socket
139 - operations. If the value is None or 0.0, socket operations will block
140 - forever. Setting this option causes urlgrabber to call the settimeout
141 - method on the Socket object used for the request. See the Python
142 - documentation on settimeout for more information.
143 - http://www.python.org/doc/current/lib/socket-objects.html
144 + a positive integer expressing the number of seconds to wait before
145 + timing out attempts to connect to a server. If the value is None
146 + or 0, connection attempts will not time out. The timeout is passed
147 + to the underlying pycurl object as its CONNECTTIMEOUT option, see
148 + the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.
149 + http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT
150
151 bandwidth = 0
152
153 @@ -198,6 +198,12 @@ GENERAL ARGUMENTS (kwargs)
154 control, you should probably subclass URLParser and pass it in via
155 the 'urlparser' option.
156
157 + username = None
158 + username to use for simple http auth - is automatically quoted for special characters
159 +
160 + password = None
161 + password to use for simple http auth - is automatically quoted for special characters
162 +
163 ssl_ca_cert = None
164
165 this option can be used if M2Crypto is available and will be
166 @@ -248,6 +254,11 @@ GENERAL ARGUMENTS (kwargs)
167
168 Maximum size (in bytes) of the headers.
169
170 + self.ip_resolve = 'whatever'
171 +
172 + What type of name to IP resolving to use, default is to do both IPV4 and
173 + IPV6.
174 +
175
176 RETRY RELATED ARGUMENTS
177
178 @@ -420,6 +431,7 @@ import time
179 import string
180 import urllib
181 import urllib2
182 +from httplib import responses
183 import mimetools
184 import thread
185 import types
186 @@ -439,6 +451,12 @@ try:
187 except:
188 __version__ = '???'
189
190 +try:
191 + # this part isn't going to do much - need to talk to gettext
192 + from i18n import _
193 +except ImportError, msg:
194 + def _(st): return st
195 +
196 ########################################################################
197 # functions for debugging output. These functions are here because they
198 # are also part of the module initialization.
199 @@ -527,6 +545,22 @@ def _(st):
200 # END MODULE INITIALIZATION
201 ########################################################################
202
203 +########################################################################
204 +# UTILITY FUNCTIONS
205 +########################################################################
206 +
207 +# These functions are meant to be utilities for the urlgrabber library to use.
208 +
209 +def _to_utf8(obj, errors='replace'):
210 + '''convert 'unicode' to an encoded utf-8 byte string '''
211 + # stolen from yum.i18n
212 + if isinstance(obj, unicode):
213 + obj = obj.encode('utf-8', errors)
214 + return obj
215 +
216 +########################################################################
217 +# END UTILITY FUNCTIONS
218 +########################################################################
219
220
221 class URLGrabError(IOError):
222 @@ -662,6 +696,7 @@ class URLParser:
223 opts.quote = 0 --> do not quote it
224 opts.quote = None --> guess
225 """
226 + url = _to_utf8(url)
227 quote = opts.quote
228
229 if opts.prefix:
230 @@ -800,6 +835,7 @@ class URLGrabberOptions:
231 self.close_connection = 0
232 self.range = None
233 self.user_agent = 'urlgrabber/%s' % __version__
234 + self.ip_resolve = None
235 self.keepalive = 1
236 self.proxies = None
237 self.reget = None
238 @@ -808,13 +844,15 @@ class URLGrabberOptions:
239 self.prefix = None
240 self.opener = None
241 self.cache_openers = True
242 - self.timeout = None
243 + self.timeout = 300
244 self.text = None
245 self.http_headers = None
246 self.ftp_headers = None
247 self.data = None
248 self.urlparser = URLParser()
249 self.quote = None
250 + self.username = None
251 + self.password = None
252 self.ssl_ca_cert = None # sets SSL_CAINFO - path to certdb
253 self.ssl_context = None # no-op in pycurl
254 self.ssl_verify_peer = True # check peer's cert for authenticityb
255 @@ -846,7 +884,7 @@ class URLGrabberOptions:
256 s = s + indent + '}'
257 return s
258
259 -class URLGrabber:
260 +class URLGrabber(object):
261 """Provides easy opening of URLs with a variety of options.
262
263 All options are specified as kwargs. Options may be specified when
264 @@ -931,6 +969,9 @@ class URLGrabber:
265 (scheme, host, path, parm, query, frag) = parts
266 if filename is None:
267 filename = os.path.basename( urllib.unquote(path) )
268 + if not filename:
269 + # This is better than nothing.
270 + filename = 'index.html'
271 if scheme == 'file' and not opts.copy_local:
272 # just return the name of the local file - don't make a
273 # copy currently
274 @@ -1030,7 +1071,7 @@ class URLGrabber:
275 default_grabber = URLGrabber()
276
277
278 -class PyCurlFileObject():
279 +class PyCurlFileObject(object):
280 def __init__(self, url, filename, opts):
281 self.fo = None
282 self._hdr_dump = ''
283 @@ -1052,9 +1093,15 @@ class PyCurlFileObject():
284 self._reget_length = 0
285 self._prog_running = False
286 self._error = (None, None)
287 - self.size = None
288 + self.size = 0
289 + self._hdr_ended = False
290 self._do_open()
291
292 +
293 + def geturl(self):
294 + """ Provide the geturl() method, used to be got from
295 + urllib.addinfourl, via. urllib.URLopener.* """
296 + return self.url
297
298 def __getattr__(self, name):
299 """This effectively allows us to wrap at the instance level.
300 @@ -1085,9 +1132,14 @@ class PyCurlFileObject():
301 return -1
302
303 def _hdr_retrieve(self, buf):
304 + if self._hdr_ended:
305 + self._hdr_dump = ''
306 + self.size = 0
307 + self._hdr_ended = False
308 +
309 if self._over_max_size(cur=len(self._hdr_dump),
310 max_size=self.opts.max_header_size):
311 - return -1
312 + return -1
313 try:
314 self._hdr_dump += buf
315 # we have to get the size before we do the progress obj start
316 @@ -1104,7 +1156,17 @@ class PyCurlFileObject():
317 s = parse150(buf)
318 if s:
319 self.size = int(s)
320 -
321 +
322 + if buf.lower().find('location') != -1:
323 + location = ':'.join(buf.split(':')[1:])
324 + location = location.strip()
325 + self.scheme = urlparse.urlsplit(location)[0]
326 + self.url = location
327 +
328 + if len(self._hdr_dump) != 0 and buf == '\r\n':
329 + self._hdr_ended = True
330 + if DEBUG: DEBUG.info('header ended:')
331 +
332 return len(buf)
333 except KeyboardInterrupt:
334 return pycurl.READFUNC_ABORT
335 @@ -1113,8 +1175,10 @@ class PyCurlFileObject():
336 if self._parsed_hdr:
337 return self._parsed_hdr
338 statusend = self._hdr_dump.find('\n')
339 + statusend += 1 # ridiculous as it may seem.
340 hdrfp = StringIO()
341 hdrfp.write(self._hdr_dump[statusend:])
342 + hdrfp.seek(0)
343 self._parsed_hdr = mimetools.Message(hdrfp)
344 return self._parsed_hdr
345
346 @@ -1136,11 +1200,21 @@ class PyCurlFileObject():
347 self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
348 self.curl_obj.setopt(pycurl.FAILONERROR, True)
349 self.curl_obj.setopt(pycurl.OPT_FILETIME, True)
350 + self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
351
352 if DEBUG:
353 self.curl_obj.setopt(pycurl.VERBOSE, True)
354 if opts.user_agent:
355 self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
356 + if opts.ip_resolve:
357 + # Default is: IPRESOLVE_WHATEVER
358 + ipr = opts.ip_resolve.lower()
359 + if ipr == 'whatever': # Do we need this?
360 + self.curl_obj.setopt(pycurl.IPRESOLVE,pycurl.IPRESOLVE_WHATEVER)
361 + if ipr == 'ipv4':
362 + self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
363 + if ipr == 'ipv6':
364 + self.curl_obj.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V6)
365
366 # maybe to be options later
367 self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)
368 @@ -1148,9 +1222,11 @@ class PyCurlFileObject():
369
370 # timeouts
371 timeout = 300
372 - if opts.timeout:
373 - timeout = int(opts.timeout)
374 - self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
375 + if hasattr(opts, 'timeout'):
376 + timeout = int(opts.timeout or 0)
377 + self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)
378 + self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)
379 + self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)
380
381 # ssl options
382 if self.scheme == 'https':
383 @@ -1203,12 +1279,19 @@ class PyCurlFileObject():
384 if proxy == '_none_': proxy = ""
385 self.curl_obj.setopt(pycurl.PROXY, proxy)
386
387 - # FIXME username/password/auth settings
388 + if opts.username and opts.password:
389 + if self.scheme in ('http', 'https'):
390 + self.curl_obj.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_ANY)
391 +
392 + if opts.username and opts.password:
393 + # apparently when applying them as curlopts they do not require quoting of any kind
394 + userpwd = '%s:%s' % (opts.username, opts.password)
395 + self.curl_obj.setopt(pycurl.USERPWD, userpwd)
396
397 #posts - simple - expects the fields as they are
398 if opts.data:
399 self.curl_obj.setopt(pycurl.POST, True)
400 - self.curl_obj.setopt(pycurl.POSTFIELDS, self._to_utf8(opts.data))
401 + self.curl_obj.setopt(pycurl.POSTFIELDS, _to_utf8(opts.data))
402
403 # our url
404 self.curl_obj.setopt(pycurl.URL, self.url)
405 @@ -1228,12 +1311,14 @@ class PyCurlFileObject():
406
407 code = self.http_code
408 errcode = e.args[0]
409 + errurl = urllib.unquote(self.url)
410 +
411 if self._error[0]:
412 errcode = self._error[0]
413
414 if errcode == 23 and code >= 200 and code < 299:
415 - err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
416 - err.url = self.url
417 + err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
418 + err.url = errurl
419
420 # this is probably wrong but ultimately this is what happens
421 # we have a legit http code and a pycurl 'writer failed' code
422 @@ -1244,23 +1329,23 @@ class PyCurlFileObject():
423 raise KeyboardInterrupt
424
425 elif errcode == 28:
426 - err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
427 - err.url = self.url
428 + err = URLGrabError(12, _('Timeout on %s: %s') % (errurl, e))
429 + err.url = errurl
430 raise err
431 elif errcode == 35:
432 msg = _("problem making ssl connection")
433 err = URLGrabError(14, msg)
434 - err.url = self.url
435 + err.url = errurl
436 raise err
437 elif errcode == 37:
438 - msg = _("Could not open/read %s") % (self.url)
439 + msg = _("Could not open/read %s") % (errurl)
440 err = URLGrabError(14, msg)
441 - err.url = self.url
442 + err.url = errurl
443 raise err
444
445 elif errcode == 42:
446 - err = URLGrabError(15, _('User (or something) called abort %s: %s') % (self.url, e))
447 - err.url = self.url
448 + err = URLGrabError(15, _('User (or something) called abort %s: %s') % (errurl, e))
449 + err.url = errurl
450 # this is probably wrong but ultimately this is what happens
451 # we have a legit http code and a pycurl 'writer failed' code
452 # which almost always means something aborted it from outside
453 @@ -1272,33 +1357,93 @@ class PyCurlFileObject():
454 elif errcode == 58:
455 msg = _("problem with the local client certificate")
456 err = URLGrabError(14, msg)
457 - err.url = self.url
458 + err.url = errurl
459 raise err
460
461 elif errcode == 60:
462 - msg = _("client cert cannot be verified or client cert incorrect")
463 + msg = _("Peer cert cannot be verified or peer cert invalid")
464 err = URLGrabError(14, msg)
465 - err.url = self.url
466 + err.url = errurl
467 raise err
468
469 elif errcode == 63:
470 if self._error[1]:
471 msg = self._error[1]
472 else:
473 - msg = _("Max download size exceeded on %s") % (self.url)
474 + msg = _("Max download size exceeded on %s") % ()
475 err = URLGrabError(14, msg)
476 - err.url = self.url
477 + err.url = errurl
478 raise err
479
480 elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it
481 - msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
482 + if self.scheme in ['http', 'https']:
483 + if self.http_code in responses:
484 + resp = responses[self.http_code]
485 + msg = 'HTTP Error %s - %s : %s' % (self.http_code, resp, errurl)
486 + else:
487 + msg = 'HTTP Error %s : %s ' % (self.http_code, errurl)
488 + elif self.scheme in ['ftp']:
489 + msg = 'FTP Error %s : %s ' % (self.http_code, errurl)
490 + else:
491 + msg = "Unknown Error: URL=%s , scheme=%s" % (errurl, self.scheme)
492 else:
493 - msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))
494 + pyerr2str = { 5 : _("Couldn't resolve proxy"),
495 + 6 : _("Couldn't resolve host"),
496 + 7 : _("Couldn't connect"),
497 + 8 : _("Bad reply to FTP server"),
498 + 9 : _("Access denied"),
499 + 11 : _("Bad reply to FTP pass"),
500 + 13 : _("Bad reply to FTP pasv"),
501 + 14 : _("Bad reply to FTP 227"),
502 + 15 : _("Couldn't get FTP host"),
503 + 17 : _("Couldn't set FTP type"),
504 + 18 : _("Partial file"),
505 + 19 : _("FTP RETR command failed"),
506 + 22 : _("HTTP returned error"),
507 + 23 : _("Write error"),
508 + 25 : _("Upload failed"),
509 + 26 : _("Read error"),
510 + 27 : _("Out of Memory"),
511 + 28 : _("Operation timed out"),
512 + 30 : _("FTP PORT command failed"),
513 + 31 : _("FTP REST command failed"),
514 + 33 : _("Range failed"),
515 + 34 : _("HTTP POST failed"),
516 + 35 : _("SSL CONNECT failed"),
517 + 36 : _("Couldn't resume download"),
518 + 37 : _("Couldn't read file"),
519 + 42 : _("Aborted by callback"),
520 + 47 : _("Too many redirects"),
521 + 51 : _("Peer certificate failed verification"),
522 + 53 : _("SSL engine not found"),
523 + 54 : _("SSL engine set failed"),
524 + 55 : _("Network error send()"),
525 + 56 : _("Network error recv()"),
526 + 58 : _("Local certificate failed"),
527 + 59 : _("SSL set cipher failed"),
528 + 60 : _("Local CA certificate failed"),
529 + 61 : _("HTTP bad transfer encoding"),
530 + 63 : _("Maximum file size exceeded"),
531 + 64 : _("FTP SSL failed"),
532 + 67 : _("Authentication failure"),
533 + 70 : _("Out of disk space on server"),
534 + 73 : _("Remove file exists"),
535 + }
536 + errstr = str(e.args[1])
537 + if not errstr:
538 + errstr = pyerr2str.get(errcode, '<Unknown>')
539 + msg = 'curl#%s - "%s"' % (errcode, errstr)
540 code = errcode
541 err = URLGrabError(14, msg)
542 err.code = code
543 err.exception = e
544 raise err
545 + else:
546 + if self._error[1]:
547 + msg = self._error[1]
548 + err = URLGRabError(14, msg)
549 + err.url = urllib.unquote(self.url)
550 + raise err
551
552 def _do_open(self):
553 self.curl_obj = _curl_cache
554 @@ -1333,7 +1478,11 @@ class PyCurlFileObject():
555
556 if self.opts.range:
557 rt = self.opts.range
558 - if rt[0]: rt = (rt[0] + reget_length, rt[1])
559 +
560 + if rt[0] is None:
561 + rt = (0, rt[1])
562 + rt = (rt[0] + reget_length, rt[1])
563 +
564
565 if rt:
566 header = range_tuple_to_header(rt)
567 @@ -1434,9 +1583,13 @@ class PyCurlFileObject():
568 #fh, self._temp_name = mkstemp()
569 #self.fo = open(self._temp_name, 'wb')
570
571 -
572 - self._do_perform()
573 -
574 + try:
575 + self._do_perform()
576 + except URLGrabError, e:
577 + self.fo.flush()
578 + self.fo.close()
579 + raise e
580 +
581
582
583 if _was_filename:
584 @@ -1446,9 +1599,23 @@ class PyCurlFileObject():
585 # set the time
586 mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
587 if mod_time != -1:
588 - os.utime(self.filename, (mod_time, mod_time))
589 + try:
590 + os.utime(self.filename, (mod_time, mod_time))
591 + except OSError, e:
592 + err = URLGrabError(16, _(\
593 + 'error setting timestamp on file %s from %s, OSError: %s')
594 + % (self.filename, self.url, e))
595 + err.url = self.url
596 + raise err
597 # re open it
598 - self.fo = open(self.filename, 'r')
599 + try:
600 + self.fo = open(self.filename, 'r')
601 + except IOError, e:
602 + err = URLGrabError(16, _(\
603 + 'error opening file from %s, IOError: %s') % (self.url, e))
604 + err.url = self.url
605 + raise err
606 +
607 else:
608 #self.fo = open(self._temp_name, 'r')
609 self.fo.seek(0)
610 @@ -1532,11 +1699,14 @@ class PyCurlFileObject():
611 def _over_max_size(self, cur, max_size=None):
612
613 if not max_size:
614 - max_size = self.size
615 - if self.opts.size: # if we set an opts size use that, no matter what
616 - max_size = self.opts.size
617 + if not self.opts.size:
618 + max_size = self.size
619 + else:
620 + max_size = self.opts.size
621 +
622 if not max_size: return False # if we have None for all of the Max then this is dumb
623 - if cur > max_size + max_size*.10:
624 +
625 + if cur > int(float(max_size) * 1.10):
626
627 msg = _("Downloaded more than max size for %s: %s > %s") \
628 % (self.url, cur, max_size)
629 @@ -1544,13 +1714,6 @@ class PyCurlFileObject():
630 return True
631 return False
632
633 - def _to_utf8(self, obj, errors='replace'):
634 - '''convert 'unicode' to an encoded utf-8 byte string '''
635 - # stolen from yum.i18n
636 - if isinstance(obj, unicode):
637 - obj = obj.encode('utf-8', errors)
638 - return obj
639 -
640 def read(self, amt=None):
641 self._fill_buffer(amt)
642 if amt is None:
643 @@ -1582,9 +1745,21 @@ class PyCurlFileObject():
644 self.opts.progress_obj.end(self._amount_read)
645 self.fo.close()
646
647 -
648 + def geturl(self):
649 + """ Provide the geturl() method, used to be got from
650 + urllib.addinfourl, via. urllib.URLopener.* """
651 + return self.url
652 +
653 _curl_cache = pycurl.Curl() # make one and reuse it over and over and over
654
655 +def reset_curl_obj():
656 + """To make sure curl has reread the network/dns info we force a reload"""
657 + global _curl_cache
658 + _curl_cache.close()
659 + _curl_cache = pycurl.Curl()
660 +
661 +
662 +
663
664 #####################################################################
665 # DEPRECATED FUNCTIONS
666 diff --git a/urlgrabber/mirror.py b/urlgrabber/mirror.py
667 index dad410b..8731aed 100644
668 --- a/urlgrabber/mirror.py
669 +++ b/urlgrabber/mirror.py
670 @@ -90,7 +90,7 @@ CUSTOMIZATION
671 import random
672 import thread # needed for locking to make this threadsafe
673
674 -from grabber import URLGrabError, CallbackObject, DEBUG
675 +from grabber import URLGrabError, CallbackObject, DEBUG, _to_utf8
676
677 def _(st):
678 return st
679 @@ -263,7 +263,8 @@ class MirrorGroup:
680 def _parse_mirrors(self, mirrors):
681 parsed_mirrors = []
682 for m in mirrors:
683 - if type(m) == type(''): m = {'mirror': m}
684 + if isinstance(m, basestring):
685 + m = {'mirror': _to_utf8(m)}
686 parsed_mirrors.append(m)
687 return parsed_mirrors
688
689 diff --git a/urlgrabber/progress.py b/urlgrabber/progress.py
690 index dd07c6a..45eb248 100644
691 --- a/urlgrabber/progress.py
692 +++ b/urlgrabber/progress.py
693 @@ -658,6 +658,8 @@ def format_time(seconds, use_hours=0):
694 if seconds is None or seconds < 0:
695 if use_hours: return '--:--:--'
696 else: return '--:--'
697 + elif seconds == float('inf'):
698 + return 'Infinite'
699 else:
700 seconds = int(seconds)
701 minutes = seconds / 60