naoki: Replaced urlgrabber by an older release.

author Michael Tremer <michael.tremer@ipfire.org>

Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)

committer Michael Tremer <michael.tremer@ipfire.org>

Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)
author Michael Tremer <michael.tremer@ipfire.org>
Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)
committer Michael Tremer <michael.tremer@ipfire.org>
Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)
diff --git a/naoki/urlgrabber/__init__.py b/naoki/urlgrabber/__init__.py

index 1ddbca1a3f60b0e78aa6dbb3223c6d5f37e22f99..7bcd9d5541584b84f03ead3e7fb1015a102efbb9 100644 (file)
--- a/naoki/urlgrabber/__init__.py
+++ b/naoki/urlgrabber/__init__.py
@@ -13,7 +13,6 @@
  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  
  # Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
-# Copyright 2009 Red Hat, Inc - pycurl support added by Seth Vidal
  
  # $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $
  
@@ -45,11 +44,10 @@ following features:
      automatically switching mirrors if there is a failure.
  """
  
-__version__ = '3.9.0'
-__date__    = '2009/07/31'
+__version__ = '3.1.0'
+__date__    = '2006/09/21'
  __author__  = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
-              'Ryan Tomayko <rtomayko@naeblis.cx>' \
-              'Seth Vidal <skvidal@fedoraproject.org>' 
+              'Ryan Tomayko <rtomayko@naeblis.cx>'
  __url__     = 'http://linux.duke.edu/projects/urlgrabber/'
  
  from grabber import urlgrab, urlopen, urlread
diff --git a/naoki/urlgrabber/byterange.py b/naoki/urlgrabber/byterange.py

index e0375627ca94851a38d762c6c6ec6ec256db2c7c..001b4e32d673dd3facabbcfffef2dcd31f635b62 100644 (file)
--- a/naoki/urlgrabber/byterange.py
+++ b/naoki/urlgrabber/byterange.py
@@ -272,8 +272,6 @@ class FTPRangeHandler(urllib2.FTPHandler):
          host, port = splitport(host)
          if port is None:
              port = ftplib.FTP_PORT
-        else:
-            port = int(port)
  
          # username/password handling
          user, host = splituser(host)
diff --git a/naoki/urlgrabber/grabber.py b/naoki/urlgrabber/grabber.py

index cf51dff0a745f0989b66f8cf4b64784a8afc8fee..20e7899b264d238eba51a27e44a33f5b3f4ece68 100644 (file)
--- a/naoki/urlgrabber/grabber.py
+++ b/naoki/urlgrabber/grabber.py
@@ -16,7 +16,6 @@
  
  # This file is part of urlgrabber, a high-level cross-protocol url-grabber
  # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-# Copyright 2009 Red Hat inc, pycurl code written by Seth Vidal
  
  """A high-level cross-protocol url-grabber.
  
@@ -56,9 +55,8 @@ GENERAL ARGUMENTS (kwargs)
  
    text = None
    
-    specifies alternative text to be passed to the progress meter
-    object.  If not given, the default progress meter will use the
-    basename of the file.
+    specifies an alternativ text item in the beginning of the progress
+    bar line. If not given, the basename of the file is used.
  
    throttle = 1.0
  
@@ -169,13 +167,6 @@ GENERAL ARGUMENTS (kwargs)
      chain integrity.  You are responsible for ensuring that any
      extension handlers are present if said features are required.
      
-  cache_openers = True
-
-    controls whether urllib2 openers should be cached and reused, or
-    whether they should be created each time.  There's a modest
-    overhead in recreating them, but it's slightly safer to do so if
-    you're modifying the handlers between calls.
-
    data = None
  
      Only relevant for the HTTP family (and ignored for other
@@ -188,44 +179,6 @@ GENERAL ARGUMENTS (kwargs)
      badly and if you do not use the proper case (shown here), your
      values will be overridden with the defaults.
      
-  urlparser = URLParser()
-
-    The URLParser class handles pre-processing of URLs, including
-    auth-handling for user/pass encoded in http urls, file handing
-    (that is, filenames not sent as a URL), and URL quoting.  If you
-    want to override any of this behavior, you can pass in a
-    replacement instance.  See also the 'quote' option.
-
-  quote = None
-
-    Whether or not to quote the path portion of a url.
-      quote = 1    ->  quote the URLs (they're not quoted yet)
-      quote = 0    ->  do not quote them (they're already quoted)
-      quote = None ->  guess what to do
-
-    This option only affects proper urls like 'file:///etc/passwd'; it
-    does not affect 'raw' filenames like '/etc/passwd'.  The latter
-    will always be quoted as they are converted to URLs.  Also, only
-    the path part of a url is quoted.  If you need more fine-grained
-    control, you should probably subclass URLParser and pass it in via
-    the 'urlparser' option.
-
-  ssl_ca_cert = None
-
-    this option can be used if M2Crypto is available and will be
-    ignored otherwise.  If provided, it will be used to create an SSL
-    context.  If both ssl_ca_cert and ssl_context are provided, then
-    ssl_context will be ignored and a new context will be created from
-    ssl_ca_cert.
-
-  ssl_context = None
-
-    this option can be used if M2Crypto is available and will be
-    ignored otherwise.  If provided, this SSL context will be used.
-    If both ssl_ca_cert and ssl_context are provided, then ssl_context
-    will be ignored and a new context will be created from
-    ssl_ca_cert.
-    
  
  RETRY RELATED ARGUMENTS
  
@@ -330,6 +283,28 @@ RETRY RELATED ARGUMENTS
      passed the same arguments, so you could use the same function for
      both.
        
+  urlparser = URLParser()
+
+    The URLParser class handles pre-processing of URLs, including
+    auth-handling for user/pass encoded in http urls, file handing
+    (that is, filenames not sent as a URL), and URL quoting.  If you
+    want to override any of this behavior, you can pass in a
+    replacement instance.  See also the 'quote' option.
+
+  quote = None
+
+    Whether or not to quote the path portion of a url.
+      quote = 1    ->  quote the URLs (they're not quoted yet)
+      quote = 0    ->  do not quote them (they're already quoted)
+      quote = None ->  guess what to do
+
+    This option only affects proper urls like 'file:///etc/passwd'; it
+    does not affect 'raw' filenames like '/etc/passwd'.  The latter
+    will always be quoted as they are converted to URLs.  Also, only
+    the path part of a url is quoted.  If you need more fine-grained
+    control, you should probably subclass URLParser and pass it in via
+    the 'urlparser' option.
+
  BANDWIDTH THROTTLING
  
    urlgrabber supports throttling via two values: throttle and
@@ -389,7 +364,7 @@ BANDWIDTH THROTTLING
  
  """
  
-# $Id: grabber.py,v 1.52 2006/12/12 19:08:46 mstenner Exp $
+# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $
  
  import os
  import os.path
@@ -400,13 +375,7 @@ import time
  import string
  import urllib
  import urllib2
-import mimetools
-import thread
  from stat import *  # S_* and ST_*
-import pycurl
-from ftplib import parse150
-from StringIO import StringIO
-from tempfile import mkstemp
  
  ########################################################################
  #                     MODULE INITIALIZATION
@@ -437,10 +406,8 @@ try:
      import keepalive
      from keepalive import HTTPHandler, HTTPSHandler
      have_keepalive = True
-    keepalive_http_handler = HTTPHandler()
  except ImportError, msg:
      have_keepalive = False
-    keepalive_http_handler = None
  
  try:
      # add in range support conditionally too
@@ -496,7 +463,7 @@ def set_logger(DBOBJ):
      if sslfactory.DEBUG is None:
          sslfactory.DEBUG = DBOBJ
  
-def _init_default_logger(logspec=None):
+def _init_default_logger():
      '''Examines the environment variable URLGRABBER_DEBUG and creates
      a logging object (logging.logger) based on the contents.  It takes
      the form
@@ -522,12 +489,9 @@ def _init_default_logger(logspec=None):
      collect the code into a nice block.'''
  
      try:
-        if logspec is None:
-            logspec = os.environ['URLGRABBER_DEBUG']
-        dbinfo = logspec.split(',')
+        dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
          import logging
-        level = logging._levelNames.get(dbinfo[0], None)
-        if level is None: level = int(dbinfo[0])
+        level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
          if level < 1: raise ValueError()
  
          formatter = logging.Formatter('%(asctime)s %(message)s')
@@ -544,17 +508,7 @@ def _init_default_logger(logspec=None):
          DBOBJ = None
      set_logger(DBOBJ)
  
-def _log_package_state():
-    if not DEBUG: return
-    DEBUG.info('urlgrabber version  = %s' % __version__)
-    DEBUG.info('have_m2crypto       = %s' % sslfactory.have_m2crypto)
-    DEBUG.info('trans function "_"  = %s' % _)
-    DEBUG.info('have_keepalive      = %s' % have_keepalive)
-    DEBUG.info('have_range          = %s' % have_range)
-    DEBUG.info('have_socket_timeout = %s' % have_socket_timeout)
-
  _init_default_logger()
-_log_package_state()
  ########################################################################
  #                 END MODULE INITIALIZATION
  ########################################################################
@@ -582,7 +536,6 @@ class URLGrabError(IOError):
          13   - malformed proxy url
          14   - HTTPError (includes .code and .exception attributes)
          15   - user abort
-        16   - error writing to local file
          
        MirrorGroup error codes (256 -- 511)
          256  - No more mirrors left to try
@@ -614,9 +567,7 @@ class URLGrabError(IOError):
             # or simply
           print e  #### print '[Errno %i] %s' % (e.errno, e.strerror)
      """
-    def __init__(self, *args):
-        IOError.__init__(self, *args)
-        self.url = "No url specified"
+    pass
  
  class CallbackObject:
      """Container for returned callback data.
@@ -736,9 +687,7 @@ class URLParser:
                  if ':' in user_pass:
                      user, password = user_pass.split(':', 1)
              except ValueError, e:
-                err = URLGrabError(1, _('Bad URL: %s') % url)
-                err.url = url
-                raise err
+                raise URLGrabError(1, _('Bad URL: %s') % url)
              if DEBUG: DEBUG.info('adding HTTP auth: %s, %s', user, password)
              auth_handler.add_password(None, host, user, password)
  
@@ -862,24 +811,6 @@ class URLGrabberOptions:
          self.ssl_ca_cert = None
          self.ssl_context = None
  
-    def __repr__(self):
-        return self.format()
-        
-    def format(self, indent='  '):
-        keys = self.__dict__.keys()
-        if self.delegate is not None:
-            keys.remove('delegate')
-        keys.sort()
-        s = '{\n'
-        for k in keys:
-            s = s + indent + '%-15s: %s,\n' % \
-                (repr(k), repr(self.__dict__[k]))
-        if self.delegate:
-            df = self.delegate.format(indent + '  ')
-            s = s + indent + '%-15s: %s\n' % ("'delegate'", df)
-        s = s + indent + '}'
-        return s
-
  class URLGrabber:
      """Provides easy opening of URLs with a variety of options.
      
@@ -947,10 +878,9 @@ class URLGrabber:
          like any other file object.
          """
          opts = self.opts.derive(**kwargs)
-        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
          def retryfunc(opts, url):
-            return PyCurlFileObject(url, filename=None, opts=opts)
+            return URLGrabberFileObject(url, filename=None, opts=opts)
          return self._retry(opts, retryfunc, url)
      
      def urlgrab(self, url, filename=None, **kwargs):
@@ -960,7 +890,6 @@ class URLGrabber:
          different from the passed-in filename if copy_local == 0.
          """
          opts = self.opts.derive(**kwargs)
-        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
          (scheme, host, path, parm, query, frag) = parts
          if filename is None:
@@ -972,28 +901,16 @@ class URLGrabber:
              if host:
                  path = os.path.normpath('//' + host + path)
              if not os.path.exists(path):
-                err = URLGrabError(2, 
+                raise URLGrabError(2, 
                        _('Local file does not exist: %s') % (path, ))
-                err.url = url
-                raise err
              elif not os.path.isfile(path):
-                err = URLGrabError(3, 
-                                 _('Not a normal file: %s') % (path, ))
-                err.url = url
-                raise err
-
+                raise URLGrabError(3, 
+                              _('Not a normal file: %s') % (path, ))
              elif not opts.range:
-                if not opts.checkfunc is None:
-                    cb_func, cb_args, cb_kwargs = \
-                       self._make_callback(opts.checkfunc)
-                    obj = CallbackObject()
-                    obj.filename = path
-                    obj.url = url
-                    apply(cb_func, (obj, )+cb_args, cb_kwargs)        
                  return path
          
          def retryfunc(opts, url, filename):
-            fo = PyCurlFileObject(url, filename, opts)
+            fo = URLGrabberFileObject(url, filename, opts)
              try:
                  fo._do_grab()
                  if not opts.checkfunc is None:
@@ -1017,13 +934,12 @@ class URLGrabber:
          into memory, but don't use too much'
          """
          opts = self.opts.derive(**kwargs)
-        if DEBUG: DEBUG.debug('combined options: %s' % repr(opts))
          (url,parts) = opts.urlparser.parse(url, opts) 
          if limit is not None:
              limit = limit + 1
              
          def retryfunc(opts, url, limit):
-            fo = PyCurlFileObject(url, filename=None, opts=opts)
+            fo = URLGrabberFileObject(url, filename=None, opts=opts)
              s = ''
              try:
                  # this is an unfortunate thing.  Some file-like objects
@@ -1046,11 +962,8 @@ class URLGrabber:
              
          s = self._retry(opts, retryfunc, url, limit)
          if limit and len(s) > limit:
-            err = URLGrabError(8, 
-                               _('Exceeded limit (%i): %s') % (limit, url))
-            err.url = url
-            raise err
-
+            raise URLGrabError(8, 
+                        _('Exceeded limit (%i): %s') % (limit, url))
          return s
          
      def _make_callback(self, callback_obj):
@@ -1108,7 +1021,7 @@ class URLGrabberFileObject:
              # it _must_ come before all other handlers in the list or urllib2
              # chokes.
              if self.opts.proxies:
-                handlers.append( _proxy_handler_cache.get(self.opts.proxies) )
+                handlers.append( CachedProxyHandler(self.opts.proxies) )
  
                  # -------------------------------------------------------
                  # OK, these next few lines are a serious kludge to get
@@ -1131,19 +1044,19 @@ class URLGrabberFileObject:
                      handlers.append( urllib2.FTPHandler() )
                  # -------------------------------------------------------
  
+            ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert,
+                self.opts.ssl_context)
  
-            ssl_factory = _ssl_factory_cache.get( (self.opts.ssl_ca_cert,
-                                                   self.opts.ssl_context) )
              if need_keepalive_handler:
-                handlers.append(keepalive_http_handler)
-                handlers.append(_https_handler_cache.get(ssl_factory))
+                handlers.append(HTTPHandler())
+                handlers.append(HTTPSHandler(ssl_factory))
              if need_range_handler:
                  handlers.extend( range_handlers )
              handlers.append( auth_handler )
              if self.opts.cache_openers:
-                self._opener = _opener_cache.get([ssl_factory,] + handlers)
+                self._opener = CachedOpenerDirector(ssl_factory, *handlers)
              else:
-                self._opener = _opener_cache.create([ssl_factory,] + handlers)
+                self._opener = ssl_factory.create_opener(*handlers)
              # OK, I don't like to do this, but otherwise, we end up with
              # TWO user-agent headers.
              self._opener.addheaders = []
@@ -1237,11 +1150,8 @@ class URLGrabberFileObject:
                  
          if self.opts.range:
              if not have_range:
-                err = URLGrabError(10, _('Byte range requested but range '\
-                                         'support unavailable %s') % self.url)
-                err.url = self.url
-                raise err
-
+                raise URLGrabError(10, _('Byte range requested but range '\
+                                         'support unavailable'))
              rt = self.opts.range
              if rt[0]: rt = (rt[0] + reget_length, rt[1])
  
@@ -1262,80 +1172,39 @@ class URLGrabberFileObject:
                  fo = opener.open(req)
              hdr = fo.info()
          except ValueError, e:
-            err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, ))
-            err.url = self.url
-            raise err
-
+            raise URLGrabError(1, _('Bad URL: %s') % (e, ))
          except RangeError, e:
-            err = URLGrabError(9, _('%s on %s') % (e, self.url))
-            err.url = self.url
-            raise err
+            raise URLGrabError(9, str(e))
          except urllib2.HTTPError, e:
-            new_e = URLGrabError(14, _('%s on %s') % (e, self.url))
+            new_e = URLGrabError(14, str(e))
              new_e.code = e.code
              new_e.exception = e
-            new_e.url = self.url
              raise new_e
          except IOError, e:
              if hasattr(e, 'reason') and have_socket_timeout and \
                     isinstance(e.reason, TimeoutError):
-                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
+                raise URLGrabError(12, _('Timeout: %s') % (e, ))
              else:
-                err = URLGrabError(4, _('IOError on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
+                raise URLGrabError(4, _('IOError: %s') % (e, ))
          except OSError, e:
-            err = URLGrabError(5, _('%s on %s') % (e, self.url))
-            err.url = self.url
-            raise err
-
+            raise URLGrabError(5, _('OSError: %s') % (e, ))
          except HTTPException, e:
-            err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \
-                            (e.__class__.__name__, self.url, e))
-            err.url = self.url
-            raise err
-
+            raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
+                            (e.__class__.__name__, e))
          else:
              return (fo, hdr)
          
      def _do_grab(self):
          """dump the file to self.filename."""
-        if self.append: mode = 'ab'
-        else: mode = 'wb'
-        if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
-                             (self.filename, mode))
-        try:
-            new_fo = open(self.filename, mode)
-        except IOError, e:
-            err = URLGrabError(16, _(\
-              'error opening local file from %s, IOError: %s') % (self.url, e))
-            err.url = self.url
-            raise err
-
-        try:
-            # if we have a known range, only try to read that much.
-            (low, high) = self.opts.range
-            amount = high - low
-        except TypeError, ValueError:
-            amount = None
+        if self.append: new_fo = open(self.filename, 'ab')
+        else: new_fo = open(self.filename, 'wb')
          bs = 1024*8
          size = 0
  
-        if amount is not None: bs = min(bs, amount - size)
          block = self.read(bs)
          size = size + len(block)
          while block:
-            try:
-                new_fo.write(block)
-            except IOError, e:
-                err = URLGrabError(16, _(\
-                 'error writing to local file from %s, IOError: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-            if amount is not None: bs = min(bs, amount - size)
+            new_fo.write(block)
              block = self.read(bs)
              size = size + len(block)
  
@@ -1381,20 +1250,11 @@ class URLGrabberFileObject:
              try:
                  new = self.fo.read(readamount)
              except socket.error, e:
-                err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
+                raise URLGrabError(4, _('Socket Error: %s') % (e, ))
              except TimeoutError, e:
-                raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
+                raise URLGrabError(12, _('Timeout: %s') % (e, ))
              except IOError, e:
-                raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e))
-                err.url = self.url
-                raise err
-
+                raise URLGrabError(4, _('IOError: %s') %(e,))
              newsize = len(new)
              if not newsize: break # no more to read
  
@@ -1440,538 +1300,36 @@ class URLGrabberFileObject:
              try: self.fo.close_connection()
              except: pass
  
-
-class PyCurlFileObject():
-    def __init__(self, url, filename, opts):
-        self.fo = None
-        self._hdr_dump = ''
-        self._parsed_hdr = None
-        self.url = url
-        self.scheme = urlparse.urlsplit(self.url)[0]
-        self.filename = filename
-        self.append = False
-        self.opts = opts
-        self._complete = False
-        self.reget_time = None
-        self._rbuf = ''
-        self._rbufsize = 1024*8
-        self._ttime = time.time()
-        self._tsize = 0
-        self._amount_read = 0
-        self._reget_length = 0
-        self._prog_running = False
-        self.size = 0
-        self._do_open()
-        
-        
-    def __getattr__(self, name):
-        """This effectively allows us to wrap at the instance level.
-        Any attribute not found in _this_ object will be searched for
-        in self.fo.  This includes methods."""
-
-        if hasattr(self.fo, name):
-            return getattr(self.fo, name)
-        raise AttributeError, name
-
-    def _retrieve(self, buf):
-        if not self._prog_running:
-            if self.opts.progress_obj:
-                size  = self.size + self._reget_length
-                self.opts.progress_obj.start(self._prog_reportname, 
-                                             urllib.unquote(self.url), 
-                                             self._prog_basename, 
-                                             size=size,
-                                             text=self.opts.text)
-                self._prog_running = True
-                self.opts.progress_obj.update(self._amount_read)
-
-        self._amount_read += len(buf)
-        self.fo.write(buf)
-        return len(buf)
-    
-    def _hdr_retrieve(self, buf):
-        self._hdr_dump += buf
-        # we have to get the size before we do the progress obj start
-        # but we can't do that w/o making it do 2 connects, which sucks
-        # so we cheat and stuff it in here in the hdr_retrieve
-        if self.scheme in ['http','https'] and buf.lower().find('content-length') != -1:
-            length = buf.split(':')[1]
-            self.size = int(length)
-        elif self.scheme in ['ftp']:
-            s = None
-            if buf.startswith('213 '):
-                s = buf[3:].strip()
-            elif buf.startswith('150 '):
-                s = parse150(buf)
-            if s:
-                self.size = s
-        
-        return len(buf)
-
-    def _return_hdr_obj(self):
-        if self._parsed_hdr:
-            return self._parsed_hdr
-        statusend = self._hdr_dump.find('\n')
-        hdrfp = StringIO()
-        hdrfp.write(self._hdr_dump[statusend:])
-        self._parsed_hdr =  mimetools.Message(hdrfp)
-        return self._parsed_hdr
-    
-    hdr = property(_return_hdr_obj)
-    http_code = property(fget=
-                 lambda self: self.curl_obj.getinfo(pycurl.RESPONSE_CODE))
-
-    def _set_opts(self, opts={}):
-        # XXX
-        if not opts:
-            opts = self.opts
-
-
-        # defaults we're always going to set
-        self.curl_obj.setopt(pycurl.NOPROGRESS, 0)
-        self.curl_obj.setopt(pycurl.WRITEFUNCTION, self._retrieve)
-        self.curl_obj.setopt(pycurl.HEADERFUNCTION, self._hdr_retrieve)
-        self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)
-        self.curl_obj.setopt(pycurl.FAILONERROR, 1)
-        
-        if DEBUG:
-            self.curl_obj.setopt(pycurl.VERBOSE, True)
-        if opts.user_agent:
-            self.curl_obj.setopt(pycurl.USERAGENT, opts.user_agent)
-        
-        # maybe to be options later
-        self.curl_obj.setopt(pycurl.FOLLOWLOCATION, 1)
-        self.curl_obj.setopt(pycurl.MAXREDIRS, 5)
-        self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, 30)
-        
-        # timeouts
-        timeout = 300
-        if opts.timeout:
-            timeout = int(opts.timeout)
-        self.curl_obj.setopt(pycurl.TIMEOUT, timeout)
-        # ssl options
-        if self.scheme == 'https':
-            if opts.ssl_ca_cert: # this may do ZERO with nss  according to curl docs
-                self.curl_obj.setopt(pycurl.CAPATH, opts.ssl_ca_cert)
-
-        #headers:
-        if opts.http_headers and self.scheme in ('http', 'https'):
-            headers = []
-            for (tag, content) in opts.http_headers:
-                headers.append('%s:%s' % (tag, content))
-            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
-
-        # ranges:
-        if opts.range or opts.reget:
-            range_str = self._build_range()
-            if range_str:
-                self.curl_obj.setopt(pycurl.RANGE, range_str)
-            
-        # throttle/bandwidth
-        if hasattr(opts, 'raw_throttle') and opts.raw_throttle():
-            self.curl_obj.setopt(pycurl.MAX_RECV_SPEED_LARGE, int(opts.raw_throttle()))
-            
-        # proxy settings
-        if opts.proxies:
-            for (scheme, proxy) in opts.proxies.items():
-                if self.scheme in ('ftp'): # only set the ftp proxy for ftp items
-                    if scheme not in ('ftp'):
-                        continue
-                    else:
-                        self.curl_obj.setopt(pycurl.PROXY, proxy)
-                elif self.scheme in ('http', 'https'):
-                    if scheme not in ('http', 'https'):
-                        continue
-                    else:
-                        self.curl_obj.setopt(pycurl.PROXY, proxy)
-        
-        # username/password/auth settings
-
-        #posts - simple - expects the fields as they are
-        if opts.data:
-            self.curl_obj.setopt(pycurl.POST, True)
-            self.curl_obj.setopt(pycurl.POSTFIELDS, opts.data)
-            
-        # our url
-        self.curl_obj.setopt(pycurl.URL, self.url)
-        
-    
-    def _do_perform(self):
-        if self._complete:
-            return
-        
-        try:
-            self.curl_obj.perform()
-        except pycurl.error, e:
-            # XXX - break some of these out a bit more clearly
-            # to other URLGrabErrors from 
-            # http://curl.haxx.se/libcurl/c/libcurl-errors.html
-            # this covers e.args[0] == 22 pretty well - which will be common
-            if str(e.args[1]) == '': # fake it until you make it
-                msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)
-            else:
-                msg = str(e.args[1])
-            err = URLGrabError(14, msg)
-            err.code = self.http_code
-            err.exception = e
-            raise err
-            
-    def _do_open(self):
-        self.append = False
-        self.reget_time = None
-        self.curl_obj = _curl_cache
-        self.curl_obj.reset() # reset all old settings away, just in case
-        # setup any ranges
-        self._set_opts()
-        self._do_grab()
-        return self.fo
-
-    def _add_headers(self):
-        pass
-        
-    def _build_range(self):
-        self.reget_time = None
-        self.append = False
-        reget_length = 0
-        rt = None
-        if self.opts.reget and type(self.filename) == type(''):
-            # we have reget turned on and we're dumping to a file
-            try:
-                s = os.stat(self.filename)
-            except OSError:
-                pass
-            else:
-                self.reget_time = s[ST_MTIME]
-                reget_length = s[ST_SIZE]
-
-                # Set initial length when regetting
-                self._amount_read = reget_length    
-                self._reget_length = reget_length # set where we started from, too
-
-                rt = reget_length, ''
-                self.append = 1
-                
-        if self.opts.range:
-            rt = self.opts.range
-            if rt[0]: rt = (rt[0] + reget_length, rt[1])
-
-        if rt:
-            header = range_tuple_to_header(rt)
-            if header:
-                return header.split('=')[1]
-
-
-
-    def _make_request(self, req, opener):
-        #XXXX
-        # This doesn't do anything really, but we could use this
-        # instead of do_open() to catch a lot of crap errors as 
-        # mstenner did before here
-        return (self.fo, self.hdr)
-        
-        try:
-            if have_socket_timeout and self.opts.timeout:
-                old_to = socket.getdefaulttimeout()
-                socket.setdefaulttimeout(self.opts.timeout)
-                try:
-                    fo = opener.open(req)
-                finally:
-                    socket.setdefaulttimeout(old_to)
-            else:
-                fo = opener.open(req)
-            hdr = fo.info()
-        except ValueError, e:
-            err = URLGrabError(1, _('Bad URL: %s : %s') % (self.url, e, ))
-            err.url = self.url
-            raise err
-
-        except RangeError, e:
-            err = URLGrabError(9, _('%s on %s') % (e, self.url))
-            err.url = self.url
-            raise err
-        except urllib2.HTTPError, e:
-            new_e = URLGrabError(14, _('%s on %s') % (e, self.url))
-            new_e.code = e.code
-            new_e.exception = e
-            new_e.url = self.url
-            raise new_e
-        except IOError, e:
-            if hasattr(e, 'reason') and have_socket_timeout and \
-                   isinstance(e.reason, TimeoutError):
-                err = URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-            else:
-                err = URLGrabError(4, _('IOError on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
-        except OSError, e:
-            err = URLGrabError(5, _('%s on %s') % (e, self.url))
-            err.url = self.url
-            raise err
-
-        except HTTPException, e:
-            err = URLGrabError(7, _('HTTP Exception (%s) on %s: %s') % \
-                            (e.__class__.__name__, self.url, e))
-            err.url = self.url
-            raise err
-
-        else:
-            return (fo, hdr)
-        
-    def _do_grab(self):
-        """dump the file to a filename or StringIO buffer"""
-
-        if self._complete:
-            return
-
-        if self.filename:
-            self._prog_reportname = str(self.filename)
-            self._prog_basename = os.path.basename(self.filename)
-
-            if self.append: mode = 'ab'
-            else: mode = 'wb'
-
-            if DEBUG: DEBUG.info('opening local file "%s" with mode %s' % \
-                                 (self.filename, mode))
-            try:
-                self.fo = open(self.filename, mode)
-            except IOError, e:
-                err = URLGrabError(16, _(\
-                  'error opening local file from %s, IOError: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
-        else:
-            self._prog_reportname = 'MEMORY'
-            self._prog_basename = 'MEMORY'
-            fh, self._temp_name = mkstemp()
-            
-            self.fo = open(self._temp_name, 'wb')
-
-            
-        self._do_perform()
-        
-
-        # close it up
-        self.fo.flush()
-        self.fo.close()
-
-        if self.filename:            
-            # set the time
-            mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)
-            if mod_time != -1:
-                os.utime(self.filename, (mod_time, mod_time))
-            # re open it
-            self.fo = open(self.filename, 'r')
-        else:
-            self.fo = open(self._temp_name, 'r')
-
-        self._complete = True
-    
-    def _fill_buffer(self, amt=None):
-        """fill the buffer to contain at least 'amt' bytes by reading
-        from the underlying file object.  If amt is None, then it will
-        read until it gets nothing more.  It updates the progress meter
-        and throttles after every self._rbufsize bytes."""
-        # the _rbuf test is only in this first 'if' for speed.  It's not
-        # logically necessary
-        if self._rbuf and not amt is None:
-            L = len(self._rbuf)
-            if amt > L:
-                amt = amt - L
-            else:
-                return
-
-        # if we've made it here, then we don't have enough in the buffer
-        # and we need to read more.
-        
-        if not self._complete: self._do_grab() #XXX cheater - change on ranges
-        
-        buf = [self._rbuf]
-        bufsize = len(self._rbuf)
-        while amt is None or amt:
-            # first, delay if necessary for throttling reasons
-            if self.opts.raw_throttle():
-                diff = self._tsize/self.opts.raw_throttle() - \
-                       (time.time() - self._ttime)
-                if diff > 0: time.sleep(diff)
-                self._ttime = time.time()
-                
-            # now read some data, up to self._rbufsize
-            if amt is None: readamount = self._rbufsize
-            else:           readamount = min(amt, self._rbufsize)
-            try:
-                new = self.fo.read(readamount)
-            except socket.error, e:
-                err = URLGrabError(4, _('Socket Error on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
-            except TimeoutError, e:
-                raise URLGrabError(12, _('Timeout on %s: %s') % (self.url, e))
-                err.url = self.url
-                raise err
-
-            except IOError, e:
-                raise URLGrabError(4, _('IOError on %s: %s') %(self.url, e))
-                err.url = self.url
-                raise err
-
-            newsize = len(new)
-            if not newsize: break # no more to read
-
-            if amt: amt = amt - newsize
-            buf.append(new)
-            bufsize = bufsize + newsize
-            self._tsize = newsize
-            self._amount_read = self._amount_read + newsize
-            #if self.opts.progress_obj:
-            #    self.opts.progress_obj.update(self._amount_read)
-
-        self._rbuf = string.join(buf, '')
-        return
-
-    def _progress_update(self, download_total, downloaded, upload_total, uploaded):
-            if self._prog_running:
-                downloaded += self._reget_length
-                self.opts.progress_obj.update(downloaded)
-
-    def read(self, amt=None):
-        self._fill_buffer(amt)
-        if amt is None:
-            s, self._rbuf = self._rbuf, ''
-        else:
-            s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
-        return s
-
-    def readline(self, limit=-1):
-        if not self._complete: self._do_grab()
-        return self.fo.readline()
-        
-        i = string.find(self._rbuf, '\n')
-        while i < 0 and not (0 < limit <= len(self._rbuf)):
-            L = len(self._rbuf)
-            self._fill_buffer(L + self._rbufsize)
-            if not len(self._rbuf) > L: break
-            i = string.find(self._rbuf, '\n', L)
-
-        if i < 0: i = len(self._rbuf)
-        else: i = i+1
-        if 0 <= limit < len(self._rbuf): i = limit
-
-        s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
-        return s
-
-    def close(self):
-        if self._prog_running:
-            self.opts.progress_obj.end(self._amount_read)
-        self.fo.close()
-        
-        # XXX - confident that this does nothing for pycurl
-        #if self.opts.close_connection:
-        #    try: self.fo.close_connection()
-        #    except: pass
-
-
-
-#####################################################################
-
-
-
-class NoDefault: pass
-class ObjectCache:
-    def __init__(self, name=None):
-        self.name = name or self.__class__.__name__
-        self._lock = thread.allocate_lock()
-        self._cache = []
-
-    def lock(self):
-        self._lock.acquire()
-
-    def unlock(self):
-        self._lock.release()
-            
-    def get(self, key, create=None, found=None):
-        for (k, v) in self._cache:
-            if k == key:
-                if DEBUG:
-                    DEBUG.debug('%s: found key' % self.name)
-                    DEBUG.debug('%s: key = %s' % (self.name, key))
-                    DEBUG.debug('%s: val = %s' % (self.name, v))
-                found = found or getattr(self, 'found', None)
-                if found: v = found(key, v)
-                return v
-        if DEBUG:
-            DEBUG.debug('%s: no key found' % self.name)
-            DEBUG.debug('%s: key = %s' % (self.name, key))
-        create = create or getattr(self, 'create', None)
-        if create:
-            value = create(key)
-            if DEBUG:
-                DEBUG.info('%s: new value created' % self.name)
-                DEBUG.debug('%s: val = %s' % (self.name, value))
-            self._cache.append( (key, value) )
-            return value
-        else:
-            raise KeyError('key not found: %s' % key)
-
-    def set(self, key, value):
-        if DEBUG:
-            DEBUG.info('%s: inserting key' % self.name)
-            DEBUG.debug('%s: key = %s' % (self.name, key))
-            DEBUG.debug('%s: val = %s' % (self.name, value))
-        self._cache.append( (key, value) )
-
-    def ts_get(self, key, create=None, found=None):
-        self._lock.acquire()
-        try:
-            self.get(key, create, found)
-        finally:
-            self._lock.release()
-        
-    def ts_set(self, key, value):
-        self._lock.acquire()
-        try:
-            self.set(key, value)
-        finally:
-            self._lock.release()
-
-class OpenerCache(ObjectCache):
-    def found(self, factory_and_handlers, opener):
-        for handler in factory_and_handlers[1:]:
-            handler.add_parent(opener)
-        return opener
-    def create(self, factory_and_handlers):
-        factory = factory_and_handlers[0]
-        handlers = factory_and_handlers[1:]
-        return factory.create_opener(*handlers)
-_opener_cache = OpenerCache()
-
-_curl_cache = pycurl.Curl() # make one and reuse it over and over and over
-
-class ProxyHandlerCache(ObjectCache):
-    def create(self, proxies):
+_handler_cache = []
+def CachedOpenerDirector(ssl_factory = None, *handlers):
+    for (cached_handlers, opener) in _handler_cache:
+        if cached_handlers == handlers:
+            for handler in opener.handlers:
+                handler.add_parent(opener)
+            return opener
+    if not ssl_factory:
+        ssl_factory = sslfactory.get_factory()
+    opener = ssl_factory.create_opener(*handlers)
+    _handler_cache.append( (handlers, opener) )
+    return opener
+
+_proxy_cache = []
+def CachedProxyHandler(proxies):
+    for (pdict, handler) in _proxy_cache:
+        if pdict == proxies:
+            if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
+            break
+    else:
          for k, v in proxies.items():
              utype, url = urllib.splittype(v)
              host, other = urllib.splithost(url)
              if (utype is None) or (host is None):
-                err = URLGrabError(13, _('Bad proxy URL: %s') % v)
-                err.url = url
-                raise err
-        return urllib2.ProxyHandler(proxies)
-_proxy_handler_cache = ProxyHandlerCache()
-
-class HTTPSHandlerCache(ObjectCache):
-    def create(self, ssl_factory):
-        return HTTPSHandler(ssl_factory)
-_https_handler_cache = HTTPSHandlerCache()
-
-class SSLFactoryCache(ObjectCache):
-    def create(self, cert_and_context):
-        return sslfactory.get_factory(*cert_and_context)
-_ssl_factory_cache = SSLFactoryCache()
+                raise URLGrabError(13, _('Bad proxy URL: %s') % v)
+
+        if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
+        handler = urllib2.ProxyHandler(proxies)
+        _proxy_cache.append( (proxies, handler) )
+    return handler
  
  #####################################################################
  # DEPRECATED FUNCTIONS
@@ -2086,7 +1444,7 @@ def _file_object_test(filename=None):
                       _test_file_object_readlines]:
          fo_input = cStringIO.StringIO(s_input)
          fo_output = cStringIO.StringIO()
-        wrapper = PyCurlFileObject(fo_input, None, 0)
+        wrapper = URLGrabberFileObject(fo_input, None, 0)
          print 'testing %-30s ' % testfunc.__name__,
          testfunc(wrapper, fo_output)
          s_output = fo_output.getvalue()
diff --git a/naoki/urlgrabber/keepalive.py b/naoki/urlgrabber/keepalive.py

index 89ee97dfe2505ff0590e0a312e81f9daac6d9f77..71393e2b8d47197dee3bf15b7f895c136b28006c 100644 (file)
--- a/naoki/urlgrabber/keepalive.py
+++ b/naoki/urlgrabber/keepalive.py
@@ -99,7 +99,7 @@ EXTRA ATTRIBUTES AND METHODS
  
  """
  
-# $Id: keepalive.py,v 1.17 2006/12/08 00:14:16 mstenner Exp $
+# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $
  
  import urllib2
  import httplib
@@ -239,13 +239,10 @@ class KeepAliveHandler:
          except (socket.error, httplib.HTTPException), err:
              raise urllib2.URLError(err)
              
-        if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
-
          # if not a persistent connection, don't try to reuse it
-        if r.will_close:
-            if DEBUG: DEBUG.info('server will close connection, discarding')
-            self._cm.remove(h)
+        if r.will_close: self._cm.remove(h)
  
+        if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
          r._handler = self
          r._host = host
          r._url = req.get_full_url()
@@ -350,9 +347,8 @@ class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
          return self.do_open(req)
  
      def _get_connection(self, host):
-        try: return self._ssl_factory.get_https_connection(host)
-        except AttributeError: return HTTPSConnection(host)
-        
+        return self._ssl_factory.get_https_connection(host)
+
  class HTTPResponse(httplib.HTTPResponse):
      # we need to subclass HTTPResponse in order to
      # 1) add readline() and readlines() methods
diff --git a/naoki/urlgrabber/progress.py b/naoki/urlgrabber/progress.py

index 7dd8d6aab1388359f554da34e76f445071ebe45b..02db524e76118ad380f18ecca505ea922a8b4742 100644 (file)
--- a/naoki/urlgrabber/progress.py
+++ b/naoki/urlgrabber/progress.py
@@ -23,79 +23,7 @@ import sys
  import time
  import math
  import thread
-import types
-import fcntl
-import struct
-import termios
-
-# Code from http://mail.python.org/pipermail/python-list/2000-May/033365.html
-def terminal_width(fd=1):
-    """ Get the real terminal width """
-    try:
-        buf = 'abcdefgh'
-        buf = fcntl.ioctl(fd, termios.TIOCGWINSZ, buf)
-        ret = struct.unpack('hhhh', buf)[1]
-        if ret == 0:
-            return 80
-        # Add minimum too?
-        return ret
-    except: # IOError
-        return 80
-
-_term_width_val  = None
-_term_width_last = None
-def terminal_width_cached(fd=1, cache_timeout=1.000):
-    """ Get the real terminal width, but cache it for a bit. """
-    global _term_width_val
-    global _term_width_last
-
-    now = time.time()
-    if _term_width_val is None or (now - _term_width_last) > cache_timeout:
-        _term_width_val  = terminal_width(fd)
-        _term_width_last = now
-    return _term_width_val
-
-class TerminalLine:
-    """ Help create dynamic progress bars, uses terminal_width_cached(). """
-
-    def __init__(self, min_rest=0, beg_len=None, fd=1, cache_timeout=1.000):
-        if beg_len is None:
-            beg_len = min_rest
-        self._min_len = min_rest
-        self._llen    = terminal_width_cached(fd, cache_timeout)
-        if self._llen < beg_len:
-            self._llen = beg_len
-        self._fin = False
-
-    def __len__(self):
-        """ Usable length for elements. """
-        return self._llen - self._min_len
-
-    def rest_split(self, fixed, elements=2):
-        """ After a fixed length, split the rest of the line length among
-            a number of different elements (default=2). """
-        if self._llen < fixed:
-            return 0
-        return (self._llen - fixed) / elements
-
-    def add(self, element, full_len=None):
-        """ If there is room left in the line, above min_len, add element.
-            Note that as soon as one add fails all the rest will fail too. """
-
-        if full_len is None:
-            full_len = len(element)
-        if len(self) < full_len:
-            self._fin = True
-        if self._fin:
-            return ''
-
-        self._llen -= len(element)
-        return element
-
-    def rest(self):
-        """ Current rest of line, same as .rest_split(fixed=0, elements=1). """
-        return self._llen
-
+    
  class BaseMeter:
      def __init__(self):
          self.update_period = 0.3 # seconds
@@ -155,64 +83,6 @@ class BaseMeter:
      def _do_end(self, amount_read, now=None):
          pass
          
-#  This is kind of a hack, but progress is gotten from grabber which doesn't
-# know about the total size to download. So we do this so we can get the data
-# out of band here. This will be "fixed" one way or anther soon.
-_text_meter_total_size = 0
-_text_meter_sofar_size = 0
-def text_meter_total_size(size, downloaded=0):
-    global _text_meter_total_size
-    global _text_meter_sofar_size
-    _text_meter_total_size = size
-    _text_meter_sofar_size = downloaded
-
-#
-#       update: No size (minimal: 17 chars)
-#       -----------------------------------
-# <text>                          <rate> | <current size> <elapsed time> 
-#  8-48                          1    8  3             6 1            9 5
-#
-# Order: 1. <text>+<current size> (17)
-#        2. +<elapsed time>       (10, total: 27)
-#        3. +                     ( 5, total: 32)
-#        4. +<rate>               ( 9, total: 41)
-#
-#       update: Size, Single file
-#       -------------------------
-# <text>            <pc>  <bar> <rate> | <current size> <eta time> ETA
-#  8-25            1 3-4 1 6-16 1   8  3             6 1        9 1  3 1
-#
-# Order: 1. <text>+<current size> (17)
-#        2. +<eta time>           (10, total: 27)
-#        3. +ETA                  ( 5, total: 32)
-#        4. +<pc>                 ( 4, total: 36)
-#        5. +<rate>               ( 9, total: 45)
-#        6. +<bar>                ( 7, total: 52)
-#
-#       update: Size, All files
-#       -----------------------
-# <text> <total pc> <pc>  <bar> <rate> | <current size> <eta time> ETA
-#  8-22 1      5-7 1 3-4 1 6-12 1   8  3             6 1        9 1  3 1
-#
-# Order: 1. <text>+<current size> (17)
-#        2. +<eta time>           (10, total: 27)
-#        3. +ETA                  ( 5, total: 32)
-#        4. +<total pc>           ( 5, total: 37)
-#        4. +<pc>                 ( 4, total: 41)
-#        5. +<rate>               ( 9, total: 50)
-#        6. +<bar>                ( 7, total: 57)
-#
-#       end
-#       ---
-# <text>                                 | <current size> <elapsed time> 
-#  8-56                                  3             6 1            9 5
-#
-# Order: 1. <text>                ( 8)
-#        2. +<current size>       ( 9, total: 17)
-#        3. +<elapsed time>       (10, total: 27)
-#        4. +                     ( 5, total: 32)
-#
-
  class TextMeter(BaseMeter):
      def __init__(self, fo=sys.stderr):
          BaseMeter.__init__(self)
@@ -227,87 +97,38 @@ class TextMeter(BaseMeter):
              text = self.text
          else:
              text = self.basename
-
-        ave_dl = format_number(self.re.average_rate())
-        sofar_size = None
-        if _text_meter_total_size:
-            sofar_size = _text_meter_sofar_size + amount_read
-            sofar_pc   = (sofar_size * 100) / _text_meter_total_size
-
-        # Include text + ui_rate in minimal
-        tl = TerminalLine(8, 8+1+8)
-        ui_size = tl.add(' | %5sB' % fread)
          if self.size is None:
-            ui_time = tl.add(' %9s' % fetime)
-            ui_end  = tl.add(' ' * 5)
-            ui_rate = tl.add(' %5sB/s' % ave_dl)
-            out = '%-*.*s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
-                                        ui_rate, ui_size, ui_time, ui_end)
+            out = '\r%-60.60s    %5sB %s ' % \
+                  (text, fread, fetime)
          else:
              rtime = self.re.remaining_time()
              frtime = format_time(rtime)
              frac = self.re.fraction_read()
+            bar = '='*int(25 * frac)
  
-            ui_time = tl.add(' %9s' % frtime)
-            ui_end  = tl.add(' ETA ')
-
-            if sofar_size is None:
-                ui_sofar_pc = ''
-            else:
-                ui_sofar_pc = tl.add(' (%i%%)' % sofar_pc,
-                                     full_len=len(" (100%)"))
-
-            ui_pc   = tl.add(' %2i%%' % (frac*100))
-            ui_rate = tl.add(' %5sB/s' % ave_dl)
-            # Make text grow a bit before we start growing the bar too
-            blen = 4 + tl.rest_split(8 + 8 + 4)
-            bar  = '='*int(blen * frac)
-            if (blen * frac) - int(blen * frac) >= 0.5:
-                bar += '-'
-            ui_bar  = tl.add(' [%-*.*s]' % (blen, blen, bar))
-            out = '%-*.*s%s%s%s%s%s%s%s\r' % (tl.rest(), tl.rest(), text,
-                                              ui_sofar_pc, ui_pc, ui_bar,
-                                              ui_rate, ui_size, ui_time, ui_end)
+            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
+                  (text, frac*100, bar, fread, frtime)
  
          self.fo.write(out)
          self.fo.flush()
  
      def _do_end(self, amount_read, now=None):
-        global _text_meter_total_size
-        global _text_meter_sofar_size
-
          total_time = format_time(self.re.elapsed_time())
          total_size = format_number(amount_read)
          if self.text is not None:
              text = self.text
          else:
              text = self.basename
-
-        tl = TerminalLine(8)
-        ui_size = tl.add(' | %5sB' % total_size)
-        ui_time = tl.add(' %9s' % total_time)
-        not_done = self.size is not None and amount_read != self.size
-        if not_done:
-            ui_end  = tl.add(' ... ')
+        if self.size is None:
+            out = '\r%-60.60s    %5sB %s ' % \
+                  (text, total_size, total_time)
          else:
-            ui_end  = tl.add(' ' * 5)
-
-        out = '\r%-*.*s%s%s%s\n' % (tl.rest(), tl.rest(), text,
-                                    ui_size, ui_time, ui_end)
-        self.fo.write(out)
+            bar = '='*25
+            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s     ' % \
+                  (text, 100, bar, total_size, total_time)
+        self.fo.write(out + '\n')
          self.fo.flush()
  
-        # Don't add size to the sofar size until we have all of it.
-        # If we don't have a size, then just pretend/hope we got all of it.
-        if not_done:
-            return
-
-        if _text_meter_total_size:
-            _text_meter_sofar_size += amount_read
-        if _text_meter_total_size <= _text_meter_sofar_size:
-            _text_meter_total_size = 0
-            _text_meter_sofar_size = 0
-
  text_progress_meter = TextMeter
  
  class MultiFileHelper(BaseMeter):
@@ -575,12 +396,10 @@ class RateEstimator:
          #print 'times', now, self.last_update_time
          time_diff = now         - self.last_update_time
          read_diff = amount_read - self.last_amount_read
-        # First update, on reget is the file size
-        if self.last_amount_read:
-            self.last_update_time = now
-            self.ave_rate = self._temporal_rolling_ave(\
-                time_diff, read_diff, self.ave_rate, self.timescale)
+        self.last_update_time = now
          self.last_amount_read = amount_read
+        self.ave_rate = self._temporal_rolling_ave(\
+            time_diff, read_diff, self.ave_rate, self.timescale)
          #print 'results', time_diff, read_diff, self.ave_rate
          
      #####################################################################
@@ -709,49 +528,3 @@ def format_number(number, SI=0, space=' '):
          format = '%.0f%s%s'
          
      return(format % (float(number or 0), space, symbols[depth]))
-
-def _tst(fn, cur, tot, beg, size, *args):
-    tm = TextMeter()
-    text = "(%d/%d): %s" % (cur, tot, fn)
-    tm.start(fn, "http://www.example.com/path/to/fn/" + fn, fn, size, text=text)
-    num = beg
-    off = 0
-    for (inc, delay) in args:
-        off += 1
-        while num < ((size * off) / len(args)):
-            num += inc
-            tm.update(num)
-            time.sleep(delay)
-    tm.end(size)
-
-if __name__ == "__main__":
-    # (1/2): subversion-1.4.4-7.x86_64.rpm               2.4 MB /  85 kB/s    00:28     
-    # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm            924 kB / 106 kB/s    00:08     
-    if len(sys.argv) >= 2 and sys.argv[1] == 'total':
-        text_meter_total_size(1000 + 10000 + 10000 + 1000000 + 1000000 +
-                              1000000 + 10000 + 10000 + 10000 + 1000000)
-    _tst("sm-1.0.0-1.fc8.i386.rpm", 1, 10, 0, 1000,
-         (10, 0.2), (10, 0.1), (100, 0.25))
-    _tst("s-1.0.1-1.fc8.i386.rpm", 2, 10, 0, 10000,
-         (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
-    _tst("m-1.0.1-2.fc8.i386.rpm", 3, 10, 5000, 10000,
-         (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
-    _tst("large-file-name-Foo-11.8.7-4.5.6.1.fc8.x86_64.rpm", 4, 10, 0, 1000000,
-         (1000, 0.2), (1000, 0.1), (10000, 0.1))
-    _tst("large-file-name-Foo2-11.8.7-4.5.6.2.fc8.x86_64.rpm", 5, 10,
-         500001, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
-    _tst("large-file-name-Foo3-11.8.7-4.5.6.3.fc8.x86_64.rpm", 6, 10,
-         750002, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
-    _tst("large-file-name-Foo4-10.8.7-4.5.6.1.fc8.x86_64.rpm", 7, 10, 0, 10000,
-         (100, 0.1))
-    _tst("large-file-name-Foo5-10.8.7-4.5.6.2.fc8.x86_64.rpm", 8, 10,
-         5001, 10000, (100, 0.1))
-    _tst("large-file-name-Foo6-10.8.7-4.5.6.3.fc8.x86_64.rpm", 9, 10,
-         7502, 10000, (1, 0.1))
-    _tst("large-file-name-Foox-9.8.7-4.5.6.1.fc8.x86_64.rpm",  10, 10,
-         0, 1000000, (10, 0.5),
-         (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
-         (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
-         (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
-         (100000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
-         (100000, 0.1), (1, 0.1))
author	Michael Tremer <michael.tremer@ipfire.org>
	Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)
committer	Michael Tremer <michael.tremer@ipfire.org>
	Wed, 30 Dec 2009 11:41:18 +0000 (12:41 +0100)
naoki/urlgrabber/__init__.py		patch \| blob \| blame \| history
naoki/urlgrabber/byterange.py		patch \| blob \| blame \| history
naoki/urlgrabber/grabber.py		patch \| blob \| blame \| history
naoki/urlgrabber/keepalive.py		patch \| blob \| blame \| history
naoki/urlgrabber/progress.py		patch \| blob \| blame \| history