]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Redesigned as a class
authorGuido van Rossum <guido@python.org>
Tue, 22 Feb 1994 19:03:38 +0000 (19:03 +0000)
committerGuido van Rossum <guido@python.org>
Tue, 22 Feb 1994 19:03:38 +0000 (19:03 +0000)
Lib/urlopen.py

index c24b3f94faaa814898cc5f14804de755ade25eae..2b2729df099b1f7411414329ee9943a4775425d3 100755 (executable)
 #     IETF URL Working Group                    14 July 1993
 #     draft-ietf-uri-url-01.txt
 #
-# The object returned by urlopen() will differ per protocol.
-# All you know is that is has methods read(), fileno(), close() and info().
-# The read(), fileno() and close() methods work like those of open files.
+# The object returned by URLopener().open(file) will differ per
+# protocol.  All you know is that is has methods read(), readline(),
+# readlines(), fileno(), close() and info().  The read*(), fileno()
+# and close() methods work like those of open files. 
 # The info() method returns an rfc822.Message object which can be
 # used to query various info about the object, if available.
 # (rfc822.Message objects are queried with the getheader() method.)
 
 import socket
 import regex
-import regsub
-import string
-import rfc822
-import ftplib
 
 
-# External interface -- use urlopen(file) as if it were open(file, 'r')
+# This really consists of two pieces:
+# (1) a class which handles opening of all sorts of URLs
+#     (plus assorted utilities etc.)
+# (2) a set of functions for parsing URLs
+# XXX Should these be separated out into different modules?
+
+
+# Shortcut for basic usage
+_urlopener = None
 def urlopen(url):
-       url = string.strip(url)
-       if url[:1] == '<' and url[-1:] == '>': url = string.strip(url[1:-1])
-       if url[:4] == 'URL:': url = string.strip(url[4:])
-       type, url = splittype(url)
-       if not type: type = 'file'
-       type = regsub.gsub('-', '_', type)
-       try:
-               func = eval('open_' + type)
-       except NameError:
-               raise IOError, ('url error', 'unknown url type', type)
-       try:
-               return func(url)
-       except socket.error, msg:
-               raise IOError, ('socket error', msg)
-
-
-# Each routine of the form open_<type> knows how to open that type of URL
-
-# Use HTTP protocol
-def open_http(url):
-       import httplib
-       host, selector = splithost(url)
-       h = httplib.HTTP(host)
-       h.putrequest('GET', selector)
-       errcode, errmsg, headers = h.getreply()
-       if errcode == 200: return makefile(h.getfile(), headers)
-       else: raise IOError, ('http error', errcode, errmsg, headers)
-
-# Empty rfc822.Message object
-noheaders = rfc822.Message(open('/dev/null', 'r'))
-noheaders.fp.close()                   # Recycle file descriptor
-
-# Use Gopher protocol
-def open_gopher(url):
-       import gopherlib
-       host, selector = splithost(url)
-       type, selector = splitgophertype(selector)
-       selector, query = splitquery(selector)
-       if query: fp = gopherlib.send_query(selector, query, host)
-       else: fp = gopherlib.send_selector(selector, host)
-       return makefile(fp, noheaders)
-
-# Use local file or FTP depending on form of URL
-localhost = socket.gethostbyname('localhost')
-thishost = socket.gethostbyname(socket.gethostname())
-def open_file(url):
-       host, file = splithost(url)
-       if not host: return makefile(open(file, 'r'), noheaders)
-       host, port = splitport(host)
-       if not port and socket.gethostbyname(host) in (localhost, thishost):
-               try: fp = open(file, 'r')
-               except IOError: fp = None
-               if fp: return makefile(fp, noheaders)
-       return open_ftp(url)
-
-# Use FTP protocol
+       global _urlopener
+       if not _urlopener:
+               _urlopener = URLopener()
+       return _urlopener.open(url)
+
+
+# Class to open URLs.
+# This is a class rather than just a subroutine because we may need
+# more than one set of global protocol-specific options.
 ftpcache = {}
-ftperrors = (ftplib.error_reply,
-            ftplib.error_temp,
-            ftplib.error_perm,
-            ftplib.error_proto)
-def open_ftp(url):
-       host, file = splithost(url)
-       host, port = splitport(host)
-       host = socket.gethostbyname(host)
-       if not port: port = ftplib.FTP_PORT
-       key = (host, port)
-       try:
-               if not ftpcache.has_key(key):
-                       ftpcache[key] = ftpwrapper(host, port)
-               return makefile(ftpcache[key].retrfile(file), noheaders)
-       except ftperrors, msg:
-               raise IOError, ('ftp error', msg)
+class URLopener:
 
+       # Constructor
+       def __init__(self):
+               self.addheaders = []
+               self.ftpcache = ftpcache
+               # Undocumented feature: you can use a different
+               # ftp cache by assigning to the .ftpcache member;
+               # in case you want logically independent URL openers
 
-# Utility classes
+       # Add a header to be used by the HTTP interface only
+       # e.g. u.addheader('Accept', 'sound/basic')
+       def addheader(self, *args):
+               self.addheaders.append(args)
 
-# Class used to add an info() method to a file object
-class makefile:
-       def __init__(self, fp, headers):
-               self.fp = fp
-               self.headers = headers
-               self.read = self.fp.read
-               self.fileno = self.fp.fileno
-               self.close = self.fp.close
-       def info(self):
-               return self.headers
+       # External interface
+       # Use URLopener().open(file) instead of open(file, 'r')
+       def open(self, url):
+               import string
+               url = string.strip(url)
+               if url[:1] == '<' and url[-1:] == '>':
+                       url = string.strip(url[1:-1])
+               if url[:4] == 'URL:': url = string.strip(url[4:])
+               type, url = splittype(url)
+               if not type: type = 'file'
+               name = 'open_' + type
+               if '-' in name:
+                       import regsub
+                       name = regsub.gsub('-', '_', name)
+               if not hasattr(self, name):
+                       raise IOError, ('url error', 'unknown url type', type)
+               meth = getattr(self, name)
+               try:
+                       return meth(url)
+               except socket.error, msg:
+                       raise IOError, ('socket error', msg)
+
+       # Each method named open_<type> knows how to open that type of URL
+
+       # Use HTTP protocol
+       def open_http(self, url):
+               import httplib
+               host, selector = splithost(url)
+               h = httplib.HTTP(host)
+               h.putrequest('GET', selector)
+               for args in self.addheaders: apply(h.putheader, args)
+               errcode, errmsg, headers = h.getreply()
+               if errcode == 200: return addinfo(h.getfile(), headers)
+               else: raise IOError, ('http error', errcode, errmsg, headers)
+
+       # Use Gopher protocol
+       def open_gopher(self, url):
+               import gopherlib
+               host, selector = splithost(url)
+               type, selector = splitgophertype(selector)
+               selector, query = splitquery(selector)
+               if query: fp = gopherlib.send_query(selector, query, host)
+               else: fp = gopherlib.send_selector(selector, host)
+               return addinfo(fp, noheaders())
+
+       # Use local file or FTP depending on form of URL
+       def open_file(self, url):
+               host, file = splithost(url)
+               if not host: return addinfo(open(file, 'r'), noheaders())
+               host, port = splitport(host)
+               if not port and socket.gethostbyname(host) in (
+                         localhost(), thishost()):
+                       try: fp = open(file, 'r')
+                       except IOError: fp = None
+                       if fp: return addinfo(fp, noheaders())
+               return self.open_ftp(url)
+
+       # Use FTP protocol
+       def open_ftp(self, url):
+               host, file = splithost(url)
+               host, port = splitport(host)
+               host = socket.gethostbyname(host)
+               if not port:
+                       import ftplib
+                       port = ftplib.FTP_PORT
+               key = (host, port)
+               try:
+                       if not self.ftpcache.has_key(key):
+                               self.ftpcache[key] = ftpwrapper(host, port)
+                       return addinfo(self.ftpcache[key].retrfile(file),
+                                 noheaders())
+               except ftperrors(), msg:
+                       raise IOError, ('ftp error', msg)
+
+
+# Utility functions
+
+# Return the IP address of the magic hostname 'localhost'
+_localhost = None
+def localhost():
+       global _localhost
+       if not _localhost:
+               _localhost = socket.gethostbyname('localhost')
+       return _localhost
+
+# Return the IP address of the current host
+_thishost = None
+def thishost():
+       global _thishost
+       if not _thishost:
+               _thishost = socket.gethostbyname(socket.gethostname())
+       return _thishost
+
+# Return the set of errors raised by the FTP class
+_ftperrors = None
+def ftperrors():
+       global _ftperrors
+       if not _ftperrors:
+               import ftplib
+               _ftperrors = (ftplib.error_reply,
+                             ftplib.error_temp,
+                             ftplib.error_perm,
+                             ftplib.error_proto)
+       return _ftperrors
+
+# Return an empty rfc822.Message object
+_noheaders = None
+def noheaders():
+       global _noheaders
+       if not _noheaders:
+               import rfc822
+               _noheaders = rfc822.Message(open('/dev/null', 'r'))
+               _noheaders.fp.close()   # Recycle file descriptor
+       return _noheaders
+
+
+# Utility classes
 
 # Class used by open_ftp() for cache of open FTP connections
 class ftpwrapper:
@@ -118,10 +178,12 @@ class ftpwrapper:
                self.port = port
                self.init()
        def init(self):
+               import ftplib
                self.ftp = ftplib.FTP()
                self.ftp.connect(self.host, self.port)
                self.ftp.login()
        def retrfile(self, file):
+               import ftplib
                try:
                        self.ftp.voidcmd('TYPE I')
                except ftplib.all_errors:
@@ -140,27 +202,43 @@ class ftpwrapper:
                        if file: cmd = 'NLST ' + file
                        else: cmd = 'NLST'
                        conn = self.ftp.transfercmd(cmd)
-               return fakefile(self.ftp, conn)
-
-# Class used by ftpwrapper to handle response when transfer is complete
-class fakefile:
-       def __init__(self, ftp, conn):
-               self.ftp = ftp
-               self.conn = conn
-               self.fp = self.conn.makefile('r')
+               return addclosehook(conn.makefile('r'), self.ftp.voidresp)
+
+# Base class for addinfo and addclosehook
+class addbase:
+       def __init__(self, fp):
+               self.fp = fp
                self.read = self.fp.read
+               self.readline = self.fp.readline
+               self.readlines = self.fp.readlines
                self.fileno = self.fp.fileno
        def __del__(self):
                self.close()
        def close(self):
-               self.conn = None
                self.fp = None
-               self.read = None
-               if self.ftp: self.ftp.voidresp()
-               self.ftp = None
+
+# Class to add a close hook to an open file
+class addclosehook(addbase):
+       def __init__(self, fp, closehook, *hookargs):
+               addbase.__init__(self, fp)
+               self.closehook = closehook
+               self.hookargs = hookargs
+       def close(self):
+               if self.closehook:
+                       apply(self.closehook, self.hookargs)
+                       self.closehook = None
+               self.fp = None
+
+# class to add an info() method to an open file
+class addinfo(addbase):
+       def __init__(self, fp, headers):
+               addbase.__init__(self, fp)
+               self.headers = headers
+       def info(self):
+               return self.headers
 
 
-# Utilities to split url parts into components:
+# Utilities to parse URLs:
 # splittype('type:opaquestring') --> 'type', 'opaquestring'
 # splithost('//host[:port]/path') --> 'host[:port]', '/path'
 # splitport('host:port') --> 'host', 'port'
@@ -168,29 +246,29 @@ class fakefile:
 # splittag('/path#tag') --> '/path', 'tag'
 # splitgophertype('/Xselector') --> 'X', 'selector'
 
-typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
+_typeprog = regex.compile('^\([^/:]+\):\(.*\)$')
 def splittype(url):
-       if typeprog.match(url) >= 0: return typeprog.group(1, 2)
+       if _typeprog.match(url) >= 0: return _typeprog.group(1, 2)
        return None, url
 
-hostprog = regex.compile('^//\([^/]+\)\(.*\)$')
+_hostprog = regex.compile('^//\([^/]+\)\(.*\)$')
 def splithost(url):
-       if hostprog.match(url) >= 0: return hostprog.group(1, 2)
+       if _hostprog.match(url) >= 0: return _hostprog.group(1, 2)
        return None, url
 
-portprog = regex.compile('^\(.*\):\([0-9]+\)$')
+_portprog = regex.compile('^\(.*\):\([0-9]+\)$')
 def splitport(host):
-       if portprog.match(host) >= 0: return portprog.group(1, 2)
+       if _portprog.match(host) >= 0: return _portprog.group(1, 2)
        return host, None
 
-queryprog = regex.compile('^\(.*\)\?\([^?]*\)$')
+_queryprog = regex.compile('^\(.*\)\?\([^?]*\)$')
 def splitquery(url):
-       if queryprog.match(url) >= 0: return queryprog.group(1, 2)
+       if _queryprog.match(url) >= 0: return _queryprog.group(1, 2)
        return url, None
 
-tagprog = regex.compile('^\(.*\)#\([^#]*\)$')
+_tagprog = regex.compile('^\(.*\)#\([^#]*\)$')
 def splittag(url):
-       if tagprog.match(url) >= 0: return tagprog.group(1, 2)
+       if _tagprog.match(url) >= 0: return _tagprog.group(1, 2)
        return url, None
 
 def splitgophertype(selector):
@@ -202,6 +280,7 @@ def splitgophertype(selector):
 # Test program
 def test():
        import sys
+       import regsub
        args = sys.argv[1:]
        if not args:
                args = [