From bd40bbe7edc908b72dd886c939a25d32b0063e6a Mon Sep 17 00:00:00 2001 From: cvs2svn Date: Sun, 22 Sep 2002 09:03:03 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'release22-maint'. --- Doc/lib/libasynchat.tex | 254 +++++++++++++++++++++++++++ Lib/email/_compat21.py | 63 +++++++ Lib/email/_compat22.py | 60 +++++++ Lib/email/test/data/msg_31.txt | 15 ++ Lib/email/test/test_email_torture.py | 136 ++++++++++++++ Lib/test/test_multifile.py | 66 +++++++ 6 files changed, 594 insertions(+) create mode 100644 Doc/lib/libasynchat.tex create mode 100644 Lib/email/_compat21.py create mode 100644 Lib/email/_compat22.py create mode 100644 Lib/email/test/data/msg_31.txt create mode 100644 Lib/email/test/test_email_torture.py create mode 100644 Lib/test/test_multifile.py diff --git a/Doc/lib/libasynchat.tex b/Doc/lib/libasynchat.tex new file mode 100644 index 000000000000..223bfed746a4 --- /dev/null +++ b/Doc/lib/libasynchat.tex @@ -0,0 +1,254 @@ +\section{\module{asynchat} --- + Asynchronous socket command/response handler} + +\declaremodule{standard}{asynchat} +\modulesynopsis{Support for asynchronous command/response protocols.} +\moduleauthor{Sam Rushing}{rushing@nightmare.com} +\sectionauthor{Steve Holden}{sholden@holdenweb.com} + +This module builds on the \refmodule{asyncore} infrastructure, +simplifying asynchronous clients and servers and making it easier to +handle protocols whose elements are terminated by arbitrary strings, or +are of variable length. \refmodule{asynchat} defines the abstract class +\class{async_chat} that you subclass, providing implementations of the +\method{collect_incoming_data()} and \method{found_terminator()} +methods. It uses the same asynchronous loop as \refmodule{asyncore}, and +the two types of channel, \class{asyncore.dispatcher} and +\class{asynchat.async_chat}, can freely be mixed in the channel map. +Typically an \class{asyncore.dispatcher} server channel generates new +\class{asynchat.async_chat} channel objects as it receives incoming +connection requests. + +\begin{classdesc}{async_chat}{} + This class is an abstract subclass of \class{asyncore.dispatcher}. To make + practical use of the code you must subclass \class{async_chat}, providing + meaningful \method{collect_incoming_data()} and \method{found_terminator()} + methods. The \class{asyncore.dispatcher} methods can be + used, although not all make sense in a message/response context. + + Like \class{asyncore.dispatcher}, \class{async_chat} defines a set of events + that are generated by an analysis of socket conditions after a + \cfunction{select()} call. Once the polling loop has been started the + \class{async_chat} object's methods are called by the event-processing + framework with no action on the part of the programmer. + + Unlike \class{asyncore.dispatcher}, \class{async_chat} allows you to define + a first-in-first-out queue (fifo) of \emph{producers}. A producer need have + only one method, \method{more()}, which should return data to be transmitted + on the channel. The producer indicates exhaustion (\emph{i.e.} that it contains + no more data) by having its \method{more()} method return the empty string. At + this point the \class{async_chat} object removes the producer from the fifo + and starts using the next producer, if any. When the producer fifo is empty + the \method{handle_write()} method does nothing. You use the channel object's + \method{set_terminator()} method to describe how to recognize the end + of, or an important breakpoint in, an incoming transmission from the + remote endpoint. + + To build a functioning \class{async_chat} subclass your + input methods \method{collect_incoming_data()} and + \method{found_terminator()} must handle the data that the channel receives + asynchronously. The methods are described below. +\end{classdesc} + +\begin{methoddesc}{close_when_done}{} + Pushes a \code{None} on to the producer fifo. When this producer is + popped off the fifo it causes the channel to be closed. +\end{methoddesc} + +\begin{methoddesc}{collect_incoming_data}{data} + Called with \var{data} holding an arbitrary amount of received data. + The default method, which must be overridden, raises a \exception{NotImplementedError} exception. +\end{methoddesc} + +\begin{methoddesc}{discard_buffers}{} + In emergencies this method will discard any data held in the input and/or + output buffers and the producer fifo. +\end{methoddesc} + +\begin{methoddesc}{found_terminator}{} + Called when the incoming data stream matches the termination condition + set by \method{set_terminator}. The default method, which must be overridden, + raises a \exception{NotImplementedError} exception. The buffered input data should + be available via an instance attribute. +\end{methoddesc} + +\begin{methoddesc}{get_terminator}{} + Returns the current terminator for the channel. +\end{methoddesc} + +\begin{methoddesc}{handle_close}{} + Called when the channel is closed. The default method silently closes + the channel's socket. +\end{methoddesc} + +\begin{methoddesc}{handle_read}{} + Called when a read event fires on the channel's socket in the + asynchronous loop. The default method checks for the termination + condition established by \method{set_terminator()}, which can be either + the appearance of a particular string in the input stream or the receipt + of a particular number of characters. When the terminator is found, + \method{handle_read} calls the \method{found_terminator()} method after + calling \method{collect_incoming_data()} with any data preceding the + terminating condition. +\end{methoddesc} + +\begin{methoddesc}{handle_write}{} + Called when the application may write data to the channel. + The default method calls the \method{initiate_send()} method, which in turn + will call \method{refill_buffer()} to collect data from the producer + fifo associated with the channel. +\end{methoddesc} + +\begin{methoddesc}{push}{data} + Creates a \class{simple_producer} object (\emph{see below}) containing the data and + pushes it on to the channel's \code{producer_fifo} to ensure its + transmission. This is all you need to do to have the channel write + the data out to the network, although it is possible to use your + own producers in more complex schemes to implement encryption and + chunking, for example. +\end{methoddesc} + +\begin{methoddesc}{push_with_producer}{producer} + Takes a producer object and adds it to the producer fifo associated with + the channel. When all currently-pushed producers have been exhausted + the channel will consume this producer's data by calling its + \method{more()} method and send the data to the remote endpoint. +\end{methoddesc} + +\begin{methoddesc}{readable}{} + Should return \code{True} for the channel to be included in the set of + channels tested by the \cfunction{select()} loop for readability. +\end{methoddesc} + +\begin{methoddesc}{refill_buffer}{} + Refills the output buffer by calling the \method{more()} method of the + producer at the head of the fifo. If it is exhausted then the + producer is popped off the fifo and the next producer is activated. + If the current producer is, or becomes, \code{None} then the channel + is closed. +\end{methoddesc} + +\begin{methoddesc}{set_terminator}{term} + Sets the terminating condition to be recognised on the channel. \code{term} + may be any of three types of value, corresponding to three different ways + to handle incoming protocol data. + + \begin{tableii}{l|l}{}{term}{Description} + \lineii{\emph{string}}{Will call \method{found_terminator()} when the + string is found in the input stream} + \lineii{\emph{integer}}{Will call \method{found_terminator()} when the + indicated number of characters have been received} + \lineii{\code{None}}{The channel continues to collect data forever} + \end{tableii} + + Note that any data following the terminator will be available for reading by + the channel after \method{found_terminator()} is called. +\end{methoddesc} + +\begin{methoddesc}{writable}{} + Should return \code{True} as long as items remain on the producer fifo, + or the channel is connected and the channel's output buffer is non-empty. +\end{methoddesc} + +\subsection{asynchat - Auxiliary Classes and Functions} + +\begin{classdesc}{simple_producer}{data\optional{, buffer_size=512}} + A \class{simple_producer} takes a chunk of data and an optional buffer size. + Repeated calls to its \method{more()} method yield successive chunks of the + data no larger than \var{buffer_size}. +\end{classdesc} + +\begin{methoddesc}{more}{} + Produces the next chunk of information from the producer, or returns the empty string. +\end{methoddesc} + +\begin{classdesc}{fifo}{\optional{list=None}} + Each channel maintains a \class{fifo} holding data which has been pushed by the + application but not yet popped for writing to the channel. + A \class{fifo} is a list used to hold data and/or producers until they are required. + If the \var{list} argument is provided then it should contain producers or + data items to be written to the channel. +\end{classdesc} + +\begin{methoddesc}{is_empty}{} + Returns \code{True} iff the fifo is empty. +\end{methoddesc} + +\begin{methoddesc}{first}{} + Returns the least-recently \method{push()}ed item from the fifo. +\end{methoddesc} + +\begin{methoddesc}{push}{data} + Adds the given data (which may be a string or a producer object) to the + producer fifo. +\end{methoddesc} + +\begin{methoddesc}{pop}{} + If the fifo is not empty, returns \code{True, first()}, deleting the popped + item. Returns \code{False, None} for an empty fifo. +\end{methoddesc} + +The \module{asynchat} module also defines one utility function, which may be +of use in network and textual analysis operations. + +\begin{funcdesc}{find_prefix_at_end}{haystack, needle} + Returns \code{True} if string \var{haystack} ends with any non-empty + prefix of string \var{needle}. +\end{funcdesc} + +\subsection{asynchat Example \label{asynchat-example}} + +The following partial example shows how HTTP requests can be read with +\class{async_chat}. A web server might create an \class{http_request_handler} object for +each incoming client connection. Notice that initially the +channel terminator is set to match the blank line at the end of the HTTP +headers, and a flag indicates that the headers are being read. + +Once the headers have been read, if the request is of type POST +(indicating that further data are present in the input stream) then the +\code{Content-Length:} header is used to set a numeric terminator to +read the right amount of data from the channel. + +The \method{handle_request()} method is called once all relevant input +has been marshalled, after setting the channel terminator to \code{None} +to ensure that any extraneous data sent by the web client are ignored. + +\begin{verbatim} +class http_request_handler(asynchat.async_chat): + + def __init__(self, conn, addr, sessions, log): + asynchat.async_chat.__init__(self, conn=conn) + self.addr = addr + self.sessions = sessions + self.ibuffer = [] + self.obuffer = "" + self.set_terminator("\r\n\r\n") + self.reading_headers = True + self.handling = False + self.cgi_data = None + self.log = log + + def collect_incoming_data(self, data): + """Buffer the data""" + self.ibuffer.append(data) + + def found_terminator(self): + if self.reading_headers: + self.reading_headers = False + self.parse_headers("".join(self.ibuffer)) + self.ibuffer = [] + if self.op.upper() == "POST": + clen = self.headers.getheader("content-length") + self.set_terminator(int(clen)) + else: + self.handling = True + self.set_terminator(None) + self.handle_request() + elif not self.handling: + self.set_terminator(None) # browsers sometimes over-send + self.cgi_data = parse(self.headers, "".join(self.ibuffer)) + self.handling = True + self.ibuffer = [] + self.handle_request() +\end{verbatim} + diff --git a/Lib/email/_compat21.py b/Lib/email/_compat21.py new file mode 100644 index 000000000000..de8c44753de4 --- /dev/null +++ b/Lib/email/_compat21.py @@ -0,0 +1,63 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: barry@zope.com + +"""Module containing compatibility functions for Python 2.1. +""" + +from cStringIO import StringIO +from types import StringType, UnicodeType + + + +# This function will become a method of the Message class +def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in depth-first order. This method is a + generator. + """ + parts = [] + parts.append(self) + if self.is_multipart(): + for subpart in self.get_payload(): + parts.extend(subpart.walk()) + return parts + + +# Python 2.2 spells floor division // +def _floordiv(i, j): + """Do a floor division, i/j.""" + return i / j + + +def _isstring(obj): + return isinstance(obj, StringType) or isinstance(obj, UnicodeType) + + + +# These two functions are imported into the Iterators.py interface module. +# The Python 2.2 version uses generators for efficiency. +def body_line_iterator(msg): + """Iterate over the parts, returning string payloads line-by-line.""" + lines = [] + for subpart in msg.walk(): + payload = subpart.get_payload() + if _isstring(payload): + for line in StringIO(payload).readlines(): + lines.append(line) + return lines + + +def typed_subpart_iterator(msg, maintype='text', subtype=None): + """Iterate over the subparts with a given MIME type. + + Use `maintype' as the main MIME type to match against; this defaults to + "text". Optional `subtype' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + parts = [] + for subpart in msg.walk(): + if subpart.get_main_type('text') == maintype: + if subtype is None or subpart.get_subtype('plain') == subtype: + parts.append(subpart) + return parts diff --git a/Lib/email/_compat22.py b/Lib/email/_compat22.py new file mode 100644 index 000000000000..a05451f25d96 --- /dev/null +++ b/Lib/email/_compat22.py @@ -0,0 +1,60 @@ +# Copyright (C) 2002 Python Software Foundation +# Author: barry@zope.com + +"""Module containing compatibility functions for Python 2.1. +""" + +from __future__ import generators +from __future__ import division +from cStringIO import StringIO +from types import StringTypes + + + +# This function will become a method of the Message class +def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in depth-first order. This method is a + generator. + """ + yield self + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + + +# Python 2.2 spells floor division // +def _floordiv(i, j): + """Do a floor division, i/j.""" + return i // j + + +def _isstring(obj): + return isinstance(obj, StringTypes) + + + +# These two functions are imported into the Iterators.py interface module. +# The Python 2.2 version uses generators for efficiency. +def body_line_iterator(msg): + """Iterate over the parts, returning string payloads line-by-line.""" + for subpart in msg.walk(): + payload = subpart.get_payload() + if _isstring(payload): + for line in StringIO(payload): + yield line + + +def typed_subpart_iterator(msg, maintype='text', subtype=None): + """Iterate over the subparts with a given MIME type. + + Use `maintype' as the main MIME type to match against; this defaults to + "text". Optional `subtype' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + for subpart in msg.walk(): + if subpart.get_main_type('text') == maintype: + if subtype is None or subpart.get_subtype('plain') == subtype: + yield subpart diff --git a/Lib/email/test/data/msg_31.txt b/Lib/email/test/data/msg_31.txt new file mode 100644 index 000000000000..1e58e56cf52e --- /dev/null +++ b/Lib/email/test/data/msg_31.txt @@ -0,0 +1,15 @@ +From: aperson@dom.ain +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=BOUNDARY_ + +--BOUNDARY +Content-Type: text/plain + +message 1 + +--BOUNDARY +Content-Type: text/plain + +message 2 + +--BOUNDARY-- diff --git a/Lib/email/test/test_email_torture.py b/Lib/email/test/test_email_torture.py new file mode 100644 index 000000000000..76d66a9ba71e --- /dev/null +++ b/Lib/email/test/test_email_torture.py @@ -0,0 +1,136 @@ +# Copyright (C) 2002 Python Software Foundation +# +# A torture test of the email package. This should not be run as part of the +# standard Python test suite since it requires several meg of email messages +# collected in the wild. These source messages are not checked into the +# Python distro, but are available as part of the standalone email package at +# http://sf.net/projects/mimelib + +import sys +import os +import unittest +from cStringIO import StringIO +from types import ListType + +from email.test.test_email import TestEmailBase +from test.test_support import TestSkipped + +import email +from email import __file__ as testfile +from email.Iterators import _structure + +def openfile(filename): + from os.path import join, dirname, abspath + path = abspath(join(dirname(testfile), os.pardir, 'moredata', filename)) + return open(path, 'rb') + +# Prevent this test from running in the Python distro +try: + openfile('crispin-torture.txt') +except IOError: + raise TestSkipped + + + +class TortureBase(TestEmailBase): + def _msgobj(self, filename): + fp = openfile(filename) + try: + msg = email.message_from_file(fp) + finally: + fp.close() + return msg + + + +class TestCrispinTorture(TortureBase): + # Mark Crispin's torture test from the SquirrelMail project + def test_mondo_message(self): + eq = self.assertEqual + neq = self.ndiffAssertEqual + msg = self._msgobj('crispin-torture.txt') + payload = msg.get_payload() + eq(type(payload), ListType) + eq(len(payload), 12) + eq(msg.preamble, None) + eq(msg.epilogue, '\n\n') + # Probably the best way to verify the message is parsed correctly is to + # dump its structure and compare it against the known structure. + fp = StringIO() + _structure(msg, fp=fp) + neq(fp.getvalue(), """\ +multipart/mixed + text/plain + message/rfc822 + multipart/alternative + text/plain + multipart/mixed + text/richtext + application/andrew-inset + message/rfc822 + audio/basic + audio/basic + image/pbm + message/rfc822 + multipart/mixed + multipart/mixed + text/plain + audio/x-sun + multipart/mixed + image/gif + image/gif + application/x-be2 + application/atomicmail + audio/x-sun + message/rfc822 + multipart/mixed + text/plain + image/pgm + text/plain + message/rfc822 + multipart/mixed + text/plain + image/pbm + message/rfc822 + application/postscript + image/gif + message/rfc822 + multipart/mixed + audio/basic + audio/basic + message/rfc822 + multipart/mixed + application/postscript + text/plain + message/rfc822 + multipart/mixed + text/plain + multipart/parallel + image/gif + audio/basic + application/atomicmail + message/rfc822 + audio/x-sun +""") + + +def _testclasses(): + mod = sys.modules[__name__] + return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')] + + +def suite(): + suite = unittest.TestSuite() + for testclass in _testclasses(): + suite.addTest(unittest.makeSuite(testclass)) + return suite + + +def test_main(): + for testclass in _testclasses(): + test_support.run_unittest(testclass) + + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') diff --git a/Lib/test/test_multifile.py b/Lib/test/test_multifile.py new file mode 100644 index 000000000000..8f703477f346 --- /dev/null +++ b/Lib/test/test_multifile.py @@ -0,0 +1,66 @@ +import mimetools +import multifile +import cStringIO + +msg = """Mime-Version: 1.0 +Content-Type: multipart/mixed; + boundary="=====================_590453667==_" +X-OriginalArrivalTime: 05 Feb 2002 03:43:23.0310 (UTC) FILETIME=[42D88CE0:01C1ADF7] + +--=====================_590453667==_ +Content-Type: multipart/alternative; + boundary="=====================_590453677==_.ALT" + +--=====================_590453677==_.ALT +Content-Type: text/plain; charset="us-ascii"; format=flowed + +test A +--=====================_590453677==_.ALT +Content-Type: text/html; charset="us-ascii" + + +test B + +--=====================_590453677==_.ALT-- + +--=====================_590453667==_ +Content-Type: text/plain; charset="us-ascii" +Content-Disposition: attachment; filename="att.txt" + +Attached Content. +Attached Content. +Attached Content. +Attached Content. + +--=====================_590453667==_-- + +""" + +boundaries = 0 +linecount = 0 + +def getMIMEMsg(mf): + global boundaries, linecount + msg = mimetools.Message(mf) + + #print "TYPE: %s" % msg.gettype() + if msg.getmaintype() == 'multipart': + boundary = msg.getparam("boundary") + boundaries += 1 + + mf.push(boundary) + while mf.next(): + getMIMEMsg(mf) + mf.pop() + else: + lines = mf.readlines() + linecount += len(lines) + +def main(): + f = cStringIO.StringIO(msg) + getMIMEMsg(multifile.MultiFile(f)) + assert boundaries == 2 + assert linecount == 9 + +if __name__ == '__main__': + main() -- 2.47.3