]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Initial revision
authorGuido van Rossum <guido@python.org>
Tue, 14 Dec 1993 10:08:02 +0000 (10:08 +0000)
committerGuido van Rossum <guido@python.org>
Tue, 14 Dec 1993 10:08:02 +0000 (10:08 +0000)
Demo/scripts/markov.py [new file with mode: 0755]

diff --git a/Demo/scripts/markov.py b/Demo/scripts/markov.py
new file mode 100755 (executable)
index 0000000..6a6d2b2
--- /dev/null
@@ -0,0 +1,116 @@
+#! /usr/local/bin/python
+
+class Markov:
+       def __init__(self, histsize, choice):
+               self.histsize = histsize
+               self.choice = choice
+               self.trans = {}
+       def add(self, state, next):
+               if not self.trans.has_key(state):
+                       self.trans[state] = [next]
+               else:
+                       self.trans[state].append(next)
+       def put(self, seq):
+               n = self.histsize
+               add = self.add
+               add(None, seq[:0])
+               for i in range(len(seq)):
+                       add(seq[max(0, i-n):i], seq[i:i+1])
+               add(seq[len(seq)-n:], None)
+       def get(self):
+               choice = self.choice
+               trans = self.trans
+               n = self.histsize
+               seq = choice(trans[None])
+               while 1:
+                       subseq = seq[max(0, len(seq)-n):]
+                       options = trans[subseq]
+                       next = choice(options)
+                       if not next: break
+                       seq = seq + next
+               return seq
+
+def test():
+       import sys, string, whrandom, getopt
+       args = sys.argv[1:]
+       try:
+               opts, args = getopt.getopt(args, '0123456789cdw')
+       except getopt.error:
+               print 'Usage: markov [-#] [-cddqw] [file] ...'
+               print 'Options:'
+               print '-#: 1-digit history size (default 2)'
+               print '-c: characters (default)'
+               print '-w: words'
+               print '-d: more debugging output'
+               print '-q: no debugging output'
+               print 'Input files (default stdin) are split in paragraphs'
+               print 'separated blank lines and each paragraph is split'
+               print 'in words by whitespace, then reconcatenated with'
+               print 'exactly one space separating words.'
+               print 'Output consists of paragraphs separated by blank'
+               print 'lines, where lines are no longer than 72 characters.'
+       histsize = 2
+       do_words = 0
+       debug = 1
+       for o, a in opts:
+               if '-0' <= o <= '-9': histsize = eval(o[1:])
+               if o == '-c': do_words = 0
+               if o == '-d': debug = debug + 1
+               if o == '-q': debug = 0
+               if o == '-w': do_words = 1
+       if not args: args = ['-']
+       m = Markov(histsize, whrandom.choice)
+       try:
+           for filename in args:
+                   if filename == '-':
+                           f = sys.stdin
+                           if f.isatty():
+                                   print 'Sorry, need stdin from file'
+                                   continue
+                   else:
+                           f = open(filename, 'r')
+                   if debug: print 'processing', filename, '...'
+                   text = f.read()
+                   f.close()
+                   paralist = string.splitfields(text, '\n\n')
+                   for para in paralist:
+                           if debug > 1: print 'feeding ...'
+                           words = string.split(para)
+                           if words:
+                                   if do_words: data = tuple(words)
+                                   else: data = string.joinfields(words, ' ')
+                                   m.put(data)
+       except KeyboardInterrupt:
+               print 'Interrupted -- continue with data read so far'
+       if not m.trans:
+               print 'No valid input files'
+               return
+       if debug: print 'done.'
+       if debug > 1:
+               for key in m.trans.keys():
+                       if key is None or len(key) < histsize:
+                               print `key`, m.trans[key]
+               if histsize == 0: print `''`, m.trans['']
+               print
+       while 1:
+               data = m.get()
+               if do_words: words = data
+               else: words = string.split(data)
+               n = 0
+               limit = 72
+               for w in words:
+                       if n + len(w) > limit:
+                               print
+                               n = 0
+                       print w,
+                       n = n + len(w) + 1
+               print
+               print
+
+def tuple(list):
+       if len(list) == 0: return ()
+       if len(list) == 1: return (list[0],)
+       i = len(list)/2
+       return tuple(list[:i]) + tuple(list[i:])
+
+test()