]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
tools/fuzz: add test scripts for synthetic pattern generation.
authorHong, Yang A <yang.a.hong@intel.com>
Mon, 25 May 2020 14:22:18 +0000 (14:22 +0000)
committerHong, Yang A <yang.a.hong@intel.com>
Mon, 25 May 2020 14:22:18 +0000 (14:22 +0000)
tools/fuzz/aristocrats.py [new file with mode: 0755]
tools/fuzz/completocrats.py [new file with mode: 0755]
tools/fuzz/heuristocrats.py [new file with mode: 0755]
tools/fuzz/limited_dict.txt [new file with mode: 0644]

diff --git a/tools/fuzz/aristocrats.py b/tools/fuzz/aristocrats.py
new file mode 100755 (executable)
index 0000000..7b6ff2b
--- /dev/null
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+from random import choice,randint
+from optparse import OptionParser
+
+def generateRandomOptions():
+    if options.hybrid:
+        allflags = "smiH8W"
+    else:
+        # Maintain an ordering for consistency.
+        allflags = "smiHV8WLP"
+    flags = ""
+    for f in allflags:
+        flags += choice(['', f])
+    return flags
+
+parser = OptionParser()
+parser.add_option("-d", "--depth",
+                  action="store", type="int", dest="depth", default=200,
+                  help="Depth of generation (akin to maximum length)")
+parser.add_option("-c", "--count",
+                  action="store", type="int", dest="count", default=1000,
+                  help="Number of expressions to generate")
+parser.add_option("-f", "--full",
+                  action="store_true", dest="full", default=False,
+                  help="Use a full character set including unprintables")
+parser.add_option("-H", "--hybrid",
+                  action="store_true", dest="hybrid",
+                  help="Generate random flags for hybrid mode")
+
+(options, args) = parser.parse_args()
+if len(args) != 0:
+    parser.error("incorrect number of arguments")
+
+if (options.full):
+    crange = range(0,256)
+    crange.remove(ord('\n'))
+else:
+    crange = range(32, 127)
+
+for i in xrange(0, options.count):
+    len = randint(1, options.depth)
+    s = [ chr(choice(crange)) for x in xrange(len) ]
+    line = str(i) + ":/" + "".join(s) + "/" + generateRandomOptions()
+    print line
diff --git a/tools/fuzz/completocrats.py b/tools/fuzz/completocrats.py
new file mode 100755 (executable)
index 0000000..60ac4d7
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+
+from itertools import *
+from optparse import OptionParser
+
+LIMITED_ALPHABET = "abc[](){}*?+^$|:=.\\-"
+
+parser = OptionParser()
+parser.add_option("-d", "--depth",
+                  action="store", type="int", dest="depth", default=200,
+                  help="Depth of generation (akin to maximum length)")
+
+parser.add_option("-f", "--full",
+                  action="store_true", dest="full", default=False,
+                  help="Use a full character set including unprintables")
+
+parser.add_option("-l", "--limited",
+                  action="store_true", dest="limited", default=False,
+                  help="Use a very limited character set: just " + LIMITED_ALPHABET)
+
+(options, args) = parser.parse_args()
+if len(args) != 0:
+    parser.error("incorrect number of arguments")
+
+if (options.full):
+    crange = range(0,256)
+    crange.remove(ord('\n'))
+elif (options.limited):
+    crange = [ ord(c) for c in LIMITED_ALPHABET ]
+else:
+    crange = range(32, 127)
+
+srange = [ chr(c) for c in crange ]
+
+i = 0
+for x in product(srange, repeat = options.depth):
+    line = str(i) + ":/" + "".join(x) + "/"
+    print line
+    i += 1
diff --git a/tools/fuzz/heuristocrats.py b/tools/fuzz/heuristocrats.py
new file mode 100755 (executable)
index 0000000..49c7acb
--- /dev/null
@@ -0,0 +1,259 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser
+from random import *
+import string
+import sys
+
+# return a random non-degenerate (ie not [10]) partition of nChildren 
+def chooseLeafWidth(nChildren):
+    width = randint(1, 5)
+    width = min(width, nChildren-1)
+    s = sample(range(1, nChildren), width)
+    s.sort()
+    s = [0] + s + [nChildren]
+    v = [ s[i+1] - s[i] for i in range(0, len(s)-1) if s[i+1] != s[i] ]
+    return v
+
+def generateConcat(nChildren, atTopIgnored):
+    v = [ generateRE(w, atTop = False) for w in chooseLeafWidth(nChildren) ]
+    v = [ r for r in v if r != '' ]    
+    return string.join(v, "")
+
+def makeGroup(s):
+    # Parenthesise either in normal parens or a non-capturing group.
+    if randint(0, 1) == 0:
+        return "(" + s + ")"
+    else:
+        return "(?:" + s + ")"
+
+def generateAlt(nChildren, atTop):
+    v = [ generateRE(w, [generateAlt], atTop) for w in chooseLeafWidth(nChildren) ]
+    v = [ r for r in v if r != '' ]    
+    s = string.join(v, "|")
+    if len(v) == 1:
+           return s
+    else:
+        return makeGroup(s)
+
+def generateQuant(nChildren, atTopIgnored):
+    lo = int(round(expovariate(0.2)))
+    hi = lo + int(round(expovariate(0.2)))
+    q = choice(["*", "?", "+", "{%d}"%lo, "{%d,}"%lo, "{%d,%d}"%(lo,hi)])
+    r = generateRE(nChildren, [generateQuant], atTop = False)
+    if (len(r) == 1) or (r[0] != '(' and r[-1] != ")"):  
+        return r + q
+    else:
+        return makeGroup(r) + q
+
+def generateChar(nChildren, atTop = False):
+    return chr(choice(alphabet))
+
+def generateNocaseChar(nChildren, atTop = False):
+    'Either generate an uppercase char from the alphabet or a nocase class [Aa]'
+    c = generateChar(nChildren, atTop)
+    if random() < 0.5:
+        return c.upper()
+    else:
+        return '[' + c.upper() + c.lower() + ']'
+
+def generateDot(nChildren, atTop = False):
+    return "."
+
+def generateBoundary(nChildren, atTop = False):
+    # \b, \B in parens so that we can repeat them and still be accepted by 
+    # libpcre
+    return makeGroup('\\' + choice('bB'))
+
+def generateCharClass(nChildren, atTop = False):
+    s = ""
+    if random() < 0.2:
+        s = "^"
+        nChars = randint(1,4)
+    else:
+        nChars = randint(2,4)
+
+    for i in xrange(nChars):
+        s += generateChar(1)
+    return "[" + s + "]"
+
+def generateOptionsFlags(nChildren, atTop = False):
+    allflags = "smix"
+    pos_flags = sample(allflags, randint(1, len(allflags)))
+    neg_flags = sample(allflags, randint(1, len(allflags)))
+    s = '(?' + ''.join(pos_flags) + '-' + ''.join(neg_flags) + ')'
+    return s 
+
+def generateLogicalId(nChildren, atTop = False):
+    return str(randint(0, options.count))
+
+def makeLogicalGroup(s):
+    return "(" + s + ")"
+
+def generateLogicalNot(nChildren, atTop):
+    r = generateCombination(nChildren, [generateLogicalNot], atTop = False)
+    return "!" + makeLogicalGroup(r)
+
+def generateLogicalAnd(nChildren, atTop):
+    v = [ generateCombination(w, [generateLogicalAnd], atTop = False) for w in chooseLeafWidth(nChildren) ]
+    v = [ r for r in v if r != '' ]
+    s = string.join(v, "&")
+    if len(v) == 1:
+           return s
+    else:
+        return makeLogicalGroup(s)
+
+def generateLogicalOr(nChildren, atTop):
+    v = [ generateCombination(w, [generateLogicalOr], atTop = False) for w in chooseLeafWidth(nChildren) ]
+    v = [ r for r in v if r != '' ]
+    s = string.join(v, "|")
+    if len(v) == 1:
+           return s
+    else:
+        return makeLogicalGroup(s)
+
+weightsTree = [
+    (generateConcat, 10),
+    (generateAlt, 3),
+    (generateQuant, 2),
+    ]
+
+weightsLeaf = [
+    (generateChar, 30),
+    (generateCharClass, 5),
+    (generateDot, 5),
+    (generateNocaseChar, 2),
+    (generateBoundary, 1),
+    (generateOptionsFlags, 1)
+    ]
+
+weightsLogicalTree = [
+    (generateLogicalNot, 1),
+    (generateLogicalAnd, 5),
+    (generateLogicalOr, 5),
+    ]
+
+weightsLogicalLeaf = [
+    (generateLogicalId, 1),
+    ]
+
+def genChoices(weighted):
+    r = []
+    for (f, w) in weighted:
+        r = r + [f] * w
+    return r
+
+choicesTree = genChoices(weightsTree)
+choicesLeaf = genChoices(weightsLeaf)
+choicesLogicalTree = genChoices(weightsLogicalTree)
+choicesLogicalLeaf = genChoices(weightsLogicalLeaf)
+
+weightsAnchor = [
+    ("\\A%s\\Z", 1),
+    ("\\A%s\\z", 1),
+    ("\\A%s",  4),
+    ("%s\\Z", 2),
+    ("%s\\z", 2),
+    ("^%s$", 1),
+    ("^%s",  4),
+    ("%s$", 2),
+    ("%s", 25)
+    ]
+choicesAnchor = genChoices(weightsAnchor)
+
+def generateRE(nChildren, suppressList = [], atTop = False):
+    if atTop:
+        anchorSubstituteString = choice(choicesAnchor)
+    else:
+        anchorSubstituteString = "%s"
+
+    nChildren -= 1
+    if nChildren == 0:
+        res = choice(choicesLeaf)(nChildren, atTop)
+    else:
+        c = [ ch for ch in choicesTree if ch not in suppressList ]
+        res = choice(c)(nChildren, atTop)
+
+    return anchorSubstituteString % res
+
+def generateCombination(nChildren, suppressList = [], atTop = False):
+    nChildren -= 1
+    if nChildren == 0:
+        res = choice(choicesLogicalLeaf)(nChildren, atTop)
+    else:
+        c = [ ch for ch in choicesLogicalTree if ch not in suppressList ]
+        res = choice(c)(nChildren, atTop)
+
+    return res
+
+def generateRandomOptions():
+    if options.hybrid:
+        allflags = "smiH8W"
+    else:
+        # Maintain an ordering for consistency.
+        allflags = "smiHV8WLP"
+    flags = ""
+    for f in allflags:
+        flags += choice(['', f])
+    if options.logical:
+        flags += choice(['', 'Q'])
+    return flags
+
+def generateRandomExtParam(depth, extparam):
+    if not extparam:
+        return ""
+    params = []
+    if choice((False, True)):
+        params.append("min_length=%u" % randint(1, depth))
+    if choice((False, True)):
+        params.append("min_offset=%u" % randint(1, depth))
+    if choice((False, True)):
+        params.append("max_offset=%u" % randint(1, depth*3))
+    if choice((False, True)):
+        dist = randint(1, 3)
+        if choice((False, True)):
+            params.append("edit_distance=%u" % dist)
+        else:
+            params.append("hamming_distance=%u" % dist)
+    if params:
+        return "{" + ",".join(params) + "}"
+    else:
+        return ""
+
+parser = OptionParser()
+parser.add_option("-d", "--depth",
+                  action="store", type="int", dest="depth", default=200,
+                  help="Depth of generation (akin to maximum length)")
+parser.add_option("-c", "--count",
+                  action="store", type="int", dest="count", default=1000,
+                  help="Number of expressions to generate")
+parser.add_option("-a", "--alphabet",
+                  action="store", type="int", dest="alphabet", default=26,
+                  help="Size of alphabet to generate character expressions over (starting with lowercase 'a')")
+parser.add_option("-i", "--nocase",
+                  action="store_true", dest="nocase",
+                  help="Use a caseless alphabet for character generation")
+parser.add_option("-x", "--extparam",
+                  action="store_true", dest="extparam",
+                  help="Generate random extended parameters")
+parser.add_option("-l", "--logical",
+                  action="store_true", dest="logical",
+                  help="Generate logical combination expressions")
+parser.add_option("-H", "--hybrid",
+                  action="store_true", dest="hybrid",
+                  help="Generate random flags for hybrid mode")
+
+(options, args) = parser.parse_args()
+if len(args) != 0:
+    parser.error("incorrect number of arguments")
+
+alphabet = range(ord('a'), ord('a') + options.alphabet)
+if options.nocase:
+    alphabet += range(ord('A'), ord('A') + options.alphabet)
+    
+for i in xrange(0, options.count):
+    print "%08d:/%s/%s%s" % (i, generateRE(randint(1, options.depth), atTop = True), generateRandomOptions(), generateRandomExtParam(options.depth, options.extparam))
+
+if options.logical:
+    for i in xrange(options.count, options.count + 3000):
+        print "%08d:/%s/C" % (i, generateCombination(randint(1, options.depth), atTop = True))
diff --git a/tools/fuzz/limited_dict.txt b/tools/fuzz/limited_dict.txt
new file mode 100644 (file)
index 0000000..7c3daf4
--- /dev/null
@@ -0,0 +1,9 @@
+hatstand
+teakettle
+badgerbrush
+mnemosyne
+rapscallion
+acerbic
+blackhat
+rufous
+echolalia