]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
prefilter: workaround for \b in UCP and !UTF8 mode
authorJustin Viiret <justin.viiret@intel.com>
Wed, 18 Jan 2017 00:33:57 +0000 (11:33 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Thu, 19 Jan 2017 22:19:51 +0000 (09:19 +1100)
For now, just drop the assertion (which will still return a superset of
matches, as per prefiltering semantics).

src/parser/prefilter.cpp

index ea58a134fbfede7a836287f8b4f1cdb9cbb0dcae..f69362e4e330557a97d652afdea32abc4f5a243f 100644 (file)
@@ -295,6 +295,16 @@ public:
 
     Component *visit(ComponentWordBoundary *c) override {
         assert(c);
+
+        // TODO: Right now, we do not have correct code for resolving these
+        // when prefiltering is on, UCP is on, and UTF-8 is *off*. For now, we
+        // just replace with an empty sequence (as that will return a superset
+        // of matches).
+        if (mode.ucp && !mode.utf8) {
+            return new ComponentSequence();
+        }
+
+        // All other cases can be prefiltered.
         c->setPrefilter(true);
         return c;
     }