]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
hscollider: fix UTF8 check for patterns
authorWang Xiang W <xiang.w.wang@intel.com>
Wed, 8 Apr 2020 15:26:05 +0000 (11:26 -0400)
committerHong, Yang A <yang.a.hong@intel.com>
Mon, 25 May 2020 13:47:53 +0000 (13:47 +0000)
tools/hscollider/GroundTruth.cpp

index f30a8f5eb71dddf06cee5c689702b92e8bcb55d7..a2673063c153483e5f5562ec74ae3884ecd5a5b3 100644 (file)
@@ -241,6 +241,13 @@ void addCallout(string &re) {
     re.append("\\E)(?C)");
 }
 
+static
+bool isUtf8(const CompiledPcre &compiled) {
+    unsigned long int options = 0;
+    pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
+    return options & PCRE_UTF8;
+}
+
 unique_ptr<CompiledPcre>
 GroundTruth::compile(unsigned id, bool no_callouts) {
     bool highlander = false;
@@ -380,6 +387,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
         throw PcreCompileFailure(oss.str());
     }
 
+    compiled->utf8 |= isUtf8(*compiled);
+
     return compiled;
 }
 
@@ -451,13 +460,6 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
     return ret;
 }
 
-static
-bool isUtf8(const CompiledPcre &compiled) {
-    unsigned long int options = 0;
-    pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
-    return options & PCRE_UTF8;
-}
-
 static
 CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
     assert(ret > 0);