]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Introduce copy_bytes for writing into bytecode
authorJustin Viiret <justin.viiret@intel.com>
Thu, 12 Nov 2015 04:27:11 +0000 (15:27 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 18 Nov 2015 04:26:16 +0000 (15:26 +1100)
Protects memcpy from nullptr sources, which triggers failures in GCC's
UB sanitizer.

src/nfa/goughcompile.cpp
src/nfa/limex_compile.cpp
src/nfagraph/ng_lbr.cpp
src/rose/rose_build_bytecode.cpp
src/util/container.h

index d2de7b95b74ea0515bdf9b0db4f18165b2e71440..d735c80acc2d9bd91ae6dcc645caabee734e844f 100644 (file)
@@ -1136,16 +1136,11 @@ aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
     gough_dfa->length = gough_size;
 
     /* copy in blocks */
-    memcpy((u8 *)gough_dfa.get() + edge_prog_offset, &edge_blocks[0],
-           byte_length(edge_blocks));
+    copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks);
     if (top_prog_offset) {
-        memcpy((u8 *)gough_dfa.get() + top_prog_offset, &top_blocks[0],
-               byte_length(top_blocks));
-    }
-    if (!temp_blocks.empty()) {
-        memcpy((u8 *)gough_dfa.get() + prog_base_offset, &temp_blocks[0],
-               byte_length(temp_blocks));
+        copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks);
     }
+    copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks);
 
     return gough_dfa;
 }
index 5cf46334e65b4b797df92daa97af38e24a95d83c..a6c34cb6625cd1adf883e1689eb3d4a6955fa11b 100644 (file)
@@ -1397,8 +1397,7 @@ struct Factory {
             repeat->horizon = rsi.horizon;
             repeat->packedCtrlSize = rsi.packedCtrlSize;
             repeat->stateSize = rsi.stateSize;
-            memcpy(repeat->packedFieldSizes, rsi.packedFieldSizes.data(),
-                   byte_length(rsi.packedFieldSizes));
+            copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes);
             repeat->patchCount = rsi.patchCount;
             repeat->patchSize = rsi.patchSize;
             repeat->encodingSize = rsi.encodingSize;
@@ -1413,8 +1412,7 @@ struct Factory {
             // Copy in the sparse lookup table.
             if (br.type == REPEAT_SPARSE_OPTIMAL_P) {
                 assert(!rsi.table.empty());
-                memcpy(info_ptr + tableOffset, rsi.table.data(),
-                       byte_length(rsi.table));
+                copy_bytes(info_ptr + tableOffset, rsi.table);
             }
 
             // Fill the tug mask.
@@ -1702,6 +1700,7 @@ struct Factory {
 
         for (u32 i = 0; i < num_repeats; i++) {
             repeatOffsets[i] = offset;
+            assert(repeats[i].first);
             memcpy((char *)limex + offset, repeats[i].first.get(),
                    repeats[i].second);
             offset += repeats[i].second;
@@ -1709,8 +1708,7 @@ struct Factory {
 
         // Write repeat offset lookup table.
         assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32)));
-        memcpy((char *)limex + repeatOffsetsOffset, repeatOffsets.data(),
-               byte_length(repeatOffsets));
+        copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets);
 
         limex->repeatOffset = repeatOffsetsOffset;
         limex->repeatCount = num_repeats;
@@ -1725,8 +1723,7 @@ struct Factory {
         limex->exReportOffset = exceptionReportsOffset;
         assert(ISALIGNED_N((char *)limex + exceptionReportsOffset,
                            alignof(ReportID)));
-        memcpy((char *)limex + exceptionReportsOffset, reports.data(),
-               byte_length(reports));
+        copy_bytes((char *)limex + exceptionReportsOffset, reports);
     }
 
     static
index 11eded69fff603ce431e6152fc32757675f5ce60..b9cacaa753528d31240bc4ee6c297beda2e891a9 100644 (file)
@@ -98,8 +98,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
     info->packedCtrlSize = rsi.packedCtrlSize;
     info->horizon = rsi.horizon;
     info->minPeriod = minPeriod;
-    memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(),
-           byte_length(rsi.packedFieldSizes));
+    copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes);
     info->patchCount = rsi.patchCount;
     info->patchSize = rsi.patchSize;
     info->encodingSize = rsi.encodingSize;
@@ -122,7 +121,7 @@ void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
         nfa->length = verify_u32(len);
         info->length = verify_u32(sizeof(RepeatInfo)
                                   + sizeof(u64a) * (rsi.patchSize + 1));
-        memcpy(table, rsi.table.data(), byte_length(rsi.table));
+        copy_bytes(table, rsi.table);
     }
 }
 
index bbc8644eeeb82c9afe938e688383cbadce9b46f6..e17953aad598e6238d7f361d4475be5ee6216f5c 100644 (file)
@@ -2687,12 +2687,6 @@ void fillInReportInfo(RoseEngine *engine, u32 reportOffset,
                  sizeof(internal_report));
 }
 
-static
-void populateInvDkeyTable(char *ptr, const ReportManager &rm) {
-    vector<ReportID> table = rm.getDkeyToReportTable();
-    memcpy(ptr, table.data(), byte_length(table));
-}
-
 static
 bool hasSimpleReports(const vector<Report> &reports) {
     auto it = find_if(reports.begin(), reports.end(), isComplexReport);
@@ -4154,7 +4148,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine->ekeyCount = rm.numEkeys();
     engine->dkeyCount = rm.numDkeys();
     engine->invDkeyOffset = dkeyOffset;
-    populateInvDkeyTable(ptr + dkeyOffset, rm);
+    copy_bytes(ptr + dkeyOffset, rm.getDkeyToReportTable());
 
     engine->somHorizon = ssm.somPrecision();
     engine->somLocationCount = ssm.numSomSlots();
@@ -4314,33 +4308,22 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     buildLitBenefits(*this, engine.get(), base_lits_benefits_offset);
 
     // Copy in other tables
-    memcpy(ptr + bc.engine_blob_base, bc.engine_blob.data(),
-           byte_length(bc.engine_blob));
-
-    memcpy(ptr + engine->literalOffset, literalTable.data(),
-           byte_length(literalTable));
-    memcpy(ptr + engine->roleOffset, bc.roleTable.data(),
-           byte_length(bc.roleTable));
-    copy(leftInfoTable.begin(), leftInfoTable.end(),
-         (LeftNfaInfo *)(ptr + engine->leftOffset));
+    copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob);
+    copy_bytes(ptr + engine->literalOffset, literalTable);
+    copy_bytes(ptr + engine->roleOffset, bc.roleTable);
+    copy_bytes(ptr + engine->leftOffset, leftInfoTable);
 
     fillLookaroundTables(ptr + lookaroundTableOffset,
                          ptr + lookaroundReachOffset, bc.lookaround);
 
     fillInSomRevNfas(engine.get(), ssm, rev_nfa_table_offset, rev_nfa_offsets);
-    memcpy(ptr + engine->predOffset, predTable.data(), byte_length(predTable));
-    memcpy(ptr + engine->rootRoleOffset, rootRoleTable.data(),
-           byte_length(rootRoleTable));
-    memcpy(ptr + engine->anchoredReportMapOffset, art.data(), byte_length(art));
-    memcpy(ptr + engine->anchoredReportInverseMapOffset, arit.data(),
-           byte_length(arit));
-    memcpy(ptr + engine->multidirectOffset, mdr_reports.data(),
-           byte_length(mdr_reports));
-
-    copy(activeLeftIter.begin(), activeLeftIter.end(),
-         (mmbit_sparse_iter *)(ptr + engine->activeLeftIterOffset));
-
-    memcpy(ptr + engine->sideOffset, sideTable.data(), byte_length(sideTable));
+    copy_bytes(ptr + engine->predOffset, predTable);
+    copy_bytes(ptr + engine->rootRoleOffset, rootRoleTable);
+    copy_bytes(ptr + engine->anchoredReportMapOffset, art);
+    copy_bytes(ptr + engine->anchoredReportInverseMapOffset, arit);
+    copy_bytes(ptr + engine->multidirectOffset, mdr_reports);
+    copy_bytes(ptr + engine->activeLeftIterOffset, activeLeftIter);
+    copy_bytes(ptr + engine->sideOffset, sideTable);
 
     DEBUG_PRINTF("rose done %p\n", engine.get());
     return engine;
index b4a10c8916508c66cca30d44c0d81e2a50bbc7bb..62e841c14e529708558989e57dc5c59b5596a2f8 100644 (file)
 #ifndef UTIL_CONTAINER_H
 #define UTIL_CONTAINER_H
 
+#include "ue2common.h"
+
 #include <algorithm>
+#include <cassert>
+#include <cstring>
 #include <set>
+#include <type_traits>
 #include <utility>
 
 namespace ue2 {
@@ -92,11 +97,35 @@ std::set<typename C::key_type> assoc_keys(const C &container) {
     return keys;
 }
 
+/**
+ * \brief Return the length in bytes of the given vector of (POD) objects.
+ */
 template<typename T>
 typename std::vector<T>::size_type byte_length(const std::vector<T> &vec) {
+    static_assert(std::is_pod<T>::value, "should be pod");
     return vec.size() * sizeof(T);
 }
 
+/**
+ * \brief Copy the given vector of POD objects to the given location in memory.
+ * It is safe to give this function an empty vector.
+ */
+template<typename T>
+void *copy_bytes(void *dest, const std::vector<T> &vec) {
+    static_assert(std::is_pod<T>::value, "should be pod");
+    assert(dest);
+
+    // Since we're generally using this function to write into the bytecode,
+    // dest should be appropriately aligned for T.
+    assert(ISALIGNED_N(dest, alignof(T)));
+
+    if (vec.empty()) {
+        return dest; // Protect memcpy against null pointers.
+    }
+    assert(vec.data() != nullptr);
+    return std::memcpy(dest, vec.data(), byte_length(vec));
+}
+
 template<typename OrderedContainer1, typename OrderedContainer2>
 bool is_subset_of(const OrderedContainer1 &small, const OrderedContainer2 &big) {
     static_assert(std::is_same<typename OrderedContainer1::value_type,