From: Jason Ish <jason.ish@oisf.net>
Date: Tue, 12 Mar 2024 19:02:26 +0000 (-0600)
Subject: filehashes: fix for unique filehash filenames
X-Git-Tag: 1.3.2~3
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=344802255773ea952cdbecf71b51d93573be2e4e;p=thirdparty%2Fsuricata-update.git

filehashes: fix for unique filehash filenames

Commit 8725e565a78caffae79584c6ec48670ca71d6618 gave each downloaded a
file a unique name so dataset files from different sources wouldn't
clobber each other, but this was applied to all files breaking file
hash lists as that code wasn't updated for the new filename scheme.

Update the file hashing code to find the files based on the filename
prefix of the rule referencing the file.

Bug: #6854
---

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f1902f..2c53882 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Change Log
 
+## 1.3.2 - unreleased
+- Fix copying of file hash lists which was broken in the dataset fix
+  as part of ticket #6833:
+  https://redmine.openinfosecfoundation.org/issues/6854
+
 ## 1.3.1 - 2024-03-11
 - Fix detecting dataset "load" when preceded by a space:
   https://redmine.openinfosecfoundation.org/issues/6777
diff --git a/suricata/update/main.py b/suricata/update/main.py
index a1e9e70..18af7a8 100644
--- a/suricata/update/main.py
+++ b/suricata/update/main.py
@@ -447,7 +447,7 @@ def handle_dataset_files(rule, dep_files):
     prefix = os.path.dirname(rule.group)
 
     # Construct the source filename.
-    source_filename = "{}/{}".format(prefix, dataset_filename)
+    source_filename = os.path.join(prefix, dataset_filename)
 
     # If a source filename starts with a "/", look for it on the filesystem. The archive
     # unpackers will take care of removing a leading / so this shouldn't happen for
@@ -483,10 +483,19 @@ def handle_filehash_files(rule, dep_files, fhash):
     if not rule.enabled:
         return
     filehash_fname = rule.get(fhash)
-    filename = [fname for fname, content in dep_files.items() if os.path.join(*(fname.split(os.path.sep)[1:])) == filehash_fname]
-    if filename:
+
+    # Get the directory name the rule is from.
+    prefix = os.path.dirname(rule.group)
+
+    source_filename = os.path.join(prefix, filehash_fname)
+    dest_filename = source_filename[len(prefix) + len(os.path.sep):]
+    logger.debug("dest_filename={}".format(dest_filename))
+
+    if source_filename not in dep_files:
+        logger.error("{} file {} was not found".format(fhash, filehash_fname))
+    else:
         logger.debug("Copying %s file %s to output directory" % (fhash, filehash_fname))
-        filepath = os.path.join(config.get_state_dir(), os.path.dirname(filename[0]))
+        filepath = os.path.join(config.get_output_dir(), os.path.dirname(dest_filename))
         logger.debug("filepath: %s" % filepath)
         try:
             os.makedirs(filepath)
@@ -494,11 +503,10 @@ def handle_filehash_files(rule, dep_files, fhash):
             if oserr.errno != errno.EEXIST:
                 logger.error(oserr)
                 sys.exit(1)
-        logger.debug("output fname: %s" % os.path.join(filepath, os.path.basename(filehash_fname)))
-        with open(os.path.join(filepath, os.path.basename(filehash_fname)), "w+") as fp:
-            fp.write(dep_files[os.path.join("rules", filehash_fname)].decode("utf-8"))
-    else:
-        logger.error("{} file {} was not found".format(fhash, filehash_fname))
+        output_filename = os.path.join(filepath, os.path.basename(filehash_fname))
+        logger.debug("output fname: %s" % output_filename)
+        with open(output_filename, "w") as fp:
+            fp.write(dep_files[source_filename].decode("utf-8"))
 
 def write_merged(filename, rulemap, dep_files):
 
@@ -991,7 +999,7 @@ def load_sources(suricata_version):
         source_files = Fetch().run(url)
         for key in source_files:
             content = source_files[key]
-            key = format("{}/{}".format(prefix, key))
+            key = os.path.join(prefix, key)
             files.append(SourceFile(key, content))
 
     # Now load local rules.