datasets: use filename based on filename; not content

author Jason Ish <jason.ish@oisf.net>

Tue, 5 Mar 2024 22:54:13 +0000 (16:54 -0600)

committer Jason Ish <jason.ish@oisf.net>

Tue, 5 Mar 2024 23:11:17 +0000 (17:11 -0600)
author Jason Ish <jason.ish@oisf.net>
Tue, 5 Mar 2024 22:54:13 +0000 (16:54 -0600)
committer Jason Ish <jason.ish@oisf.net>
Tue, 5 Mar 2024 23:11:17 +0000 (17:11 -0600)
diff --git a/CHANGELOG.md b/CHANGELOG.md

index 71f7d7f8eab3052ab0bec64d6a53ebc6fa71861a..a231f002c1655a5f29afc2836a14dbe479f44c01 100644 (file)
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@
    instead of 4.0.0.
  - Handle URLs of bare files that don't end in .rules:
    https://redmine.openinfosecfoundation.org/issues/3664
+- Don't base dataset filenames on the contents of the file, but
+  instead the filename path:
+  https://redmine.openinfosecfoundation.org/issues/6763
  
  ## 1.3.0 - 2023-07-07
  
diff --git a/suricata/update/main.py b/suricata/update/main.py

index 9d2e36d1adf164a467b9d7a1da4f852b8d80ae42..d41944e2049ca96a7f2bac6cbf6f9bde7d18a6ae 100644 (file)
--- a/suricata/update/main.py
+++ b/suricata/update/main.py
@@ -465,9 +465,9 @@ def handle_dataset_files(rule, dep_files):
              return
          dataset_contents = dep_files[source_filename]
  
-    content_hash = hashlib.md5(dataset_contents).hexdigest()
-    new_rule = re.sub(r"(dataset.*?load\s+){}".format(dataset_filename), r"\g<1>datasets/{}".format(content_hash), rule.format())
-    dest_filename = os.path.join(config.get_output_dir(), "datasets", content_hash)
+    source_filename_hash = hashlib.md5(source_filename.encode()).hexdigest()
+    new_rule = re.sub(r"(dataset.*?load\s+){}".format(dataset_filename), r"\g<1>datasets/{}".format(source_filename_hash), rule.format())
+    dest_filename = os.path.join(config.get_output_dir(), "datasets", source_filename_hash)
      dest_dir = os.path.dirname(dest_filename)
      logger.debug("Copying dataset file {} to {}".format(dataset_filename, dest_filename))
      try:
author	Jason Ish <jason.ish@oisf.net>
	Tue, 5 Mar 2024 22:54:13 +0000 (16:54 -0600)
committer	Jason Ish <jason.ish@oisf.net>
	Tue, 5 Mar 2024 23:11:17 +0000 (17:11 -0600)
CHANGELOG.md		patch \| blob \| blame \| history
suricata/update/main.py		patch \| blob \| blame \| history