From 8725e565a78caffae79584c6ec48670ca71d6618 Mon Sep 17 00:00:00 2001
From: Jason Ish <jason.ish@oisf.net>
Date: Tue, 5 Mar 2024 17:12:55 -0600
Subject: [PATCH] sources: give each filename from a source a unique filename

To prevent dataset files from difference sources from overwriting each
other, give each file downloaded and extracted a prefix based on the
URL (a hash). This ensures unique filenames across all rulesets.

This mostly matters for datasets, as when datasets are processed we
are working with a merged set of filenames, unlike rules which are
parsed much earlier when we still have a list of files.

Not the most elegant solution, but saves a rather large refactor.

Bug: #6833
---
 CHANGELOG.md            | 4 ++++
 suricata/update/main.py | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a231f00..738c000 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,10 @@
 - Don't base dataset filenames on the contents of the file, but
   instead the filename path:
   https://redmine.openinfosecfoundation.org/issues/6763
+- Give each file in a source a unique filename by prefixing the files
+  with a hash of the URL to prevent duplicate filenames from
+  cloberring each other, in particular dataset files:
+  https://redmine.openinfosecfoundation.org/issues/6833
 
 ## 1.3.0 - 2023-07-07
 
diff --git a/suricata/update/main.py b/suricata/update/main.py
index d41944e..a1e9e70 100644
--- a/suricata/update/main.py
+++ b/suricata/update/main.py
@@ -985,9 +985,14 @@ def load_sources(suricata_version):
     # Now download each URL.
     files = []
     for url in urls:
+
+        # To de-duplicate filenames, add a prefix that is a hash of the URL.
+        prefix = hashlib.md5(url[0].encode()).hexdigest()
         source_files = Fetch().run(url)
         for key in source_files:
-            files.append(SourceFile(key, source_files[key]))
+            content = source_files[key]
+            key = format("{}/{}".format(prefix, key))
+            files.append(SourceFile(key, content))
 
     # Now load local rules.
     if config.get("local") is not None:
-- 
2.47.3