From 8725e565a78caffae79584c6ec48670ca71d6618 Mon Sep 17 00:00:00 2001 From: Jason Ish Date: Tue, 5 Mar 2024 17:12:55 -0600 Subject: [PATCH] sources: give each filename from a source a unique filename To prevent dataset files from difference sources from overwriting each other, give each file downloaded and extracted a prefix based on the URL (a hash). This ensures unique filenames across all rulesets. This mostly matters for datasets, as when datasets are processed we are working with a merged set of filenames, unlike rules which are parsed much earlier when we still have a list of files. Not the most elegant solution, but saves a rather large refactor. Bug: #6833 --- CHANGELOG.md | 4 ++++ suricata/update/main.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a231f00..738c000 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ - Don't base dataset filenames on the contents of the file, but instead the filename path: https://redmine.openinfosecfoundation.org/issues/6763 +- Give each file in a source a unique filename by prefixing the files + with a hash of the URL to prevent duplicate filenames from + cloberring each other, in particular dataset files: + https://redmine.openinfosecfoundation.org/issues/6833 ## 1.3.0 - 2023-07-07 diff --git a/suricata/update/main.py b/suricata/update/main.py index d41944e..a1e9e70 100644 --- a/suricata/update/main.py +++ b/suricata/update/main.py @@ -985,9 +985,14 @@ def load_sources(suricata_version): # Now download each URL. files = [] for url in urls: + + # To de-duplicate filenames, add a prefix that is a hash of the URL. + prefix = hashlib.md5(url[0].encode()).hexdigest() source_files = Fetch().run(url) for key in source_files: - files.append(SourceFile(key, source_files[key])) + content = source_files[key] + key = format("{}/{}".format(prefix, key)) + files.append(SourceFile(key, content)) # Now load local rules. if config.get("local") is not None: -- 2.47.3