From: Jason Ish Date: Mon, 11 Apr 2022 19:53:24 +0000 (-0600) Subject: datasets: dataset handling for --local files X-Git-Tag: 1.3.0rc1~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c2926608523531e6eb38b6f50e99f9453bf2a29d;p=thirdparty%2Fsuricata-update.git datasets: dataset handling for --local files Look for datasets on the filesystem when referenced from rules loaded with "--local". --- diff --git a/suricata/update/extract.py b/suricata/update/extract.py index 65e7b3e..20e4156 100644 --- a/suricata/update/extract.py +++ b/suricata/update/extract.py @@ -34,7 +34,9 @@ def extract_tar(filename): continue fileobj = tf.extractfile(member) if fileobj: - files[member.name] = fileobj.read() + # Remove leading /. + member_name = member.name.lstrip("/") + files[member_name] = fileobj.read() finally: tf.close() @@ -47,7 +49,8 @@ def extract_zip(filename): for name in reader.namelist(): if name.endswith("/"): continue - files[name] = reader.read(name) + fixed_name = name.lstrip("/") + files[fixed_name] = reader.read(name) return files diff --git a/suricata/update/main.py b/suricata/update/main.py index 29203fc..ef4c438 100644 --- a/suricata/update/main.py +++ b/suricata/update/main.py @@ -297,6 +297,7 @@ def load_local(local, files): if len(local_files) == 0: local_files.append(local) for filename in local_files: + filename = os.path.realpath(filename) logger.info("Loading local file %s" % (filename)) if filename in files: logger.warn( @@ -437,23 +438,35 @@ def handle_dataset_files(rule, dep_files): # Construct the source filename. source_filename = "{}/{}".format(prefix, dataset_filename) - if source_filename in dep_files: - content_hash = hashlib.md5(dep_files[source_filename]).hexdigest() - new_rule = re.sub("(dataset.*?load\s+){}".format(dataset_filename), "\g<1>datasets/{}".format(content_hash), rule.format()) - dest_filename = os.path.join(config.get_output_dir(), "datasets", content_hash) - dest_dir = os.path.dirname(dest_filename) - logger.debug("Copying dataset file {} to {}".format(dataset_filename, dest_filename)) - try: - os.makedirs(dest_dir, exist_ok=True) - except Exception as err: - logger.error("Failed to create directory {}: {}".format(dest_dir, err)) + # If a source filename starts with a "/", look for it on the filesystem. The archive + # unpackers will take care of removing a leading / so this shouldn't happen for + # downloaded rulesets. + if source_filename.startswith("/"): + if not os.path.exists(source_filename): + logger.warn("Local dataset file '{}' was not found for rule {}, rule will be disabled".format(source_filename, rule.idstr)) + rule.enabled = False return - with open(dest_filename, "w") as fp: - fp.write(dep_files[source_filename].decode("utf-8")) - return new_rule + dataset_contents = open(source_filename, "rb").read() else: - logger.warn("Dataset file '{}' was not found for rule {}, rule will be disabled".format(dataset_filename, rule.idstr)) - rule.enabled = False + if not source_filename in dep_files: + logger.warn("Dataset file '{}' was not found for rule {}, rule will be disabled".format(dataset_filename, rule.idstr)) + rule.enabled = False + return + dataset_contents = dep_files[source_filename] + + content_hash = hashlib.md5(dataset_contents).hexdigest() + new_rule = re.sub("(dataset.*?load\s+){}".format(dataset_filename), "\g<1>datasets/{}".format(content_hash), rule.format()) + dest_filename = os.path.join(config.get_output_dir(), "datasets", content_hash) + dest_dir = os.path.dirname(dest_filename) + logger.debug("Copying dataset file {} to {}".format(dataset_filename, dest_filename)) + try: + os.makedirs(dest_dir, exist_ok=True) + except Exception as err: + logger.error("Failed to create directory {}: {}".format(dest_dir, err)) + return + with open(dest_filename, "w") as fp: + fp.write(dataset_contents.decode("utf-8")) + return new_rule def handle_filehash_files(rule, dep_files, fhash): if not rule.enabled: @@ -570,22 +583,20 @@ def write_to_directory(directory, files, rulemap, dep_files): content = [] for line in io.StringIO(file.content.decode("utf-8")): rule = rule_mod.parse(line) - if not rule: + if not rule or rule.id not in rulemap: content.append(line.strip()) else: + reformatted = None for kw in file_kw: if kw in rule: if "dataset" == kw: - handle_dataset_files(rule, dep_files) + reformatted = handle_dataset_files(rulemap[rule.id], dep_files) else: - handle_filehash_files(rule, dep_files, kw) - if rule.id in rulemap: - content.append(rulemap[rule.id].format()) + handle_filehash_files(rulemap[rule.id], dep_files, kw) + if reformatted: + content.append(reformatted) else: - # Just pass the input through. Most likey a - # rule from a file that was ignored, but we'll - # still pass it through. - content.append(line.strip()) + content.append(rulemap[rule.id].format()) tmp_filename = ".".join([outpath, "tmp"]) io.open(tmp_filename, encoding="utf-8", mode="w").write( u"\n".join(content))