From: Jason Ish Date: Mon, 14 Jul 2025 15:52:44 +0000 (-0600) Subject: rules: skip rules with utf8 decoding errors X-Git-Tag: 1.3.7~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6d21c64ac7f3f510d63e7d676a02f8e98e6683f7;p=thirdparty%2Fsuricata-update.git rules: skip rules with utf8 decoding errors If a rule can't be decoded as utf-8, we can't further parse it. So skip it with a warning. The previous behavior was to raise an exception and abort. Ticket: #7812 --- diff --git a/suricata/update/rule.py b/suricata/update/rule.py index 20ff868..6542704 100644 --- a/suricata/update/rule.py +++ b/suricata/update/rule.py @@ -321,11 +321,16 @@ def parse_fileobj(fileobj, group=None): rules = [] buf = "" for line in fileobj: - try: - if type(line) == type(b""): - line = line.decode() - except: - pass + if type(line) == type(b""): + try: + line = line.decode("utf-8", "strict") + except UnicodeDecodeError: + logger.warning("Skipping rule due to encoding issue: %s", repr(line)) + # Skip this line and reset buffer if we were accumulating a multi-line rule + if buf: + logger.warning("Discarding incomplete multi-line rule due to encoding issue") + buf = "" + continue if line.rstrip().endswith("\\"): buf = "%s%s " % (buf, line.rstrip()[0:-1]) continue diff --git a/tests/rules-with-encoding-issues.rules b/tests/rules-with-encoding-issues.rules new file mode 100644 index 0000000..934faba --- /dev/null +++ b/tests/rules-with-encoding-issues.rules @@ -0,0 +1,11 @@ +alert tcp any any -> any any (msg:"Valid rule 1"; sid:1001; rev:1;) +alert tcp any any -> any any (msg:"Bad encoding ÿþ"; sid:1002; rev:1;) +alert tcp any any -> any any (msg:"Latin1 éè"; sid:1003; rev:1;) +alert tcp any any -> any any (msg:"Valid rule 2"; sid:1004; rev:1;) +alert tcp any any -> any any (msg:"Valid multiline"; \ + content:"test"; \ + sid:2001; rev:1;) +alert tcp any any -> any any (msg:"Bad multiline"; \ + content:"badÿþ"; \ + sid:2002; rev:1;) +alert tcp any any -> any any (msg:"Valid after bad"; sid:2003; rev:1;) diff --git a/tests/test_rule.py b/tests/test_rule.py index a034117..539cf8d 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -262,3 +262,30 @@ alert dnp3 any any -> any any (msg:"SURICATA DNP3 Request flood detected"; \ self.assertEqual(rule["source_port"], "any") self.assertEqual(rule["dest_addr"], "[any,![$EXTERNAL_IP,$REVERSE_PROXY_HOSTS,$ODD_HTTP_HOSTS]]") self.assertEqual(rule["dest_port"], "80") + + def test_rule_with_encoding_issues(self): + """Test that rules with encoding issues are skipped (Issue #7812)""" + # Parse the file with encoding issues + with open('tests/rules-with-encoding-issues.rules', 'rb') as fileobj: + rules = suricata.update.rule.parse_fileobj(fileobj) + + # Should have parsed: + # - Valid single-line rules (1001 and 1004) + # - Valid multiline rule (2001) + # - Valid rule after bad multiline (2003) + # Should have skipped: + # - Rules with bad encoding (1002, 1003) + # - Bad multiline rule (2002) + self.assertEqual(len(rules), 4) + + # Check single-line rules + self.assertEqual(rules[0].sid, 1001) + self.assertEqual(rules[0].msg, "Valid rule 1") + self.assertEqual(rules[1].sid, 1004) + self.assertEqual(rules[1].msg, "Valid rule 2") + + # Check multiline rules + self.assertEqual(rules[2].sid, 2001) + self.assertEqual(rules[2].msg, "Valid multiline") + self.assertEqual(rules[3].sid, 2003) + self.assertEqual(rules[3].msg, "Valid after bad")