From: Shivani Bhardwaj <shivani@oisf.net>
Date: Wed, 5 Oct 2022 11:10:07 +0000 (+0530)
Subject: base64: add new mode as per RFC 4648
X-Git-Tag: suricata-7.0.0-beta1~81
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dad52f133d871690b29e1415b40827cac4fa418c;p=thirdparty%2Fsuricata.git

base64: add new mode as per RFC 4648

As per RFC 4648,
Implementations MUST reject the encoded data if it contains characters
outside the base alphabet when interpreting base-encoded data, unless
the specification referring to this document explicitly states
otherwise.

Add a new mode BASE64_MODE_RFC4648, and handle input strictly as per the
specification.

Bug 5223
---

diff --git a/src/detect-base64-decode.c b/src/detect-base64-decode.c
index 1fe4904793..d12d48b8fb 100644
--- a/src/detect-base64-decode.c
+++ b/src/detect-base64-decode.c
@@ -96,7 +96,7 @@ int DetectBase64DecodeDoMatch(DetectEngineThreadCtx *det_ctx, const Signature *s
 
     uint32_t consumed = 0, num_decoded = 0;
     (void)DecodeBase64(det_ctx->base64_decoded, det_ctx->base64_decoded_len_max, payload,
-            decode_len, &consumed, &num_decoded, BASE64_MODE_RELAX);
+            decode_len, &consumed, &num_decoded, BASE64_MODE_RFC4648);
     det_ctx->base64_decoded_len = num_decoded;
     SCLogDebug("Decoded %d bytes from base64 data.",
         det_ctx->base64_decoded_len);
diff --git a/src/util-base64.c b/src/util-base64.c
index dc3de2a1ea..a683317d63 100644
--- a/src/util-base64.c
+++ b/src/util-base64.c
@@ -155,6 +155,14 @@ Base64Ecode DecodeBase64(uint8_t *dest, uint32_t dest_size, const uint8_t *src,
             memset(&b64, 0, sizeof(b64));
         }
     }
+
+    if (!valid && mode == BASE64_MODE_RFC4648) {
+        padding = B64_BLOCK - bbidx;
+        *decoded_bytes += ASCII_BLOCK - (B64_BLOCK - bbidx);
+        DecodeBase64Block(dptr, b64);
+        *consumed_bytes += bbidx;
+    }
+
     /* Finish remaining b64 bytes by padding */
     if (valid && bbidx > 0 && (mode != BASE64_MODE_RFC2045)) {
         /* Decode remaining */
diff --git a/src/util-base64.h b/src/util-base64.h
index aee14ece53..ae07b268a0 100644
--- a/src/util-base64.h
+++ b/src/util-base64.h
@@ -33,8 +33,40 @@
 
 typedef enum {
     BASE64_MODE_RELAX,
+    /* If the following strings were to be passed to the decoder with RFC2045 mode,
+     * the results would be as follows. See the unittest B64TestVectorsRFC2045 in
+     * src/util-base64.c
+     *
+     * BASE64("") = ""
+     * BASE64("f") = "Zg=="
+     * BASE64("fo") = "Zm8="
+     * BASE64("foo") = "Zm9v"
+     * BASE64("foob") = "Zm9vYg=="
+     * BASE64("fooba") = "Zm9vYmE="
+     * BASE64("foobar") = "Zm9vYmFy"
+     * BASE64("foobar") = "Zm 9v Ym Fy"   <-- Notice how the spaces are ignored
+     * BASE64("f") = "Zm$9vYm.Fy"    # TODO according to RFC, All line breaks or *other characters*
+     * not found in base64 alphabet must be ignored by decoding software
+     * */
     BASE64_MODE_RFC2045, /* SPs are allowed during transfer but must be skipped by Decoder */
     BASE64_MODE_STRICT,
+    /* If the following strings were to be passed to the decoder with RFC4648 mode,
+     * the results would be as follows. See the unittest B64TestVectorsRFC4648 in
+     * src/util-base64.c
+     *
+     * BASE64("") = ""
+     * BASE64("f") = "Zg=="
+     * BASE64("fo") = "Zm8="
+     * BASE64("foo") = "Zm9v"
+     * BASE64("foob") = "Zm9vYg=="
+     * BASE64("fooba") = "Zm9vYmE="
+     * BASE64("foobar") = "Zm9vYmFy"
+     * BASE64("f") = "Zm 9v Ym Fy"   <-- Notice how the processing stops once space is encountered
+     * BASE64("f") = "Zm$9vYm.Fy"    <-- Notice how the processing stops once an invalid char is
+     * encountered
+     * */
+    BASE64_MODE_RFC4648, /* reject the encoded data if it contains characters outside the base
+                            alphabet */
 } Base64Mode;
 
 typedef enum {