http: use rust for mime parsing

author Philippe Antoine <pantoine@oisf.net>

Tue, 2 Aug 2022 14:25:10 +0000 (16:25 +0200)

committer Victor Julien <victor@inliniac.net>

Tue, 4 Jun 2024 04:28:28 +0000 (06:28 +0200)
author Philippe Antoine <pantoine@oisf.net>
Tue, 2 Aug 2022 14:25:10 +0000 (16:25 +0200)
committer Victor Julien <victor@inliniac.net>
Tue, 4 Jun 2024 04:28:28 +0000 (06:28 +0200)
diff --git a/rust/src/mime/mod.rs b/rust/src/mime/mod.rs

index 44c34dedaf881a41eb67601d968e39de48baca0b..5899da415fea702430bb34e328cf7b904adebc38 100644 (file)
--- a/rust/src/mime/mod.rs
+++ b/rust/src/mime/mod.rs
@@ -19,9 +19,9 @@
  
  use crate::common::nom7::take_until_and_consume;
  use nom7::branch::alt;
-use nom7::bytes::complete::{take_till, take_until, take_while};
+use nom7::bytes::complete::{tag, take, take_till, take_until, take_while};
  use nom7::character::complete::char;
-use nom7::combinator::{complete, opt, rest};
+use nom7::combinator::{complete, opt, rest, value};
  use nom7::error::{make_error, ErrorKind};
  use nom7::{Err, IResult};
  use std;
@@ -42,7 +42,7 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> {
              if input[i] == b'"' && !escaping {
                  return Ok((&input[i + 1..], &input[..i]));
              }
-            //TODOmime unescape later
+            // unescape can be processed later
              escaping = false;
          }
      }
@@ -55,8 +55,8 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> {
  fn mime_parse_value_until(input: &[u8]) -> IResult<&[u8], &[u8]> {
      let (input, value) = alt((take_till(|ch: u8| ch == b';'), rest))(input)?;
      for i in 0..value.len() {
-        if !is_mime_space(value[value.len()-i-1]) {
-            return Ok((input, &value[..value.len()-i]));
+        if !is_mime_space(value[value.len() - i - 1]) {
+            return Ok((input, &value[..value.len() - i]));
          }
      }
      return Ok((input, value));
@@ -176,6 +176,360 @@ pub unsafe extern "C" fn rs_mime_find_header_token(
      return false;
  }
  
+#[derive(Debug)]
+enum MimeParserState {
+    MimeStart = 0,
+    MimeHeader = 1,
+    MimeHeaderEnd = 2,
+    MimeChunk = 3,
+    MimeBoundaryWaitingForEol = 4,
+}
+
+impl Default for MimeParserState {
+    fn default() -> Self {
+        MimeParserState::MimeStart
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct MimeStateHTTP {
+    boundary: Vec<u8>,
+    filename: Vec<u8>,
+    state: MimeParserState,
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialOrd, PartialEq)]
+pub enum MimeParserResult {
+    MimeNeedsMore = 0,
+    MimeFileOpen = 1,
+    MimeFileChunk = 2,
+    MimeFileClose = 3,
+}
+
+fn mime_parse_skip_line(input: &[u8]) -> IResult<&[u8], MimeParserState> {
+    let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?;
+    let (input, _) = char('\n')(input)?;
+    return Ok((input, MimeParserState::MimeStart));
+}
+
+fn mime_parse_boundary_regular<'a, 'b>(
+    boundary: &'b [u8], input: &'a [u8],
+) -> IResult<&'a [u8], MimeParserState> {
+    let (input, _) = tag(boundary)(input)?;
+    let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?;
+    let (input, _) = char('\n')(input)?;
+    return Ok((input, MimeParserState::MimeHeader));
+}
+
+// Number of characters after boundary, without end of line, before changing state to streaming
+const MIME_BOUNDARY_MAX_BEFORE_EOL: usize = 128;
+const MIME_HEADER_MAX_LINE: usize = 4096;
+
+fn mime_parse_boundary_missing_eol<'a, 'b>(
+    boundary: &'b [u8], input: &'a [u8],
+) -> IResult<&'a [u8], MimeParserState> {
+    let (input, _) = tag(boundary)(input)?;
+    let (input, _) = take(MIME_BOUNDARY_MAX_BEFORE_EOL)(input)?;
+    return Ok((input, MimeParserState::MimeBoundaryWaitingForEol));
+}
+
+fn mime_parse_boundary<'a, 'b>(
+    boundary: &'b [u8], input: &'a [u8],
+) -> IResult<&'a [u8], MimeParserState> {
+    let r = mime_parse_boundary_regular(boundary, input);
+    if r.is_ok() {
+        return r;
+    }
+    let r2 = mime_parse_skip_line(input);
+    if r2.is_ok() {
+        return r2;
+    }
+    return mime_parse_boundary_missing_eol(boundary, input);
+}
+
+fn mime_consume_until_eol(input: &[u8]) -> IResult<&[u8], bool> {
+    return alt((value(true, mime_parse_skip_line), value(false, rest)))(input);
+}
+
+fn mime_parse_header_line(input: &[u8]) -> IResult<&[u8], &[u8]> {
+    let (input, name) = take_till(|ch: u8| ch == b':')(input)?;
+    let (input, _) = char(':')(input)?;
+    return Ok((input, name));
+}
+
+// s2 is already lower case
+fn rs_equals_lowercase(s1: &[u8], s2: &[u8]) -> bool {
+    if s1.len() == s2.len() {
+        for i in 0..s1.len() {
+            if s1[i].to_ascii_lowercase() != s2[i] {
+                return false;
+            }
+        }
+        return true;
+    }
+    return false;
+}
+
+fn mime_parse_headers<'a, 'b>(
+    ctx: &'b mut MimeStateHTTP, i: &'a [u8],
+) -> IResult<&'a [u8], (MimeParserState, bool, bool)> {
+    let mut fileopen = false;
+    let mut errored = false;
+    let mut input = i;
+    while input.len() > 0 {
+        match take_until::<_, &[u8], nom7::error::Error<&[u8]>>("\r\n")(input) {
+            Ok((input2, line)) => {
+                match mime_parse_header_line(line) {
+                    Ok((value, name)) => {
+                        if rs_equals_lowercase(name, "content-disposition".as_bytes()) {
+                            let mut sections_values = Vec::new();
+                            if let Ok(filename) = mime_find_header_token(
+                                value,
+                                "filename".as_bytes(),
+                                &mut sections_values,
+                            ) {
+                                if filename.len() > 0 {
+                                    ctx.filename = Vec::with_capacity(filename.len());
+                                    fileopen = true;
+                                    for c in filename {
+                                        // unescape
+                                        if *c != b'\\' {
+                                            ctx.filename.push(*c);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        if value.len() == 0 {
+                            errored = true;
+                        }
+                    }
+                    _ => {
+                        if line.len() > 0 {
+                            errored = true;
+                        }
+                    }
+                }
+                let (input3, _) = tag("\r\n")(input2)?;
+                input = input3;
+                if line.len() == 0 || (line.len() == 1 && line[0] == b'\r') {
+                    return Ok((input, (MimeParserState::MimeHeaderEnd, fileopen, errored)));
+                }
+            }
+            _ => {
+                // guard against too long header lines
+                if input.len() > MIME_HEADER_MAX_LINE {
+                    return Ok((
+                        input,
+                        (
+                            MimeParserState::MimeBoundaryWaitingForEol,
+                            fileopen,
+                            errored,
+                        ),
+                    ));
+                }
+                if input.len() < i.len() {
+                    return Ok((input, (MimeParserState::MimeHeader, fileopen, errored)));
+                } // else only an incomplete line, ask for more
+                return Err(Err::Error(make_error(input, ErrorKind::Eof)));
+            }
+        }
+    }
+    return Ok((input, (MimeParserState::MimeHeader, fileopen, errored)));
+}
+
+fn mime_consume_chunk<'a, 'b>(boundary: &'b [u8], input: &'a [u8]) -> IResult<&'a [u8], bool> {
+    let r: Result<(&[u8], &[u8]), Err<nom7::error::Error<&[u8]>>> = take_until("\r\n")(input);
+    match r {
+        Ok((input, line)) => {
+            let (input2, _) = tag("\r\n")(input)?;
+            if input2.len() < boundary.len() {
+                if input2 == &boundary[..input2.len()] {
+                    if line.len() > 0 {
+                        // consume as chunk up to eol (not consuming eol)
+                        return Ok((input, false));
+                    }
+                    // new line beignning like boundary, with nothin to consume as chunk : request more
+                    return Err(Err::Error(make_error(input, ErrorKind::Eof)));
+                }
+                // not like boundary : consume everything as chunk
+                return Ok((&input[input.len()..], false));
+            } // else
+            if &input2[..boundary.len()] == boundary {
+                // end of file with boundary, consume eol but do not consume boundary
+                return Ok((input2, true));
+            }
+            // not like boundary : consume everything as chunk
+            return Ok((input2, false));
+        }
+        _ => {
+            return Ok((&input[input.len()..], false));
+        }
+    }
+}
+
+pub const MIME_EVENT_FLAG_INVALID_HEADER: u32 = 0x01;
+pub const MIME_EVENT_FLAG_NO_FILEDATA: u32 = 0x02;
+
+fn mime_process(ctx: &mut MimeStateHTTP, i: &[u8]) -> (MimeParserResult, u32, u32) {
+    let mut input = i;
+    let mut consumed = 0;
+    let mut warnings = 0;
+    while input.len() > 0 {
+        match ctx.state {
+            MimeParserState::MimeStart => {
+                if let Ok((rem, next)) = mime_parse_boundary(&ctx.boundary, input) {
+                    ctx.state = next;
+                    consumed += (input.len() - rem.len()) as u32;
+                    input = rem;
+                } else {
+                    return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+                }
+            }
+            MimeParserState::MimeBoundaryWaitingForEol => {
+                if let Ok((rem, found)) = mime_consume_until_eol(input) {
+                    if found {
+                        ctx.state = MimeParserState::MimeHeader;
+                    }
+                    consumed += (input.len() - rem.len()) as u32;
+                    input = rem;
+                } else {
+                    // should never happen
+                    return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+                }
+            }
+            MimeParserState::MimeHeader => {
+                if let Ok((rem, (next, fileopen, err))) = mime_parse_headers(ctx, input) {
+                    ctx.state = next;
+                    consumed += (input.len() - rem.len()) as u32;
+                    input = rem;
+                    if err {
+                        warnings |= MIME_EVENT_FLAG_INVALID_HEADER;
+                    }
+                    if fileopen {
+                        return (MimeParserResult::MimeFileOpen, consumed, warnings);
+                    }
+                } else {
+                    return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+                }
+            }
+            MimeParserState::MimeHeaderEnd => {
+                // check if we start with the boundary
+                // and transition to chunk, or empty file and back to start
+                if input.len() < ctx.boundary.len() {
+                    if input == &ctx.boundary[..input.len()] {
+                        return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+                    }
+                    ctx.state = MimeParserState::MimeChunk;
+                } else {
+                    if &input[..ctx.boundary.len()] == ctx.boundary {
+                        ctx.state = MimeParserState::MimeStart;
+                        if ctx.filename.len() > 0 {
+                            warnings |= MIME_EVENT_FLAG_NO_FILEDATA;
+                        }
+                        ctx.filename.clear();
+                        return (MimeParserResult::MimeFileClose, consumed, warnings);
+                    } else {
+                        ctx.state = MimeParserState::MimeChunk;
+                    }
+                }
+            }
+            MimeParserState::MimeChunk => {
+                if let Ok((rem, eof)) = mime_consume_chunk(&ctx.boundary, input) {
+                    consumed += (input.len() - rem.len()) as u32;
+                    if eof {
+                        ctx.state = MimeParserState::MimeStart;
+                        ctx.filename.clear();
+                        return (MimeParserResult::MimeFileClose, consumed, warnings);
+                    } else {
+                        // + 2 for \r\n
+                        if rem.len() < ctx.boundary.len() + 2 {
+                            return (MimeParserResult::MimeFileChunk, consumed, warnings);
+                        }
+                        input = rem;
+                    }
+                } else {
+                    return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+                }
+            }
+        }
+    }
+    return (MimeParserResult::MimeNeedsMore, consumed, warnings);
+}
+
+pub fn mime_state_init(i: &[u8]) -> Option<MimeStateHTTP> {
+    let mut sections_values = Vec::new();
+    match mime_find_header_token(i, "boundary".as_bytes(), &mut sections_values) {
+        Ok(value) => {
+            if value.len() <= RS_MIME_MAX_TOKEN_LEN {
+                let mut r = MimeStateHTTP::default();
+                r.boundary = Vec::with_capacity(2 + value.len());
+                // start wih 2 additional hyphens
+                r.boundary.push(b'-');
+                r.boundary.push(b'-');
+                for c in value {
+                    // unescape
+                    if *c != b'\\' {
+                        r.boundary.push(*c);
+                    }
+                }
+                return Some(r);
+            }
+        }
+        _ => {}
+    }
+    return None;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_mime_state_init(
+    input: *const u8, input_len: u32,
+) -> *mut MimeStateHTTP {
+    let slice = build_slice!(input, input_len as usize);
+
+    if let Some(ctx) = mime_state_init(slice) {
+        let boxed = Box::new(ctx);
+        return Box::into_raw(boxed) as *mut _;
+    }
+    return std::ptr::null_mut();
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_mime_parse(
+    ctx: &mut MimeStateHTTP, input: *const u8, input_len: u32, consumed: *mut u32,
+    warnings: *mut u32,
+) -> MimeParserResult {
+    let slice = build_slice!(input, input_len as usize);
+    let (r, c, w) = mime_process(ctx, slice);
+    *consumed = c;
+    *warnings = w;
+    return r;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_mime_state_get_filename(
+    ctx: &mut MimeStateHTTP, buffer: *mut *const u8, filename_len: *mut u16,
+) {
+    if ctx.filename.len() > 0 {
+        *buffer = ctx.filename.as_ptr();
+        if ctx.filename.len() < u16::MAX.into() {
+            *filename_len = ctx.filename.len() as u16;
+        } else {
+            *filename_len = u16::MAX;
+        }
+    } else {
+        *buffer = std::ptr::null_mut();
+        *filename_len = 0;
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_mime_state_free(ctx: &mut MimeStateHTTP) {
+    // Just unbox...
+    std::mem::drop(Box::from_raw(ctx));
+}
+
  #[cfg(test)]
  mod test {
      use super::*;
diff --git a/src/app-layer-htp.c b/src/app-layer-htp.c

index 786ec2f4d82b4d10bca9aa05e34eb5e6500b3ea2..acdb8739d923a1201fd83724a19e94104fb8a7ef 100644 (file)
--- a/src/app-layer-htp.c
+++ b/src/app-layer-htp.c
@@ -99,6 +99,10 @@ StreamingBufferConfig htp_sbcfg = STREAMING_BUFFER_CONFIG_INITIALIZER;
  /** Limit to the number of libhtp messages that can be handled */
  #define HTP_MAX_MESSAGES 512
  
+/** a boundary should be smaller in size */
+// RFC 2046 states that max boundary size is 70
+#define HTP_BOUNDARY_MAX 200U
+
  SC_ATOMIC_DECLARE(uint32_t, htp_config_flags);
  
  #ifdef DEBUG
@@ -368,8 +372,8 @@ static void HtpTxUserDataFree(HtpState *state, HtpTxUserData *htud)
          if (htud->response_headers_raw)
              HTPFree(htud->response_headers_raw, htud->response_headers_raw_len);
          AppLayerDecoderEventsFreeEvents(&htud->tx_data.events);
-        if (htud->boundary)
-            HTPFree(htud->boundary, htud->boundary_len);
+        if (htud->mime_state)
+            rs_mime_state_free(htud->mime_state);
          if (htud->tx_data.de_state != NULL) {
              DetectEngineStateFree(htud->tx_data.de_state);
          }
@@ -1114,92 +1118,6 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len,
      return 0;
  }
  
-/**
- *  \param name /Lowercase/ version of the variable name
- */
-static int HTTPParseContentTypeHeader(uint8_t *name, size_t name_len,
-        uint8_t *data, size_t len, uint8_t **retptr, size_t *retlen)
-{
-    SCEnter();
-#ifdef PRINT
-    printf("DATA START: \n");
-    PrintRawDataFp(stdout, data, len);
-    printf("DATA END: \n");
-#endif
-    size_t x;
-    int quote = 0;
-
-    for (x = 0; x < len; x++) {
-        if (!(isspace(data[x])))
-            break;
-    }
-
-    if (x >= len) {
-        SCReturnInt(0);
-    }
-
-    uint8_t *line = data+x;
-    size_t line_len = len-x;
-    size_t offset = 0;
-#ifdef PRINT
-    printf("LINE START: \n");
-    PrintRawDataFp(stdout, line, line_len);
-    printf("LINE END: \n");
-#endif
-    for (x = 0 ; x < line_len; x++) {
-        if (x > 0) {
-            if (line[x - 1] != '\\' && line[x] == '\"') {
-                quote++;
-            }
-
-            if (((line[x - 1] != '\\' && line[x] == ';') || ((x + 1) == line_len)) && (quote == 0 || quote % 2 == 0)) {
-                uint8_t *token = line + offset;
-                size_t token_len = x - offset;
-
-                if ((x + 1) == line_len) {
-                    token_len++;
-                }
-
-                offset = x + 1;
-
-                while (offset < line_len && isspace(line[offset])) {
-                    x++;
-                    offset++;
-                }
-#ifdef PRINT
-                printf("TOKEN START: \n");
-                PrintRawDataFp(stdout, token, token_len);
-                printf("TOKEN END: \n");
-#endif
-                if (token_len > name_len) {
-                    if (name == NULL || SCMemcmpLowercase(name, token, name_len) == 0) {
-                        uint8_t *value = token + name_len;
-                        size_t value_len = token_len - name_len;
-
-                        if (value[0] == '\"') {
-                            value++;
-                            value_len--;
-                        }
-                        if (value[value_len-1] == '\"') {
-                            value_len--;
-                        }
-#ifdef PRINT
-                        printf("VALUE START: \n");
-                        PrintRawDataFp(stdout, value, value_len);
-                        printf("VALUE END: \n");
-#endif
-                        *retptr = value;
-                        *retlen = value_len;
-                        SCReturnInt(1);
-                    }
-                }
-            }
-        }
-    }
-
-    SCReturnInt(0);
-}
-
  /**
   *  \brief setup multipart parsing: extract boundary and store it
   *
@@ -1218,123 +1136,15 @@ static int HtpRequestBodySetupMultipart(htp_tx_t *tx, HtpTxUserData *htud)
      htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers,
              "Content-Type");
      if (h != NULL && bstr_len(h->value) > 0) {
-        uint8_t *boundary = NULL;
-        size_t boundary_len = 0;
-
-        int r = HTTPParseContentTypeHeader((uint8_t *)"boundary=", 9,
-                (uint8_t *) bstr_ptr(h->value), bstr_len(h->value),
-                &boundary, &boundary_len);
-        if (r == 1) {
-#ifdef PRINT
-            printf("BOUNDARY START: \n");
-            PrintRawDataFp(stdout, boundary, boundary_len);
-            printf("BOUNDARY END: \n");
-#endif
-            if (boundary_len < HTP_BOUNDARY_MAX) {
-                htud->boundary = HTPMalloc(boundary_len);
-                if (htud->boundary == NULL) {
-                    return -1;
-                }
-                htud->boundary_len = (uint8_t)boundary_len;
-                memcpy(htud->boundary, boundary, boundary_len);
-
-                htud->tsflags |= HTP_BOUNDARY_SET;
-            } else {
-                SCLogDebug("invalid boundary");
-                return -1;
-            }
+        htud->mime_state = rs_mime_state_init(bstr_ptr(h->value), bstr_len(h->value));
+        if (htud->mime_state) {
+            htud->tsflags |= HTP_BOUNDARY_SET;
              SCReturnInt(1);
          }
-        //SCReturnInt(1);
      }
      SCReturnInt(0);
  }
  
-#define C_D_HDR "content-disposition:"
-#define C_D_HDR_LEN 20
-#define C_T_HDR "content-type:"
-#define C_T_HDR_LEN 13
-
-static void HtpRequestBodyMultipartParseHeader(HtpState *hstate,
-        HtpTxUserData *htud,
-        uint8_t *header, uint32_t header_len,
-        uint8_t **filename, uint16_t *filename_len,
-        uint8_t **filetype, uint16_t *filetype_len)
-{
-    uint8_t *fn = NULL;
-    size_t fn_len = 0;
-    uint8_t *ft = NULL;
-    size_t ft_len = 0;
-
-#ifdef PRINT
-    printf("HEADER START: \n");
-    PrintRawDataFp(stdout, header, header_len);
-    printf("HEADER END: \n");
-#endif
-
-    while (header_len > 0) {
-        uint8_t *next_line = Bs2bmSearch(header, header_len, (uint8_t *)"\r\n", 2);
-        uint8_t *line = header;
-        uint32_t line_len;
-
-        if (next_line == NULL) {
-            line_len = header_len;
-        } else {
-            line_len = next_line - header;
-        }
-        uint8_t *sc = (uint8_t *)memchr(line, ':', line_len);
-        if (sc == NULL) {
-            HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                    HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
-            /* if the : we found is the final char, it means we have
-             * no value */
-        } else if (line_len > 0 && sc == &line[line_len - 1]) {
-            HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                    HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
-        } else {
-#ifdef PRINT
-            printf("LINE START: \n");
-            PrintRawDataFp(stdout, line, line_len);
-            printf("LINE END: \n");
-#endif
-            if (line_len >= C_D_HDR_LEN &&
-                    SCMemcmpLowercase(C_D_HDR, line, C_D_HDR_LEN) == 0) {
-                uint8_t *value = line + C_D_HDR_LEN;
-                uint32_t value_len = line_len - C_D_HDR_LEN;
-
-                /* parse content-disposition */
-                (void)HTTPParseContentDispositionHeader((uint8_t *)"filename=", 9,
-                        value, value_len, &fn, &fn_len);
-            } else if (line_len >= C_T_HDR_LEN &&
-                    SCMemcmpLowercase(C_T_HDR, line, C_T_HDR_LEN) == 0) {
-                SCLogDebug("content-type line");
-                uint8_t *value = line + C_T_HDR_LEN;
-                uint32_t value_len = line_len - C_T_HDR_LEN;
-
-                (void)HTTPParseContentTypeHeader(NULL, 0,
-                        value, value_len, &ft, &ft_len);
-            }
-        }
-
-        if (next_line == NULL) {
-            SCLogDebug("no next_line");
-            break;
-        }
-        header_len -= ((next_line + 2) - header);
-        header = next_line + 2;
-    } /* while (header_len > 0) */
-
-    if (fn_len > USHRT_MAX)
-        fn_len = USHRT_MAX;
-    if (ft_len > USHRT_MAX)
-        ft_len = USHRT_MAX;
-
-    *filename = fn;
-    *filename_len = (uint16_t)fn_len;
-    *filetype = ft;
-    *filetype_len = (uint16_t)ft_len;
-}
-
  /**
   *  \brief Create a single buffer from the HtpBodyChunks in our list
   *
@@ -1364,336 +1174,104 @@ static void FlagDetectStateNewFile(HtpTxUserData *tx, int dir)
      }
  }
  
-/**
- *  \brief Setup boundary buffers
- */
-static void HtpRequestBodySetupBoundary(HtpTxUserData *htud,
-        uint8_t *boundary, uint32_t boundary_len)
-{
-    memset(boundary, '-', boundary_len);
-    memcpy(boundary + 2, htud->boundary, htud->boundary_len);
-}
-
  static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, void *tx,
-        const uint8_t *chunks_buffer, uint32_t chunks_buffer_len)
+        const uint8_t *chunks_buffer, uint32_t chunks_buffer_len, bool eof)
  {
-    int result = 0;
-    uint8_t boundary[htud->boundary_len + 4]; /**< size limited to HTP_BOUNDARY_MAX + 4 */
-    uint16_t expected_boundary_len = htud->boundary_len + 2;
-    uint16_t expected_boundary_end_len = htud->boundary_len + 4;
-    int tx_progress = 0;
-
  #ifdef PRINT
      printf("CHUNK START: \n");
      PrintRawDataFp(stdout, chunks_buffer, chunks_buffer_len);
      printf("CHUNK END: \n");
  #endif
  
-    HtpRequestBodySetupBoundary(htud, boundary, htud->boundary_len + 4);
-
-    /* search for the header start, header end and form end */
-    const uint8_t *header_start = Bs2bmSearch(chunks_buffer, chunks_buffer_len,
-            boundary, expected_boundary_len);
-    /* end of the multipart form */
-    const uint8_t *form_end = NULL;
-    /* end marker belonging to header_start */
-    const uint8_t *header_end = NULL;
-    if (header_start != NULL) {
-        header_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer),
-                (uint8_t *)"\r\n\r\n", 4);
-        form_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer),
-                boundary, expected_boundary_end_len);
-    }
-
-    SCLogDebug("header_start %p, header_end %p, form_end %p", header_start,
-            header_end, form_end);
-
-    /* we currently only handle multipart for ts.  When we support it for tc,
-     * we will need to supply right direction */
-    tx_progress = AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, STREAM_TOSERVER);
-    /* if we're in the file storage process, deal with that now */
-    if (htud->tsflags & HTP_FILENAME_SET) {
-        if (header_start != NULL || (tx_progress > HTP_REQUEST_BODY)) {
-            SCLogDebug("reached the end of the file");
-
-            const uint8_t *filedata = chunks_buffer;
-            uint32_t filedata_len = 0;
-            uint8_t flags = 0;
-
-            if (header_start != NULL) {
-                if (header_start == filedata + 2) {
-                    /* last chunk had all data, but not the boundary */
-                    SCLogDebug("last chunk had all data, but not the boundary");
-                    filedata_len = 0;
-                } else if (header_start > filedata + 2) {
-                    SCLogDebug("some data from last file before the boundary");
-                    /* some data from last file before the boundary */
-                    filedata_len = header_start - filedata - 2;
-                }
-            }
-            /* body parsing done, we did not get our form end. Use all data
-             * we still have and signal to files API we have an issue. */
-            if (tx_progress > HTP_REQUEST_BODY) {
-                filedata_len = chunks_buffer_len;
-                flags = FILE_TRUNCATED;
-            }
-
-            if (filedata_len > chunks_buffer_len) {
-                HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                        HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
-                goto end;
-            }
-#ifdef PRINT
-            printf("FILEDATA (final chunk) START: \n");
-            PrintRawDataFp(stdout, filedata, filedata_len);
-            printf("FILEDATA (final chunk) END: \n");
-#endif
-            if (!(htud->tsflags & HTP_DONTSTORE)) {
-                if (HTPFileClose(htud, filedata, filedata_len, flags, STREAM_TOSERVER) == -1) {
-                    goto end;
-                }
-            }
-
-            htud->tsflags &=~ HTP_FILENAME_SET;
-
-            /* fall through */
-        } else {
-            SCLogDebug("not yet at the end of the file");
-
-            if (chunks_buffer_len > expected_boundary_end_len) {
-                const uint8_t *filedata = chunks_buffer;
-                uint32_t filedata_len = chunks_buffer_len - expected_boundary_len;
-                for (; filedata_len < chunks_buffer_len; filedata_len++) {
-                    // take as much as we can until the beginning of a new line
-                    if (chunks_buffer[filedata_len] == '\r') {
-                        if (filedata_len + 1 == chunks_buffer_len ||
-                                chunks_buffer[filedata_len + 1] == '\n') {
-                            break;
-                        }
-                    }
-                }
-
-#ifdef PRINT
-                printf("FILEDATA (part) START: \n");
-                PrintRawDataFp(stdout, filedata, filedata_len);
-                printf("FILEDATA (part) END: \n");
-#endif
-
-                if (!(htud->tsflags & HTP_DONTSTORE)) {
-                    result = HTPFileStoreChunk(htud, filedata, filedata_len, STREAM_TOSERVER);
-                    if (result == -1) {
-                        goto end;
-                    } else if (result == -2) {
-                        /* we know for sure we're not storing the file */
-                        htud->tsflags |= HTP_DONTSTORE;
-                    }
-                }
+    // libhtp will not call us back too late
+    // should libhtp send a callback eof for 0 chunked ?
+    DEBUG_VALIDATE_BUG_ON(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx,
+                                  STREAM_TOSERVER) >= HTP_REQUEST_COMPLETE);
  
-                htud->request_body.body_parsed += filedata_len;
-            } else {
-                SCLogDebug("chunk too small to already process in part");
-            }
+    const uint8_t *cur_buf = chunks_buffer;
+    uint32_t cur_buf_len = chunks_buffer_len;
  
-            goto end;
+    if (eof) {
+        // abrupt end of connection
+        if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
+            /* we currently only handle multipart for ts.  When we support it for tc,
+             * we will need to supply right direction */
+            HTPFileClose(htud, cur_buf, cur_buf_len, FILE_TRUNCATED, STREAM_TOSERVER);
          }
+        htud->tsflags &= ~HTP_FILENAME_SET;
+        goto end;
      }
  
-    while (header_start != NULL && header_end != NULL &&
-            header_end != form_end &&
-            header_start < (chunks_buffer + chunks_buffer_len) &&
-            header_end < (chunks_buffer + chunks_buffer_len) &&
-            header_start < header_end)
-    {
-        uint8_t *filename = NULL;
-        uint16_t filename_len = 0;
-        uint8_t *filetype = NULL;
-        uint16_t filetype_len = 0;
-
-        uint32_t header_len = header_end - header_start;
-        SCLogDebug("header_len %u", header_len);
-        uint8_t *header = (uint8_t *)header_start;
-
-        /* skip empty records */
-        if (expected_boundary_len == header_len) {
-            goto next;
-        } else if ((uint32_t)(expected_boundary_len + 2) <= header_len) {
-            header_len -= (expected_boundary_len + 2);
-            header = (uint8_t *)header_start + (expected_boundary_len + 2); // + for 0d 0a
+    uint32_t consumed;
+    uint32_t warnings;
+    int result = 0;
+    const uint8_t *filename = NULL;
+    uint16_t filename_len = 0;
+
+    // keep parsing mime and use callbacks when needed
+    while (cur_buf_len > 0) {
+        MimeParserResult r =
+                rs_mime_parse(htud->mime_state, cur_buf, cur_buf_len, &consumed, &warnings);
+        DEBUG_VALIDATE_BUG_ON(consumed > cur_buf_len);
+        htud->request_body.body_parsed += consumed;
+        if (warnings) {
+            if (warnings & MIME_EVENT_FLAG_INVALID_HEADER) {
+                HTPSetEvent(
+                        hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER);
+            }
+            if (warnings & MIME_EVENT_FLAG_NO_FILEDATA) {
+                HTPSetEvent(
+                        hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA);
+            }
          }
-
-        HtpRequestBodyMultipartParseHeader(hstate, htud, header, header_len,
-                &filename, &filename_len, &filetype, &filetype_len);
-
-        if (filename != NULL) {
-            const uint8_t *filedata = NULL;
-            uint32_t filedata_len = 0;
-
-            SCLogDebug("we have a filename");
-
-            htud->tsflags |= HTP_FILENAME_SET;
-            htud->tsflags &= ~HTP_DONTSTORE;
-
-            SCLogDebug("header_end %p", header_end);
-            SCLogDebug("form_end %p", form_end);
-
-            /* everything until the final boundary is the file */
-            if (form_end != NULL) {
-                SCLogDebug("have form_end");
-
-                filedata = header_end + 4;
-                if (form_end == filedata) {
-                    HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                            HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA);
-                    goto end;
-                } else if (form_end < filedata) {
-                    HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                            HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
-                    goto end;
-                }
-
-                filedata_len = form_end - (header_end + 4 + 2);
-                SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len);
-
-                /* or is it? */
-                uint8_t *header_next = Bs2bmSearch(filedata, filedata_len,
-                        boundary, expected_boundary_len);
-                if (header_next != NULL) {
-                    filedata_len -= (form_end - header_next);
-                }
-
-                if (filedata_len > chunks_buffer_len) {
-                    HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                            HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
-                    goto end;
-                }
-                SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len);
-#ifdef PRINT
-                printf("FILEDATA START: \n");
-                PrintRawDataFp(stdout, filedata, filedata_len);
-                printf("FILEDATA END: \n");
-#endif
-
-                result = HTPFileOpen(hstate, htud, filename, filename_len, filedata, filedata_len,
-                        STREAM_TOSERVER);
-                if (result == -1) {
-                    goto end;
-                } else if (result == -2) {
-                    htud->tsflags |= HTP_DONTSTORE;
-                } else {
-                    if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) {
-                        goto end;
-                    }
-                }
-                FlagDetectStateNewFile(htud, STREAM_TOSERVER);
-
-                htud->request_body.body_parsed += (header_end - chunks_buffer);
-                htud->tsflags &= ~HTP_FILENAME_SET;
-            } else {
-                SCLogDebug("chunk doesn't contain form end");
-
-                filedata = header_end + 4;
-                filedata_len = chunks_buffer_len - (filedata - chunks_buffer);
-                SCLogDebug("filedata_len %u (chunks_buffer_len %u)", filedata_len, chunks_buffer_len);
-
-                if (filedata_len > chunks_buffer_len) {
-                    HTPSetEvent(hstate, htud, STREAM_TOSERVER,
-                            HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR);
-                    goto end;
-                }
-
-#ifdef PRINT
-                printf("FILEDATA START: \n");
-                PrintRawDataFp(stdout, filedata, filedata_len);
-                printf("FILEDATA END: \n");
-#endif
-                /* form doesn't end in this chunk, but the part might. Lets
-                 * see if have another coming up */
-                uint8_t *header_next = Bs2bmSearch(filedata, filedata_len,
-                        boundary, expected_boundary_len);
-                SCLogDebug("header_next %p", header_next);
-                if (header_next == NULL) {
-                    SCLogDebug("more file data to come");
-
-                    uint32_t offset = (header_end + 4) - chunks_buffer;
-                    SCLogDebug("offset %u", offset);
-                    htud->request_body.body_parsed += offset;
-
-                    if (filedata_len >= (uint32_t)(expected_boundary_len + 2)) {
-                        filedata_len -= (expected_boundary_len + 2 - 1);
-                        // take as much as we can until start of boundary
-                        for (size_t nb = 0; nb < (size_t)expected_boundary_len + 1; nb++) {
-                            if (filedata[filedata_len] == '\r') {
-                                if (nb == expected_boundary_len ||
-                                        filedata[filedata_len + 1] == '\n') {
-                                    break;
-                                }
-                            }
-                            filedata_len++;
-                        }
-                        SCLogDebug("opening file with partial data");
-                    } else {
-                        filedata = NULL;
-                        filedata_len = 0;
-                    }
-                    result = HTPFileOpen(hstate, htud, filename, filename_len, filedata,
-                            filedata_len, STREAM_TOSERVER);
+        switch (r) {
+            case MimeNeedsMore:
+                // there is not enough data, wait for more next time
+                goto end;
+            case MimeFileOpen:
+                // get filename owned by mime state
+                rs_mime_state_get_filename(htud->mime_state, &filename, &filename_len);
+                if (filename_len > 0) {
+                    htud->tsflags |= HTP_FILENAME_SET;
+                    htud->tsflags &= ~HTP_DONTSTORE;
+                    result = HTPFileOpen(
+                            hstate, htud, filename, filename_len, NULL, 0, STREAM_TOSERVER);
                      if (result == -1) {
                          goto end;
                      } else if (result == -2) {
                          htud->tsflags |= HTP_DONTSTORE;
                      }
                      FlagDetectStateNewFile(htud, STREAM_TOSERVER);
-                    htud->request_body.body_parsed += filedata_len;
-                    SCLogDebug("htud->request_body.body_parsed %"PRIu64, htud->request_body.body_parsed);
-
-                } else if (header_next - filedata > 2) {
-                    filedata_len = header_next - filedata - 2;
-                    SCLogDebug("filedata_len %u", filedata_len);
-
-                    result = HTPFileOpen(hstate, htud, filename, filename_len, filedata,
-                            filedata_len, STREAM_TOSERVER);
+                }
+                break;
+            case MimeFileChunk:
+                if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
+                    result = HTPFileStoreChunk(htud, cur_buf, consumed, STREAM_TOSERVER);
                      if (result == -1) {
                          goto end;
                      } else if (result == -2) {
+                        /* we know for sure we're not storing the file */
                          htud->tsflags |= HTP_DONTSTORE;
-                    } else {
-                        if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) {
-                            goto end;
+                    }
+                }
+                break;
+            case MimeFileClose:
+                if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) {
+                    uint32_t lastsize = consumed;
+                    if (lastsize > 0 && cur_buf[lastsize - 1] == '\n') {
+                        lastsize--;
+                        if (lastsize > 0 && cur_buf[lastsize - 1] == '\r') {
+                            lastsize--;
                          }
                      }
-                    FlagDetectStateNewFile(htud, STREAM_TOSERVER);
-
-                    htud->tsflags &= ~HTP_FILENAME_SET;
-                    htud->request_body.body_parsed += (header_end - chunks_buffer);
+                    HTPFileClose(htud, cur_buf, lastsize, 0, STREAM_TOSERVER);
                  }
-            }
-        }
-next:
-        SCLogDebug("header_start %p, header_end %p, form_end %p",
-                header_start, header_end, form_end);
-
-        /* Search next boundary entry after the start of body */
-        uint32_t cursizeread = header_end - chunks_buffer;
-        header_start = Bs2bmSearch(header_end + 4,
-                chunks_buffer_len - (cursizeread + 4),
-                boundary, expected_boundary_len);
-        if (header_start != NULL) {
-            header_end = Bs2bmSearch(header_end + 4,
-                    chunks_buffer_len - (cursizeread + 4),
-                    (uint8_t *) "\r\n\r\n", 4);
-        }
-    }
-
-    /* if we're parsing the multipart and we're not currently processing a
-     * file, we move the body pointer forward. */
-    if (form_end == NULL && !(htud->tsflags & HTP_FILENAME_SET) && header_start == NULL) {
-        if (chunks_buffer_len > expected_boundary_end_len) {
-            uint32_t move = chunks_buffer_len - expected_boundary_end_len + 1;
-
-            htud->request_body.body_parsed += move;
-            SCLogDebug("form not ready, file not set, parsing non-file "
-                    "record: moved %u", move);
+                htud->tsflags &= ~HTP_FILENAME_SET;
+                break;
+                // TODO event on parsing error ?
          }
+        cur_buf += consumed;
+        cur_buf_len -= consumed;
      }
  
  end:
@@ -1931,7 +1509,8 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d)
              printf("REASSCHUNK END: \n");
  #endif
  
-            HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len);
+            HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len,
+                    (d->data == NULL && d->len == 0));
  
          } else if (tx_ud->request_body_type == HTP_BODY_REQUEST_POST ||
                     tx_ud->request_body_type == HTP_BODY_REQUEST_PUT) {
@@ -6030,7 +5609,11 @@ static int HTPBodyReassemblyTest01(void)
      printf("REASSCHUNK END: \n");
  #endif
  
-    HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len);
+    htud.mime_state = rs_mime_state_init((const uint8_t *)"multipart/form-data; boundary=toto",
+            strlen("multipart/form-data; boundary=toto"));
+    FAIL_IF_NULL(htud.mime_state);
+    htud.tsflags |= HTP_BOUNDARY_SET;
+    HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len, false);
  
      if (htud.request_body.content_len_so_far != 669) {
          printf("htud.request_body.content_len_so_far %"PRIu64": ", htud.request_body.content_len_so_far);
diff --git a/src/app-layer-htp.h b/src/app-layer-htp.h

index 6b376285434d25b086f5246df8facaa27729a462..9c39ba393ffd96af5bfc3db066677eef098dc0e5 100644 (file)
--- a/src/app-layer-htp.h
+++ b/src/app-layer-htp.h
@@ -58,9 +58,6 @@
  #define HTP_CONFIG_DEFAULT_RANDOMIZE                    1
  #define HTP_CONFIG_DEFAULT_RANDOMIZE_RANGE              10
  
-/** a boundary should be smaller in size */
-#define HTP_BOUNDARY_MAX                            200U
-
  // 0x0001 not used
  #define HTP_FLAG_STATE_CLOSED_TS    0x0002    /**< Flag to indicate that HTTP
                                               connection is closed */
@@ -212,8 +209,6 @@ typedef struct HtpTxUserData_ {
      uint8_t request_has_trailers;
      uint8_t response_has_trailers;
  
-    uint8_t boundary_len;
-
      uint8_t tsflags;
      uint8_t tcflags;
  
@@ -229,10 +224,7 @@ typedef struct HtpTxUserData_ {
      uint32_t request_headers_raw_len;
      uint32_t response_headers_raw_len;
  
-    /** Holds the boundary identification string if any (used on
-     *  multipart/form-data only)
-     */
-    uint8_t *boundary;
+    MimeStateHTTP *mime_state;
  
      HttpRangeContainerBlock *file_range; /**< used to assign track ids to range file */
author	Philippe Antoine <pantoine@oisf.net>
	Tue, 2 Aug 2022 14:25:10 +0000 (16:25 +0200)
committer	Victor Julien <victor@inliniac.net>
	Tue, 4 Jun 2024 04:28:28 +0000 (06:28 +0200)
rust/src/mime/mod.rs		patch \| blob \| blame \| history
src/app-layer-htp.c		patch \| blob \| blame \| history
src/app-layer-htp.h		patch \| blob \| blame \| history