events->create_event(EVENT_CHUNK_BAD_SEP);
break;
}
- curr_state = CHUNK_ZEROS;
+ curr_state = CHUNK_LEADING_WS;
k--; // Reprocess this octet in the next state
break;
+ case CHUNK_LEADING_WS:
+ // Looking for whitespace before the chunk size
+ if (is_sp_tab[buffer[k]])
+ {
+ *infractions += INF_CHUNK_LEADING_WS;
+ events->create_event(EVENT_CHUNK_WHITESPACE);
+ num_leading_ws++;
+ if (num_leading_ws == 5)
+ {
+ events->create_event(EVENT_BROKEN_CHUNK);
+ curr_state = CHUNK_BAD;
+ }
+ break;
+ }
+ curr_state = CHUNK_ZEROS;
+ k--;
+ break;
case CHUNK_ZEROS:
// Looking for leading zeros in the chunk size.
if (buffer[k] == '0')
{
*infractions += INF_CHUNK_WHITESPACE;
events->create_event(EVENT_CHUNK_WHITESPACE);
- curr_state = CHUNK_WHITESPACE;
+ curr_state = CHUNK_TRAILING_WS;
}
else if (buffer[k] == ';')
{
}
}
break;
- case CHUNK_WHITESPACE:
+ case CHUNK_TRAILING_WS:
// Skipping over improper whitespace following the chunk size
if (buffer[k] == '\r')
{
case CHUNK_DCRLF2:
// The LF from the end-of-chunk CRLF should be here
num_good_chunks++;
+ num_leading_ws = 0;
num_zeros = 0;
expected = 0;
digits_seen = 0;
uint32_t data_seen = 0;
HttpEnums::ChunkState curr_state = HttpEnums::CHUNK_NEWLINES;
uint32_t expected = 0;
+ uint32_t num_leading_ws = 0;
uint32_t num_zeros = 0;
uint32_t digits_seen = 0;
bool new_section = false;
SCAN_ABORT, SCAN_END };
// State machine for chunk parsing
-enum ChunkState { CHUNK_NEWLINES, CHUNK_ZEROS, CHUNK_NUMBER, CHUNK_WHITESPACE, CHUNK_OPTIONS,
- CHUNK_HCRLF, CHUNK_DATA, CHUNK_DCRLF1, CHUNK_DCRLF2, CHUNK_BAD };
+enum ChunkState { CHUNK_NEWLINES, CHUNK_ZEROS, CHUNK_LEADING_WS, CHUNK_NUMBER, CHUNK_TRAILING_WS,
+ CHUNK_OPTIONS, CHUNK_HCRLF, CHUNK_DATA, CHUNK_DCRLF1, CHUNK_DCRLF2, CHUNK_BAD };
// List of possible HTTP versions.
enum VersionId { VERS__NO_SOURCE=-16, VERS__NOT_COMPUTE=-14, VERS__PROBLEMATIC=-12,
INF_CONTENT_ENCODING_CHUNKED,
INF_206_WITHOUT_RANGE,
INF_VERSION_NOT_UPPERCASE,
+ INF_CHUNK_LEADING_WS,
INF__MAX_VALUE
};
switch (curr_state)
{
case CHUNK_NEWLINES:
- if (!is_cr_lf[data[k]])
+ case CHUNK_LEADING_WS:
+ // Cases are combined in reassemble(). CHUNK_LEADING_WS here to avoid compiler warning.
+ if (!is_sp_tab_cr_lf[data[k]])
{
curr_state = CHUNK_NUMBER;
k--;
else if (data[k] == ';')
curr_state = CHUNK_OPTIONS;
else if (is_sp_tab[data[k]])
- curr_state = CHUNK_WHITESPACE;
+ curr_state = CHUNK_TRAILING_WS;
else
expected = expected * 16 + as_hex[data[k]];
break;
+ case CHUNK_TRAILING_WS:
case CHUNK_OPTIONS:
- case CHUNK_WHITESPACE:
- // No practical difference between white space and options in reassemble()
+ // No practical difference between trailing white space and options in reassemble()
if (data[k] == '\r')
curr_state = CHUNK_HCRLF;
else if (data[k] == '\n')
{ EVENT_URI_BAD_FORMAT, "URI badly formatted" },
{ EVENT_UNKNOWN_PERCENT, "unrecognized type of percent encoding in URI" },
{ EVENT_BROKEN_CHUNK, "HTTP chunk misformatted" },
- { EVENT_CHUNK_WHITESPACE, "white space following chunk length" },
+ { EVENT_CHUNK_WHITESPACE, "white space adjacent to chunk length" },
{ EVENT_HEAD_NAME_WHITESPACE, "white space within header name" },
{ EVENT_GZIP_OVERRUN, "excessive gzip compression" },
{ EVENT_GZIP_FAILURE, "gzip decompression failed" },