From: Willy Tarreau Date: Thu, 21 Sep 2017 06:40:02 +0000 (+0200) Subject: REORG: http: move the HTTP/1 chunk parser to h1.{c,h} X-Git-Tag: v1.8-dev3~21 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db4893d6a4168601cc855114d58e9cdbe587df42;p=thirdparty%2Fhaproxy.git REORG: http: move the HTTP/1 chunk parser to h1.{c,h} Functions http_parse_chunk_size(), http_skip_chunk_crlf() and http_forward_trailers() were moved to h1.h and h1.c respectively so that they can be called from outside. The parts that were inline remained inline as it's critical for performance (+41% perf difference reported in an earlier test). For now the "http_" prefix remains in their name since they still depend on the http_msg type. --- diff --git a/include/proto/h1.h b/include/proto/h1.h index 7dff096125..3551152732 100644 --- a/include/proto/h1.h +++ b/include/proto/h1.h @@ -22,11 +22,15 @@ #ifndef _PROTO_H1_H #define _PROTO_H1_H +#include #include #include +#include #include +#include extern const uint8_t h1_char_classes[256]; +int http_forward_trailers(struct http_msg *msg); #define H1_FLG_CTL 0x01 #define H1_FLG_SEP 0x02 @@ -121,5 +125,145 @@ static inline const char *h1_msg_state_str(enum h1_state msg_state) } } +/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or + * a possible LF alone at the end of a chunk. The caller should adjust msg->next + * in order to include this part into the next forwarding phase. Note that the + * caller must ensure that ->p points to the first byte to parse. It returns + * the number of bytes parsed on success, so the caller can set msg_state to + * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not + * change anything and returns zero. If a parse error is encountered, the + * function returns < 0. Note: this function is designed to parse wrapped CRLF + * at the end of the buffer. + */ +static inline int http_skip_chunk_crlf(struct http_msg *msg) +{ + const struct buffer *buf = msg->chn->buf; + const char *ptr; + int bytes; + + /* NB: we'll check data availabilty at the end. It's not a + * problem because whatever we match first will be checked + * against the correct length. + */ + bytes = 1; + ptr = b_ptr(buf, msg->next); + if (*ptr == '\r') { + bytes++; + ptr++; + if (ptr >= buf->data + buf->size) + ptr = buf->data; + } + + if (msg->next + bytes > buf->i) + return 0; + + if (*ptr != '\n') { + msg->err_pos = buffer_count(buf, buf->p, ptr); + return -1; + } + return bytes; +} + +/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to + * point to the first byte of data after the chunk size, so that we know we can + * forward exactly msg->next bytes. msg->sol contains the exact number of bytes + * forming the chunk size. That way it is always possible to differentiate + * between the start of the body and the start of the data. Return the number + * of byte parsed on success, 0 when some data is missing, <0 on error. Note: + * this function is designed to parse wrapped CRLF at the end of the buffer. + */ +static inline int http_parse_chunk_size(struct http_msg *msg) +{ + const struct buffer *buf = msg->chn->buf; + const char *ptr = b_ptr(buf, msg->next); + const char *ptr_old = ptr; + const char *end = buf->data + buf->size; + const char *stop = bi_end(buf); + unsigned int chunk = 0; + + /* The chunk size is in the following form, though we are only + * interested in the size and CRLF : + * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF + */ + while (1) { + int c; + if (ptr == stop) + return 0; + c = hex2i(*ptr); + if (c < 0) /* not a hex digit anymore */ + break; + if (unlikely(++ptr >= end)) + ptr = buf->data; + if (unlikely(chunk & 0xF8000000)) /* integer overflow will occur if result >= 2GB */ + goto error; + chunk = (chunk << 4) + c; + } + + /* empty size not allowed */ + if (unlikely(ptr == ptr_old)) + goto error; + + while (HTTP_IS_SPHT(*ptr)) { + if (++ptr >= end) + ptr = buf->data; + if (unlikely(ptr == stop)) + return 0; + } + + /* Up to there, we know that at least one byte is present at *ptr. Check + * for the end of chunk size. + */ + while (1) { + if (likely(HTTP_IS_CRLF(*ptr))) { + /* we now have a CR or an LF at ptr */ + if (likely(*ptr == '\r')) { + if (++ptr >= end) + ptr = buf->data; + if (ptr == stop) + return 0; + } + + if (unlikely(*ptr != '\n')) + goto error; + if (++ptr >= end) + ptr = buf->data; + /* done */ + break; + } + else if (likely(*ptr == ';')) { + /* chunk extension, ends at next CRLF */ + if (++ptr >= end) + ptr = buf->data; + if (ptr == stop) + return 0; + + while (!HTTP_IS_CRLF(*ptr)) { + if (++ptr >= end) + ptr = buf->data; + if (ptr == stop) + return 0; + } + /* we have a CRLF now, loop above */ + continue; + } + else + goto error; + } + + /* OK we found our CRLF and now points to the next byte, which may + * or may not be present. We save the number of bytes parsed into + * msg->sol. + */ + msg->sol = ptr - ptr_old; + if (unlikely(ptr < ptr_old)) + msg->sol += buf->size; + msg->chunk_len = chunk; + msg->body_len += chunk; + return msg->sol; + error: + msg->err_pos = buffer_count(buf, buf->p, ptr); + return -1; +} + #endif /* _PROTO_H1_H */ diff --git a/include/types/proto_http.h b/include/types/proto_http.h index 3f99df74ad..027bfce42f 100644 --- a/include/types/proto_http.h +++ b/include/types/proto_http.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/src/h1.c b/src/h1.c index 044709aa9b..7a380c2d04 100644 --- a/src/h1.c +++ b/src/h1.c @@ -153,3 +153,77 @@ const unsigned char h1_char_classes[256] = { ['~'] = H1_FLG_TOK, [127] = H1_FLG_CTL, }; + + +/* This function skips trailers in the buffer associated with HTTP message + * . The first visited position is msg->next. If the end of the trailers is + * found, the function returns >0. So, the caller can automatically schedul it + * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough + * data are available, the function does not change anything except maybe + * msg->sol if it could parse some lines, and returns zero. If a parse error + * is encountered, the function returns < 0 and does not change anything except + * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS + * state before calling this function, which implies that all non-trailers data + * have already been scheduled for forwarding, and that msg->next exactly + * matches the length of trailers already parsed and not forwarded. It is also + * important to note that this function is designed to be able to parse wrapped + * headers at end of buffer. + */ +int http_forward_trailers(struct http_msg *msg) +{ + const struct buffer *buf = msg->chn->buf; + + /* we have msg->next which points to next line. Look for CRLF. But + * first, we reset msg->sol */ + msg->sol = 0; + while (1) { + const char *p1 = NULL, *p2 = NULL; + const char *start = b_ptr(buf, msg->next + msg->sol); + const char *stop = bi_end(buf); + const char *ptr = start; + int bytes = 0; + + /* scan current line and stop at LF or CRLF */ + while (1) { + if (ptr == stop) + return 0; + + if (*ptr == '\n') { + if (!p1) + p1 = ptr; + p2 = ptr; + break; + } + + if (*ptr == '\r') { + if (p1) { + msg->err_pos = buffer_count(buf, buf->p, ptr); + return -1; + } + p1 = ptr; + } + + ptr++; + if (ptr >= buf->data + buf->size) + ptr = buf->data; + } + + /* after LF; point to beginning of next line */ + p2++; + if (p2 >= buf->data + buf->size) + p2 = buf->data; + + bytes = p2 - start; + if (bytes < 0) + bytes += buf->size; + msg->sol += bytes; + + /* LF/CRLF at beginning of line => end of trailers at p2. + * Everything was scheduled for forwarding, there's nothing left + * from this message. */ + if (p1 == start) + return 1; + + /* OK, next line then */ + } +} diff --git a/src/proto_http.c b/src/proto_http.c index a599f659d7..92e6083908 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -2108,219 +2108,6 @@ void http_change_connection_header(struct http_txn *txn, struct http_msg *msg, i return; } -/* Parse the chunk size at msg->next. Once done, caller should adjust ->next to - * point to the first byte of data after the chunk size, so that we know we can - * forward exactly msg->next bytes. msg->sol contains the exact number of bytes - * forming the chunk size. That way it is always possible to differentiate - * between the start of the body and the start of the data. Return the number - * of byte parsed on success, 0 when some data is missing, <0 on error. Note: - * this function is designed to parse wrapped CRLF at the end of the buffer. - */ -static inline int http_parse_chunk_size(struct http_msg *msg) -{ - const struct buffer *buf = msg->chn->buf; - const char *ptr = b_ptr(buf, msg->next); - const char *ptr_old = ptr; - const char *end = buf->data + buf->size; - const char *stop = bi_end(buf); - unsigned int chunk = 0; - - /* The chunk size is in the following form, though we are only - * interested in the size and CRLF : - * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF - */ - while (1) { - int c; - if (ptr == stop) - return 0; - c = hex2i(*ptr); - if (c < 0) /* not a hex digit anymore */ - break; - if (unlikely(++ptr >= end)) - ptr = buf->data; - if (chunk & 0xF8000000) /* integer overflow will occur if result >= 2GB */ - goto error; - chunk = (chunk << 4) + c; - } - - /* empty size not allowed */ - if (unlikely(ptr == ptr_old)) - goto error; - - while (HTTP_IS_SPHT(*ptr)) { - if (++ptr >= end) - ptr = buf->data; - if (unlikely(ptr == stop)) - return 0; - } - - /* Up to there, we know that at least one byte is present at *ptr. Check - * for the end of chunk size. - */ - while (1) { - if (likely(HTTP_IS_CRLF(*ptr))) { - /* we now have a CR or an LF at ptr */ - if (likely(*ptr == '\r')) { - if (++ptr >= end) - ptr = buf->data; - if (ptr == stop) - return 0; - } - - if (*ptr != '\n') - goto error; - if (++ptr >= end) - ptr = buf->data; - /* done */ - break; - } - else if (*ptr == ';') { - /* chunk extension, ends at next CRLF */ - if (++ptr >= end) - ptr = buf->data; - if (ptr == stop) - return 0; - - while (!HTTP_IS_CRLF(*ptr)) { - if (++ptr >= end) - ptr = buf->data; - if (ptr == stop) - return 0; - } - /* we have a CRLF now, loop above */ - continue; - } - else - goto error; - } - - /* OK we found our CRLF and now points to the next byte, which may - * or may not be present. We save the number of bytes parsed into - * msg->sol. - */ - msg->sol = ptr - ptr_old; - if (unlikely(ptr < ptr_old)) - msg->sol += buf->size; - msg->chunk_len = chunk; - msg->body_len += chunk; - return msg->sol; - error: - msg->err_pos = buffer_count(buf, buf->p, ptr); - return -1; -} - -/* This function skips trailers in the buffer associated with HTTP message - * . The first visited position is msg->next. If the end of the trailers is - * found, the function returns >0. So, the caller can automatically schedul it - * to be forwarded, and switch msg->msg_state to HTTP_MSG_DONE. If not enough - * data are available, the function does not change anything except maybe - * msg->sol if it could parse some lines, and returns zero. If a parse error - * is encountered, the function returns < 0 and does not change anything except - * maybe msg->sol. Note that the message must already be in HTTP_MSG_TRAILERS - * state before calling this function, which implies that all non-trailers data - * have already been scheduled for forwarding, and that msg->next exactly - * matches the length of trailers already parsed and not forwarded. It is also - * important to note that this function is designed to be able to parse wrapped - * headers at end of buffer. - */ -static int http_forward_trailers(struct http_msg *msg) -{ - const struct buffer *buf = msg->chn->buf; - - /* we have msg->next which points to next line. Look for CRLF. But - * first, we reset msg->sol */ - msg->sol = 0; - while (1) { - const char *p1 = NULL, *p2 = NULL; - const char *start = b_ptr(buf, msg->next + msg->sol); - const char *stop = bi_end(buf); - const char *ptr = start; - int bytes = 0; - - /* scan current line and stop at LF or CRLF */ - while (1) { - if (ptr == stop) - return 0; - - if (*ptr == '\n') { - if (!p1) - p1 = ptr; - p2 = ptr; - break; - } - - if (*ptr == '\r') { - if (p1) { - msg->err_pos = buffer_count(buf, buf->p, ptr); - return -1; - } - p1 = ptr; - } - - ptr++; - if (ptr >= buf->data + buf->size) - ptr = buf->data; - } - - /* after LF; point to beginning of next line */ - p2++; - if (p2 >= buf->data + buf->size) - p2 = buf->data; - - bytes = p2 - start; - if (bytes < 0) - bytes += buf->size; - msg->sol += bytes; - - /* LF/CRLF at beginning of line => end of trailers at p2. - * Everything was scheduled for forwarding, there's nothing left - * from this message. */ - if (p1 == start) - return 1; - - /* OK, next line then */ - } -} - -/* This function may be called only in HTTP_MSG_CHUNK_CRLF. It reads the CRLF or - * a possible LF alone at the end of a chunk. The caller should adjust msg->next - * in order to include this part into the next forwarding phase. Note that the - * caller must ensure that ->p points to the first byte to parse. It returns - * the number of bytes parsed on success, so the caller can set msg_state to - * HTTP_MSG_CHUNK_SIZE. If not enough data are available, the function does not - * change anything and returns zero. If a parse error is encountered, the - * function returns < 0. Note: this function is designed to parse wrapped CRLF - * at the end of the buffer. - */ -static inline int http_skip_chunk_crlf(struct http_msg *msg) -{ - const struct buffer *buf = msg->chn->buf; - const char *ptr; - int bytes; - - /* NB: we'll check data availabilty at the end. It's not a - * problem because whatever we match first will be checked - * against the correct length. - */ - bytes = 1; - ptr = b_ptr(buf, msg->next); - if (*ptr == '\r') { - bytes++; - ptr++; - if (ptr >= buf->data + buf->size) - ptr = buf->data; - } - - if (msg->next + bytes > buf->i) - return 0; - - if (*ptr != '\n') { - msg->err_pos = buffer_count(buf, buf->p, ptr); - return -1; - } - return bytes; -} - /* Parses a qvalue and returns it multipled by 1000, from 0 to 1000. If the * value is larger than 1000, it is bound to 1000. The parser consumes up to * 1 digit, one dot and 3 digits and stops on the first invalid character.