From: Willy Tarreau Date: Thu, 21 Sep 2017 08:22:25 +0000 (+0200) Subject: REORG: http: move the HTTP/1 header block parser to h1.c X-Git-Tag: v1.8-dev3~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8740c8b1b2fffd33ab3b54c4b304f05d803f0063;p=thirdparty%2Fhaproxy.git REORG: http: move the HTTP/1 header block parser to h1.c Since it still depends on http_msg, it was not renamed yet. --- diff --git a/include/proto/h1.h b/include/proto/h1.h index 3551152732..a0fa7a7c59 100644 --- a/include/proto/h1.h +++ b/include/proto/h1.h @@ -28,8 +28,16 @@ #include #include #include +#include extern const uint8_t h1_char_classes[256]; +const char *http_parse_reqline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state); +const char *http_parse_stsline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state); +void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx); int http_forward_trailers(struct http_msg *msg); #define H1_FLG_CTL 0x01 diff --git a/src/h1.c b/src/h1.c index 7a380c2d04..bca820c20e 100644 --- a/src/h1.c +++ b/src/h1.c @@ -13,6 +13,7 @@ #include #include +#include /* It is about twice as fast on recent architectures to lookup a byte in a * table than to perform a boolean AND or OR between two tests. Refer to @@ -155,6 +156,645 @@ const unsigned char h1_char_classes[256] = { }; +/* + * This function parses a status line between and , starting with + * parser state . Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP, + * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others + * will give undefined results. + * Note that it is upon the caller's responsibility to ensure that ptr < end, + * and that msg->sol points to the beginning of the response. + * If a complete line is found (which implies that at least one CR or LF is + * found before , the updated is returned, otherwise NULL is + * returned indicating an incomplete line (which does not mean that parts have + * not been updated). In the incomplete case, if or are + * non-NULL, they are fed with the new and values to be passed + * upon next call. + * + * This function was intentionally designed to be called from + * http_msg_analyzer() with the lowest overhead. It should integrate perfectly + * within its state machine and use the same macros, hence the need for same + * labels and variable names. Note that msg->sol is left unchanged. + */ +const char *http_parse_stsline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state) +{ + const char *msg_start = msg->chn->buf->p; + + switch (state) { + case HTTP_MSG_RPVER: + http_msg_rpver: + if (likely(HTTP_IS_VER_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.st.v_l = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); + } + msg->err_state = HTTP_MSG_RPVER; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RPVER_SP: + http_msg_rpver_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.st.c = ptr - msg_start; + goto http_msg_rpcode; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); + /* so it's a CR/LF, this is invalid */ + msg->err_state = HTTP_MSG_RPVER_SP; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RPCODE: + http_msg_rpcode: + if (likely(!HTTP_IS_LWS(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); + } + + /* so it's a CR/LF, so there is no reason phrase */ + msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; + http_msg_rsp_reason: + /* FIXME: should we support HTTP responses without any reason phrase ? */ + msg->sl.st.r = ptr - msg_start; + msg->sl.st.r_l = 0; + goto http_msg_rpline_eol; + + case HTTP_MSG_RPCODE_SP: + http_msg_rpcode_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.st.r = ptr - msg_start; + goto http_msg_rpreason; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); + /* so it's a CR/LF, so there is no reason phrase */ + goto http_msg_rsp_reason; + + case HTTP_MSG_RPREASON: + http_msg_rpreason: + if (likely(!HTTP_IS_CRLF(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON); + msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r; + http_msg_rpline_eol: + /* We have seen the end of line. Note that we do not + * necessarily have the \n yet, but at least we know that we + * have EITHER \r OR \n, otherwise the response would not be + * complete. We can then record the response length and return + * to the caller which will be able to register it. + */ + msg->sl.st.l = ptr - msg_start - msg->sol; + return ptr; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + + http_msg_ood: + /* out of valid data */ + if (ret_state) + *ret_state = state; + if (ret_ptr) + *ret_ptr = ptr - msg_start; + return NULL; +} + +/* + * This function parses a request line between and , starting with + * parser state . Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP, + * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others + * will give undefined results. + * Note that it is upon the caller's responsibility to ensure that ptr < end, + * and that msg->sol points to the beginning of the request. + * If a complete line is found (which implies that at least one CR or LF is + * found before , the updated is returned, otherwise NULL is + * returned indicating an incomplete line (which does not mean that parts have + * not been updated). In the incomplete case, if or are + * non-NULL, they are fed with the new and values to be passed + * upon next call. + * + * This function was intentionally designed to be called from + * http_msg_analyzer() with the lowest overhead. It should integrate perfectly + * within its state machine and use the same macros, hence the need for same + * labels and variable names. Note that msg->sol is left unchanged. + */ +const char *http_parse_reqline(struct http_msg *msg, + enum h1_state state, const char *ptr, const char *end, + unsigned int *ret_ptr, enum h1_state *ret_state) +{ + const char *msg_start = msg->chn->buf->p; + + switch (state) { + case HTTP_MSG_RQMETH: + http_msg_rqmeth: + if (likely(HTTP_IS_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.rq.m_l = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); + } + + if (likely(HTTP_IS_CRLF(*ptr))) { + /* HTTP 0.9 request */ + msg->sl.rq.m_l = ptr - msg_start; + http_msg_req09_uri: + msg->sl.rq.u = ptr - msg_start; + http_msg_req09_uri_e: + msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; + http_msg_req09_ver: + msg->sl.rq.v = ptr - msg_start; + msg->sl.rq.v_l = 0; + goto http_msg_rqline_eol; + } + msg->err_state = HTTP_MSG_RQMETH; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RQMETH_SP: + http_msg_rqmeth_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.rq.u = ptr - msg_start; + goto http_msg_rquri; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_uri; + + case HTTP_MSG_RQURI: + http_msg_rquri: +#if defined(__x86_64__) || \ + defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ + defined(__ARM_ARCH_7A__) + /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ + while (ptr <= end - sizeof(int)) { + int x = *(int *)ptr - 0x21212121; + if (x & 0x80808080) + break; + + x -= 0x5e5e5e5e; + if (!(x & 0x80808080)) + break; + + ptr += sizeof(int); + } +#endif + if (ptr >= end) { + state = HTTP_MSG_RQURI; + goto http_msg_ood; + } + http_msg_rquri2: + if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI); + + if (likely(HTTP_IS_SPHT(*ptr))) { + msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); + } + + if (likely((unsigned char)*ptr >= 128)) { + /* non-ASCII chars are forbidden unless option + * accept-invalid-http-request is enabled in the frontend. + * In any case, we capture the faulty char. + */ + if (msg->err_pos < -1) + goto invalid_char; + if (msg->err_pos == -1) + msg->err_pos = ptr - msg_start; + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI); + } + + if (likely(HTTP_IS_CRLF(*ptr))) { + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_uri_e; + } + + /* OK forbidden chars, 0..31 or 127 */ + invalid_char: + msg->err_pos = ptr - msg_start; + msg->err_state = HTTP_MSG_RQURI; + state = HTTP_MSG_ERROR; + break; + + case HTTP_MSG_RQURI_SP: + http_msg_rquri_sp: + if (likely(!HTTP_IS_LWS(*ptr))) { + msg->sl.rq.v = ptr - msg_start; + goto http_msg_rqver; + } + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); + /* so it's a CR/LF, meaning an HTTP 0.9 request */ + goto http_msg_req09_ver; + + case HTTP_MSG_RQVER: + http_msg_rqver: + if (likely(HTTP_IS_VER_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER); + + if (likely(HTTP_IS_CRLF(*ptr))) { + msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v; + http_msg_rqline_eol: + /* We have seen the end of line. Note that we do not + * necessarily have the \n yet, but at least we know that we + * have EITHER \r OR \n, otherwise the request would not be + * complete. We can then record the request length and return + * to the caller which will be able to register it. + */ + msg->sl.rq.l = ptr - msg_start - msg->sol; + return ptr; + } + + /* neither an HTTP_VER token nor a CRLF */ + msg->err_state = HTTP_MSG_RQVER; + state = HTTP_MSG_ERROR; + break; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + + http_msg_ood: + /* out of valid data */ + if (ret_state) + *ret_state = state; + if (ret_ptr) + *ret_ptr = ptr - msg_start; + return NULL; +} + +/* + * This function parses an HTTP message, either a request or a response, + * depending on the initial msg->msg_state. The caller is responsible for + * ensuring that the message does not wrap. The function can be preempted + * everywhere when data are missing and recalled at the exact same location + * with no information loss. The message may even be realigned between two + * calls. The header index is re-initialized when switching from + * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other + * fields. Note that msg->sol will be initialized after completing the first + * state, so that none of the msg pointers has to be initialized prior to the + * first call. + */ +void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx) +{ + enum h1_state state; /* updated only when leaving the FSM */ + register char *ptr, *end; /* request pointers, to avoid dereferences */ + struct buffer *buf; + + state = msg->msg_state; + buf = msg->chn->buf; + ptr = buf->p + msg->next; + end = buf->p + buf->i; + + if (unlikely(ptr >= end)) + goto http_msg_ood; + + switch (state) { + /* + * First, states that are specific to the response only. + * We check them first so that request and headers are + * closer to each other (accessed more often). + */ + case HTTP_MSG_RPBEFORE: + http_msg_rpbefore: + if (likely(HTTP_IS_TOKEN(*ptr))) { + /* we have a start of message, but we have to check + * first if we need to remove some CRLF. We can only + * do this when o=0. + */ + if (unlikely(ptr != buf->p)) { + if (buf->o) + goto http_msg_ood; + /* Remove empty leading lines, as recommended by RFC2616. */ + bi_fast_delete(buf, ptr - buf->p); + } + msg->sol = 0; + msg->sl.st.l = 0; /* used in debug mode */ + hdr_idx_init(idx); + state = HTTP_MSG_RPVER; + goto http_msg_rpver; + } + + if (unlikely(!HTTP_IS_CRLF(*ptr))) { + state = HTTP_MSG_RPBEFORE; + goto http_msg_invalid; + } + + if (unlikely(*ptr == '\n')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR); + /* stop here */ + + case HTTP_MSG_RPBEFORE_CR: + http_msg_rpbefore_cr: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); + /* stop here */ + + case HTTP_MSG_RPVER: + http_msg_rpver: + case HTTP_MSG_RPVER_SP: + case HTTP_MSG_RPCODE: + case HTTP_MSG_RPCODE_SP: + case HTTP_MSG_RPREASON: + ptr = (char *)http_parse_stsline(msg, + state, ptr, end, + &msg->next, &msg->msg_state); + if (unlikely(!ptr)) + return; + + /* we have a full response and we know that we have either a CR + * or an LF at . + */ + hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r'); + + msg->sol = ptr - buf->p; + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END); + goto http_msg_rpline_end; + + case HTTP_MSG_RPLINE_END: + http_msg_rpline_end: + /* msg->sol must point to the first of CR or LF. */ + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); + /* stop here */ + + /* + * Second, states that are specific to the request only + */ + case HTTP_MSG_RQBEFORE: + http_msg_rqbefore: + if (likely(HTTP_IS_TOKEN(*ptr))) { + /* we have a start of message, but we have to check + * first if we need to remove some CRLF. We can only + * do this when o=0. + */ + if (likely(ptr != buf->p)) { + if (buf->o) + goto http_msg_ood; + /* Remove empty leading lines, as recommended by RFC2616. */ + bi_fast_delete(buf, ptr - buf->p); + } + msg->sol = 0; + msg->sl.rq.l = 0; /* used in debug mode */ + state = HTTP_MSG_RQMETH; + goto http_msg_rqmeth; + } + + if (unlikely(!HTTP_IS_CRLF(*ptr))) { + state = HTTP_MSG_RQBEFORE; + goto http_msg_invalid; + } + + if (unlikely(*ptr == '\n')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR); + /* stop here */ + + case HTTP_MSG_RQBEFORE_CR: + http_msg_rqbefore_cr: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); + /* stop here */ + + case HTTP_MSG_RQMETH: + http_msg_rqmeth: + case HTTP_MSG_RQMETH_SP: + case HTTP_MSG_RQURI: + case HTTP_MSG_RQURI_SP: + case HTTP_MSG_RQVER: + ptr = (char *)http_parse_reqline(msg, + state, ptr, end, + &msg->next, &msg->msg_state); + if (unlikely(!ptr)) + return; + + /* we have a full request and we know that we have either a CR + * or an LF at . + */ + hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r'); + + msg->sol = ptr - buf->p; + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END); + goto http_msg_rqline_end; + + case HTTP_MSG_RQLINE_END: + http_msg_rqline_end: + /* check for HTTP/0.9 request : no version information available. + * msg->sol must point to the first of CR or LF. + */ + if (unlikely(msg->sl.rq.v_l == 0)) + goto http_msg_last_lf; + + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); + /* stop here */ + + /* + * Common states below + */ + case HTTP_MSG_HDR_FIRST: + http_msg_hdr_first: + msg->sol = ptr - buf->p; + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_name; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); + goto http_msg_last_lf; + + case HTTP_MSG_HDR_NAME: + http_msg_hdr_name: + /* assumes msg->sol points to the first char */ + if (likely(HTTP_IS_TOKEN(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); + + if (likely(*ptr == ':')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); + + if (likely(msg->err_pos < -1) || *ptr == '\n') { + state = HTTP_MSG_HDR_NAME; + goto http_msg_invalid; + } + + if (msg->err_pos == -1) /* capture error pointer */ + msg->err_pos = ptr - buf->p; /* >= 0 now */ + + /* and we still accept this non-token character */ + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); + + case HTTP_MSG_HDR_L1_SP: + http_msg_hdr_l1_sp: + /* assumes msg->sol points to the first char */ + if (likely(HTTP_IS_SPHT(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); + + /* header value can be basically anything except CR/LF */ + msg->sov = ptr - buf->p; + + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_val; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF); + goto http_msg_hdr_l1_lf; + + case HTTP_MSG_HDR_L1_LF: + http_msg_hdr_l1_lf: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS); + + case HTTP_MSG_HDR_L1_LWS: + http_msg_hdr_l1_lws: + if (likely(HTTP_IS_SPHT(*ptr))) { + /* replace HT,CR,LF with spaces */ + for (; buf->p + msg->sov < ptr; msg->sov++) + buf->p[msg->sov] = ' '; + goto http_msg_hdr_l1_sp; + } + /* we had a header consisting only in spaces ! */ + msg->eol = msg->sov; + goto http_msg_complete_header; + + case HTTP_MSG_HDR_VAL: + http_msg_hdr_val: + /* assumes msg->sol points to the first char, and msg->sov + * points to the first character of the value. + */ + + /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D + * and lower. In fact since most of the time is spent in the loop, we + * also remove the sign bit test so that bytes 0x8e..0x0d break the + * loop, but we don't care since they're very rare in header values. + */ +#if defined(__x86_64__) + while (ptr <= end - sizeof(long)) { + if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) + goto http_msg_hdr_val2; + ptr += sizeof(long); + } +#endif +#if defined(__x86_64__) || \ + defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ + defined(__ARM_ARCH_7A__) + while (ptr <= end - sizeof(int)) { + if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) + goto http_msg_hdr_val2; + ptr += sizeof(int); + } +#endif + if (ptr >= end) { + state = HTTP_MSG_HDR_VAL; + goto http_msg_ood; + } + http_msg_hdr_val2: + if (likely(!HTTP_IS_CRLF(*ptr))) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL); + + msg->eol = ptr - buf->p; + /* Note: we could also copy eol into ->eoh so that we have the + * real header end in case it ends with lots of LWS, but is this + * really needed ? + */ + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF); + goto http_msg_hdr_l2_lf; + + case HTTP_MSG_HDR_L2_LF: + http_msg_hdr_l2_lf: + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF); + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS); + + case HTTP_MSG_HDR_L2_LWS: + http_msg_hdr_l2_lws: + if (unlikely(HTTP_IS_SPHT(*ptr))) { + /* LWS: replace HT,CR,LF with spaces */ + for (; buf->p + msg->eol < ptr; msg->eol++) + buf->p[msg->eol] = ' '; + goto http_msg_hdr_val; + } + http_msg_complete_header: + /* + * It was a new header, so the last one is finished. + * Assumes msg->sol points to the first char, msg->sov points + * to the first character of the value and msg->eol to the + * first CR or LF so we know how the line ends. We insert last + * header into the index. + */ + if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r', + idx, idx->tail) < 0)) { + state = HTTP_MSG_HDR_L2_LWS; + goto http_msg_invalid; + } + + msg->sol = ptr - buf->p; + if (likely(!HTTP_IS_CRLF(*ptr))) { + goto http_msg_hdr_name; + } + + if (likely(*ptr == '\r')) + EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); + goto http_msg_last_lf; + + case HTTP_MSG_LAST_LF: + http_msg_last_lf: + /* Assumes msg->sol points to the first of either CR or LF. + * Sets ->sov and ->next to the total header length, ->eoh to + * the last CRLF, and ->eol to the last CRLF length (1 or 2). + */ + EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF); + ptr++; + msg->sov = msg->next = ptr - buf->p; + msg->eoh = msg->sol; + msg->sol = 0; + msg->eol = msg->sov - msg->eoh; + msg->msg_state = HTTP_MSG_BODY; + return; + + case HTTP_MSG_ERROR: + /* this may only happen if we call http_msg_analyser() twice with an error */ + break; + + default: +#ifdef DEBUG_FULL + fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); + exit(1); +#endif + ; + } + http_msg_ood: + /* out of data */ + msg->msg_state = state; + msg->next = ptr - buf->p; + return; + + http_msg_invalid: + /* invalid message */ + msg->err_state = state; + msg->msg_state = HTTP_MSG_ERROR; + msg->next = ptr - buf->p; + return; +} + /* This function skips trailers in the buffer associated with HTTP message * . The first visited position is msg->next. If the end of the trailers is * found, the function returns >0. So, the caller can automatically schedul it diff --git a/src/proto_http.c b/src/proto_http.c index 92e6083908..c0532d7722 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -1233,288 +1233,6 @@ void capture_headers(char *som, struct hdr_idx *idx, } } -/* - * This function parses a status line between and , starting with - * parser state . Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP, - * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others - * will give undefined results. - * Note that it is upon the caller's responsibility to ensure that ptr < end, - * and that msg->sol points to the beginning of the response. - * If a complete line is found (which implies that at least one CR or LF is - * found before , the updated is returned, otherwise NULL is - * returned indicating an incomplete line (which does not mean that parts have - * not been updated). In the incomplete case, if or are - * non-NULL, they are fed with the new and values to be passed - * upon next call. - * - * This function was intentionally designed to be called from - * http_msg_analyzer() with the lowest overhead. It should integrate perfectly - * within its state machine and use the same macros, hence the need for same - * labels and variable names. Note that msg->sol is left unchanged. - */ -const char *http_parse_stsline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state) -{ - const char *msg_start = msg->chn->buf->p; - - switch (state) { - case HTTP_MSG_RPVER: - http_msg_rpver: - if (likely(HTTP_IS_VER_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.st.v_l = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); - } - msg->err_state = HTTP_MSG_RPVER; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RPVER_SP: - http_msg_rpver_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.st.c = ptr - msg_start; - goto http_msg_rpcode; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP); - /* so it's a CR/LF, this is invalid */ - msg->err_state = HTTP_MSG_RPVER_SP; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RPCODE: - http_msg_rpcode: - if (likely(!HTTP_IS_LWS(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); - } - - /* so it's a CR/LF, so there is no reason phrase */ - msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c; - http_msg_rsp_reason: - /* FIXME: should we support HTTP responses without any reason phrase ? */ - msg->sl.st.r = ptr - msg_start; - msg->sl.st.r_l = 0; - goto http_msg_rpline_eol; - - case HTTP_MSG_RPCODE_SP: - http_msg_rpcode_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.st.r = ptr - msg_start; - goto http_msg_rpreason; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP); - /* so it's a CR/LF, so there is no reason phrase */ - goto http_msg_rsp_reason; - - case HTTP_MSG_RPREASON: - http_msg_rpreason: - if (likely(!HTTP_IS_CRLF(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON); - msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r; - http_msg_rpline_eol: - /* We have seen the end of line. Note that we do not - * necessarily have the \n yet, but at least we know that we - * have EITHER \r OR \n, otherwise the response would not be - * complete. We can then record the response length and return - * to the caller which will be able to register it. - */ - msg->sl.st.l = ptr - msg_start - msg->sol; - return ptr; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - - http_msg_ood: - /* out of valid data */ - if (ret_state) - *ret_state = state; - if (ret_ptr) - *ret_ptr = ptr - msg_start; - return NULL; -} - -/* - * This function parses a request line between and , starting with - * parser state . Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP, - * HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others - * will give undefined results. - * Note that it is upon the caller's responsibility to ensure that ptr < end, - * and that msg->sol points to the beginning of the request. - * If a complete line is found (which implies that at least one CR or LF is - * found before , the updated is returned, otherwise NULL is - * returned indicating an incomplete line (which does not mean that parts have - * not been updated). In the incomplete case, if or are - * non-NULL, they are fed with the new and values to be passed - * upon next call. - * - * This function was intentionally designed to be called from - * http_msg_analyzer() with the lowest overhead. It should integrate perfectly - * within its state machine and use the same macros, hence the need for same - * labels and variable names. Note that msg->sol is left unchanged. - */ -const char *http_parse_reqline(struct http_msg *msg, - enum h1_state state, const char *ptr, const char *end, - unsigned int *ret_ptr, enum h1_state *ret_state) -{ - const char *msg_start = msg->chn->buf->p; - - switch (state) { - case HTTP_MSG_RQMETH: - http_msg_rqmeth: - if (likely(HTTP_IS_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.rq.m_l = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); - } - - if (likely(HTTP_IS_CRLF(*ptr))) { - /* HTTP 0.9 request */ - msg->sl.rq.m_l = ptr - msg_start; - http_msg_req09_uri: - msg->sl.rq.u = ptr - msg_start; - http_msg_req09_uri_e: - msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; - http_msg_req09_ver: - msg->sl.rq.v = ptr - msg_start; - msg->sl.rq.v_l = 0; - goto http_msg_rqline_eol; - } - msg->err_state = HTTP_MSG_RQMETH; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RQMETH_SP: - http_msg_rqmeth_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.rq.u = ptr - msg_start; - goto http_msg_rquri; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP); - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_uri; - - case HTTP_MSG_RQURI: - http_msg_rquri: -#if defined(__x86_64__) || \ - defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ - defined(__ARM_ARCH_7A__) - /* speedup: skip bytes not between 0x21 and 0x7e inclusive */ - while (ptr <= end - sizeof(int)) { - int x = *(int *)ptr - 0x21212121; - if (x & 0x80808080) - break; - - x -= 0x5e5e5e5e; - if (!(x & 0x80808080)) - break; - - ptr += sizeof(int); - } -#endif - if (ptr >= end) { - state = HTTP_MSG_RQURI; - goto http_msg_ood; - } - http_msg_rquri2: - if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */ - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI); - - if (likely(HTTP_IS_SPHT(*ptr))) { - msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); - } - - if (likely((unsigned char)*ptr >= 128)) { - /* non-ASCII chars are forbidden unless option - * accept-invalid-http-request is enabled in the frontend. - * In any case, we capture the faulty char. - */ - if (msg->err_pos < -1) - goto invalid_char; - if (msg->err_pos == -1) - msg->err_pos = ptr - msg_start; - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI); - } - - if (likely(HTTP_IS_CRLF(*ptr))) { - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_uri_e; - } - - /* OK forbidden chars, 0..31 or 127 */ - invalid_char: - msg->err_pos = ptr - msg_start; - msg->err_state = HTTP_MSG_RQURI; - state = HTTP_MSG_ERROR; - break; - - case HTTP_MSG_RQURI_SP: - http_msg_rquri_sp: - if (likely(!HTTP_IS_LWS(*ptr))) { - msg->sl.rq.v = ptr - msg_start; - goto http_msg_rqver; - } - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP); - /* so it's a CR/LF, meaning an HTTP 0.9 request */ - goto http_msg_req09_ver; - - case HTTP_MSG_RQVER: - http_msg_rqver: - if (likely(HTTP_IS_VER_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER); - - if (likely(HTTP_IS_CRLF(*ptr))) { - msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v; - http_msg_rqline_eol: - /* We have seen the end of line. Note that we do not - * necessarily have the \n yet, but at least we know that we - * have EITHER \r OR \n, otherwise the request would not be - * complete. We can then record the request length and return - * to the caller which will be able to register it. - */ - msg->sl.rq.l = ptr - msg_start - msg->sol; - return ptr; - } - - /* neither an HTTP_VER token nor a CRLF */ - msg->err_state = HTTP_MSG_RQVER; - state = HTTP_MSG_ERROR; - break; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - - http_msg_ood: - /* out of valid data */ - if (ret_state) - *ret_state = state; - if (ret_ptr) - *ret_ptr = ptr - msg_start; - return NULL; -} - /* * Returns the data from Authorization header. Function may be called more * than once so data is stored in txn->auth_data. When no header is found @@ -1597,363 +1315,6 @@ get_http_auth(struct stream *s) } -/* - * This function parses an HTTP message, either a request or a response, - * depending on the initial msg->msg_state. The caller is responsible for - * ensuring that the message does not wrap. The function can be preempted - * everywhere when data are missing and recalled at the exact same location - * with no information loss. The message may even be realigned between two - * calls. The header index is re-initialized when switching from - * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other - * fields. Note that msg->sol will be initialized after completing the first - * state, so that none of the msg pointers has to be initialized prior to the - * first call. - */ -void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx) -{ - enum h1_state state; /* updated only when leaving the FSM */ - register char *ptr, *end; /* request pointers, to avoid dereferences */ - struct buffer *buf; - - state = msg->msg_state; - buf = msg->chn->buf; - ptr = buf->p + msg->next; - end = buf->p + buf->i; - - if (unlikely(ptr >= end)) - goto http_msg_ood; - - switch (state) { - /* - * First, states that are specific to the response only. - * We check them first so that request and headers are - * closer to each other (accessed more often). - */ - case HTTP_MSG_RPBEFORE: - http_msg_rpbefore: - if (likely(HTTP_IS_TOKEN(*ptr))) { - /* we have a start of message, but we have to check - * first if we need to remove some CRLF. We can only - * do this when o=0. - */ - if (unlikely(ptr != buf->p)) { - if (buf->o) - goto http_msg_ood; - /* Remove empty leading lines, as recommended by RFC2616. */ - bi_fast_delete(buf, ptr - buf->p); - } - msg->sol = 0; - msg->sl.st.l = 0; /* used in debug mode */ - hdr_idx_init(idx); - state = HTTP_MSG_RPVER; - goto http_msg_rpver; - } - - if (unlikely(!HTTP_IS_CRLF(*ptr))) { - state = HTTP_MSG_RPBEFORE; - goto http_msg_invalid; - } - - if (unlikely(*ptr == '\n')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR); - /* stop here */ - - case HTTP_MSG_RPBEFORE_CR: - http_msg_rpbefore_cr: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE); - /* stop here */ - - case HTTP_MSG_RPVER: - http_msg_rpver: - case HTTP_MSG_RPVER_SP: - case HTTP_MSG_RPCODE: - case HTTP_MSG_RPCODE_SP: - case HTTP_MSG_RPREASON: - ptr = (char *)http_parse_stsline(msg, - state, ptr, end, - &msg->next, &msg->msg_state); - if (unlikely(!ptr)) - return; - - /* we have a full response and we know that we have either a CR - * or an LF at . - */ - hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r'); - - msg->sol = ptr - buf->p; - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END); - goto http_msg_rpline_end; - - case HTTP_MSG_RPLINE_END: - http_msg_rpline_end: - /* msg->sol must point to the first of CR or LF. */ - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); - /* stop here */ - - /* - * Second, states that are specific to the request only - */ - case HTTP_MSG_RQBEFORE: - http_msg_rqbefore: - if (likely(HTTP_IS_TOKEN(*ptr))) { - /* we have a start of message, but we have to check - * first if we need to remove some CRLF. We can only - * do this when o=0. - */ - if (likely(ptr != buf->p)) { - if (buf->o) - goto http_msg_ood; - /* Remove empty leading lines, as recommended by RFC2616. */ - bi_fast_delete(buf, ptr - buf->p); - } - msg->sol = 0; - msg->sl.rq.l = 0; /* used in debug mode */ - state = HTTP_MSG_RQMETH; - goto http_msg_rqmeth; - } - - if (unlikely(!HTTP_IS_CRLF(*ptr))) { - state = HTTP_MSG_RQBEFORE; - goto http_msg_invalid; - } - - if (unlikely(*ptr == '\n')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR); - /* stop here */ - - case HTTP_MSG_RQBEFORE_CR: - http_msg_rqbefore_cr: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE); - /* stop here */ - - case HTTP_MSG_RQMETH: - http_msg_rqmeth: - case HTTP_MSG_RQMETH_SP: - case HTTP_MSG_RQURI: - case HTTP_MSG_RQURI_SP: - case HTTP_MSG_RQVER: - ptr = (char *)http_parse_reqline(msg, - state, ptr, end, - &msg->next, &msg->msg_state); - if (unlikely(!ptr)) - return; - - /* we have a full request and we know that we have either a CR - * or an LF at . - */ - hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r'); - - msg->sol = ptr - buf->p; - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END); - goto http_msg_rqline_end; - - case HTTP_MSG_RQLINE_END: - http_msg_rqline_end: - /* check for HTTP/0.9 request : no version information available. - * msg->sol must point to the first of CR or LF. - */ - if (unlikely(msg->sl.rq.v_l == 0)) - goto http_msg_last_lf; - - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST); - /* stop here */ - - /* - * Common states below - */ - case HTTP_MSG_HDR_FIRST: - http_msg_hdr_first: - msg->sol = ptr - buf->p; - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_name; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); - goto http_msg_last_lf; - - case HTTP_MSG_HDR_NAME: - http_msg_hdr_name: - /* assumes msg->sol points to the first char */ - if (likely(HTTP_IS_TOKEN(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); - - if (likely(*ptr == ':')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); - - if (likely(msg->err_pos < -1) || *ptr == '\n') { - state = HTTP_MSG_HDR_NAME; - goto http_msg_invalid; - } - - if (msg->err_pos == -1) /* capture error pointer */ - msg->err_pos = ptr - buf->p; /* >= 0 now */ - - /* and we still accept this non-token character */ - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME); - - case HTTP_MSG_HDR_L1_SP: - http_msg_hdr_l1_sp: - /* assumes msg->sol points to the first char */ - if (likely(HTTP_IS_SPHT(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP); - - /* header value can be basically anything except CR/LF */ - msg->sov = ptr - buf->p; - - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_val; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF); - goto http_msg_hdr_l1_lf; - - case HTTP_MSG_HDR_L1_LF: - http_msg_hdr_l1_lf: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS); - - case HTTP_MSG_HDR_L1_LWS: - http_msg_hdr_l1_lws: - if (likely(HTTP_IS_SPHT(*ptr))) { - /* replace HT,CR,LF with spaces */ - for (; buf->p + msg->sov < ptr; msg->sov++) - buf->p[msg->sov] = ' '; - goto http_msg_hdr_l1_sp; - } - /* we had a header consisting only in spaces ! */ - msg->eol = msg->sov; - goto http_msg_complete_header; - - case HTTP_MSG_HDR_VAL: - http_msg_hdr_val: - /* assumes msg->sol points to the first char, and msg->sov - * points to the first character of the value. - */ - - /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D - * and lower. In fact since most of the time is spent in the loop, we - * also remove the sign bit test so that bytes 0x8e..0x0d break the - * loop, but we don't care since they're very rare in header values. - */ -#if defined(__x86_64__) - while (ptr <= end - sizeof(long)) { - if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) - goto http_msg_hdr_val2; - ptr += sizeof(long); - } -#endif -#if defined(__x86_64__) || \ - defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \ - defined(__ARM_ARCH_7A__) - while (ptr <= end - sizeof(int)) { - if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) - goto http_msg_hdr_val2; - ptr += sizeof(int); - } -#endif - if (ptr >= end) { - state = HTTP_MSG_HDR_VAL; - goto http_msg_ood; - } - http_msg_hdr_val2: - if (likely(!HTTP_IS_CRLF(*ptr))) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL); - - msg->eol = ptr - buf->p; - /* Note: we could also copy eol into ->eoh so that we have the - * real header end in case it ends with lots of LWS, but is this - * really needed ? - */ - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF); - goto http_msg_hdr_l2_lf; - - case HTTP_MSG_HDR_L2_LF: - http_msg_hdr_l2_lf: - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF); - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS); - - case HTTP_MSG_HDR_L2_LWS: - http_msg_hdr_l2_lws: - if (unlikely(HTTP_IS_SPHT(*ptr))) { - /* LWS: replace HT,CR,LF with spaces */ - for (; buf->p + msg->eol < ptr; msg->eol++) - buf->p[msg->eol] = ' '; - goto http_msg_hdr_val; - } - http_msg_complete_header: - /* - * It was a new header, so the last one is finished. - * Assumes msg->sol points to the first char, msg->sov points - * to the first character of the value and msg->eol to the - * first CR or LF so we know how the line ends. We insert last - * header into the index. - */ - if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r', - idx, idx->tail) < 0)) { - state = HTTP_MSG_HDR_L2_LWS; - goto http_msg_invalid; - } - - msg->sol = ptr - buf->p; - if (likely(!HTTP_IS_CRLF(*ptr))) { - goto http_msg_hdr_name; - } - - if (likely(*ptr == '\r')) - EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF); - goto http_msg_last_lf; - - case HTTP_MSG_LAST_LF: - http_msg_last_lf: - /* Assumes msg->sol points to the first of either CR or LF. - * Sets ->sov and ->next to the total header length, ->eoh to - * the last CRLF, and ->eol to the last CRLF length (1 or 2). - */ - EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF); - ptr++; - msg->sov = msg->next = ptr - buf->p; - msg->eoh = msg->sol; - msg->sol = 0; - msg->eol = msg->sov - msg->eoh; - msg->msg_state = HTTP_MSG_BODY; - return; - - case HTTP_MSG_ERROR: - /* this may only happen if we call http_msg_analyser() twice with an error */ - break; - - default: -#ifdef DEBUG_FULL - fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state); - exit(1); -#endif - ; - } - http_msg_ood: - /* out of data */ - msg->msg_state = state; - msg->next = ptr - buf->p; - return; - - http_msg_invalid: - /* invalid message */ - msg->err_state = state; - msg->msg_state = HTTP_MSG_ERROR; - msg->next = ptr - buf->p; - return; -} - /* convert an HTTP/0.9 request into an HTTP/1.0 request. Returns 1 if the * conversion succeeded, 0 in case of error. If the request was already 1.X, * nothing is done and 1 is returned.