]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
[MEDIUM] implemented the status-line parser in http_msg_analyzer().
authorWilly Tarreau <w@1wt.eu>
Sun, 21 Jan 2007 22:58:29 +0000 (23:58 +0100)
committerWilly Tarreau <w@1wt.eu>
Sun, 21 Jan 2007 22:58:29 +0000 (23:58 +0100)
The status line parser has been written. With it, it should not
be too hard to replace the response parser to benefit from the
new header facilities.

src/client.c
src/proto_http.c

index e4babba0813450174e189f46e49910b0da16c884..c72f7e4e5367c7b953958735e402ce6588a6605f 100644 (file)
@@ -252,7 +252,6 @@ int event_accept(int fd) {
                                pool_free(session, s);
                                return 0;
                        }
-                       hdr_idx_init(&hreq->hdr_idx);
                }
 
                if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP)
index 83af0c7e418d55cb1d4d6653bd30c2da5dc2fa46..fe5917043da386f2b8d21e4de0a4900c649f652d 100644 (file)
@@ -504,6 +504,129 @@ int process_session(struct task *t)
                }                               \
        } while (0)
 
+/*
+ * This function parses a response line between <ptr> and <end>, starting with
+ * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
+ * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
+ * will give undefined results.
+ * Note that it is upon the caller's responsibility to ensure that ptr < end,
+ * and that msg->sol points to the beginning of the response.
+ * If a complete line is found (which implies that at least one CR or LF is
+ * found before <end>, the updated <ptr> is returned, otherwise NULL is
+ * returned indicating an incomplete line (which does not mean that parts have
+ * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
+ * non-NULL, they are fed with the new <ptr> and <state> values to be passed
+ * upon next call.
+ *
+ * This function was intentionnally designed to be called from
+ * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
+ * within its state machine and use the same macros, hence the need for same
+ * labels and variable names.
+ */
+const char *http_parse_rspline(struct http_msg *msg, const char *msg_buf, int state,
+                              const char *ptr, const char *end,
+                              char **ret_ptr, int *ret_state)
+{
+       __label__
+               http_msg_rpver,
+               http_msg_rpver_sp,
+               http_msg_rpcode,
+               http_msg_rpcode_sp,
+               http_msg_rpreason,
+               http_msg_rpline_eol,
+               http_msg_ood,     /* out of data */
+               http_msg_invalid;
+
+       switch (state)  {
+       http_msg_rpver:
+       case HTTP_MSG_RPVER:
+               if (likely(HTTP_IS_TOKEN(*ptr)))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpver, HTTP_MSG_RPVER);
+
+               if (likely(HTTP_IS_SPHT(*ptr))) {
+                       msg->sl.st.v_l = (ptr - msg_buf) - msg->sor;
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
+               }
+               goto http_msg_invalid;
+               
+       http_msg_rpver_sp:
+       case HTTP_MSG_RPVER_SP:
+               if (likely(!HTTP_IS_LWS(*ptr))) {
+                       msg->sl.st.c = ptr - msg_buf;
+                       goto http_msg_rpcode;
+               }
+               if (likely(HTTP_IS_SPHT(*ptr)))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
+               /* so it's a CR/LF, this is invalid */
+               goto http_msg_invalid;
+
+       http_msg_rpcode:
+       case HTTP_MSG_RPCODE:
+               if (likely(!HTTP_IS_LWS(*ptr)))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpcode, HTTP_MSG_RPCODE);
+
+               if (likely(HTTP_IS_SPHT(*ptr))) {
+                       msg->sl.st.c_l = (ptr - msg_buf) - msg->sl.st.c;
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
+               }
+
+               /* so it's a CR/LF, so there is no reason phrase */
+               msg->sl.st.c_l = (ptr - msg_buf) - msg->sl.st.c;
+       http_msg_rsp_reason:
+               /* FIXME: should we support HTTP responses without any reason phrase ? */
+               msg->sl.st.r = ptr - msg_buf;
+               msg->sl.st.r_l = 0;
+               goto http_msg_rpline_eol;
+
+       http_msg_rpcode_sp:
+       case HTTP_MSG_RPCODE_SP:
+               if (likely(!HTTP_IS_LWS(*ptr))) {
+                       msg->sl.st.r = ptr - msg_buf;
+                       goto http_msg_rpreason;
+               }
+               if (likely(HTTP_IS_SPHT(*ptr)))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
+               /* so it's a CR/LF, so there is no reason phrase */
+               goto http_msg_rsp_reason;
+
+       http_msg_rpreason:
+       case HTTP_MSG_RPREASON:
+               if (likely(!HTTP_IS_CRLF(*ptr)))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpreason, HTTP_MSG_RPREASON);
+               msg->sl.st.r_l = (ptr - msg_buf) - msg->sl.st.r;
+       http_msg_rpline_eol:
+               /* We have seen the end of line. Note that we do not
+                * necessarily have the \n yet, but at least we know that we
+                * have EITHER \r OR \n, otherwise the response would not be
+                * complete. We can then record the response length and return
+                * to the caller which will be able to register it.
+                */
+               msg->sl.st.l = ptr - msg->sol;
+               return ptr;
+
+#ifdef DEBUG_FULL
+       default:
+               fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
+               exit(1);
+#endif
+       }
+
+ http_msg_ood:
+       /* out of data */
+       if (ret_state)
+               *ret_state = state;
+       if (ret_ptr)
+               *ret_ptr = (char *)ptr;
+       return NULL;
+
+ http_msg_invalid:
+       /* invalid message */
+       if (ret_state)
+               *ret_state = HTTP_MSG_ERROR;
+       return NULL;
+}
+
+
 /*
  * This function parses a request line between <ptr> and <end>, starting with
  * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
@@ -524,8 +647,8 @@ int process_session(struct task *t)
  * labels and variable names.
  */
 const char *http_parse_reqline(struct http_msg *msg, const char *msg_buf, int state,
-                        const char *ptr, const char *end,
-                        char **ret_ptr, int *ret_state)
+                              const char *ptr, const char *end,
+                              char **ret_ptr, int *ret_state)
 {
        __label__
                http_msg_rqmeth,
@@ -635,6 +758,13 @@ const char *http_parse_reqline(struct http_msg *msg, const char *msg_buf, int st
 }
 
 
+/*
+ * This function parses an HTTP message, either a request or a response,
+ * depending on the initial msg->hdr_state. It can be preempted everywhere
+ * when data are missing and recalled at the exact same location with no
+ * information loss. The header index is re-initialized when switching from
+ * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH.
+ */
 void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx *idx)
 {
        __label__
@@ -666,14 +796,95 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
                goto http_msg_ood;
 
        switch (state)  {
+       /*
+        * First, states that are specific to the response only.
+        * We check them first so that request and headers are
+        * closer to each other (accessed more often).
+        */
+       http_msg_rpbefore:
+       case HTTP_MSG_RPBEFORE:
+               if (likely(HTTP_IS_TOKEN(*ptr))) {
+                       if (likely(ptr == buf->data)) {
+                               msg->sol = ptr;
+                               msg->sor = 0;
+                       } else {
+#if PARSE_PRESERVE_EMPTY_LINES
+                               /* only skip empty leading lines, don't remove them */
+                               msg->sol = ptr;
+                               msg->sor = ptr - buf->data;
+#else
+                               /* Remove empty leading lines, as recommended by
+                                * RFC2616. This takes a lot of time because we
+                                * must move all the buffer backwards, but this
+                                * is rarely needed. The method above will be
+                                * cleaner when we'll be able to start sending
+                                * the request from any place in the buffer.
+                                */
+                               buf->lr = ptr;
+                               buffer_replace2(buf, buf->data, buf->lr, NULL, 0);
+                               msg->sor = 0;
+                               msg->sol = buf->data;
+                               ptr = buf->data;
+                               end = buf->r;
+#endif
+                       }
+                       hdr_idx_init(idx);
+                       state = HTTP_MSG_RPVER;
+                       goto http_msg_rpver;
+               }
+
+               if (unlikely(!HTTP_IS_CRLF(*ptr)))
+                       goto http_msg_invalid;
+
+               if (unlikely(*ptr == '\n'))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
+               EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore_cr, HTTP_MSG_RPBEFORE_CR);
+               /* stop here */
+
+       http_msg_rpbefore_cr:
+       case HTTP_MSG_RPBEFORE_CR:
+               EXPECT_LF_HERE(ptr, http_msg_invalid);
+               EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
+               /* stop here */
+
+       http_msg_rpver:
+       case HTTP_MSG_RPVER:
+       case HTTP_MSG_RPVER_SP:
+       case HTTP_MSG_RPCODE:
+       case HTTP_MSG_RPCODE_SP:
+       case HTTP_MSG_RPREASON:
+               ptr = (char *)http_parse_rspline(msg, buf->data, state, ptr, end,
+                                                &buf->lr, &msg->hdr_state);
+               if (unlikely(!ptr))
+                       return;
+
+               /* we have a full response and we know that we have either a CR
+                * or an LF at <ptr>.
+                */
+               //fprintf(stderr,"sor=%d rq.l=%d *ptr=0x%02x\n", msg->sor, msg->sl.st.l, *ptr);
+               hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
+
+               msg->sol = ptr;
+               if (likely(*ptr == '\r'))
+                       EAT_AND_JUMP_OR_RETURN(http_msg_rpline_end, HTTP_MSG_RPLINE_END);
+               goto http_msg_rpline_end;
+
+       http_msg_rpline_end:
+       case HTTP_MSG_RPLINE_END:
+               /* msg->sol must point to the first of CR or LF. */
+               EXPECT_LF_HERE(ptr, http_msg_invalid);
+               EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
+               /* stop here */
+
+       /*
+        * Second, states that are specific to the request only
+        */
        http_msg_rqbefore:
        case HTTP_MSG_RQBEFORE:
                if (likely(HTTP_IS_TOKEN(*ptr))) {
                        if (likely(ptr == buf->data)) {
                                msg->sol = ptr;
                                msg->sor = 0;
-                               state = HTTP_MSG_RQMETH;
-                               goto http_msg_rqmeth;
                        } else {
 #if PARSE_PRESERVE_EMPTY_LINES
                                /* only skip empty leading lines, don't remove them */
@@ -694,9 +905,10 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
                                ptr = buf->data;
                                end = buf->r;
 #endif
-                               state = HTTP_MSG_RQMETH;
-                               goto http_msg_rqmeth;
                        }
+                       hdr_idx_init(idx);
+                       state = HTTP_MSG_RQMETH;
+                       goto http_msg_rqmeth;
                }
 
                if (unlikely(!HTTP_IS_CRLF(*ptr)))
@@ -705,14 +917,13 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
                if (unlikely(*ptr == '\n'))
                        EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
                EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore_cr, HTTP_MSG_RQBEFORE_CR);
-               /* fall through http_msg_rqbefore_cr */
+               /* stop here */
 
        http_msg_rqbefore_cr:
        case HTTP_MSG_RQBEFORE_CR:
                EXPECT_LF_HERE(ptr, http_msg_invalid);
                EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
-               state = HTTP_MSG_RQMETH;
-               /* fall through http_msg_rqmeth */
+               /* stop here */
 
        http_msg_rqmeth:
        case HTTP_MSG_RQMETH:
@@ -734,7 +945,6 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
                msg->sol = ptr;
                if (likely(*ptr == '\r'))
                        EAT_AND_JUMP_OR_RETURN(http_msg_rqline_end, HTTP_MSG_RQLINE_END);
-
                goto http_msg_rqline_end;
 
        http_msg_rqline_end:
@@ -747,7 +957,11 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 
                EXPECT_LF_HERE(ptr, http_msg_invalid);
                EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
+               /* stop here */
 
+       /*
+        * Common states below
+        */
        http_msg_hdr_first:
        case HTTP_MSG_HDR_FIRST:
                msg->sol = ptr;
@@ -914,7 +1128,7 @@ int process_cli(struct session *t)
                 * headers. An index of all the lines will be elaborated while
                 * parsing.
                 *
-                * For the parsing, we use a 10 states FSM.
+                * For the parsing, we use a 28 states FSM.
                 *
                 * RFC2616 requires that both LF and CRLF are recognized as
                 * line breaks, but that any other combination is an error.