From: Willy Tarreau <w@1wt.eu>
Date: Sun, 21 Jan 2007 22:58:29 +0000 (+0100)
Subject: [MEDIUM] implemented the status-line parser in http_msg_analyzer().
X-Git-Tag: v1.3.6~1
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8973c70f7d1659ba22e466c753bc8a39413dd3d4;p=thirdparty%2Fhaproxy.git

[MEDIUM] implemented the status-line parser in http_msg_analyzer().

The status line parser has been written. With it, it should not
be too hard to replace the response parser to benefit from the
new header facilities.
---

diff --git a/src/client.c b/src/client.c
index e4babba081..c72f7e4e53 100644
--- a/src/client.c
+++ b/src/client.c
@@ -252,7 +252,6 @@ int event_accept(int fd) {
 				pool_free(session, s);
 				return 0;
 			}
-			hdr_idx_init(&hreq->hdr_idx);
 		}
 
 		if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP)
diff --git a/src/proto_http.c b/src/proto_http.c
index 83af0c7e41..fe5917043d 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -504,6 +504,129 @@ int process_session(struct task *t)
 		}                               \
 	} while (0)
 
+/*
+ * This function parses a response line between <ptr> and <end>, starting with
+ * parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
+ * HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
+ * will give undefined results.
+ * Note that it is upon the caller's responsibility to ensure that ptr < end,
+ * and that msg->sol points to the beginning of the response.
+ * If a complete line is found (which implies that at least one CR or LF is
+ * found before <end>, the updated <ptr> is returned, otherwise NULL is
+ * returned indicating an incomplete line (which does not mean that parts have
+ * not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
+ * non-NULL, they are fed with the new <ptr> and <state> values to be passed
+ * upon next call.
+ *
+ * This function was intentionnally designed to be called from
+ * http_msg_analyzer() with the lowest overhead. It should integrate perfectly
+ * within its state machine and use the same macros, hence the need for same
+ * labels and variable names.
+ */
+const char *http_parse_rspline(struct http_msg *msg, const char *msg_buf, int state,
+			       const char *ptr, const char *end,
+			       char **ret_ptr, int *ret_state)
+{
+	__label__
+		http_msg_rpver,
+		http_msg_rpver_sp,
+		http_msg_rpcode,
+		http_msg_rpcode_sp,
+		http_msg_rpreason,
+		http_msg_rpline_eol,
+		http_msg_ood,     /* out of data */
+		http_msg_invalid;
+
+	switch (state)	{
+	http_msg_rpver:
+	case HTTP_MSG_RPVER:
+		if (likely(HTTP_IS_TOKEN(*ptr)))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpver, HTTP_MSG_RPVER);
+
+		if (likely(HTTP_IS_SPHT(*ptr))) {
+			msg->sl.st.v_l = (ptr - msg_buf) - msg->sor;
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
+		}
+		goto http_msg_invalid;
+		
+	http_msg_rpver_sp:
+	case HTTP_MSG_RPVER_SP:
+		if (likely(!HTTP_IS_LWS(*ptr))) {
+			msg->sl.st.c = ptr - msg_buf;
+			goto http_msg_rpcode;
+		}
+		if (likely(HTTP_IS_SPHT(*ptr)))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpver_sp, HTTP_MSG_RPVER_SP);
+		/* so it's a CR/LF, this is invalid */
+		goto http_msg_invalid;
+
+	http_msg_rpcode:
+	case HTTP_MSG_RPCODE:
+		if (likely(!HTTP_IS_LWS(*ptr)))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpcode, HTTP_MSG_RPCODE);
+
+		if (likely(HTTP_IS_SPHT(*ptr))) {
+			msg->sl.st.c_l = (ptr - msg_buf) - msg->sl.st.c;
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
+		}
+
+		/* so it's a CR/LF, so there is no reason phrase */
+		msg->sl.st.c_l = (ptr - msg_buf) - msg->sl.st.c;
+	http_msg_rsp_reason:
+		/* FIXME: should we support HTTP responses without any reason phrase ? */
+		msg->sl.st.r = ptr - msg_buf;
+		msg->sl.st.r_l = 0;
+		goto http_msg_rpline_eol;
+
+	http_msg_rpcode_sp:
+	case HTTP_MSG_RPCODE_SP:
+		if (likely(!HTTP_IS_LWS(*ptr))) {
+			msg->sl.st.r = ptr - msg_buf;
+			goto http_msg_rpreason;
+		}
+		if (likely(HTTP_IS_SPHT(*ptr)))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpcode_sp, HTTP_MSG_RPCODE_SP);
+		/* so it's a CR/LF, so there is no reason phrase */
+		goto http_msg_rsp_reason;
+
+	http_msg_rpreason:
+	case HTTP_MSG_RPREASON:
+		if (likely(!HTTP_IS_CRLF(*ptr)))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpreason, HTTP_MSG_RPREASON);
+		msg->sl.st.r_l = (ptr - msg_buf) - msg->sl.st.r;
+	http_msg_rpline_eol:
+		/* We have seen the end of line. Note that we do not
+		 * necessarily have the \n yet, but at least we know that we
+		 * have EITHER \r OR \n, otherwise the response would not be
+		 * complete. We can then record the response length and return
+		 * to the caller which will be able to register it.
+		 */
+		msg->sl.st.l = ptr - msg->sol;
+		return ptr;
+
+#ifdef DEBUG_FULL
+	default:
+		fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
+		exit(1);
+#endif
+	}
+
+ http_msg_ood:
+	/* out of data */
+	if (ret_state)
+		*ret_state = state;
+	if (ret_ptr)
+		*ret_ptr = (char *)ptr;
+	return NULL;
+
+ http_msg_invalid:
+	/* invalid message */
+	if (ret_state)
+		*ret_state = HTTP_MSG_ERROR;
+	return NULL;
+}
+
+
 /*
  * This function parses a request line between <ptr> and <end>, starting with
  * parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
@@ -524,8 +647,8 @@ int process_session(struct task *t)
  * labels and variable names.
  */
 const char *http_parse_reqline(struct http_msg *msg, const char *msg_buf, int state,
-			 const char *ptr, const char *end,
-			 char **ret_ptr, int *ret_state)
+			       const char *ptr, const char *end,
+			       char **ret_ptr, int *ret_state)
 {
 	__label__
 		http_msg_rqmeth,
@@ -635,6 +758,13 @@ const char *http_parse_reqline(struct http_msg *msg, const char *msg_buf, int st
 }
 
 
+/*
+ * This function parses an HTTP message, either a request or a response,
+ * depending on the initial msg->hdr_state. It can be preempted everywhere
+ * when data are missing and recalled at the exact same location with no
+ * information loss. The header index is re-initialized when switching from
+ * MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH.
+ */
 void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx *idx)
 {
 	__label__
@@ -666,14 +796,95 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 		goto http_msg_ood;
 
 	switch (state)	{
+	/*
+	 * First, states that are specific to the response only.
+	 * We check them first so that request and headers are
+	 * closer to each other (accessed more often).
+	 */
+	http_msg_rpbefore:
+	case HTTP_MSG_RPBEFORE:
+		if (likely(HTTP_IS_TOKEN(*ptr))) {
+			if (likely(ptr == buf->data)) {
+				msg->sol = ptr;
+				msg->sor = 0;
+			} else {
+#if PARSE_PRESERVE_EMPTY_LINES
+				/* only skip empty leading lines, don't remove them */
+				msg->sol = ptr;
+				msg->sor = ptr - buf->data;
+#else
+				/* Remove empty leading lines, as recommended by
+				 * RFC2616. This takes a lot of time because we
+				 * must move all the buffer backwards, but this
+				 * is rarely needed. The method above will be
+				 * cleaner when we'll be able to start sending
+				 * the request from any place in the buffer.
+				 */
+				buf->lr = ptr;
+				buffer_replace2(buf, buf->data, buf->lr, NULL, 0);
+				msg->sor = 0;
+				msg->sol = buf->data;
+				ptr = buf->data;
+				end = buf->r;
+#endif
+			}
+			hdr_idx_init(idx);
+			state = HTTP_MSG_RPVER;
+			goto http_msg_rpver;
+		}
+
+		if (unlikely(!HTTP_IS_CRLF(*ptr)))
+			goto http_msg_invalid;
+
+		if (unlikely(*ptr == '\n'))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
+		EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore_cr, HTTP_MSG_RPBEFORE_CR);
+		/* stop here */
+
+	http_msg_rpbefore_cr:
+	case HTTP_MSG_RPBEFORE_CR:
+		EXPECT_LF_HERE(ptr, http_msg_invalid);
+		EAT_AND_JUMP_OR_RETURN(http_msg_rpbefore, HTTP_MSG_RPBEFORE);
+		/* stop here */
+
+	http_msg_rpver:
+	case HTTP_MSG_RPVER:
+	case HTTP_MSG_RPVER_SP:
+	case HTTP_MSG_RPCODE:
+	case HTTP_MSG_RPCODE_SP:
+	case HTTP_MSG_RPREASON:
+		ptr = (char *)http_parse_rspline(msg, buf->data, state, ptr, end,
+						 &buf->lr, &msg->hdr_state);
+		if (unlikely(!ptr))
+			return;
+
+		/* we have a full response and we know that we have either a CR
+		 * or an LF at <ptr>.
+		 */
+		//fprintf(stderr,"sor=%d rq.l=%d *ptr=0x%02x\n", msg->sor, msg->sl.st.l, *ptr);
+		hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
+
+		msg->sol = ptr;
+		if (likely(*ptr == '\r'))
+			EAT_AND_JUMP_OR_RETURN(http_msg_rpline_end, HTTP_MSG_RPLINE_END);
+		goto http_msg_rpline_end;
+
+	http_msg_rpline_end:
+	case HTTP_MSG_RPLINE_END:
+		/* msg->sol must point to the first of CR or LF. */
+		EXPECT_LF_HERE(ptr, http_msg_invalid);
+		EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
+		/* stop here */
+
+	/*
+	 * Second, states that are specific to the request only
+	 */
 	http_msg_rqbefore:
 	case HTTP_MSG_RQBEFORE:
 		if (likely(HTTP_IS_TOKEN(*ptr))) {
 			if (likely(ptr == buf->data)) {
 				msg->sol = ptr;
 				msg->sor = 0;
-				state = HTTP_MSG_RQMETH;
-				goto http_msg_rqmeth;
 			} else {
 #if PARSE_PRESERVE_EMPTY_LINES
 				/* only skip empty leading lines, don't remove them */
@@ -694,9 +905,10 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 				ptr = buf->data;
 				end = buf->r;
 #endif
-				state = HTTP_MSG_RQMETH;
-				goto http_msg_rqmeth;
 			}
+			hdr_idx_init(idx);
+			state = HTTP_MSG_RQMETH;
+			goto http_msg_rqmeth;
 		}
 
 		if (unlikely(!HTTP_IS_CRLF(*ptr)))
@@ -705,14 +917,13 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 		if (unlikely(*ptr == '\n'))
 			EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
 		EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore_cr, HTTP_MSG_RQBEFORE_CR);
-		/* fall through http_msg_rqbefore_cr */
+		/* stop here */
 
 	http_msg_rqbefore_cr:
 	case HTTP_MSG_RQBEFORE_CR:
 		EXPECT_LF_HERE(ptr, http_msg_invalid);
 		EAT_AND_JUMP_OR_RETURN(http_msg_rqbefore, HTTP_MSG_RQBEFORE);
-		state = HTTP_MSG_RQMETH;
-		/* fall through http_msg_rqmeth */
+		/* stop here */
 
 	http_msg_rqmeth:
 	case HTTP_MSG_RQMETH:
@@ -734,7 +945,6 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 		msg->sol = ptr;
 		if (likely(*ptr == '\r'))
 			EAT_AND_JUMP_OR_RETURN(http_msg_rqline_end, HTTP_MSG_RQLINE_END);
-
 		goto http_msg_rqline_end;
 
 	http_msg_rqline_end:
@@ -747,7 +957,11 @@ void http_msg_analyzer(struct buffer *buf, struct http_msg *msg, struct hdr_idx
 
 		EXPECT_LF_HERE(ptr, http_msg_invalid);
 		EAT_AND_JUMP_OR_RETURN(http_msg_hdr_first, HTTP_MSG_HDR_FIRST);
+		/* stop here */
 
+	/*
+	 * Common states below
+	 */
 	http_msg_hdr_first:
 	case HTTP_MSG_HDR_FIRST:
 		msg->sol = ptr;
@@ -914,7 +1128,7 @@ int process_cli(struct session *t)
 		 * headers. An index of all the lines will be elaborated while
 		 * parsing.
 		 *
-		 * For the parsing, we use a 10 states FSM.
+		 * For the parsing, we use a 28 states FSM.
 		 *
 		 * RFC2616 requires that both LF and CRLF are recognized as
 		 * line breaks, but that any other combination is an error.