From: adrian <> Date: Wed, 27 Sep 2006 19:17:52 +0000 (+0000) Subject: Replace the client-side request line parser X-Git-Tag: SQUID_3_0_PRE5~46 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=84cc26357ca24a37566c505b16252f3e2a0882e4;p=thirdparty%2Fsquid.git Replace the client-side request line parser * remove the 'inbuf' copy during parsing; * .. but create a 'url' temporary copy for now since the *URL() routines in client_side.cc expect a mutable zero-terminated string; I looked into it and it won't take much work to rework those routines to take an immutable buffer. --- diff --git a/src/HttpMsg.cc b/src/HttpMsg.cc index 68fc1a7910..6411b1aa30 100644 --- a/src/HttpMsg.cc +++ b/src/HttpMsg.cc @@ -1,6 +1,6 @@ /* - * $Id: HttpMsg.cc,v 1.33 2006/09/26 15:19:22 adrian Exp $ + * $Id: HttpMsg.cc,v 1.34 2006/09/27 13:17:52 adrian Exp $ * * DEBUG: section 74 HTTP Message * AUTHOR: Alex Rousskov @@ -441,3 +441,172 @@ HttpParserRequestLen(HttpParser *hp) return hp->hdr_end - hp->req_start + 1; } +/* + * Attempt to parse the request line. + * + * This will set the values in hmsg that it determines. One may end up + * with a partially-parsed buffer; the return value tells you whether + * the values are valid or not. + * + * @return 1 if parsed correctly, 0 if more is needed, -1 if error + * + * TODO: + * * have it indicate "error" and "not enough" as two separate conditions! + * * audit this code as off-by-one errors are probably everywhere! + */ +int +HttpParserParseReqLine(HttpParser *hmsg) +{ + int i = 0; + int retcode = 0; + int maj = -1, min = -1; + int last_whitespace = -1, line_end = -1; + + /* Find \r\n - end of URL+Version (and the request) */ + for (i = 0; i < hmsg->bufsiz; i++) { + if (hmsg->buf[i] == '\n') { + break; + } + if (i < hmsg->bufsiz - 1 && hmsg->buf[i - 1] == '\r' && hmsg->buf[i] == '\n') { + i++; + break; + } + } + if (i == hmsg->bufsiz) { + retcode = 0; + goto finish; + } + /* XXX this should point to the -end- of the \r\n, \n, etc. */ + hmsg->req_end = i; + i = 0; + + /* Find first non-whitespace - beginning of method */ + for (; i < hmsg->req_end && (isspace(hmsg->buf[i])); i++); + if (i >= hmsg->req_end) { + retcode = 0; + goto finish; + } + hmsg->m_start = i; + hmsg->req_start = i; + + /* Find first whitespace - end of method */ + for (; i < hmsg->req_end && (! isspace(hmsg->buf[i])); i++); + if (i >= hmsg->req_end) { + retcode = 0; + goto finish; + } + hmsg->m_end = i - 1; + + /* Find first non-whitespace - beginning of URL+Version */ + for (; i < hmsg->req_end && (isspace(hmsg->buf[i])); i++); + if (i >= hmsg->req_end) { + retcode = 0; + goto finish; + } + hmsg->u_start = i; + + /* Find \r\n or \n - thats the end of the line. Keep track of the last whitespace! */ + for (; i <= hmsg->req_end; i++) { + /* If \n - its end of line */ + if (hmsg->buf[i] == '\n') { + line_end = i; + break; + } + /* XXX could be off-by-one wrong! */ + if (hmsg->buf[i] == '\r' && (i + 1) <= hmsg->req_end && hmsg->buf[i+1] == '\n') { + line_end = i; + break; + } + /* If its a whitespace, note it as it'll delimit our version */ + if (hmsg->buf[i] == ' ' || hmsg->buf[i] == '\t') { + last_whitespace = i; + } + } + if (i > hmsg->req_end) { + retcode = 0; + goto finish; + } + + /* At this point we don't need the 'i' value; so we'll recycle it for version parsing */ + + /* + * At this point: line_end points to the first eol char (\r or \n); + * last_whitespace points to the last whitespace char in the URL. + * We know we have a full buffer here! + */ + if (last_whitespace == -1) { + maj = 0; min = 9; + hmsg->u_end = line_end - 1; + assert(hmsg->u_end >= hmsg->u_start); + } else { + /* Find the first non-whitespace after last_whitespace */ + /* XXX why <= vs < ? I do need to really re-audit all of this ..*/ + for (i = last_whitespace; i <= hmsg->req_end && isspace(hmsg->buf[i]); i++); + if (i > hmsg->req_end) { + retcode = 0; + goto finish; + } + + /* is it http/ ? if so, we try parsing. If not, the URL is the whole line; version is 0.9 */ + if (i + 5 >= hmsg->req_end || (strncasecmp(&hmsg->buf[i], "HTTP/", 5) != 0)) { + maj = 0; min = 9; + hmsg->u_end = line_end - 1; + assert(hmsg->u_end >= hmsg->u_start); + } else { + /* Ok, lets try parsing! Yes, this needs refactoring! */ + hmsg->v_start = i; + i += 5; + + /* next should be 1 or more digits */ + maj = 0; + for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { + maj = maj * 10; + maj = maj + (hmsg->buf[i]) - '0'; + } + if (i >= hmsg->req_end) { + retcode = 0; + goto finish; + } + + /* next should be .; we -have- to have this as we have a whole line.. */ + if (hmsg->buf[i] != '.') { + retcode = 0; + goto finish; + } + if (i + 1 >= hmsg->req_end) { + retcode = 0; + goto finish; + } + + /* next should be one or more digits */ + i++; + min = 0; + for (; i < hmsg->req_end && (isdigit(hmsg->buf[i])); i++) { + min = min * 10; + min = min + (hmsg->buf[i]) - '0'; + } + + /* Find whitespace, end of version */ + hmsg->v_end = i; + hmsg->u_end = last_whitespace - 1; + } + } + + /* + * Rightio - we have all the schtuff. Return true; we've got enough. + */ + retcode = 1; + +finish: + hmsg->v_maj = maj; + hmsg->v_min = min; + assert(maj != -1); + assert(min != -1); + debug(1, 2) ("Parser: retval %d: from %d->%d: method %d->%d; url %d->%d; version %d->%d (%d/%d)\n", + retcode, hmsg->req_start, hmsg->req_end, + hmsg->m_start, hmsg->m_end, + hmsg->u_start, hmsg->u_end, + hmsg->v_start, hmsg->v_end, maj, min); + return retcode; +} + diff --git a/src/HttpMsg.h b/src/HttpMsg.h index 2b81133102..9818b8e112 100644 --- a/src/HttpMsg.h +++ b/src/HttpMsg.h @@ -1,6 +1,6 @@ /* - * $Id: HttpMsg.h,v 1.11 2006/09/26 15:19:22 adrian Exp $ + * $Id: HttpMsg.h,v 1.12 2006/09/27 13:17:52 adrian Exp $ * * * SQUID Web Proxy Cache http://www.squid-cache.org/ @@ -105,9 +105,12 @@ struct _HttpParser { char state; const char *buf; int bufsiz; - int req_start, req_end; int hdr_start, hdr_end; + int m_start, m_end; + int u_start, u_end; + int v_start, v_end; + int v_maj, v_min; }; typedef struct _HttpParser HttpParser; @@ -116,6 +119,7 @@ extern int HttpParserReqSz(HttpParser *); extern int HttpParserHdrSz(HttpParser *); extern const char * HttpParserHdrBuf(HttpParser *); extern int HttpParserRequestLen(HttpParser *hp); +extern int HttpParserParseReqLine(HttpParser *hp); SQUIDCEXTERN int httpMsgIsolateHeaders(const char **parse_start, int len, const char **blk_start, const char **blk_end); diff --git a/src/client_side.cc b/src/client_side.cc index 870ba5ffc3..0b03a7034a 100644 --- a/src/client_side.cc +++ b/src/client_side.cc @@ -1,6 +1,6 @@ /* - * $Id: client_side.cc,v 1.738 2006/09/26 15:19:22 adrian Exp $ + * $Id: client_side.cc,v 1.739 2006/09/27 13:17:52 adrian Exp $ * * DEBUG: section 33 Client-side Routines * AUTHOR: Duane Wessels @@ -156,12 +156,10 @@ static int responseFinishedOrFailed(HttpReply * rep, StoreIOBuffer const &reciev static void ClientSocketContextPushDeferredIfNeeded(ClientSocketContext::Pointer deferredRequest, ConnStateData::Pointer & conn); static void clientUpdateSocketStats(log_type logType, size_t size); -static ClientSocketContext *clientParseRequestMethod(char *inbuf, method_t * method_p, ConnStateData::Pointer & conn); -static char *skipLeadingSpace(char *aString); +char *skipLeadingSpace(char *aString); #if UNUSED_CODE static void trimTrailingSpaces(char *aString, size_t len); #endif -static ClientSocketContext *parseURIandHTTPVersion(char **url_p, HttpVersion * http_ver_p, ConnStateData::Pointer& conn, char *http_version_str); static int connReadWasError(ConnStateData::Pointer& conn, comm_err_t, int size, int xerrno); static int connFinishedWithConn(ConnStateData::Pointer& conn, int size); static void connNoteUseOfBuffer(ConnStateData* conn, size_t byteCount); @@ -1665,27 +1663,6 @@ parseHttpRequestAbort(ConnStateData::Pointer & conn, const char *uri) return context; } -ClientSocketContext * -clientParseRequestMethod(char *inbuf, method_t * method_p, ConnStateData::Pointer & conn) -{ - char *mstr = NULL; - - if ((mstr = strtok(inbuf, "\t ")) == NULL) { - debug(33, 1) ("clientParseRequestMethod: Can't get request method\n"); - return parseHttpRequestAbort(conn, "error:invalid-request"); - } - - *method_p = HttpRequestMethod(mstr); - - if (*method_p == METHOD_NONE) { - debug(33, 1) ("clientParseRequestMethod: Unsupported method '%s'\n", mstr); - return parseHttpRequestAbort(conn, "error:unsupported-request-method"); - } - - debug(33, 5) ("clientParseRequestMethod: Method is '%s'\n", mstr); - return NULL; -} - char * skipLeadingSpace(char *aString) { @@ -1737,69 +1714,6 @@ trimTrailingSpaces(char *aString, size_t len) #endif -static ClientSocketContext * -parseURIandHTTPVersion(char **url_p, HttpVersion * http_ver_p, - ConnStateData::Pointer & conn, char *http_version_str) -{ - char *url; - /* look for URL (strtok initiated by clientParseRequestMethod) */ - - if ((url = strtok(NULL, "\n")) == NULL) { - debug(33, 1) ("parseHttpRequest: Missing URL\n"); - return parseHttpRequestAbort(conn, "error:missing-url"); - } - - url = skipLeadingSpace(url); - - if (!*url || (http_version_str && http_version_str <= url+1)) { - debug(33, 1) ("parseHttpRequest: Missing URL\n"); - return parseHttpRequestAbort(conn, "error:missing-url"); - } - - /* Terminate URL just before HTTP version (or at end of line) */ - if (http_version_str) - http_version_str[-1] = '\0'; - else { - char *t = url + strlen(url) - 1; - - while (t > url && *t == '\r') - *t-- = '\0'; - } - - debug(33, 5) ("parseHttpRequest: URI is '%s'\n", url); - *url_p = url; - - if (http_version_str) { - if (sscanf(http_version_str + 5, "%d.%d", &http_ver_p->major, - &http_ver_p->minor) != 2) { - debug(33, 3) ("parseHttpRequest: Invalid HTTP identifier.\n"); - return parseHttpRequestAbort(conn, "error:invalid-http-ident"); - } - - debug(33, 6) ("parseHttpRequest: Client HTTP version %d.%d.\n", - http_ver_p->major, http_ver_p->minor); - } else { - *http_ver_p = HttpVersion(0,9); /* wild guess */ - } - - return NULL; -} - -/* Utility function to perform part of request parsing */ -static ClientSocketContext * -clientParseHttpRequestLine(char *reqline, ConnStateData::Pointer &conn, - method_t * method_p, char **url_p, HttpVersion * http_ver_p, char * http_version_str) -{ - ClientSocketContext *result = NULL; - /* XXX: This sequence relies on strtok() */ - - if ((result = clientParseRequestMethod(reqline, method_p, conn)) - || (result = parseURIandHTTPVersion(url_p, http_ver_p, conn, http_version_str))) - return result; - - return NULL; -} - void setLogUri(ClientHttpRequest * http, char const *uri) { @@ -1927,39 +1841,38 @@ prepareTransparentURL(ConnStateData::Pointer & conn, ClientHttpRequest *http, ch static ClientSocketContext * parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * method_p, HttpVersion *http_ver) { - char *inbuf = NULL; char *url = NULL; char *req_hdr = NULL; - char *t; char *end; size_t req_sz; ClientHttpRequest *http; ClientSocketContext *result; StoreIOBuffer tempBuffer; - const char *http_version; + int r; /* pre-set these values to make aborting simpler */ *method_p = METHOD_NONE; - /* Read the HTTP message. HTTP/0.9 is detected by the absence of a HTTP signature */ - - if ((t = (char *)memchr(hp->buf, '\n', hp->bufsiz)) == NULL) { + /* Attempt to parse the first line; this'll define the method, url, version and header begin */ + r = HttpParserParseReqLine(hp); + if (r == 0) { debug(33, 5) ("Incomplete request, waiting for end of request line\n"); - return NULL; + return NULL; } + if (r == -1) { + return parseHttpRequestAbort(conn, "error:invalid-request"); + } + /* Request line is valid here .. */ + *http_ver = HttpVersion(hp->v_maj, hp->v_min); - hp->req_start = 0; - hp->req_end = t - hp->buf; - http_version = findTrailingHTTPVersion(hp->buf); - - if (http_version) { + if (hp->v_maj > 0) { if ((req_sz = headersEnd(hp->buf, hp->bufsiz)) == 0) { debug(33, 5) ("Incomplete request, waiting for end of headers\n"); return NULL; } } else { debug(33, 3) ("parseHttpRequest: Missing HTTP identifier\n"); - req_sz = t - hp->buf + 1; /* HTTP/0.9 requests */ + req_sz = HttpParserReqSz(hp); } assert(req_sz <= (size_t) hp->bufsiz); @@ -1968,46 +1881,37 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho assert(req_sz > 0); hp->hdr_end = req_sz - 1; hp->hdr_start = hp->req_end + 1; - /* Use memcpy, not strdup! */ - inbuf = (char *)xmalloc(req_sz + 1); - xmemcpy(inbuf, hp->buf, req_sz); - *(inbuf + req_sz) = '\0'; - /* and adjust http_version to point into the new copy */ - - if (http_version) - http_version = inbuf + (http_version - hp->buf); /* Enforce max_request_size */ - if (req_sz >= Config.maxRequestHeaderSize) { debug(33, 5) ("parseHttpRequest: Too large request\n"); - xfree(inbuf); return parseHttpRequestAbort(conn, "error:request-too-large"); } - /* Barf on NULL characters in the headers */ - if (strlen(inbuf) != req_sz) { - debug(33, 1) ("parseHttpRequest: Requestheader contains NULL characters\n"); -#if TRY_TO_IGNORE_THIS - - return parseHttpRequestAbort(conn, "error:invalid-request"); -#endif - + /* Set method_p */ + *method_p = HttpRequestMethod(&hp->buf[hp->m_start], &hp->buf[hp->m_end]); + if (*method_p == METHOD_NONE) { + /* XXX need a way to say "this many character length string" */ + debug(33, 1) ("clientParseRequestMethod: Unsupported method in request '%s'\n", hp->buf); + /* XXX where's the method set for this error? */ + return parseHttpRequestAbort(conn, "error:unsupported-request-method"); } - /* Is there a legitimate first line to the headers ? */ - if ((result = clientParseHttpRequestLine(inbuf, conn, method_p, &url, - http_ver, (char *) http_version))) { - /* something wrong, abort */ - xfree(inbuf); - return result; - } + /* set url */ + /* + * XXX this should eventually not use a malloc'ed buffer; the transformation code + * below needs to be modified to not expect a mutable nul-terminated string. + */ + url = (char *)xmalloc(hp->u_end - hp->u_start + 16); + memcpy(url, hp->buf + hp->u_start, hp->u_end - hp->u_start + 1); + url[hp->u_end - hp->u_start + 1] = '\0'; /* * Process headers after request line * TODO: Use httpRequestParse here. */ - req_hdr = inbuf + hp->req_end; + /* XXX this code should be modified to take a const char * later! */ + req_hdr = (char *) hp->buf + hp->req_end + 1; debug(33, 3) ("parseHttpRequest: req_hdr = {%s}\n", req_hdr); @@ -2017,7 +1921,7 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho if (strstr(req_hdr, "\r\r\n")) { debug(33, 1) ("WARNING: suspicious HTTP request contains double CR\n"); - xfree(inbuf); + xfree(url); return parseHttpRequestAbort(conn, "error:double-CR"); } @@ -2076,10 +1980,9 @@ parseHttpRequest(ConnStateData::Pointer & conn, HttpParser *hp, method_t * metho setLogUri(http, http->uri); debug(33, 5) ("parseHttpRequest: Complete request received\n"); - xfree(inbuf); result->flags.parsed_ok = 1; + xfree(url); return result; - } int