From: Amos Jeffries Date: Mon, 2 Jun 2014 11:49:56 +0000 (-0700) Subject: Cleanup: rewrite Http::One::Parser::getHeaderField() using Tokenizer X-Git-Tag: merge-candidate-3-v1~506^2~25 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=687696c16ece476b1a6bf6cf89b2f40173052363;p=thirdparty%2Fsquid.git Cleanup: rewrite Http::One::Parser::getHeaderField() using Tokenizer Fixes performance regression in SBuf usage. Also, fixes one nasty bug where it would return the value of a line containign the named header despite obs-fold and quoting. This was particualarly bad as this method is used primarily to retrieve Host: header before full aprsing of the mime block takes place. The bug causes "foo.invalid" to be detected as hostname in: GET / HTTP/1.1 Foo: bar="\r Host: fake.invalid\n " Host: example.com --- diff --git a/src/Makefile.am b/src/Makefile.am index 62a6c8fcbf..285ad865e3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -619,6 +619,8 @@ squid_LDADD = \ $(AUTH_LIBS) \ $(DISK_LIBS) \ acl/libapi.la \ + http/libsquid-http.la \ + parser/libsquid-parser.la \ base/libbase.la \ libsquid.la \ ip/libip.la \ @@ -629,7 +631,6 @@ squid_LDADD = \ anyp/libanyp.la \ comm/libcomm.la \ eui/libeui.la \ - http/libsquid-http.la \ icmp/libicmp.la icmp/libicmp-core.la \ log/liblog.la \ format/libformat.la \ @@ -643,7 +644,6 @@ squid_LDADD = \ $(ADAPTATION_LIBS) \ $(ESI_LIBS) \ $(SNMP_LIBS) \ - parser/libsquid-parser.la \ $(top_builddir)/lib/libmisccontainers.la \ $(top_builddir)/lib/libmiscencoding.la \ $(top_builddir)/lib/libmiscutil.la \ @@ -1198,6 +1198,7 @@ nodist_tests_testHttpReply_SOURCES=\ tests_testHttpReply_LDFLAGS = $(LIBADD_DL) tests_testHttpReply_LDADD=\ http/libsquid-http.la \ + parser/libsquid-parser.la \ acl/libacls.la \ acl/libapi.la \ acl/libstate.la \ @@ -1333,6 +1334,7 @@ nodist_tests_testACLMaxUserIP_SOURCES= \ $(TESTSOURCES) tests_testACLMaxUserIP_LDADD= \ http/libsquid-http.la \ + parser/libsquid-parser.la \ $(AUTH_ACL_LIBS) \ ident/libident.la \ acl/libacls.la \ @@ -1585,6 +1587,7 @@ nodist_tests_testCacheManager_SOURCES = \ # comm.cc only requires comm/libcomm.la until fdc_table is dead. tests_testCacheManager_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ ident/libident.la \ acl/libacls.la \ acl/libstate.la \ @@ -1770,6 +1773,7 @@ nodist_tests_testDiskIO_SOURCES= \ swap_log_op.cc tests_testDiskIO_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ SquidConfig.o \ CommCalls.o \ DnsLookupDetails.o \ @@ -2015,6 +2019,7 @@ nodist_tests_testEvent_SOURCES = \ $(DISKIO_GEN_SOURCE) tests_testEvent_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ ident/libident.la \ acl/libacls.la \ acl/libstate.la \ @@ -2262,6 +2267,7 @@ nodist_tests_testEventLoop_SOURCES = \ $(DISKIO_GEN_SOURCE) tests_testEventLoop_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ ident/libident.la \ acl/libacls.la \ acl/libstate.la \ @@ -2502,6 +2508,7 @@ nodist_tests_test_http_range_SOURCES = \ $(DISKIO_GEN_SOURCE) tests_test_http_range_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ ident/libident.la \ acl/libacls.la \ acl/libstate.la \ @@ -2575,6 +2582,7 @@ nodist_tests_testHttp1Parser_SOURCES = \ $(TESTSOURCES) tests_testHttp1Parser_LDADD= \ http/libsquid-http.la \ + parser/libsquid-parser.la \ anyp/libanyp.la \ SquidConfig.o \ base/libbase.la \ @@ -2800,6 +2808,8 @@ tests_testHttpRequest_LDADD = \ fs/libfs.la \ $(SSL_LIBS) \ ipc/libipc.la \ + http/libsquid-http.la \ + parser/libsquid-parser.la \ base/libbase.la \ mgr/libmgr.la \ anyp/libanyp.la \ @@ -2808,7 +2818,6 @@ tests_testHttpRequest_LDADD = \ comm/libcomm.la \ log/liblog.la \ format/libformat.la \ - http/libsquid-http.la \ $(REPL_OBJS) \ $(ADAPTATION_LIBS) \ $(ESI_LIBS) \ @@ -2975,6 +2984,7 @@ nodist_tests_testStore_SOURCES= \ tests_testStore_LDADD= \ http/libsquid-http.la \ + parser/libsquid-parser.la \ ident/libident.la \ acl/libacls.la \ acl/libstate.la \ @@ -3205,6 +3215,7 @@ nodist_tests_testUfs_SOURCES = \ swap_log_op.cc tests_testUfs_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ CommCalls.o \ DnsLookupDetails.o \ ident/libident.la \ @@ -3389,6 +3400,7 @@ nodist_tests_testRock_SOURCES = \ $(TESTSOURCES) tests_testRock_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ libsquid.la \ comm/libcomm.la \ anyp/libanyp.la \ @@ -3627,6 +3639,7 @@ nodist_tests_testURL_SOURCES = \ $(BUILT_SOURCES) tests_testURL_LDADD = \ http/libsquid-http.la \ + parser/libsquid-parser.la \ anyp/libanyp.la \ ident/libident.la \ acl/libacls.la \ diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc index 867a9ba8c9..079a3d1fc4 100644 --- a/src/http/one/Parser.cc +++ b/src/http/one/Parser.cc @@ -1,6 +1,7 @@ #include "squid.h" #include "Debug.h" #include "http/one/Parser.h" +#include "parser/Tokenizer.h" /// RFC 7230 section 2.6 - 7 magic octets const SBuf Http::One::Parser::Http1magic("HTTP/1."); @@ -17,60 +18,53 @@ Http::One::Parser::clear() // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField() #define GET_HDR_SZ 1024 +// BUG: returns only the first header line with given name, +// ignores multi-line headers and obs-fold headers char * Http::One::Parser::getHeaderField(const char *name) { - LOCAL_ARRAY(char, header, GET_HDR_SZ); - const char *p = NULL; - char *q = NULL; - char got = 0; - const int namelen = name ? strlen(name) : 0; - if (!headerBlockSize() || !name) return NULL; + LOCAL_ARRAY(char, header, GET_HDR_SZ); + const int namelen = name ? strlen(name) : 0; + debugs(25, 5, "looking for '" << name << "'"); - for (p = mimeHeader().c_str(); *p; p += strcspn(p, "\n\r")) { - if (strcmp(p, "\r\n\r\n") == 0 || strcmp(p, "\n\n") == 0) - return NULL; + ::Parser::Tokenizer tok(mimeHeaderBlock_); + SBuf p; + const SBuf crlf("\r\n"); - while (xisspace(*p)) - ++p; + // while we can find more LF in the SBuf + while (tok.prefix(p, CharacterSet::LF)) { + tok.skip(CharacterSet::LF); // move tokenizer past the LF - if (strncasecmp(p, name, namelen)) + // header lines must start with the name (case insensitive) + if (p.substr(0, namelen).caseCmp(name, namelen)) continue; - if (!xisspace(p[namelen]) && p[namelen] != ':') + // then a COLON + if (p[namelen] != ':') continue; - int l = strcspn(p, "\n\r") + 1; - - if (l > GET_HDR_SZ) - l = GET_HDR_SZ; - - xstrncpy(header, p, l); - - debugs(25, 5, "checking '" << header << "'"); - - q = header; + // drop any trailing *CR sequence + p.trim(crlf, false, true); - q += namelen; + debugs(25, 5, "checking " << p); + p.consume(namelen + 1); - if (*q == ':') { - ++q; - got = 1; - } + // TODO: optimize SBuf::trim to take CharacterSet directly + ::Parser::Tokenizer t(p); + t.skip(CharacterSet::WSP); + p = t.remaining(); - while (xisspace(*q)) { - ++q; - got = 1; - } + // prevent buffer overrun on char header[]; + p.chop(0, sizeof(header)-1); - if (got) { - debugs(25, 5, "returning '" << q << "'"); - return q; - } + // return the header field-value + xstrncpy(header, p.rawContent(), p.length()); + debugs(25, 5, "returning: " << header); + return header; } return NULL; diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h index 0c1ccc60c8..e4ec3f8471 100644 --- a/src/http/one/Parser.h +++ b/src/http/one/Parser.h @@ -69,6 +69,12 @@ public: const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;} /** + * Scan the mime header block (badly) for a header with teh given name. + * + * BUG: omits lines when searching for headers with obs-fold or multiple entries. + * + * BUG: limits output to just 1KB when Squid accepts up to 64KB line length. + * * \return A pointer to a field-value of the first matching field-name, or NULL. */ char *getHeaderField(const char *name);