From b22ce53bb325123ce3d6c7410f8b6d9c0143a7bf Mon Sep 17 00:00:00 2001 From: Eduard Bagdasaryan Date: Fri, 16 Sep 2016 23:53:28 +1200 Subject: [PATCH] Fix logged request size (%http::>st) and other size-related %codes. The %[http::]>st logformat code should log the actual number of [dechunked] bytes received from the client. However, for requests with Content-Length, Squid was logging the sum of the request header size and the Content-Length header field value instead. The logged value was wrong when the client sent fewer than Content-Length body bytes. Also: * Fixed %http::>h and %http::h logformat code, but since ICAP services deal with (and log) both HTTP requests and responses, that focus on the HTTP message kind actually complicates ICAP logging configuration and will eventually require introduction of new %http logformat codes that would be meaningless in access.log context. - In ICAP context, %http::>h should log to-be-adapted HTTP message headers embedded in an ICAP request (HTTP request headers in REQMOD; HTTP response headers in RESPMOD). Before this change, %http::>h logged constant/"FYI" HTTP request headers in RESPMOD. - Similarly, %http::" and "<" characters should indicate ICAP message kind (where the being-logged HTTP message is embedded), not HTTP message kind, even for %http codes. TODO: %http::>h code logs "-" in RESPMOD because AccessLogEntry does not store virgin HTTP response headers. * Correctly initialize ICAP ALE HTTP fields related to HTTP message sizes for icap_log, including %http::>st, %http::sh, and %http::>sh format codes. * Initialize HttpMsg::hdr_sz in HTTP header parsing code. Among other uses, this field is needed to initialize HTTP fields inside ICAP ALE. * Synced default icap_log format documentation with the current code. --- src/adaptation/icap/ModXact.cc | 53 +++++++++++++----------- src/cf.data.pre | 59 +++++++++++++++++--------- src/client_side.cc | 6 ++- src/format/Format.cc | 75 +++++++++++++++++++++++++++------- 4 files changed, 132 insertions(+), 61 deletions(-) diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc index 6f154f6b37..7e1158c42c 100644 --- a/src/adaptation/icap/ModXact.cc +++ b/src/adaptation/icap/ModXact.cc @@ -1259,31 +1259,32 @@ void prepareLogWithRequestDetails(HttpRequest *, AccessLogEntry::Pointer &); void Adaptation::Icap::ModXact::finalizeLogInfo() { - HttpRequest * request_ = NULL; - HttpRequest * adapted_request_ = NULL; - HttpReply * reply_ = NULL; - request_ = (virgin.cause? virgin.cause: dynamic_cast(virgin.header)); + HttpRequest *adapted_request_ = NULL; + HttpReply *adapted_reply_ = NULL; + HttpRequest *virgin_request_ = (virgin.cause ? virgin.cause : dynamic_cast(virgin.header)); if (!(adapted_request_ = dynamic_cast(adapted.header))) { - adapted_request_ = request_; - reply_ = dynamic_cast(adapted.header); + // if the request was not adapted, use virgin request to simplify + // the code further below + adapted_request_ = virgin_request_; + adapted_reply_ = dynamic_cast(adapted.header); } - Adaptation::Icap::History::Pointer h = (request_ ? request_->icapHistory() : NULL); + Adaptation::Icap::History::Pointer h = (virgin_request_ ? virgin_request_->icapHistory() : NULL); Must(h != NULL); // ICAPXaction::maybeLog calls only if there is a log al.icp.opcode = ICP_INVALID; al.url = h->log_uri.termedBuf(); const Adaptation::Icap::ServiceRep &s = service(); al.icap.reqMethod = s.cfg().method; - al.cache.caddr = request_->client_addr; + al.cache.caddr = virgin_request_->client_addr; - al.request = request_; + al.request = virgin_request_; HTTPMSGLOCK(al.request); al.adapted_request = adapted_request_; HTTPMSGLOCK(al.adapted_request); - if (reply_) { - al.reply = reply_; + if (adapted_reply_) { + al.reply = adapted_reply_; HTTPMSGLOCK(al.reply); } else al.reply = NULL; @@ -1296,25 +1297,29 @@ void Adaptation::Icap::ModXact::finalizeLogInfo() al.cache.ssluser = h->ssluser.termedBuf(); #endif al.cache.code = h->logType; - // XXX: should use icap-specific counters instead ? - al.http.clientRequestSz.payloadData = h->req_sz; + + const HttpMsg *virgin_msg = dynamic_cast(virgin.header); + if (!virgin_msg) + virgin_msg = virgin_request_; + assert(virgin_msg != virgin.cause); + al.http.clientRequestSz.header = virgin_msg->hdr_sz; + al.http.clientRequestSz.payloadData = virgin_msg->body_pipe->producedSize(); // leave al.icap.bodyBytesRead negative if no body if (replyHttpHeaderSize >= 0 || replyHttpBodySize >= 0) { const int64_t zero = 0; // to make max() argument types the same - al.icap.bodyBytesRead = - max(zero, replyHttpHeaderSize) + max(zero, replyHttpBodySize); + const uint64_t headerSize = max(zero, replyHttpHeaderSize); + const uint64_t bodySize = max(zero, replyHttpBodySize); + al.icap.bodyBytesRead = headerSize + bodySize; + al.http.clientReplySz.header = headerSize; + al.http.clientReplySz.payloadData = bodySize; } - if (reply_) { - al.http.code = reply_->sline.status(); - al.http.content_type = reply_->content_type.termedBuf(); - if (replyHttpBodySize >= 0) { - // XXX: should use icap-specific counters instead ? - al.http.clientReplySz.payloadData = replyHttpBodySize; - al.http.clientReplySz.header = reply_->hdr_sz; + if (adapted_reply_) { + al.http.code = adapted_reply_->sline.status(); + al.http.content_type = adapted_reply_->content_type.termedBuf(); + if (replyHttpBodySize >= 0) al.cache.highOffset = replyHttpBodySize; - } //don't set al.cache.objectSize because it hasn't exist yet Packer p; @@ -1323,7 +1328,7 @@ void Adaptation::Icap::ModXact::finalizeLogInfo() mb.init(); packerToMemInit(&p, &mb); - reply_->header.packInto(&p); + adapted_reply_->header.packInto(&p); al.headers.reply = xstrdup(mb.buf); packerClean(&p); diff --git a/src/cf.data.pre b/src/cf.data.pre index 4d6df80dbe..639a7e2dad 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -4427,13 +4427,35 @@ DOC_START ICAP transaction log lines will correspond to a single access log line. - ICAP log uses logformat codes that make sense for an ICAP - transaction. Header-related codes are applied to the HTTP header - embedded in an ICAP server response, with the following caveats: - For REQMOD, there is no HTTP response header unless the ICAP - server performed request satisfaction. For RESPMOD, the HTTP - request header is the header sent to the ICAP server. For - OPTIONS, there are no HTTP headers. + ICAP log supports many access.log logformat %codes. In ICAP context, + HTTP message-related %codes are applied to the HTTP message embedded + in an ICAP message. Logformat "%http::>..." codes are used for HTTP + messages embedded in ICAP requests while "%http::<..." codes are used + for HTTP messages embedded in ICAP responses. For example: + + http::>h To-be-adapted HTTP message headers sent by Squid to + the ICAP service. For REQMOD transactions, these are + HTTP request headers. For RESPMOD, these are HTTP + response headers, but Squid currently cannot log them + (i.e., %http::>h will expand to "-" for RESPMOD). + + http::st Bytes sent to the ICAP server (TCP payload - only; i.e., what Squid writes to the socket). + icap::>st The total size of the ICAP request sent to the ICAP + server (ICAP headers + ICAP body), including chunking + metadata (if any). - icap::a %icap::to/%03icap::Hs %icap::A %icap::to/%03icap::Hs %icap::http.method = request->method; aLogEntry->http.version = request->http_ver; aLogEntry->hier = request->hier; - if (request->content_length > 0) // negative when no body or unknown length - aLogEntry->http.clientRequestSz.payloadData += request->content_length; // XXX: actually adaptedRequest payload size ?? aLogEntry->cache.extuser = request->extacl_user.termedBuf(); // Adapted request, if any, inherits and then collects all the stats, but @@ -593,6 +591,9 @@ ClientHttpRequest::logRequest() al->cache.objectSize = loggingEntry()->contentLen(); // payload duplicate ?? with or without TE ? al->http.clientRequestSz.header = req_sz; + // the virgin request is saved to al->request + if (al->request && al->request->body_pipe != NULL) + al->http.clientRequestSz.payloadData = al->request->body_pipe->producedSize(); al->http.clientReplySz.header = out.headers_sz; // XXX: calculate without payload encoding or headers !! al->http.clientReplySz.payloadData = out.size - out.headers_sz; // pretend its all un-encoded data for now. @@ -2744,6 +2745,7 @@ clientProcessRequest(ConnStateData *conn, HttpParser *hp, ClientSocketContext *c request->my_addr = conn->clientConnection->local; request->myportname = conn->port->name; request->http_ver = http_ver; + request->hdr_sz = http->req_sz; // Link this HttpRequest to ConnStateData relatively early so the following complex handling can use it // TODO: this effectively obsoletes a lot of conn->FOO copying. That needs cleaning up later. diff --git a/src/format/Format.cc b/src/format/Format.cc index d48e6703e7..4540a47c4f 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -309,6 +309,38 @@ log_quoted_string(const char *str, char *out) *p = '\0'; } +/// XXX: Misnamed. TODO: Split reply; +#if USE_ADAPTATION + // al->icap.reqMethod is methodNone in access.log context + if (!msg && al->icap.reqMethod == Adaptation::methodReqmod) + msg = al->adapted_request; +#endif + return msg; +} + +/// XXX: Misnamed. See actualReplyHeader(). +/// \return HttpRequest or HttpReply for %http::>h. +static const HttpMsg * +actualRequestHeader(const AccessLogEntry::Pointer &al) +{ +#if USE_ADAPTATION + // al->icap.reqMethod is methodNone in access.log context + if (al->icap.reqMethod == Adaptation::methodRespmod) { + // XXX: for now AccessLogEntry lacks virgin response headers + return NULL; + } +#endif + return al->request; +} + void Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logSequenceNumber) const { @@ -547,8 +579,8 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS case LFT_REQUEST_HEADER: - if (al->request) - sb = al->request->header.getByName(fmt->data.header.header); + if (const HttpMsg *msg = actualRequestHeader(al)) + sb = msg->header.getByName(fmt->data.header.header); out = sb.termedBuf(); @@ -567,15 +599,15 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS break; - case LFT_REPLY_HEADER: - if (al->reply) - sb = al->reply->header.getByName(fmt->data.header.header); + case LFT_REPLY_HEADER: { + if (const HttpMsg *msg = actualReplyHeader(al)) + sb = msg->header.getByName(fmt->data.header.header); out = sb.termedBuf(); quote = 1; - - break; + } + break; #if USE_ADAPTATION case LFT_ADAPTATION_SUM_XACT_TIMES: @@ -757,8 +789,8 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS break; #endif case LFT_REQUEST_HEADER_ELEM: - if (al->request) - sb = al->request->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); + if (const HttpMsg *msg = actualRequestHeader(al)) + sb = msg->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); out = sb.termedBuf(); @@ -776,18 +808,27 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS break; - case LFT_REPLY_HEADER_ELEM: - if (al->reply) - sb = al->reply->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); + case LFT_REPLY_HEADER_ELEM: { + if (const HttpMsg *msg = actualReplyHeader(al)) + sb = msg->header.getByNameListMember(fmt->data.header.header, fmt->data.header.element, fmt->data.header.separator); out = sb.termedBuf(); quote = 1; - - break; + } + break; case LFT_REQUEST_ALL_HEADERS: - out = al->headers.request; +#if USE_ADAPTATION + if (al->icap.reqMethod == Adaptation::methodRespmod) { + // XXX: since AccessLogEntry::Headers lacks virgin response + // headers, do nothing for now + out = NULL; + } else +#endif + { + out = al->headers.request; + } quote = 1; @@ -802,6 +843,10 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS case LFT_REPLY_ALL_HEADERS: out = al->headers.reply; +#if USE_ADAPTATION + if (!out && al->icap.reqMethod == Adaptation::methodReqmod) + out = al->headers.adapted_request; +#endif quote = 1; -- 2.47.2