From: Eduard Bagdasaryan Date: Wed, 29 Nov 2023 12:50:05 +0000 (+0000) Subject: Add %byte{value} logformat code for logging or sending any byte (#1588) X-Git-Tag: SQUID_7_0_1~270 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=007cd68679204c84b3c5188fdb87ef1c742bb5fb;p=thirdparty%2Fsquid.git Add %byte{value} logformat code for logging or sending any byte (#1588) This feature is needed by at least the statsd tool receiving TCP log info: https://github.com/statsd/statsd/blob/7c07eec/docs/server.md No support for zero byte values yet because existing Format::assemble() code does not support that out of the box, and there is no known need for such support. It can be added later (without backward compatibility problems) if needed. --- diff --git a/src/cf.data.pre b/src/cf.data.pre index a89022cc4d..94e09480a0 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -4685,6 +4685,13 @@ DOC_START Format codes: % a literal % character + + byte{value} Adds a single byte with the given value (e.g., %byte{10} + adds an ASCII LF character a.k.a. "new line" or "\n"). The value + parameter is required and must be a positive decimal integer not + exceeding 255. Zero-valued bytes (i.e. ASCII NUL characters) are + not yet supported. + sn Unique sequence number per log line entry err_code The ID of an error response served by Squid or a similar internal error identifier. diff --git a/src/format/ByteCode.h b/src/format/ByteCode.h index 890abc0785..c8f7ace221 100644 --- a/src/format/ByteCode.h +++ b/src/format/ByteCode.h @@ -33,6 +33,8 @@ typedef enum { /* arbitrary string between tokens */ LFT_STRING, + LFT_BYTE, + /* client TCP connection remote end details */ LFT_CLIENT_IP_ADDRESS, LFT_CLIENT_FQDN, diff --git a/src/format/Format.cc b/src/format/Format.cc index d0c6719f45..892c5f01ec 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -399,6 +399,12 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS out = ""; break; + case LFT_BYTE: + tmp[0] = static_cast(fmt->data.byteValue); + tmp[1] = '\0'; + out = tmp; + break; + case LFT_STRING: out = fmt->data.string; break; diff --git a/src/format/Token.cc b/src/format/Token.cc index 2a24c8a3e5..671cde55cc 100644 --- a/src/format/Token.cc +++ b/src/format/Token.cc @@ -11,6 +11,7 @@ #include "format/Token.h" #include "format/TokenTableEntry.h" #include "globals.h" +#include "parser/Tokenizer.h" #include "proxyp/Elements.h" #include "sbuf/Stream.h" #include "SquidConfig.h" @@ -139,6 +140,7 @@ static TokenTableEntry TokenTable2C[] = { /// Miscellaneous >2 byte tokens static TokenTableEntry TokenTableMisc[] = { + TokenTableEntry("byte", LFT_BYTE), TokenTableEntry(">eui", LFT_CLIENT_EUI), TokenTableEntry(">qos", LFT_CLIENT_LOCAL_TOS), TokenTableEntry(" +static Integer +ParseUnsignedDecimalInteger(const char *description, const SBuf &rawInput) +{ + constexpr auto minValue = std::numeric_limits::min(); + constexpr auto maxValue = std::numeric_limits::max(); + + Parser::Tokenizer tok(rawInput); + if (tok.skip('0')) { + if (!tok.atEnd()) { + // e.g., 077, 0xFF, 0b101, or 0.1 + throw TextException(ToSBuf("Malformed ", description, + ": Expected a decimal integer without leading zeros but got '", + rawInput, "'"), Here()); + } + // for simplicity, we currently assume that zero is always in range + static_assert(minValue <= 0); + static_assert(0 <= maxValue); + return Integer(0); + } + // else the value might still be zero (e.g., -0) + + // check that our caller is compatible with Tokenizer::int64() use below + using ParsedInteger = int64_t; + static_assert(minValue >= std::numeric_limits::min()); + static_assert(maxValue <= std::numeric_limits::max()); + + ParsedInteger rawValue = 0; + if (!tok.int64(rawValue, 10, false)) { + // e.g., FF, -1, or 18446744073709551616 + // TODO: Provide better diagnostic for values exceeding int64_t maximum. + throw TextException(ToSBuf("Malformed ", description, + ": Expected an unsigned decimal integer but got '", + rawInput, "'"), Here()); + } + + if (!tok.atEnd()) { + // e.g., 1,000, 1.0, or 1e6 + throw TextException(ToSBuf("Malformed ", description, + ": Trailing garbage after ", rawValue, " in '", + rawInput, "'"), Here()); + } + + if (rawValue > maxValue) { + throw TextException(ToSBuf("Malformed ", description, + ": Expected an integer value not exceeding ", maxValue, + " but got ", rawValue), Here()); + } + + if (rawValue < minValue) { + throw TextException(ToSBuf("Malformed ", description, + ": Expected an integer value not below ", minValue, + " but got ", rawValue), Here()); + } + + return Integer(rawValue); +} + /* parses a single token. Returns the token length in characters, * and fills in the lt item with the token information. * def is for sure null-terminated @@ -476,6 +538,16 @@ Format::Token::parse(const char *def, Quoting *quoting) switch (type) { + case LFT_BYTE: + if (!data.string) + throw TextException("logformat %byte requires a parameter (e.g., %byte{10})", Here()); + // TODO: Convert Format::Token::data.string to SBuf. + if (const auto v = ParseUnsignedDecimalInteger("logformat %byte{value}", SBuf(data.string))) + data.byteValue = v; + else + throw TextException("logformat %byte{n} does not support zero n values yet", Here()); + break; + #if USE_ADAPTATION case LFT_ADAPTATION_LAST_HEADER: #endif @@ -682,6 +754,7 @@ Format::Token::Token() : type(LFT_NONE), data.header.element = nullptr; data.header.separator = ','; data.headerId = ProxyProtocol::Two::htUnknown; + data.byteValue = 0; } Format::Token::~Token() diff --git a/src/format/Token.h b/src/format/Token.h index d69cd46424..fc483baeff 100644 --- a/src/format/Token.h +++ b/src/format/Token.h @@ -60,6 +60,8 @@ public: char *element; char separator; } header; + + uint8_t byteValue; // %byte{} parameter or zero } data; int widthMin; ///< minimum field width int widthMax; ///< maximum field width