From: Alex Rousskov Date: Sun, 7 Jul 2024 03:03:00 +0000 (+0000) Subject: Fix Tokenizer::int64() parsing of "0" when guessing base (#1842) X-Git-Tag: SQUID_7_0_1~95 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8cc5e88c7fb0d73ce413d1871641b93403d1ea1a;p=thirdparty%2Fsquid.git Fix Tokenizer::int64() parsing of "0" when guessing base (#1842) Known bug victims in current code were tcp_outgoing_mark, mark_client_packet, clientside_mark, and mark_client_connection directives as well as client_connection_mark and (deprecated) clientside_mark ACLs if they were configured to match a zero mark using "0" or "0/..." syntax: ERROR: configuration failure: NfMarkConfig: invalid value '0/10'... exception location: NfMarkConfig.cc(23) getNfmark Probably broken since 2014 commit 01f2137d. --- diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 51654bae90..6544fb4482 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -264,7 +264,6 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf: if (base == 0) { if ( *s == '0') { base = 8; - ++s; } else { base = 10; } diff --git a/src/tests/testTokenizer.cc b/src/tests/testTokenizer.cc index e69fec4f95..28532bce0b 100644 --- a/src/tests/testTokenizer.cc +++ b/src/tests/testTokenizer.cc @@ -248,6 +248,285 @@ TestTokenizer::testTokenizerInt64() CPPUNIT_ASSERT(t.buf().isEmpty()); } + // When interpreting octal numbers, standard strtol() and Tokenizer::int64() + // treat leading zero as a part of sequence of digits rather than a + // character used _exclusively_ as base indicator. Thus, it is not possible + // to create an invalid octal number with an explicit octal base -- the + // first invalid character after the base will be successfully ignored. This + // treatment also makes it difficult to define "shortest valid octal input". + // Here, we are just enumerating interesting "short input" octal cases in + // four dimensions: + // 1. int64(base) argument: forced or auto-detected; + // 2. base character ("0") in input: absent or present; + // 3. post-base digits in input: absent, valid, or invalid; + // 4. input length limits via int64(length) argument: unlimited or limited. + + // forced base; input: no base, no post-base digits, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("")); + CPPUNIT_ASSERT(!t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // forced base; input: no base, no post-base digits, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("7")); + CPPUNIT_ASSERT(!t.int64(rv, 8, false, 0)); + CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf()); + } + + // forced base; input: no base, one valid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("4")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // forced base; input: no base, one valid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("46")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 8, false, 1)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf()); + } + + // forced base; input: no base, one invalid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("8")); + CPPUNIT_ASSERT(!t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // forced base; input: no base, one invalid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("80")); + CPPUNIT_ASSERT(!t.int64(rv, 8, false, 1)); + CPPUNIT_ASSERT_EQUAL(SBuf("80"), t.buf()); + } + + // repeat the above six octal cases, but now with base character in input + + // forced base; input: base, no post-base digits, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("0")); + const int64_t benchmark = 0; + CPPUNIT_ASSERT(t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // forced base; input: base, no post-base digits, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("07")); + const int64_t benchmark = 0; + CPPUNIT_ASSERT(t.int64(rv, 8, false, 1)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf()); + } + + // forced base; input: base, one valid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("04")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // forced base; input: base, one valid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("046")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 8, false, 2)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf()); + } + + // forced base; input: base, one invalid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("08")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // forced base; input: base, one invalid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("08")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv, 8, false, 2)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // And now repeat six "with base character in input" octal cases but with + // auto-detected base. When octal cases below say "auto-detected base", they + // describe int64() base=0 parameter value. Current int64() implementation + // does auto-detect base as octal in all of these cases, but that might + // change, and some of these cases (e.g., "0") can also be viewed as a + // non-octal input case as well. These cases do not attempt to test base + // detection. They focus on other potential problems. + + // auto-detected base; input: base, no post-base digits, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("0")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv, 0)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // auto-detected base; input: base, no post-base digits, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("07")); + const int64_t benchmark = 0; + CPPUNIT_ASSERT(t.int64(rv, 0, false, 1)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf()); + } + + // auto-detected base; input: base, one valid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("04")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 0)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // auto-detected base; input: base, one valid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("046")); + const int64_t benchmark = 04; + CPPUNIT_ASSERT(t.int64(rv, 0, false, 2)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf()); + } + + // auto-detected base; input: base, one invalid post-base digit, unlimited + { + int64_t rv; + Parser::Tokenizer t(SBuf("08")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv, 0)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // auto-detected base; input: base, one invalid post-base digit, limited + { + int64_t rv; + Parser::Tokenizer t(SBuf("08")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv, 0, false, 2)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // this ends four-dimensional enumeration of octal cases described earlier + + // check octal base auto-detection + { + int64_t rv; + Parser::Tokenizer t(SBuf("0128")); + const int64_t benchmark = 012; + CPPUNIT_ASSERT(t.int64(rv, 0)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf()); + } + + // check that octal base auto-detection is not confused by repeated zeros + { + int64_t rv; + Parser::Tokenizer t(SBuf("00000000071")); + const int64_t benchmark = 00000000071; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // check that forced octal base is not confused by hex prefix + { + int64_t rv; + Parser::Tokenizer t(SBuf("0x5")); + const int64_t benchmark = 0; + CPPUNIT_ASSERT(t.int64(rv, 8)); + CPPUNIT_ASSERT_EQUAL(benchmark, rv); + CPPUNIT_ASSERT_EQUAL(SBuf("x5"), t.buf()); + } + + // autodetect decimal base in shortest valid input + { + int64_t rv; + Parser::Tokenizer t(SBuf("1")); + const int64_t benchmark = 1; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT(t.buf().isEmpty()); + } + + // autodetect hex base in shortest valid input + { + int64_t rv; + Parser::Tokenizer t(SBuf("0X1")); + const int64_t benchmark = 0X1; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT(t.buf().isEmpty()); + } + + // invalid (when autodetecting base) input matching hex base + { + int64_t rv; + Parser::Tokenizer t(SBuf("0x")); + CPPUNIT_ASSERT(!t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf()); + } + + // invalid (when forcing hex base) input matching hex base + { + int64_t rv; + Parser::Tokenizer t(SBuf("0x")); + CPPUNIT_ASSERT(!t.int64(rv, 16)); + CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf()); + } + + // invalid (when autodetecting base and limiting) input matching hex base + { + int64_t rv; + Parser::Tokenizer t(SBuf("0x2")); + CPPUNIT_ASSERT(!t.int64(rv, 0, true, 2)); + CPPUNIT_ASSERT_EQUAL(SBuf("0x2"), t.buf()); + } + + // invalid (when forcing hex base and limiting) input matching hex base + { + int64_t rv; + Parser::Tokenizer t(SBuf("0x3")); + CPPUNIT_ASSERT(!t.int64(rv, 16, false, 2)); + CPPUNIT_ASSERT_EQUAL(SBuf("0x3"), t.buf()); + } + // API mismatch: don't eat leading space { int64_t rv; @@ -264,6 +543,36 @@ TestTokenizer::testTokenizerInt64() CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf()); } + // zero corner case: repeated zeros + { + int64_t rv; + Parser::Tokenizer t(SBuf("00")); + const int64_t benchmark = 00; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // zero corner case: "positive" zero + { + int64_t rv; + Parser::Tokenizer t(SBuf("+0")); + const int64_t benchmark = +0; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + + // zero corner case: "negative" zero + { + int64_t rv; + Parser::Tokenizer t(SBuf("-0")); + const int64_t benchmark = -0; + CPPUNIT_ASSERT(t.int64(rv)); + CPPUNIT_ASSERT_EQUAL(benchmark,rv); + CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf()); + } + // trailing spaces { int64_t rv;