src/http/one/Parser.cc

   1 /*
   2  * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "base/CharacterSet.h"
  11 #include "debug/Stream.h"
  12 #include "http/one/Parser.h"
  13 #include "mime_header.h"
  14 #include "parser/Tokenizer.h"
  15 #include "SquidConfig.h"
  16
  17 /// RFC 7230 section 2.6 - 7 magic octets
  18 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
  19
  20 const SBuf &Http::One::CrLf()
  21 {
  22     static const SBuf crlf("\r\n");
  23     return crlf;
  24 }
  25
  26 void
  27 Http::One::Parser::clear()
  28 {
  29     parsingStage_ = HTTP_PARSE_NONE;
  30     buf_ = nullptr;
  31     msgProtocol_ = AnyP::ProtocolVersion();
  32     mimeHeaderBlock_.clear();
  33 }
  34
  35 /// characters HTTP permits tolerant parsers to accept as delimiters
  36 static const CharacterSet &
  37 RelaxedDelimiterCharacters()
  38 {
  39     // RFC 7230 section 3.5
  40     // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
  41     // or bare CR as whitespace between request-line fields
  42     static const CharacterSet RelaxedDels =
  43         (CharacterSet::SP +
  44          CharacterSet::HTAB +
  45          CharacterSet("VT,FF","\x0B\x0C") +
  46          CharacterSet::CR).rename("relaxed-WSP");
  47
  48     return RelaxedDels;
  49 }
  50
  51 const CharacterSet &
  52 Http::One::Parser::WhitespaceCharacters()
  53 {
  54     return Config.onoff.relaxed_header_parser ?
  55            RelaxedDelimiterCharacters() : CharacterSet::WSP;
  56 }
  57
  58 const CharacterSet &
  59 Http::One::Parser::DelimiterCharacters()
  60 {
  61     return Config.onoff.relaxed_header_parser ?
  62            RelaxedDelimiterCharacters() : CharacterSet::SP;
  63 }
  64
  65 void
  66 Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
  67 {
  68     if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
  69         return;
  70
  71     tok.skipRequired("line-terminating CRLF", Http1::CrLf());
  72 }
  73
  74 /// all characters except the LF line terminator
  75 static const CharacterSet &
  76 LineCharacters()
  77 {
  78     static const CharacterSet line = CharacterSet::LF.complement("non-LF");
  79     return line;
  80 }
  81
  82 /**
  83  * Remove invalid lines (if any) from the mime prefix
  84  *
  85  * RFC 7230 section 3:
  86  * "A recipient that receives whitespace between the start-line and
  87  * the first header field MUST ... consume each whitespace-preceded
  88  * line without further processing of it."
  89  *
  90  * We need to always use the relaxed delimiters here to prevent
  91  * line smuggling through strict parsers.
  92  *
  93  * Note that 'whitespace' in RFC 7230 includes CR. So that means
  94  * sequences of CRLF will be pruned, but not sequences of bare-LF.
  95  */
  96 void
  97 Http::One::Parser::cleanMimePrefix()
  98 {
  99     Tokenizer tok(mimeHeaderBlock_);
 100     while (tok.skipOne(RelaxedDelimiterCharacters())) {
 101         (void)tok.skipAll(LineCharacters()); // optional line content
 102         // LF terminator is required.
 103         // trust headersEnd() to ensure that we have at least one LF
 104         (void)tok.skipOne(CharacterSet::LF);
 105     }
 106
 107     // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
 108     // then we skipped everything, including that terminating LF.
 109     // Restore the terminating CRLF if needed.
 110     if (tok.atEnd())
 111         mimeHeaderBlock_ = Http1::CrLf();
 112     else
 113         mimeHeaderBlock_ = tok.remaining();
 114     // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
 115 }
 116
 117 /**
 118  * Replace obs-fold with a single SP,
 119  *
 120  * RFC 7230 section 3.2.4
 121  * "A server that receives an obs-fold in a request message that is not
 122  *  within a message/http container MUST ... replace
 123  *  each received obs-fold with one or more SP octets prior to
 124  *  interpreting the field value or forwarding the message downstream."
 125  *
 126  * "A proxy or gateway that receives an obs-fold in a response message
 127  *  that is not within a message/http container MUST ... replace each
 128  *  received obs-fold with one or more SP octets prior to interpreting
 129  *  the field value or forwarding the message downstream."
 130  */
 131 void
 132 Http::One::Parser::unfoldMime()
 133 {
 134     Tokenizer tok(mimeHeaderBlock_);
 135     const auto szLimit = mimeHeaderBlock_.length();
 136     mimeHeaderBlock_.clear();
 137     // prevent the mime sender being able to make append() realloc/grow multiple times.
 138     mimeHeaderBlock_.reserveSpace(szLimit);
 139
 140     static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
 141
 142     while (!tok.atEnd()) {
 143         const SBuf all(tok.remaining());
 144         const auto blobLen = tok.skipAll(nonCRLF); // may not be there
 145         const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
 146         const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
 147
 148         if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
 149             mimeHeaderBlock_.append(all.substr(0, blobLen));
 150             mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
 151         } else
 152             mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
 153     }
 154 }
 155
 156 bool
 157 Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
 158 {
 159     // MIME headers block exist in (only) HTTP/1.x and ICY
 160     const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
 161                             msgProtocol_.protocol == AnyP::PROTO_ICY ||
 162                             hackExpectsMime_;
 163
 164     if (expectMime) {
 165         /* NOTE: HTTP/0.9 messages do not have a mime header block.
 166          *       So the rest of the code will need to deal with '0'-byte headers
 167          *       (ie, none, so don't try parsing em)
 168          */
 169         bool containsObsFold;
 170         if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
 171
 172             // Squid could handle these headers, but admin does not want to
 173             if (firstLineSize() + mimeHeaderBytes >= limit) {
 174                 debugs(33, 5, "Too large " << which);
 175                 parseStatusCode = Http::scHeaderTooLarge;
 176                 buf_.consume(mimeHeaderBytes);
 177                 parsingStage_ = HTTP_PARSE_DONE;
 178                 return false;
 179             }
 180
 181             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 182             cleanMimePrefix();
 183             if (containsObsFold)
 184                 unfoldMime();
 185
 186             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 187
 188         } else { // headersEnd() == 0
 189             if (buf_.length()+firstLineSize() >= limit) {
 190                 debugs(33, 5, "Too large " << which);
 191                 parseStatusCode = Http::scHeaderTooLarge;
 192                 parsingStage_ = HTTP_PARSE_DONE;
 193             } else
 194                 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
 195             return false;
 196         }
 197
 198     } else
 199         debugs(33, 3, "Missing HTTP/1.x identifier");
 200
 201     // NP: we do not do any further stages here yet so go straight to DONE
 202     parsingStage_ = HTTP_PARSE_DONE;
 203
 204     return true;
 205 }
 206
 207 // arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
 208 #define GET_HDR_SZ  1024
 209
 210 // BUG: returns only the first header line with given name,
 211 //      ignores multi-line headers and obs-fold headers
 212 char *
 213 Http::One::Parser::getHostHeaderField()
 214 {
 215     if (!headerBlockSize())
 216         return nullptr;
 217
 218     LOCAL_ARRAY(char, header, GET_HDR_SZ);
 219     const char *name = "Host";
 220     const int namelen = strlen(name);
 221
 222     debugs(25, 5, "looking for " << name);
 223
 224     // while we can find more LF in the SBuf
 225     Tokenizer tok(mimeHeaderBlock_);
 226     SBuf p;
 227
 228     while (tok.prefix(p, LineCharacters())) {
 229         if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
 230             break; // error. reached invalid octet or end of buffer instead of an LF ??
 231
 232         // header lines must start with the name (case insensitive)
 233         if (p.substr(0, namelen).caseCmp(name, namelen))
 234             continue;
 235
 236         // then a COLON
 237         if (p[namelen] != ':')
 238             continue;
 239
 240         // drop any trailing *CR sequence
 241         p.trim(Http1::CrLf(), false, true);
 242
 243         debugs(25, 5, "checking " << p);
 244         p.consume(namelen + 1);
 245
 246         // TODO: optimize SBuf::trim to take CharacterSet directly
 247         Tokenizer t(p);
 248         t.skipAll(CharacterSet::WSP);
 249         p = t.remaining();
 250
 251         // prevent buffer overrun on char header[];
 252         p.chop(0, sizeof(header)-1);
 253
 254         // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
 255         static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
 256         if (p.findFirstNotOf(hostChars) != SBuf::npos)
 257             break; // error. line contains character not accepted in Host header
 258
 259         // return the header field-value
 260         SBufToCstring(header, p);
 261         debugs(25, 5, "returning " << header);
 262         return header;
 263     }
 264
 265     return nullptr;
 266 }
 267
 268 int
 269 Http::One::ErrorLevel()
 270 {
 271     return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
 272 }
 273
 274 /// common part of ParseBws() and ParseStrctBws()
 275 namespace Http::One {
 276 static void
 277 ParseBws_(Parser::Tokenizer &tok, const CharacterSet &bwsChars)
 278 {
 279     const auto count = tok.skipAll(bwsChars);
 280
 281     if (tok.atEnd())
 282         throw InsufficientInput(); // even if count is positive
 283
 284     if (count) {
 285         // Generating BWS is a MUST-level violation so warn about it as needed.
 286         debugs(33, ErrorLevel(), "found " << count << " BWS octets");
 287         // RFC 7230 says we MUST parse BWS, so we fall through even if
 288         // Config.onoff.relaxed_header_parser is off.
 289     }
 290     // else we successfully "parsed" an empty BWS sequence
 291
 292     // success: no more BWS characters expected
 293 }
 294 } // namespace Http::One
 295
 296 void
 297 Http::One::ParseBws(Parser::Tokenizer &tok)
 298 {
 299     ParseBws_(tok, Parser::WhitespaceCharacters());
 300 }
 301
 302 void
 303 Http::One::ParseStrictBws(Parser::Tokenizer &tok)
 304 {
 305     ParseBws_(tok, CharacterSet::WSP);
 306 }
 307