src/http/one/TeChunkedParser.cc

   1 /*
   2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "base/TextException.h"
  11 #include "Debug.h"
  12 #include "http/one/TeChunkedParser.h"
  13 #include "http/one/Tokenizer.h"
  14 #include "http/ProtocolVersion.h"
  15 #include "MemBuf.h"
  16 #include "Parsing.h"
  17 #include "SquidConfig.h"
  18
  19 Http::One::TeChunkedParser::TeChunkedParser()
  20 {
  21     // chunked encoding only exists in HTTP/1.1
  22     Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
  23
  24     clear();
  25 }
  26
  27 void
  28 Http::One::TeChunkedParser::clear()
  29 {
  30     parsingStage_ = Http1::HTTP_PARSE_NONE;
  31     buf_.clear();
  32     theChunkSize = theLeftBodySize = 0;
  33     theOut = NULL;
  34     useOriginBody = -1;
  35 }
  36
  37 bool
  38 Http::One::TeChunkedParser::parse(const SBuf &aBuf)
  39 {
  40     buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
  41
  42     if (buf_.isEmpty()) // nothing to do (yet)
  43         return false;
  44
  45     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
  46
  47     Must(!buf_.isEmpty() && theOut);
  48
  49     if (parsingStage_ == Http1::HTTP_PARSE_NONE)
  50         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
  51
  52     Http1::Tokenizer tok(buf_);
  53
  54     // loop for as many chunks as we can
  55     // use do-while instead of while so that we can incrementally
  56     // restart in the middle of a chunk/frame
  57     do {
  58
  59         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
  60             return false;
  61
  62         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
  63             return false;
  64
  65         if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
  66             return false;
  67
  68         // loop for as many chunks as we can
  69     } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
  70
  71     return !needsMoreData() && !needsMoreSpace();
  72 }
  73
  74 bool
  75 Http::One::TeChunkedParser::needsMoreSpace() const
  76 {
  77     assert(theOut);
  78     return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
  79 }
  80
  81 /// RFC 7230 section 4.1 chunk-size
  82 bool
  83 Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
  84 {
  85     Must(theChunkSize <= 0); // Should(), really
  86
  87     int64_t size = -1;
  88     if (tok.int64(size, 16, false) && !tok.atEnd()) {
  89         if (size < 0)
  90             throw TexcHere("negative chunk size");
  91
  92         theChunkSize = theLeftBodySize = size;
  93         debugs(94,7, "found chunk: " << theChunkSize);
  94         buf_ = tok.remaining(); // parse checkpoint
  95         parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
  96         return true;
  97
  98     } else if (tok.atEnd()) {
  99         return false; // need more data
 100     }
 101
 102     // else error
 103     throw TexcHere("corrupted chunk size");
 104     return false; // should not be reachable
 105 }
 106
 107 /**
 108  * Parses a set of RFC 7230 section 4.1.1 chunk-ext
 109  * http://tools.ietf.org/html/rfc7230#section-4.1.1
 110  *
 111  *   chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
 112  *   chunk-ext-name = token
 113  *   chunk-ext-val  = token / quoted-string
 114  *
 115  * ICAP 'use-original-body=N' extension is supported.
 116  */
 117 bool
 118 Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
 119 {
 120     // Bug 4492: IBM_HTTP_Server sends SP padding
 121     if (auto n = tok.skipAll(CharacterSet::SP)) {
 122         debugs(94, 3, "skipping " << n << " spurious whitespace at start of chunk extension");
 123     }
 124
 125     SBuf ext;
 126     SBuf value;
 127     while (tok.skip(';') && tok.prefix(ext, CharacterSet::TCHAR)) {
 128
 129         // whole value part is optional. if no '=' expect next chunk-ext
 130         if (tok.skip('=')) {
 131
 132             if (!skipKnown) {
 133                 if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
 134                     debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
 135                     buf_ = tok.remaining(); // parse checkpoint
 136                     continue;
 137                 }
 138             }
 139
 140             debugs(94, 5, "skipping unknown chunk extension " << ext);
 141
 142             // unknown might have a value token or quoted-string
 143             if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
 144                 buf_ = tok.remaining(); // parse checkpoint
 145                 continue;
 146             }
 147
 148             // otherwise need more data OR corrupt syntax
 149             break;
 150         }
 151
 152         if (!tok.atEnd())
 153             buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
 154     }
 155
 156     if (skipLineTerminator(tok)) {
 157         buf_ = tok.remaining(); // checkpoint
 158         // non-0 chunk means data, 0-size means optional Trailer follows
 159         parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
 160         return true;
 161     }
 162
 163     return false;
 164 }
 165
 166 bool
 167 Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
 168 {
 169     if (theLeftBodySize > 0) {
 170         buf_ = tok.remaining(); // sync buffers before buf_ use
 171
 172         // TODO fix type mismatches and casting for these
 173         const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
 174         const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
 175
 176         theOut->append(buf_.rawContent(), safeSize);
 177         buf_.consume(safeSize);
 178         theLeftBodySize -= safeSize;
 179
 180         tok.reset(buf_); // sync buffers after consume()
 181     }
 182
 183     if (theLeftBodySize == 0)
 184         return parseChunkEnd(tok);
 185     else
 186         Must(needsMoreData() || needsMoreSpace());
 187
 188     return true;
 189 }
 190
 191 bool
 192 Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
 193 {
 194     Must(theLeftBodySize == 0); // Should(), really
 195
 196     if (skipLineTerminator(tok)) {
 197         buf_ = tok.remaining(); // parse checkpoint
 198         theChunkSize = 0; // done with the current chunk
 199         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
 200         return true;
 201     }
 202
 203     return false;
 204 }
 205