src/http/one/TeChunkedParser.cc

   1 /*
   2  * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "base/TextException.h"
  11 #include "Debug.h"
  12 #include "http/one/TeChunkedParser.h"
  13 #include "http/one/Tokenizer.h"
  14 #include "http/ProtocolVersion.h"
  15 #include "MemBuf.h"
  16 #include "Parsing.h"
  17
  18 Http::One::TeChunkedParser::TeChunkedParser()
  19 {
  20     // chunked encoding only exists in HTTP/1.1
  21     Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
  22
  23     clear();
  24 }
  25
  26 void
  27 Http::One::TeChunkedParser::clear()
  28 {
  29     parsingStage_ = Http1::HTTP_PARSE_NONE;
  30     buf_.clear();
  31     theChunkSize = theLeftBodySize = 0;
  32     theOut = NULL;
  33     useOriginBody = -1;
  34 }
  35
  36 bool
  37 Http::One::TeChunkedParser::parse(const SBuf &aBuf)
  38 {
  39     buf_ = aBuf; // sync buffers first so calls to remaining() work properly if nothing done.
  40
  41     if (buf_.isEmpty()) // nothing to do (yet)
  42         return false;
  43
  44     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
  45
  46     Must(!buf_.isEmpty() && theOut);
  47
  48     if (parsingStage_ == Http1::HTTP_PARSE_NONE)
  49         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
  50
  51     Http1::Tokenizer tok(buf_);
  52
  53     // loop for as many chunks as we can
  54     // use do-while instead of while so that we can incrementally
  55     // restart in the middle of a chunk/frame
  56     do {
  57
  58         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
  59             return false;
  60
  61         if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
  62             return false;
  63
  64         if (parsingStage_ == Http1::HTTP_PARSE_MIME && !grabMimeBlock("Trailers", 64*1024 /* 64KB max */))
  65             return false;
  66
  67         // loop for as many chunks as we can
  68     } while (parsingStage_ == Http1::HTTP_PARSE_CHUNK_SZ && parseChunkSize(tok));
  69
  70     return !needsMoreData() && !needsMoreSpace();
  71 }
  72
  73 bool
  74 Http::One::TeChunkedParser::needsMoreSpace() const
  75 {
  76     assert(theOut);
  77     return parsingStage_ == Http1::HTTP_PARSE_CHUNK && !theOut->hasPotentialSpace();
  78 }
  79
  80 /// RFC 7230 section 4.1 chunk-size
  81 bool
  82 Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
  83 {
  84     Must(theChunkSize <= 0); // Should(), really
  85
  86     int64_t size = -1;
  87     if (tok.int64(size, 16, false) && !tok.atEnd()) {
  88         if (size < 0)
  89             throw TexcHere("negative chunk size");
  90
  91         theChunkSize = theLeftBodySize = size;
  92         debugs(94,7, "found chunk: " << theChunkSize);
  93         buf_ = tok.remaining(); // parse checkpoint
  94         parsingStage_ = Http1::HTTP_PARSE_CHUNK_EXT;
  95         return true;
  96
  97     } else if (tok.atEnd()) {
  98         return false; // need more data
  99     }
 100
 101     // else error
 102     throw TexcHere("corrupted chunk size");
 103     return false; // should not be reachable
 104 }
 105
 106 /**
 107  * Parses a set of RFC 7230 section 4.1.1 chunk-ext
 108  * http://tools.ietf.org/html/rfc7230#section-4.1.1
 109  *
 110  *   chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
 111  *   chunk-ext-name = token
 112  *   chunk-ext-val  = token / quoted-string
 113  *
 114  * ICAP 'use-original-body=N' extension is supported.
 115  */
 116 bool
 117 Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
 118 {
 119     SBuf ext;
 120     SBuf value;
 121     while (tok.skip(';') && tok.prefix(ext, CharacterSet::TCHAR)) {
 122
 123         // whole value part is optional. if no '=' expect next chunk-ext
 124         if (tok.skip('=')) {
 125
 126             if (!skipKnown) {
 127                 if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
 128                     debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
 129                     buf_ = tok.remaining(); // parse checkpoint
 130                     continue;
 131                 }
 132             }
 133
 134             debugs(94, 5, "skipping unknown chunk extension " << ext);
 135
 136             // unknown might have a value token or quoted-string
 137             if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
 138                 buf_ = tok.remaining(); // parse checkpoint
 139                 continue;
 140             }
 141
 142             // otherwise need more data OR corrupt syntax
 143             break;
 144         }
 145
 146         if (!tok.atEnd())
 147             buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
 148     }
 149
 150     if (tok.atEnd())
 151         return false;
 152
 153     if (skipLineTerminator(tok)) {
 154         buf_ = tok.remaining(); // checkpoint
 155         // non-0 chunk means data, 0-size means optional Trailer follows
 156         parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
 157         return true;
 158     }
 159
 160     throw TexcHere("corrupted chunk extension value");
 161     return false;
 162 }
 163
 164 bool
 165 Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
 166 {
 167     if (theLeftBodySize > 0) {
 168         buf_ = tok.remaining(); // sync buffers before buf_ use
 169
 170         // TODO fix type mismatches and casting for these
 171         const size_t availSize = min(theLeftBodySize, (uint64_t)buf_.length());
 172         const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
 173
 174         theOut->append(buf_.rawContent(), safeSize);
 175         buf_.consume(safeSize);
 176         theLeftBodySize -= safeSize;
 177
 178         tok.reset(buf_); // sync buffers after consume()
 179     }
 180
 181     if (theLeftBodySize == 0)
 182         return parseChunkEnd(tok);
 183     else
 184         Must(needsMoreData() || needsMoreSpace());
 185
 186     return true;
 187 }
 188
 189 bool
 190 Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
 191 {
 192     Must(theLeftBodySize == 0); // Should(), really
 193
 194     if (skipLineTerminator(tok)) {
 195         buf_ = tok.remaining(); // parse checkpoint
 196         theChunkSize = 0; // done with the current chunk
 197         parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
 198         return true;
 199
 200     } else if (!tok.atEnd()) {
 201         throw TexcHere("found data between chunk end and CRLF");
 202     }
 203
 204     return false;
 205 }
 206