src/HttpParser.cc

   1 /*
   2  * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "HttpParser.h"
  12 #include "profiler/Profiler.h"
  13 #include "SquidConfig.h"
  14
  15 void
  16 HttpParser::clear()
  17 {
  18     state = HTTP_PARSE_NONE;
  19     request_parse_status = Http::scNone;
  20     buf = NULL;
  21     bufsiz = 0;
  22     req.start = req.end = -1;
  23     hdr_start = hdr_end = -1;
  24     req.m_start = req.m_end = -1;
  25     req.u_start = req.u_end = -1;
  26     req.v_start = req.v_end = -1;
  27     req.v_maj = req.v_min = 0;
  28 }
  29
  30 void
  31 HttpParser::reset(const char *aBuf, int len)
  32 {
  33     clear(); // empty the state.
  34     state = HTTP_PARSE_NEW;
  35     buf = aBuf;
  36     bufsiz = len;
  37     debugs(74, 5, HERE << "Request buffer is " << buf);
  38 }
  39
  40 int
  41 HttpParser::parseRequestFirstLine()
  42 {
  43     int second_word = -1; // track the suspected URI start
  44     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  45     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  46
  47     debugs(74, 5, HERE << "parsing possible request: " << buf);
  48
  49     // Single-pass parse: (provided we have the whole line anyways)
  50
  51     req.start = 0;
  52     if (Config.onoff.relaxed_header_parser) {
  53         if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
  54             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  55                    "Whitespace bytes received ahead of method. " <<
  56                    "Ignored due to relaxed_header_parser.");
  57         // Be tolerant of prefix spaces (other bytes are valid method values)
  58         for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
  59     }
  60     req.end = -1;
  61     for (int i = 0; i < bufsiz; ++i) {
  62         // track first and last whitespace (SP only)
  63         if (buf[i] == ' ') {
  64             last_whitespace = i;
  65             if (first_whitespace < req.start)
  66                 first_whitespace = i;
  67         }
  68
  69         // track next non-SP/non-HT byte after first_whitespace
  70         if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
  71             second_word = i;
  72         }
  73
  74         // locate line terminator
  75         if (buf[i] == '\n') {
  76             req.end = i;
  77             line_end = i - 1;
  78             break;
  79         }
  80         if (i < bufsiz - 1 && buf[i] == '\r') {
  81             if (Config.onoff.relaxed_header_parser) {
  82                 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
  83                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  84                            "Series of carriage-return bytes received prior to line terminator. " <<
  85                            "Ignored due to relaxed_header_parser.");
  86
  87                 // Be tolerant of invalid multiple \r prior to terminal \n
  88                 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
  89                     line_end = i - 1;
  90                 while (i < bufsiz - 1 && buf[i + 1] == '\r')
  91                     ++i;
  92
  93                 if (buf[i + 1] == '\n') {
  94                     req.end = i + 1;
  95                     break;
  96                 }
  97             } else {
  98                 if (buf[i + 1] == '\n') {
  99                     req.end = i + 1;
 100                     line_end = i - 1;
 101                     break;
 102                 }
 103             }
 104
 105             // RFC 2616 section 5.1
 106             // "No CR or LF is allowed except in the final CRLF sequence"
 107             request_parse_status = Http::scBadRequest;
 108             return -1;
 109         }
 110     }
 111     if (req.end == -1) {
 112         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 113                "->" << req.end << ": needs more data to complete first line.");
 114         return 0;
 115     }
 116
 117     // NP: we have now seen EOL, more-data (0) cannot occur.
 118     //     From here on any failure is -1, success is 1
 119
 120     // Input Validation:
 121
 122     // Process what we now know about the line structure into field offsets
 123     // generating HTTP status for any aborts as we go.
 124
 125     // First non-whitespace = beginning of method
 126     if (req.start > line_end) {
 127         request_parse_status = Http::scBadRequest;
 128         return -1;
 129     }
 130     req.m_start = req.start;
 131
 132     // First whitespace = end of method
 133     if (first_whitespace > line_end || first_whitespace < req.start) {
 134         request_parse_status = Http::scBadRequest; // no method
 135         return -1;
 136     }
 137     req.m_end = first_whitespace - 1;
 138     if (req.m_end < req.m_start) {
 139         request_parse_status = Http::scBadRequest; // missing URI?
 140         return -1;
 141     }
 142
 143     // First non-whitespace after first SP = beginning of URL+Version
 144     if (second_word > line_end || second_word < req.start) {
 145         request_parse_status = Http::scBadRequest; // missing URI
 146         return -1;
 147     }
 148     req.u_start = second_word;
 149
 150     // RFC 1945: SP and version following URI are optional, marking version 0.9
 151     // we identify this by the last whitespace being earlier than URI start
 152     if (last_whitespace < second_word && last_whitespace >= req.start) {
 153         req.v_maj = 0;
 154         req.v_min = 9;
 155         req.u_end = line_end;
 156         request_parse_status = Http::scOkay; // HTTP/0.9
 157         return 1;
 158     } else {
 159         // otherwise last whitespace is somewhere after end of URI.
 160         req.u_end = last_whitespace;
 161         // crop any trailing whitespace in the area we think of as URI
 162         for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
 163     }
 164     if (req.u_end < req.u_start) {
 165         request_parse_status = Http::scBadRequest; // missing URI
 166         return -1;
 167     }
 168
 169     // Last whitespace SP = before start of protocol/version
 170     if (last_whitespace >= line_end) {
 171         request_parse_status = Http::scBadRequest; // missing version
 172         return -1;
 173     }
 174     req.v_start = last_whitespace + 1;
 175     req.v_end = line_end;
 176
 177     // We only accept HTTP protocol requests right now.
 178     // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
 179     if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) {
 180 #if USE_HTTP_VIOLATIONS
 181         // being lax; old parser accepted strange versions
 182         // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
 183         req.v_maj = 0;
 184         req.v_min = 9;
 185         req.u_end = line_end;
 186         request_parse_status = Http::scOkay; // treat as HTTP/0.9
 187         return 1;
 188 #else
 189         // protocol not supported / implemented.
 190         request_parse_status = Http::scHttpVersionNotSupported;
 191         return -1;
 192 #endif
 193     }
 194
 195     int i = req.v_start + sizeof("HTTP/") -1;
 196
 197     /* next should be 1 or more digits */
 198     if (!isdigit(buf[i])) {
 199         request_parse_status = Http::scHttpVersionNotSupported;
 200         return -1;
 201     }
 202     int maj = 0;
 203     for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
 204         maj = maj * 10;
 205         maj = maj + (buf[i]) - '0';
 206     }
 207     // catch too-big values or missing remainders
 208     if (maj >= 65536 || i > line_end) {
 209         request_parse_status = Http::scHttpVersionNotSupported;
 210         return -1;
 211     }
 212     req.v_maj = maj;
 213
 214     /* next should be .; we -have- to have this as we have a whole line.. */
 215     if (buf[i] != '.') {
 216         request_parse_status = Http::scHttpVersionNotSupported;
 217         return -1;
 218     }
 219     // catch missing minor part
 220     if (++i > line_end) {
 221         request_parse_status = Http::scHttpVersionNotSupported;
 222         return -1;
 223     }
 224     /* next should be one or more digits */
 225     if (!isdigit(buf[i])) {
 226         request_parse_status = Http::scHttpVersionNotSupported;
 227         return -1;
 228     }
 229     int min = 0;
 230     for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
 231         min = min * 10;
 232         min = min + (buf[i]) - '0';
 233     }
 234     // catch too-big values or trailing garbage
 235     if (min >= 65536 || i < line_end) {
 236         request_parse_status = Http::scHttpVersionNotSupported;
 237         return -1;
 238     }
 239     req.v_min = min;
 240
 241     /*
 242      * Rightio - we have all the schtuff. Return true; we've got enough.
 243      */
 244     request_parse_status = Http::scOkay;
 245     return 1;
 246 }
 247
 248 int
 249 HttpParserParseReqLine(HttpParser *hmsg)
 250 {
 251     PROF_start(HttpParserParseReqLine);
 252     int retcode = hmsg->parseRequestFirstLine();
 253     debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req.start <<
 254            "->" << hmsg->req.end << ": method " << hmsg->req.m_start << "->" <<
 255            hmsg->req.m_end << "; url " << hmsg->req.u_start << "->" << hmsg->req.u_end <<
 256            "; version " << hmsg->req.v_start << "->" << hmsg->req.v_end << " (" << hmsg->req.v_maj <<
 257            "/" << hmsg->req.v_min << ")");
 258     PROF_stop(HttpParserParseReqLine);
 259     return retcode;
 260 }
 261
 262 #if MSGDODEBUG
 263 /* XXX This should eventually turn into something inlined or #define'd */
 264 int
 265 HttpParserReqSz(HttpParser *hp)
 266 {
 267     assert(hp->state == HTTP_PARSE_NEW);
 268     assert(hp->req.start != -1);
 269     assert(hp->req.end != -1);
 270     return hp->req.end - hp->req.start + 1;
 271 }
 272
 273 /*
 274  * This +1 makes it 'right' but won't make any sense if
 275  * there's a 0 byte header? This won't happen normally - a valid header
 276  * is at -least- a blank line (\n, or \r\n.)
 277  */
 278 int
 279 HttpParserHdrSz(HttpParser *hp)
 280 {
 281     assert(hp->state == HTTP_PARSE_NEW);
 282     assert(hp->hdr_start != -1);
 283     assert(hp->hdr_end != -1);
 284     return hp->hdr_end - hp->hdr_start + 1;
 285 }
 286
 287 const char *
 288 HttpParserHdrBuf(HttpParser *hp)
 289 {
 290     assert(hp->state == HTTP_PARSE_NEW);
 291     assert(hp->hdr_start != -1);
 292     assert(hp->hdr_end != -1);
 293     return hp->buf + hp->hdr_start;
 294 }
 295
 296 int
 297 HttpParserRequestLen(HttpParser *hp)
 298 {
 299     return hp->hdr_end - hp->req.start + 1;
 300 }
 301 #endif
 302