src/HttpParser.cc

   1 #include "squid.h"
   2 #include "Debug.h"
   3 #include "HttpParser.h"
   4 #include "structs.h"
   5 #include "profiler/Profiler.h"
   6
   7 void
   8 HttpParser::clear()
   9 {
  10     state = HTTP_PARSE_NONE;
  11     request_parse_status = HTTP_STATUS_NONE;
  12     buf = NULL;
  13     bufsiz = 0;
  14     req.start = req.end = -1;
  15     hdr_start = hdr_end = -1;
  16     req.m_start = req.m_end = -1;
  17     req.u_start = req.u_end = -1;
  18     req.v_start = req.v_end = -1;
  19     req.v_maj = req.v_min = 0;
  20 }
  21
  22 void
  23 HttpParser::reset(const char *aBuf, int len)
  24 {
  25     clear(); // empty the state.
  26     state = HTTP_PARSE_NEW;
  27     buf = aBuf;
  28     bufsiz = len;
  29     debugs(74, 5, HERE << "Request buffer is " << buf);
  30 }
  31
  32 int
  33 HttpParser::parseRequestFirstLine()
  34 {
  35     int second_word = -1; // track the suspected URI start
  36     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  37     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  38
  39     debugs(74, 5, HERE << "parsing possible request: " << buf);
  40
  41     // Single-pass parse: (provided we have the whole line anyways)
  42
  43     req.start = 0;
  44     if (Config.onoff.relaxed_header_parser) {
  45         if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
  46             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  47                    "Whitespace bytes received ahead of method. " <<
  48                    "Ignored due to relaxed_header_parser.");
  49         // Be tolerant of prefix spaces (other bytes are valid method values)
  50         for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
  51     }
  52     req.end = -1;
  53     for (int i = 0; i < bufsiz; ++i) {
  54         // track first and last whitespace (SP only)
  55         if (buf[i] == ' ') {
  56             last_whitespace = i;
  57             if (first_whitespace < req.start)
  58                 first_whitespace = i;
  59         }
  60
  61         // track next non-SP/non-HT byte after first_whitespace
  62         if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
  63             second_word = i;
  64         }
  65
  66         // locate line terminator
  67         if (buf[i] == '\n') {
  68             req.end = i;
  69             line_end = i - 1;
  70             break;
  71         }
  72         if (i < bufsiz - 1 && buf[i] == '\r') {
  73             if (Config.onoff.relaxed_header_parser) {
  74                 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
  75                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  76                            "Series of carriage-return bytes received prior to line terminator. " <<
  77                            "Ignored due to relaxed_header_parser.");
  78
  79                 // Be tolerant of invalid multiple \r prior to terminal \n
  80                 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
  81                     line_end = i - 1;
  82                 while (i < bufsiz - 1 && buf[i + 1] == '\r')
  83                     ++i;
  84
  85                 if (buf[i + 1] == '\n') {
  86                     req.end = i + 1;
  87                     break;
  88                 }
  89             } else {
  90                 if (buf[i + 1] == '\n') {
  91                     req.end = i + 1;
  92                     line_end = i - 1;
  93                     break;
  94                 }
  95             }
  96
  97             // RFC 2616 section 5.1
  98             // "No CR or LF is allowed except in the final CRLF sequence"
  99             request_parse_status = HTTP_BAD_REQUEST;
 100             return -1;
 101         }
 102     }
 103     if (req.end == -1) {
 104         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 105                "->" << req.end << ": needs more data to complete first line.");
 106         return 0;
 107     }
 108
 109     // NP: we have now seen EOL, more-data (0) cannot occur.
 110     //     From here on any failure is -1, success is 1
 111
 112     // Input Validation:
 113
 114     // Process what we now know about the line structure into field offsets
 115     // generating HTTP status for any aborts as we go.
 116
 117     // First non-whitespace = beginning of method
 118     if (req.start > line_end) {
 119         request_parse_status = HTTP_BAD_REQUEST;
 120         return -1;
 121     }
 122     req.m_start = req.start;
 123
 124     // First whitespace = end of method
 125     if (first_whitespace > line_end || first_whitespace < req.start) {
 126         request_parse_status = HTTP_BAD_REQUEST; // no method
 127         return -1;
 128     }
 129     req.m_end = first_whitespace - 1;
 130     if (req.m_end < req.m_start) {
 131         request_parse_status = HTTP_BAD_REQUEST; // missing URI?
 132         return -1;
 133     }
 134
 135     // First non-whitespace after first SP = beginning of URL+Version
 136     if (second_word > line_end || second_word < req.start) {
 137         request_parse_status = HTTP_BAD_REQUEST; // missing URI
 138         return -1;
 139     }
 140     req.u_start = second_word;
 141
 142     // RFC 1945: SP and version following URI are optional, marking version 0.9
 143     // we identify this by the last whitespace being earlier than URI start
 144     if (last_whitespace < second_word && last_whitespace >= req.start) {
 145         req.v_maj = 0;
 146         req.v_min = 9;
 147         req.u_end = line_end;
 148         request_parse_status = HTTP_OK; // HTTP/0.9
 149         return 1;
 150     } else {
 151         // otherwise last whitespace is somewhere after end of URI.
 152         req.u_end = last_whitespace;
 153         // crop any trailing whitespace in the area we think of as URI
 154         for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
 155     }
 156     if (req.u_end < req.u_start) {
 157         request_parse_status = HTTP_BAD_REQUEST; // missing URI
 158         return -1;
 159     }
 160
 161     // Last whitespace SP = before start of protocol/version
 162     if (last_whitespace >= line_end) {
 163         request_parse_status = HTTP_BAD_REQUEST; // missing version
 164         return -1;
 165     }
 166     req.v_start = last_whitespace + 1;
 167     req.v_end = line_end;
 168
 169     // We only accept HTTP protocol requests right now.
 170     // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
 171     if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) {
 172 #if USE_HTTP_VIOLATIONS
 173         // being lax; old parser accepted strange versions
 174         // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
 175         req.v_maj = 0;
 176         req.v_min = 9;
 177         req.u_end = line_end;
 178         request_parse_status = HTTP_OK; // treat as HTTP/0.9
 179         return 1;
 180 #else
 181         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; // protocol not supported / implemented.
 182         return -1;
 183 #endif
 184     }
 185
 186     int i = req.v_start + sizeof("HTTP/") -1;
 187
 188     /* next should be 1 or more digits */
 189     if (!isdigit(buf[i])) {
 190         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 191         return -1;
 192     }
 193     int maj = 0;
 194     for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
 195         maj = maj * 10;
 196         maj = maj + (buf[i]) - '0';
 197     }
 198     // catch too-big values or missing remainders
 199     if (maj >= 65536 || i > line_end) {
 200         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 201         return -1;
 202     }
 203     req.v_maj = maj;
 204
 205     /* next should be .; we -have- to have this as we have a whole line.. */
 206     if (buf[i] != '.') {
 207         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 208         return -1;
 209     }
 210     // catch missing minor part
 211     if (++i > line_end) {
 212         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 213         return -1;
 214     }
 215     /* next should be one or more digits */
 216     if (!isdigit(buf[i])) {
 217         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 218         return -1;
 219     }
 220     int min = 0;
 221     for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
 222         min = min * 10;
 223         min = min + (buf[i]) - '0';
 224     }
 225     // catch too-big values or trailing garbage
 226     if (min >= 65536 || i < line_end) {
 227         request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
 228         return -1;
 229     }
 230     req.v_min = min;
 231
 232     /*
 233      * Rightio - we have all the schtuff. Return true; we've got enough.
 234      */
 235     request_parse_status = HTTP_OK;
 236     return 1;
 237 }
 238
 239 int
 240 HttpParserParseReqLine(HttpParser *hmsg)
 241 {
 242     PROF_start(HttpParserParseReqLine);
 243     int retcode = hmsg->parseRequestFirstLine();
 244     debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req.start <<
 245            "->" << hmsg->req.end << ": method " << hmsg->req.m_start << "->" <<
 246            hmsg->req.m_end << "; url " << hmsg->req.u_start << "->" << hmsg->req.u_end <<
 247            "; version " << hmsg->req.v_start << "->" << hmsg->req.v_end << " (" << hmsg->req.v_maj <<
 248            "/" << hmsg->req.v_min << ")");
 249     PROF_stop(HttpParserParseReqLine);
 250     return retcode;
 251 }
 252
 253 #if MSGDODEBUG
 254 /* XXX This should eventually turn into something inlined or #define'd */
 255 int
 256 HttpParserReqSz(HttpParser *hp)
 257 {
 258     assert(hp->state == HTTP_PARSE_NEW);
 259     assert(hp->req.start != -1);
 260     assert(hp->req.end != -1);
 261     return hp->req.end - hp->req.start + 1;
 262 }
 263
 264 /*
 265  * This +1 makes it 'right' but won't make any sense if
 266  * there's a 0 byte header? This won't happen normally - a valid header
 267  * is at -least- a blank line (\n, or \r\n.)
 268  */
 269 int
 270 HttpParserHdrSz(HttpParser *hp)
 271 {
 272     assert(hp->state == HTTP_PARSE_NEW);
 273     assert(hp->hdr_start != -1);
 274     assert(hp->hdr_end != -1);
 275     return hp->hdr_end - hp->hdr_start + 1;
 276 }
 277
 278 const char *
 279 HttpParserHdrBuf(HttpParser *hp)
 280 {
 281     assert(hp->state == HTTP_PARSE_NEW);
 282     assert(hp->hdr_start != -1);
 283     assert(hp->hdr_end != -1);
 284     return hp->buf + hp->hdr_start;
 285 }
 286
 287 int
 288 HttpParserRequestLen(HttpParser *hp)
 289 {
 290     return hp->hdr_end - hp->req.start + 1;
 291 }
 292 #endif
 293