]> git.ipfire.org Git - thirdparty/squid.git/blob - src/HttpHeader.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / HttpHeader.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 55 HTTP Header */
10
11 #include "squid.h"
12 #include "base64.h"
13 #include "globals.h"
14 #include "HttpHdrCc.h"
15 #include "HttpHdrContRange.h"
16 #include "HttpHdrSc.h"
17 #include "HttpHeader.h"
18 #include "HttpHeaderFieldInfo.h"
19 #include "HttpHeaderStat.h"
20 #include "HttpHeaderTools.h"
21 #include "MemBuf.h"
22 #include "mgr/Registration.h"
23 #include "profiler/Profiler.h"
24 #include "rfc1123.h"
25 #include "SquidConfig.h"
26 #include "SquidString.h"
27 #include "StatHist.h"
28 #include "Store.h"
29 #include "StrList.h"
30 #include "TimeOrTag.h"
31
32 #include <algorithm>
33
34 /* XXX: the whole set of API managing the entries vector should be rethought
35 * after the parse4r-ng effort is complete.
36 */
37
38 /*
39 * On naming conventions:
40 *
41 * HTTP/1.1 defines message-header as
42 *
43 * message-header = field-name ":" [ field-value ] CRLF
44 * field-name = token
45 * field-value = *( field-content | LWS )
46 *
47 * HTTP/1.1 does not give a name name a group of all message-headers in a message.
48 * Squid 1.1 seems to refer to that group _plus_ start-line as "headers".
49 *
50 * HttpHeader is an object that represents all message-headers in a message.
51 * HttpHeader does not manage start-line.
52 *
53 * HttpHeader is implemented as a collection of header "entries".
54 * An entry is a (field_id, field_name, field_value) triplet.
55 */
56
57 /*
58 * local constants and vars
59 */
60
61 /*
62 * A table with major attributes for every known field.
63 * We calculate name lengths and reorganize this array on start up.
64 * After reorganization, field id can be used as an index to the table.
65 */
66 static const HttpHeaderFieldAttrs HeadersAttrs[] = {
67 {"Accept", HDR_ACCEPT, ftStr},
68
69 {"Accept-Charset", HDR_ACCEPT_CHARSET, ftStr},
70 {"Accept-Encoding", HDR_ACCEPT_ENCODING, ftStr},
71 {"Accept-Language", HDR_ACCEPT_LANGUAGE, ftStr},
72 {"Accept-Ranges", HDR_ACCEPT_RANGES, ftStr},
73 {"Age", HDR_AGE, ftInt},
74 {"Allow", HDR_ALLOW, ftStr},
75 {"Authorization", HDR_AUTHORIZATION, ftStr}, /* for now */
76 {"Cache-Control", HDR_CACHE_CONTROL, ftPCc},
77 {"Connection", HDR_CONNECTION, ftStr},
78 {"Content-Base", HDR_CONTENT_BASE, ftStr},
79 {"Content-Disposition", HDR_CONTENT_DISPOSITION, ftStr}, /* for now */
80 {"Content-Encoding", HDR_CONTENT_ENCODING, ftStr},
81 {"Content-Language", HDR_CONTENT_LANGUAGE, ftStr},
82 {"Content-Length", HDR_CONTENT_LENGTH, ftInt64},
83 {"Content-Location", HDR_CONTENT_LOCATION, ftStr},
84 {"Content-MD5", HDR_CONTENT_MD5, ftStr}, /* for now */
85 {"Content-Range", HDR_CONTENT_RANGE, ftPContRange},
86 {"Content-Type", HDR_CONTENT_TYPE, ftStr},
87 {"Cookie", HDR_COOKIE, ftStr},
88 {"Cookie2", HDR_COOKIE2, ftStr},
89 {"Date", HDR_DATE, ftDate_1123},
90 {"ETag", HDR_ETAG, ftETag},
91 {"Expect", HDR_EXPECT, ftStr},
92 {"Expires", HDR_EXPIRES, ftDate_1123},
93 {"Forwarded", HDR_FORWARDED, ftStr},
94 {"From", HDR_FROM, ftStr},
95 {"Host", HDR_HOST, ftStr},
96 {"HTTP2-Settings", HDR_HTTP2_SETTINGS, ftStr}, /* for now */
97 {"If-Match", HDR_IF_MATCH, ftStr}, /* for now */
98 {"If-Modified-Since", HDR_IF_MODIFIED_SINCE, ftDate_1123},
99 {"If-None-Match", HDR_IF_NONE_MATCH, ftStr}, /* for now */
100 {"If-Range", HDR_IF_RANGE, ftDate_1123_or_ETag},
101 {"If-Unmodified-Since", HDR_IF_UNMODIFIED_SINCE, ftDate_1123},
102 {"Keep-Alive", HDR_KEEP_ALIVE, ftStr},
103 {"Key", HDR_KEY, ftStr},
104 {"Last-Modified", HDR_LAST_MODIFIED, ftDate_1123},
105 {"Link", HDR_LINK, ftStr},
106 {"Location", HDR_LOCATION, ftStr},
107 {"Max-Forwards", HDR_MAX_FORWARDS, ftInt64},
108 {"Mime-Version", HDR_MIME_VERSION, ftStr}, /* for now */
109 {"Negotiate", HDR_NEGOTIATE, ftStr},
110 {"Origin", HDR_ORIGIN, ftStr},
111 {"Pragma", HDR_PRAGMA, ftStr},
112 {"Proxy-Authenticate", HDR_PROXY_AUTHENTICATE, ftStr},
113 {"Proxy-Authentication-Info", HDR_PROXY_AUTHENTICATION_INFO, ftStr},
114 {"Proxy-Authorization", HDR_PROXY_AUTHORIZATION, ftStr},
115 {"Proxy-Connection", HDR_PROXY_CONNECTION, ftStr},
116 {"Proxy-support", HDR_PROXY_SUPPORT, ftStr},
117 {"Public", HDR_PUBLIC, ftStr},
118 {"Range", HDR_RANGE, ftPRange},
119 {"Referer", HDR_REFERER, ftStr},
120 {"Request-Range", HDR_REQUEST_RANGE, ftPRange}, /* usually matches HDR_RANGE */
121 {"Retry-After", HDR_RETRY_AFTER, ftStr}, /* for now (ftDate_1123 or ftInt!) */
122 {"Server", HDR_SERVER, ftStr},
123 {"Set-Cookie", HDR_SET_COOKIE, ftStr},
124 {"Set-Cookie2", HDR_SET_COOKIE2, ftStr},
125 {"TE", HDR_TE, ftStr},
126 {"Title", HDR_TITLE, ftStr},
127 {"Trailer", HDR_TRAILER, ftStr},
128 {"Transfer-Encoding", HDR_TRANSFER_ENCODING, ftStr},
129 {"Translate", HDR_TRANSLATE, ftStr}, /* for now. may need to crop */
130 {"Unless-Modified-Since", HDR_UNLESS_MODIFIED_SINCE, ftStr}, /* for now ignore. may need to crop */
131 {"Upgrade", HDR_UPGRADE, ftStr}, /* for now */
132 {"User-Agent", HDR_USER_AGENT, ftStr},
133 {"Vary", HDR_VARY, ftStr}, /* for now */
134 {"Via", HDR_VIA, ftStr}, /* for now */
135 {"Warning", HDR_WARNING, ftStr}, /* for now */
136 {"WWW-Authenticate", HDR_WWW_AUTHENTICATE, ftStr},
137 {"Authentication-Info", HDR_AUTHENTICATION_INFO, ftStr},
138 {"X-Cache", HDR_X_CACHE, ftStr},
139 {"X-Cache-Lookup", HDR_X_CACHE_LOOKUP, ftStr},
140 {"X-Forwarded-For", HDR_X_FORWARDED_FOR, ftStr},
141 {"X-Request-URI", HDR_X_REQUEST_URI, ftStr},
142 {"X-Squid-Error", HDR_X_SQUID_ERROR, ftStr},
143 #if X_ACCELERATOR_VARY
144 {"X-Accelerator-Vary", HDR_X_ACCELERATOR_VARY, ftStr},
145 #endif
146 #if USE_ADAPTATION
147 {"X-Next-Services", HDR_X_NEXT_SERVICES, ftStr},
148 #endif
149 {"Surrogate-Capability", HDR_SURROGATE_CAPABILITY, ftStr},
150 {"Surrogate-Control", HDR_SURROGATE_CONTROL, ftPSc},
151 {"Front-End-Https", HDR_FRONT_END_HTTPS, ftStr},
152 {"FTP-Command", HDR_FTP_COMMAND, ftStr},
153 {"FTP-Arguments", HDR_FTP_ARGUMENTS, ftStr},
154 {"FTP-Pre", HDR_FTP_PRE, ftStr},
155 {"FTP-Status", HDR_FTP_STATUS, ftInt},
156 {"FTP-Reason", HDR_FTP_REASON, ftStr},
157 {"Other:", HDR_OTHER, ftStr} /* ':' will not allow matches */
158 };
159
160 static HttpHeaderFieldInfo *Headers = NULL;
161
162 http_hdr_type &operator++ (http_hdr_type &aHeader)
163 {
164 int tmp = (int)aHeader;
165 aHeader = (http_hdr_type)(++tmp);
166 return aHeader;
167 }
168
169 /*
170 * headers with field values defined as #(values) in HTTP/1.1
171 * Headers that are currently not recognized, are commented out.
172 */
173 static HttpHeaderMask ListHeadersMask; /* set run-time using ListHeadersArr */
174 static http_hdr_type ListHeadersArr[] = {
175 HDR_ACCEPT,
176 HDR_ACCEPT_CHARSET,
177 HDR_ACCEPT_ENCODING,
178 HDR_ACCEPT_LANGUAGE,
179 HDR_ACCEPT_RANGES,
180 HDR_ALLOW,
181 HDR_CACHE_CONTROL,
182 HDR_CONTENT_ENCODING,
183 HDR_CONTENT_LANGUAGE,
184 HDR_CONNECTION,
185 HDR_EXPECT,
186 HDR_IF_MATCH,
187 HDR_IF_NONE_MATCH,
188 HDR_KEY,
189 HDR_LINK,
190 HDR_PRAGMA,
191 HDR_PROXY_CONNECTION,
192 HDR_PROXY_SUPPORT,
193 HDR_TRANSFER_ENCODING,
194 HDR_UPGRADE,
195 HDR_VARY,
196 HDR_VIA,
197 HDR_WARNING,
198 HDR_WWW_AUTHENTICATE,
199 HDR_AUTHENTICATION_INFO,
200 HDR_PROXY_AUTHENTICATION_INFO,
201 /* HDR_TE, HDR_TRAILER */
202 #if X_ACCELERATOR_VARY
203 HDR_X_ACCELERATOR_VARY,
204 #endif
205 #if USE_ADAPTATION
206 HDR_X_NEXT_SERVICES,
207 #endif
208 HDR_SURROGATE_CAPABILITY,
209 HDR_SURROGATE_CONTROL,
210 HDR_FORWARDED,
211 HDR_X_FORWARDED_FOR
212 };
213
214 /* general-headers */
215 static http_hdr_type GeneralHeadersArr[] = {
216 HDR_CACHE_CONTROL,
217 HDR_CONNECTION,
218 HDR_DATE,
219 HDR_FORWARDED,
220 HDR_X_FORWARDED_FOR,
221 HDR_MIME_VERSION,
222 HDR_PRAGMA,
223 HDR_PROXY_CONNECTION,
224 HDR_TRANSFER_ENCODING,
225 HDR_UPGRADE,
226 /* HDR_TRAILER, */
227 HDR_VIA,
228 };
229
230 /* entity-headers */
231 static http_hdr_type EntityHeadersArr[] = {
232 HDR_ALLOW,
233 HDR_CONTENT_BASE,
234 HDR_CONTENT_ENCODING,
235 HDR_CONTENT_LANGUAGE,
236 HDR_CONTENT_LENGTH,
237 HDR_CONTENT_LOCATION,
238 HDR_CONTENT_MD5,
239 HDR_CONTENT_RANGE,
240 HDR_CONTENT_TYPE,
241 HDR_ETAG,
242 HDR_EXPIRES,
243 HDR_LAST_MODIFIED,
244 HDR_LINK,
245 HDR_OTHER
246 };
247
248 /* request-only headers */
249 static HttpHeaderMask RequestHeadersMask; /* set run-time using RequestHeaders */
250 static http_hdr_type RequestHeadersArr[] = {
251 HDR_ACCEPT,
252 HDR_ACCEPT_CHARSET,
253 HDR_ACCEPT_ENCODING,
254 HDR_ACCEPT_LANGUAGE,
255 HDR_AUTHORIZATION,
256 HDR_EXPECT,
257 HDR_FROM,
258 HDR_HOST,
259 HDR_HTTP2_SETTINGS,
260 HDR_IF_MATCH,
261 HDR_IF_MODIFIED_SINCE,
262 HDR_IF_NONE_MATCH,
263 HDR_IF_RANGE,
264 HDR_IF_UNMODIFIED_SINCE,
265 HDR_MAX_FORWARDS,
266 HDR_ORIGIN,
267 HDR_PROXY_AUTHORIZATION,
268 HDR_RANGE,
269 HDR_REFERER,
270 HDR_REQUEST_RANGE,
271 HDR_TE,
272 HDR_USER_AGENT,
273 HDR_SURROGATE_CAPABILITY
274 };
275
276 /* reply-only headers */
277 static HttpHeaderMask ReplyHeadersMask; /* set run-time using ReplyHeaders */
278 static http_hdr_type ReplyHeadersArr[] = {
279 HDR_ACCEPT_ENCODING,
280 HDR_ACCEPT_RANGES,
281 HDR_AGE,
282 HDR_KEY,
283 HDR_LOCATION,
284 HDR_PROXY_AUTHENTICATE,
285 HDR_PUBLIC,
286 HDR_RETRY_AFTER,
287 HDR_SERVER,
288 HDR_SET_COOKIE,
289 HDR_SET_COOKIE2,
290 HDR_VARY,
291 HDR_WARNING,
292 HDR_WWW_AUTHENTICATE,
293 HDR_X_CACHE,
294 HDR_X_CACHE_LOOKUP,
295 HDR_X_REQUEST_URI,
296 #if X_ACCELERATOR_VARY
297 HDR_X_ACCELERATOR_VARY,
298 #endif
299 #if USE_ADAPTATION
300 HDR_X_NEXT_SERVICES,
301 #endif
302 HDR_X_SQUID_ERROR,
303 HDR_SURROGATE_CONTROL
304 };
305
306 /* hop-by-hop headers */
307 static HttpHeaderMask HopByHopHeadersMask;
308 static http_hdr_type HopByHopHeadersArr[] = {
309 HDR_CONNECTION,
310 HDR_HTTP2_SETTINGS,
311 HDR_KEEP_ALIVE,
312 /*HDR_PROXY_AUTHENTICATE, // removal handled specially for peer login */
313 HDR_PROXY_AUTHORIZATION,
314 HDR_TE,
315 HDR_TRAILER,
316 HDR_TRANSFER_ENCODING,
317 HDR_UPGRADE,
318 HDR_PROXY_CONNECTION
319 };
320
321 /* header accounting */
322 static HttpHeaderStat HttpHeaderStats[] = {
323 {"all"},
324 #if USE_HTCP
325 {"HTCP reply"},
326 #endif
327 {"request"},
328 {"reply"}
329 };
330 static int HttpHeaderStatCount = countof(HttpHeaderStats);
331
332 static int HeaderEntryParsedCount = 0;
333
334 /*
335 * forward declarations and local routines
336 */
337
338 class StoreEntry;
339 #define assert_eid(id) assert((id) >= 0 && (id) < HDR_ENUM_END)
340
341 static void httpHeaderNoteParsedEntry(http_hdr_type id, String const &value, int error);
342
343 static void httpHeaderStatInit(HttpHeaderStat * hs, const char *label);
344 static void httpHeaderStatDump(const HttpHeaderStat * hs, StoreEntry * e);
345
346 /** store report about current header usage and other stats */
347 static void httpHeaderStoreReport(StoreEntry * e);
348
349 /*
350 * Module initialization routines
351 */
352
353 static void
354 httpHeaderRegisterWithCacheManager(void)
355 {
356 Mgr::RegisterAction("http_headers",
357 "HTTP Header Statistics",
358 httpHeaderStoreReport, 0, 1);
359 }
360
361 void
362 httpHeaderInitModule(void)
363 {
364 int i;
365 /* check that we have enough space for masks */
366 assert(8 * sizeof(HttpHeaderMask) >= HDR_ENUM_END);
367 /* all headers must be described */
368 assert(countof(HeadersAttrs) == HDR_ENUM_END);
369
370 if (!Headers)
371 Headers = httpHeaderBuildFieldsInfo(HeadersAttrs, HDR_ENUM_END);
372
373 /* create masks */
374 httpHeaderMaskInit(&ListHeadersMask, 0);
375 httpHeaderCalcMask(&ListHeadersMask, ListHeadersArr, countof(ListHeadersArr));
376
377 httpHeaderMaskInit(&ReplyHeadersMask, 0);
378 httpHeaderCalcMask(&ReplyHeadersMask, ReplyHeadersArr, countof(ReplyHeadersArr));
379 httpHeaderCalcMask(&ReplyHeadersMask, GeneralHeadersArr, countof(GeneralHeadersArr));
380 httpHeaderCalcMask(&ReplyHeadersMask, EntityHeadersArr, countof(EntityHeadersArr));
381
382 httpHeaderMaskInit(&RequestHeadersMask, 0);
383 httpHeaderCalcMask(&RequestHeadersMask, RequestHeadersArr, countof(RequestHeadersArr));
384 httpHeaderCalcMask(&RequestHeadersMask, GeneralHeadersArr, countof(GeneralHeadersArr));
385 httpHeaderCalcMask(&RequestHeadersMask, EntityHeadersArr, countof(EntityHeadersArr));
386
387 httpHeaderMaskInit(&HopByHopHeadersMask, 0);
388 httpHeaderCalcMask(&HopByHopHeadersMask, HopByHopHeadersArr, countof(HopByHopHeadersArr));
389
390 /* init header stats */
391 assert(HttpHeaderStatCount == hoReply + 1);
392 for (i = 0; i < HttpHeaderStatCount; ++i)
393 httpHeaderStatInit(HttpHeaderStats + i, HttpHeaderStats[i].label);
394
395 HttpHeaderStats[hoRequest].owner_mask = &RequestHeadersMask;
396
397 HttpHeaderStats[hoReply].owner_mask = &ReplyHeadersMask;
398
399 #if USE_HTCP
400 HttpHeaderStats[hoHtcpReply].owner_mask = &ReplyHeadersMask;
401 #endif
402
403 /* init dependent modules */
404 httpHdrCcInitModule();
405 httpHdrScInitModule();
406
407 httpHeaderRegisterWithCacheManager();
408 }
409
410 void
411 httpHeaderCleanModule(void)
412 {
413 httpHeaderDestroyFieldsInfo(Headers, HDR_ENUM_END);
414 Headers = NULL;
415 httpHdrCcCleanModule();
416 httpHdrScCleanModule();
417 }
418
419 static void
420 httpHeaderStatInit(HttpHeaderStat * hs, const char *label)
421 {
422 assert(hs);
423 assert(label);
424 memset(hs, 0, sizeof(HttpHeaderStat));
425 hs->label = label;
426 hs->hdrUCountDistr.enumInit(32); /* not a real enum */
427 hs->fieldTypeDistr.enumInit(HDR_ENUM_END);
428 hs->ccTypeDistr.enumInit(CC_ENUM_END);
429 hs->scTypeDistr.enumInit(SC_ENUM_END);
430 }
431
432 /*
433 * HttpHeader Implementation
434 */
435
436 HttpHeader::HttpHeader() : owner (hoNone), len (0)
437 {
438 httpHeaderMaskInit(&mask, 0);
439 }
440
441 HttpHeader::HttpHeader(const http_hdr_owner_type anOwner): owner(anOwner), len(0)
442 {
443 assert(anOwner > hoNone && anOwner < hoEnd);
444 debugs(55, 7, "init-ing hdr: " << this << " owner: " << owner);
445 httpHeaderMaskInit(&mask, 0);
446 }
447
448 HttpHeader::HttpHeader(const HttpHeader &other): owner(other.owner), len(other.len)
449 {
450 httpHeaderMaskInit(&mask, 0);
451 update(&other, NULL); // will update the mask as well
452 }
453
454 HttpHeader::~HttpHeader()
455 {
456 clean();
457 }
458
459 HttpHeader &
460 HttpHeader::operator =(const HttpHeader &other)
461 {
462 if (this != &other) {
463 // we do not really care, but the caller probably does
464 assert(owner == other.owner);
465 clean();
466 update(&other, NULL); // will update the mask as well
467 len = other.len;
468 }
469 return *this;
470 }
471
472 void
473 HttpHeader::clean()
474 {
475
476 assert(owner > hoNone && owner < hoEnd);
477 debugs(55, 7, "cleaning hdr: " << this << " owner: " << owner);
478
479 PROF_start(HttpHeaderClean);
480
481 if (owner <= hoReply) {
482 /*
483 * An unfortunate bug. The entries array is initialized
484 * such that count is set to zero. httpHeaderClean() seems to
485 * be called both when 'hdr' is created, and destroyed. Thus,
486 * we accumulate a large number of zero counts for 'hdr' before
487 * it is ever used. Can't think of a good way to fix it, except
488 * adding a state variable that indicates whether or not 'hdr'
489 * has been used. As a hack, just never count zero-sized header
490 * arrays.
491 */
492 if (!entries.empty())
493 HttpHeaderStats[owner].hdrUCountDistr.count(entries.size());
494
495 ++ HttpHeaderStats[owner].destroyedCount;
496
497 HttpHeaderStats[owner].busyDestroyedCount += entries.size() > 0;
498 } // if (owner <= hoReply)
499
500 for (std::vector<HttpHeaderEntry *>::iterator i = entries.begin(); i != entries.end(); ++i) {
501 HttpHeaderEntry *e = *i;
502 if (e == NULL)
503 continue;
504 if (e->id < 0 || e->id >= HDR_ENUM_END) {
505 debugs(55, DBG_CRITICAL, "BUG: invalid entry (" << e->id << "). Ignored.");
506 } else {
507 if (owner <= hoReply)
508 HttpHeaderStats[owner].fieldTypeDistr.count(e->id);
509 delete e;
510 }
511 }
512
513 entries.clear();
514 httpHeaderMaskInit(&mask, 0);
515 len = 0;
516 PROF_stop(HttpHeaderClean);
517 }
518
519 /* append entries (also see httpHeaderUpdate) */
520 void
521 HttpHeader::append(const HttpHeader * src)
522 {
523 const HttpHeaderEntry *e;
524 HttpHeaderPos pos = HttpHeaderInitPos;
525 assert(src);
526 assert(src != this);
527 debugs(55, 7, "appending hdr: " << this << " += " << src);
528
529 while ((e = src->getEntry(&pos))) {
530 addEntry(e->clone());
531 }
532 }
533
534 /* use fresh entries to replace old ones */
535 void
536 httpHeaderUpdate(HttpHeader * old, const HttpHeader * fresh, const HttpHeaderMask * denied_mask)
537 {
538 assert (old);
539 old->update (fresh, denied_mask);
540 }
541
542 void
543 HttpHeader::update (HttpHeader const *fresh, HttpHeaderMask const *denied_mask)
544 {
545 const HttpHeaderEntry *e;
546 HttpHeaderPos pos = HttpHeaderInitPos;
547 assert(fresh);
548 assert(this != fresh);
549
550 while ((e = fresh->getEntry(&pos))) {
551 /* deny bad guys (ok to check for HDR_OTHER) here */
552
553 if (denied_mask && CBIT_TEST(*denied_mask, e->id))
554 continue;
555
556 if (e->id != HDR_OTHER)
557 delById(e->id);
558 else
559 delByName(e->name.termedBuf());
560 }
561
562 pos = HttpHeaderInitPos;
563 while ((e = fresh->getEntry(&pos))) {
564 /* deny bad guys (ok to check for HDR_OTHER) here */
565
566 if (denied_mask && CBIT_TEST(*denied_mask, e->id))
567 continue;
568
569 debugs(55, 7, "Updating header '" << HeadersAttrs[e->id].name << "' in cached entry");
570
571 addEntry(e->clone());
572 }
573 }
574
575 /* just handy in parsing: resets and returns false */
576 int
577 HttpHeader::reset()
578 {
579 clean();
580 return 0;
581 }
582
583 int
584 HttpHeader::parse(const char *header_start, const char *header_end)
585 {
586 const char *field_ptr = header_start;
587 HttpHeaderEntry *e, *e2;
588 int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
589
590 PROF_start(HttpHeaderParse);
591
592 assert(header_start && header_end);
593 debugs(55, 7, "parsing hdr: (" << this << ")" << std::endl << getStringPrefix(header_start, header_end));
594 ++ HttpHeaderStats[owner].parsedCount;
595
596 char *nulpos;
597 if ((nulpos = (char*)memchr(header_start, '\0', header_end - header_start))) {
598 debugs(55, DBG_IMPORTANT, "WARNING: HTTP header contains NULL characters {" <<
599 getStringPrefix(header_start, nulpos) << "}\nNULL\n{" << getStringPrefix(nulpos+1, header_end));
600 PROF_stop(HttpHeaderParse);
601 return reset();
602 }
603
604 /* common format headers are "<name>:[ws]<value>" lines delimited by <CRLF>.
605 * continuation lines start with a (single) space or tab */
606 while (field_ptr < header_end) {
607 const char *field_start = field_ptr;
608 const char *field_end;
609
610 do {
611 const char *this_line = field_ptr;
612 field_ptr = (const char *)memchr(field_ptr, '\n', header_end - field_ptr);
613
614 if (!field_ptr) {
615 // missing <LF>
616 PROF_stop(HttpHeaderParse);
617 return reset();
618 }
619
620 field_end = field_ptr;
621
622 ++field_ptr; /* Move to next line */
623
624 if (field_end > this_line && field_end[-1] == '\r') {
625 --field_end; /* Ignore CR LF */
626
627 if (owner == hoRequest && field_end > this_line) {
628 bool cr_only = true;
629 for (const char *p = this_line; p < field_end && cr_only; ++p) {
630 if (*p != '\r')
631 cr_only = false;
632 }
633 if (cr_only) {
634 debugs(55, DBG_IMPORTANT, "SECURITY WARNING: Rejecting HTTP request with a CR+ "
635 "header field to prevent request smuggling attacks: {" <<
636 getStringPrefix(header_start, header_end) << "}");
637 PROF_stop(HttpHeaderParse);
638 return reset();
639 }
640 }
641 }
642
643 /* Barf on stray CR characters */
644 if (memchr(this_line, '\r', field_end - this_line)) {
645 debugs(55, warnOnError, "WARNING: suspicious CR characters in HTTP header {" <<
646 getStringPrefix(field_start, field_end) << "}");
647
648 if (Config.onoff.relaxed_header_parser) {
649 char *p = (char *) this_line; /* XXX Warning! This destroys original header content and violates specifications somewhat */
650
651 while ((p = (char *)memchr(p, '\r', field_end - p)) != NULL) {
652 *p = ' ';
653 ++p;
654 }
655 } else {
656 PROF_stop(HttpHeaderParse);
657 return reset();
658 }
659 }
660
661 if (this_line + 1 == field_end && this_line > field_start) {
662 debugs(55, warnOnError, "WARNING: Blank continuation line in HTTP header {" <<
663 getStringPrefix(header_start, header_end) << "}");
664 PROF_stop(HttpHeaderParse);
665 return reset();
666 }
667 } while (field_ptr < header_end && (*field_ptr == ' ' || *field_ptr == '\t'));
668
669 if (field_start == field_end) {
670 if (field_ptr < header_end) {
671 debugs(55, warnOnError, "WARNING: unparseable HTTP header field near {" <<
672 getStringPrefix(field_start, header_end) << "}");
673 PROF_stop(HttpHeaderParse);
674 return reset();
675 }
676
677 break; /* terminating blank line */
678 }
679
680 if ((e = HttpHeaderEntry::parse(field_start, field_end)) == NULL) {
681 debugs(55, warnOnError, "WARNING: unparseable HTTP header field {" <<
682 getStringPrefix(field_start, field_end) << "}");
683 debugs(55, warnOnError, " in {" << getStringPrefix(header_start, header_end) << "}");
684
685 if (Config.onoff.relaxed_header_parser)
686 continue;
687
688 PROF_stop(HttpHeaderParse);
689 return reset();
690 }
691
692 if (e->id == HDR_CONTENT_LENGTH && (e2 = findEntry(e->id)) != NULL) {
693 if (e->value != e2->value) {
694 int64_t l1, l2;
695 debugs(55, warnOnError, "WARNING: found two conflicting content-length headers in {" <<
696 getStringPrefix(header_start, header_end) << "}");
697
698 if (!Config.onoff.relaxed_header_parser) {
699 delete e;
700 PROF_stop(HttpHeaderParse);
701 return reset();
702 }
703
704 if (!httpHeaderParseOffset(e->value.termedBuf(), &l1)) {
705 debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e->value << "'");
706 delete e;
707 continue;
708 } else if (!httpHeaderParseOffset(e2->value.termedBuf(), &l2)) {
709 debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e2->value << "'");
710 delById(e2->id);
711 } else if (l1 > l2) {
712 delById(e2->id);
713 } else {
714 delete e;
715 continue;
716 }
717 } else {
718 debugs(55, warnOnError, "NOTICE: found double content-length header");
719 delete e;
720
721 if (Config.onoff.relaxed_header_parser)
722 continue;
723
724 PROF_stop(HttpHeaderParse);
725 return reset();
726 }
727 }
728
729 if (e->id == HDR_OTHER && stringHasWhitespace(e->name.termedBuf())) {
730 debugs(55, warnOnError, "WARNING: found whitespace in HTTP header name {" <<
731 getStringPrefix(field_start, field_end) << "}");
732
733 if (!Config.onoff.relaxed_header_parser) {
734 delete e;
735 PROF_stop(HttpHeaderParse);
736 return reset();
737 }
738 }
739
740 addEntry(e);
741 }
742
743 if (chunked()) {
744 // RFC 2616 section 4.4: ignore Content-Length with Transfer-Encoding
745 delById(HDR_CONTENT_LENGTH);
746 }
747
748 PROF_stop(HttpHeaderParse);
749 return 1; /* even if no fields where found, it is a valid header */
750 }
751
752 /* packs all the entries using supplied packer */
753 void
754 HttpHeader::packInto(Packer * p, bool mask_sensitive_info) const
755 {
756 HttpHeaderPos pos = HttpHeaderInitPos;
757 const HttpHeaderEntry *e;
758 assert(p);
759 debugs(55, 7, this << " into " << p <<
760 (mask_sensitive_info ? " while masking" : ""));
761 /* pack all entries one by one */
762 while ((e = getEntry(&pos))) {
763 if (!mask_sensitive_info) {
764 e->packInto(p);
765 continue;
766 }
767
768 bool maskThisEntry = false;
769 switch (e->id) {
770 case HDR_AUTHORIZATION:
771 case HDR_PROXY_AUTHORIZATION:
772 maskThisEntry = true;
773 break;
774
775 case HDR_FTP_ARGUMENTS:
776 if (const HttpHeaderEntry *cmd = findEntry(HDR_FTP_COMMAND))
777 maskThisEntry = (cmd->value == "PASS");
778 break;
779
780 default:
781 break;
782 }
783 if (maskThisEntry) {
784 packerAppend(p, e->name.rawBuf(), e->name.size());
785 packerAppend(p, ": ** NOT DISPLAYED **\r\n", 23);
786 } else {
787 e->packInto(p);
788 }
789
790 }
791 /* Pack in the "special" entries */
792
793 /* Cache-Control */
794 }
795
796 /* returns next valid entry */
797 HttpHeaderEntry *
798 HttpHeader::getEntry(HttpHeaderPos * pos) const
799 {
800 assert(pos);
801 assert(*pos >= HttpHeaderInitPos && *pos < static_cast<ssize_t>(entries.size()));
802
803 for (++(*pos); *pos < static_cast<ssize_t>(entries.size()); ++(*pos)) {
804 if (entries[*pos])
805 return static_cast<HttpHeaderEntry*>(entries[*pos]);
806 }
807
808 return NULL;
809 }
810
811 /*
812 * returns a pointer to a specified entry if any
813 * note that we return one entry so it does not make much sense to ask for
814 * "list" headers
815 */
816 HttpHeaderEntry *
817 HttpHeader::findEntry(http_hdr_type id) const
818 {
819 HttpHeaderPos pos = HttpHeaderInitPos;
820 HttpHeaderEntry *e;
821 assert_eid(id);
822 assert(!CBIT_TEST(ListHeadersMask, id));
823
824 /* check mask first */
825
826 if (!CBIT_TEST(mask, id))
827 return NULL;
828
829 /* looks like we must have it, do linear search */
830 while ((e = getEntry(&pos))) {
831 if (e->id == id)
832 return e;
833 }
834
835 /* hm.. we thought it was there, but it was not found */
836 assert(0);
837
838 return NULL; /* not reached */
839 }
840
841 /*
842 * same as httpHeaderFindEntry
843 */
844 HttpHeaderEntry *
845 HttpHeader::findLastEntry(http_hdr_type id) const
846 {
847 HttpHeaderPos pos = HttpHeaderInitPos;
848 HttpHeaderEntry *e;
849 HttpHeaderEntry *result = NULL;
850 assert_eid(id);
851 assert(!CBIT_TEST(ListHeadersMask, id));
852
853 /* check mask first */
854
855 if (!CBIT_TEST(mask, id))
856 return NULL;
857
858 /* looks like we must have it, do linear search */
859 while ((e = getEntry(&pos))) {
860 if (e->id == id)
861 result = e;
862 }
863
864 assert(result); /* must be there! */
865 return result;
866 }
867
868 /*
869 * deletes all fields with a given name if any, returns #fields deleted;
870 */
871 int
872 HttpHeader::delByName(const char *name)
873 {
874 int count = 0;
875 HttpHeaderPos pos = HttpHeaderInitPos;
876 HttpHeaderEntry *e;
877 httpHeaderMaskInit(&mask, 0); /* temporal inconsistency */
878 debugs(55, 9, "deleting '" << name << "' fields in hdr " << this);
879
880 while ((e = getEntry(&pos))) {
881 if (!e->name.caseCmp(name))
882 delAt(pos, count);
883 else
884 CBIT_SET(mask, e->id);
885 }
886
887 return count;
888 }
889
890 /* deletes all entries with a given id, returns the #entries deleted */
891 int
892 HttpHeader::delById(http_hdr_type id)
893 {
894 int count = 0;
895 HttpHeaderPos pos = HttpHeaderInitPos;
896 HttpHeaderEntry *e;
897 debugs(55, 8, this << " del-by-id " << id);
898 assert_eid(id);
899 assert(id != HDR_OTHER); /* does not make sense */
900
901 if (!CBIT_TEST(mask, id))
902 return 0;
903
904 while ((e = getEntry(&pos))) {
905 if (e->id == id)
906 delAt(pos, count);
907 }
908
909 CBIT_CLR(mask, id);
910 assert(count);
911 return count;
912 }
913
914 /*
915 * deletes an entry at pos and leaves a gap; leaving a gap makes it
916 * possible to iterate(search) and delete fields at the same time
917 * NOTE: Does not update the header mask. Caller must follow up with
918 * a call to refreshMask() if headers_deleted was incremented.
919 */
920 void
921 HttpHeader::delAt(HttpHeaderPos pos, int &headers_deleted)
922 {
923 HttpHeaderEntry *e;
924 assert(pos >= HttpHeaderInitPos && pos < static_cast<ssize_t>(entries.size()));
925 e = static_cast<HttpHeaderEntry*>(entries[pos]);
926 entries[pos] = NULL;
927 /* decrement header length, allow for ": " and crlf */
928 len -= e->name.size() + 2 + e->value.size() + 2;
929 assert(len >= 0);
930 delete e;
931 ++headers_deleted;
932 }
933
934 /*
935 * Compacts the header storage
936 */
937 void
938 HttpHeader::compact()
939 {
940 // TODO: optimize removal, or possibly make it so that's not needed.
941 std::vector<HttpHeaderEntry *>::iterator newend;
942 newend = std::remove(entries.begin(), entries.end(), static_cast<HttpHeaderEntry *>(NULL));
943 entries.resize(newend-entries.begin());
944 }
945
946 /*
947 * Refreshes the header mask. Required after delAt() calls.
948 */
949 void
950 HttpHeader::refreshMask()
951 {
952 httpHeaderMaskInit(&mask, 0);
953 debugs(55, 7, "refreshing the mask in hdr " << this);
954 HttpHeaderPos pos = HttpHeaderInitPos;
955 while (HttpHeaderEntry *e = getEntry(&pos)) {
956 CBIT_SET(mask, e->id);
957 }
958 }
959
960 /* appends an entry;
961 * does not call e->clone() so one should not reuse "*e"
962 */
963 void
964 HttpHeader::addEntry(HttpHeaderEntry * e)
965 {
966 assert(e);
967 assert_eid(e->id);
968 assert(e->name.size());
969
970 debugs(55, 7, this << " adding entry: " << e->id << " at " << entries.size());
971
972 if (CBIT_TEST(mask, e->id))
973 ++ Headers[e->id].stat.repCount;
974 else
975 CBIT_SET(mask, e->id);
976
977 entries.push_back(e);
978
979 /* increment header length, allow for ": " and crlf */
980 len += e->name.size() + 2 + e->value.size() + 2;
981 }
982
983 /* inserts an entry;
984 * does not call e->clone() so one should not reuse "*e"
985 */
986 void
987 HttpHeader::insertEntry(HttpHeaderEntry * e)
988 {
989 assert(e);
990 assert_eid(e->id);
991
992 debugs(55, 7, this << " adding entry: " << e->id << " at " << entries.size());
993
994 if (CBIT_TEST(mask, e->id))
995 ++ Headers[e->id].stat.repCount;
996 else
997 CBIT_SET(mask, e->id);
998
999 entries.insert(entries.begin(),e);
1000
1001 /* increment header length, allow for ": " and crlf */
1002 len += e->name.size() + 2 + e->value.size() + 2;
1003 }
1004
1005 bool
1006 HttpHeader::getList(http_hdr_type id, String *s) const
1007 {
1008 HttpHeaderEntry *e;
1009 HttpHeaderPos pos = HttpHeaderInitPos;
1010 debugs(55, 9, this << " joining for id " << id);
1011 /* only fields from ListHeaders array can be "listed" */
1012 assert(CBIT_TEST(ListHeadersMask, id));
1013
1014 if (!CBIT_TEST(mask, id))
1015 return false;
1016
1017 while ((e = getEntry(&pos))) {
1018 if (e->id == id)
1019 strListAdd(s, e->value.termedBuf(), ',');
1020 }
1021
1022 /*
1023 * note: we might get an empty (size==0) string if there was an "empty"
1024 * header. This results in an empty length String, which may have a NULL
1025 * buffer.
1026 */
1027 /* temporary warning: remove it? (Is it useful for diagnostics ?) */
1028 if (!s->size())
1029 debugs(55, 3, "empty list header: " << Headers[id].name << "(" << id << ")");
1030 else
1031 debugs(55, 6, this << ": joined for id " << id << ": " << s);
1032
1033 return true;
1034 }
1035
1036 /* return a list of entries with the same id separated by ',' and ws */
1037 String
1038 HttpHeader::getList(http_hdr_type id) const
1039 {
1040 HttpHeaderEntry *e;
1041 HttpHeaderPos pos = HttpHeaderInitPos;
1042 debugs(55, 9, this << "joining for id " << id);
1043 /* only fields from ListHeaders array can be "listed" */
1044 assert(CBIT_TEST(ListHeadersMask, id));
1045
1046 if (!CBIT_TEST(mask, id))
1047 return String();
1048
1049 String s;
1050
1051 while ((e = getEntry(&pos))) {
1052 if (e->id == id)
1053 strListAdd(&s, e->value.termedBuf(), ',');
1054 }
1055
1056 /*
1057 * note: we might get an empty (size==0) string if there was an "empty"
1058 * header. This results in an empty length String, which may have a NULL
1059 * buffer.
1060 */
1061 /* temporary warning: remove it? (Is it useful for diagnostics ?) */
1062 if (!s.size())
1063 debugs(55, 3, "empty list header: " << Headers[id].name << "(" << id << ")");
1064 else
1065 debugs(55, 6, this << ": joined for id " << id << ": " << s);
1066
1067 return s;
1068 }
1069
1070 /* return a string or list of entries with the same id separated by ',' and ws */
1071 String
1072 HttpHeader::getStrOrList(http_hdr_type id) const
1073 {
1074 HttpHeaderEntry *e;
1075
1076 if (CBIT_TEST(ListHeadersMask, id))
1077 return getList(id);
1078
1079 if ((e = findEntry(id)))
1080 return e->value;
1081
1082 return String();
1083 }
1084
1085 /*
1086 * Returns the value of the specified header and/or an undefined String.
1087 */
1088 String
1089 HttpHeader::getByName(const char *name) const
1090 {
1091 String result;
1092 // ignore presence: return undefined string if an empty header is present
1093 (void)getByNameIfPresent(name, result);
1094 return result;
1095 }
1096
1097 bool
1098 HttpHeader::getByNameIfPresent(const char *name, String &result) const
1099 {
1100 http_hdr_type id;
1101 HttpHeaderPos pos = HttpHeaderInitPos;
1102 HttpHeaderEntry *e;
1103
1104 assert(name);
1105
1106 /* First try the quick path */
1107 id = httpHeaderIdByNameDef(name, strlen(name));
1108
1109 if (id != -1) {
1110 if (!has(id))
1111 return false;
1112 result = getStrOrList(id);
1113 return true;
1114 }
1115
1116 /* Sorry, an unknown header name. Do linear search */
1117 bool found = false;
1118 while ((e = getEntry(&pos))) {
1119 if (e->id == HDR_OTHER && e->name.caseCmp(name) == 0) {
1120 found = true;
1121 strListAdd(&result, e->value.termedBuf(), ',');
1122 }
1123 }
1124
1125 return found;
1126 }
1127
1128 /*
1129 * Returns a the value of the specified list member, if any.
1130 */
1131 String
1132 HttpHeader::getByNameListMember(const char *name, const char *member, const char separator) const
1133 {
1134 String header;
1135 const char *pos = NULL;
1136 const char *item;
1137 int ilen;
1138 int mlen = strlen(member);
1139
1140 assert(name);
1141
1142 header = getByName(name);
1143
1144 String result;
1145
1146 while (strListGetItem(&header, separator, &item, &ilen, &pos)) {
1147 if (strncmp(item, member, mlen) == 0 && item[mlen] == '=') {
1148 result.append(item + mlen + 1, ilen - mlen - 1);
1149 break;
1150 }
1151 }
1152
1153 return result;
1154 }
1155
1156 /*
1157 * returns a the value of the specified list member, if any.
1158 */
1159 String
1160 HttpHeader::getListMember(http_hdr_type id, const char *member, const char separator) const
1161 {
1162 String header;
1163 const char *pos = NULL;
1164 const char *item;
1165 int ilen;
1166 int mlen = strlen(member);
1167
1168 assert(id >= 0);
1169
1170 header = getStrOrList(id);
1171 String result;
1172
1173 while (strListGetItem(&header, separator, &item, &ilen, &pos)) {
1174 if (strncmp(item, member, mlen) == 0 && item[mlen] == '=') {
1175 result.append(item + mlen + 1, ilen - mlen - 1);
1176 break;
1177 }
1178 }
1179
1180 header.clean();
1181 return result;
1182 }
1183
1184 /* test if a field is present */
1185 int
1186 HttpHeader::has(http_hdr_type id) const
1187 {
1188 assert_eid(id);
1189 assert(id != HDR_OTHER);
1190 debugs(55, 9, this << " lookup for " << id);
1191 return CBIT_TEST(mask, id);
1192 }
1193
1194 void
1195 HttpHeader::putInt(http_hdr_type id, int number)
1196 {
1197 assert_eid(id);
1198 assert(Headers[id].type == ftInt); /* must be of an appropriate type */
1199 assert(number >= 0);
1200 addEntry(new HttpHeaderEntry(id, NULL, xitoa(number)));
1201 }
1202
1203 void
1204 HttpHeader::putInt64(http_hdr_type id, int64_t number)
1205 {
1206 assert_eid(id);
1207 assert(Headers[id].type == ftInt64); /* must be of an appropriate type */
1208 assert(number >= 0);
1209 addEntry(new HttpHeaderEntry(id, NULL, xint64toa(number)));
1210 }
1211
1212 void
1213 HttpHeader::putTime(http_hdr_type id, time_t htime)
1214 {
1215 assert_eid(id);
1216 assert(Headers[id].type == ftDate_1123); /* must be of an appropriate type */
1217 assert(htime >= 0);
1218 addEntry(new HttpHeaderEntry(id, NULL, mkrfc1123(htime)));
1219 }
1220
1221 void
1222 HttpHeader::insertTime(http_hdr_type id, time_t htime)
1223 {
1224 assert_eid(id);
1225 assert(Headers[id].type == ftDate_1123); /* must be of an appropriate type */
1226 assert(htime >= 0);
1227 insertEntry(new HttpHeaderEntry(id, NULL, mkrfc1123(htime)));
1228 }
1229
1230 void
1231 HttpHeader::putStr(http_hdr_type id, const char *str)
1232 {
1233 assert_eid(id);
1234 assert(Headers[id].type == ftStr); /* must be of an appropriate type */
1235 assert(str);
1236 addEntry(new HttpHeaderEntry(id, NULL, str));
1237 }
1238
1239 void
1240 HttpHeader::putAuth(const char *auth_scheme, const char *realm)
1241 {
1242 assert(auth_scheme && realm);
1243 httpHeaderPutStrf(this, HDR_WWW_AUTHENTICATE, "%s realm=\"%s\"", auth_scheme, realm);
1244 }
1245
1246 void
1247 HttpHeader::putCc(const HttpHdrCc * cc)
1248 {
1249 MemBuf mb;
1250 Packer p;
1251 assert(cc);
1252 /* remove old directives if any */
1253 delById(HDR_CACHE_CONTROL);
1254 /* pack into mb */
1255 mb.init();
1256 packerToMemInit(&p, &mb);
1257 cc->packInto(&p);
1258 /* put */
1259 addEntry(new HttpHeaderEntry(HDR_CACHE_CONTROL, NULL, mb.buf));
1260 /* cleanup */
1261 packerClean(&p);
1262 mb.clean();
1263 }
1264
1265 void
1266 HttpHeader::putContRange(const HttpHdrContRange * cr)
1267 {
1268 MemBuf mb;
1269 Packer p;
1270 assert(cr);
1271 /* remove old directives if any */
1272 delById(HDR_CONTENT_RANGE);
1273 /* pack into mb */
1274 mb.init();
1275 packerToMemInit(&p, &mb);
1276 httpHdrContRangePackInto(cr, &p);
1277 /* put */
1278 addEntry(new HttpHeaderEntry(HDR_CONTENT_RANGE, NULL, mb.buf));
1279 /* cleanup */
1280 packerClean(&p);
1281 mb.clean();
1282 }
1283
1284 void
1285 HttpHeader::putRange(const HttpHdrRange * range)
1286 {
1287 MemBuf mb;
1288 Packer p;
1289 assert(range);
1290 /* remove old directives if any */
1291 delById(HDR_RANGE);
1292 /* pack into mb */
1293 mb.init();
1294 packerToMemInit(&p, &mb);
1295 range->packInto(&p);
1296 /* put */
1297 addEntry(new HttpHeaderEntry(HDR_RANGE, NULL, mb.buf));
1298 /* cleanup */
1299 packerClean(&p);
1300 mb.clean();
1301 }
1302
1303 void
1304 HttpHeader::putSc(HttpHdrSc *sc)
1305 {
1306 MemBuf mb;
1307 Packer p;
1308 assert(sc);
1309 /* remove old directives if any */
1310 delById(HDR_SURROGATE_CONTROL);
1311 /* pack into mb */
1312 mb.init();
1313 packerToMemInit(&p, &mb);
1314 sc->packInto(&p);
1315 /* put */
1316 addEntry(new HttpHeaderEntry(HDR_SURROGATE_CONTROL, NULL, mb.buf));
1317 /* cleanup */
1318 packerClean(&p);
1319 mb.clean();
1320 }
1321
1322 void
1323 HttpHeader::putWarning(const int code, const char *const text)
1324 {
1325 char buf[512];
1326 snprintf(buf, sizeof(buf), "%i %s \"%s\"", code, visible_appname_string, text);
1327 putStr(HDR_WARNING, buf);
1328 }
1329
1330 /* add extension header (these fields are not parsed/analyzed/joined, etc.) */
1331 void
1332 HttpHeader::putExt(const char *name, const char *value)
1333 {
1334 assert(name && value);
1335 debugs(55, 8, this << " adds ext entry " << name << " : " << value);
1336 addEntry(new HttpHeaderEntry(HDR_OTHER, name, value));
1337 }
1338
1339 int
1340 HttpHeader::getInt(http_hdr_type id) const
1341 {
1342 assert_eid(id);
1343 assert(Headers[id].type == ftInt); /* must be of an appropriate type */
1344 HttpHeaderEntry *e;
1345
1346 if ((e = findEntry(id)))
1347 return e->getInt();
1348
1349 return -1;
1350 }
1351
1352 int64_t
1353 HttpHeader::getInt64(http_hdr_type id) const
1354 {
1355 assert_eid(id);
1356 assert(Headers[id].type == ftInt64); /* must be of an appropriate type */
1357 HttpHeaderEntry *e;
1358
1359 if ((e = findEntry(id)))
1360 return e->getInt64();
1361
1362 return -1;
1363 }
1364
1365 time_t
1366 HttpHeader::getTime(http_hdr_type id) const
1367 {
1368 HttpHeaderEntry *e;
1369 time_t value = -1;
1370 assert_eid(id);
1371 assert(Headers[id].type == ftDate_1123); /* must be of an appropriate type */
1372
1373 if ((e = findEntry(id))) {
1374 value = parse_rfc1123(e->value.termedBuf());
1375 httpHeaderNoteParsedEntry(e->id, e->value, value < 0);
1376 }
1377
1378 return value;
1379 }
1380
1381 /* sync with httpHeaderGetLastStr */
1382 const char *
1383 HttpHeader::getStr(http_hdr_type id) const
1384 {
1385 HttpHeaderEntry *e;
1386 assert_eid(id);
1387 assert(Headers[id].type == ftStr); /* must be of an appropriate type */
1388
1389 if ((e = findEntry(id))) {
1390 httpHeaderNoteParsedEntry(e->id, e->value, 0); /* no errors are possible */
1391 return e->value.termedBuf();
1392 }
1393
1394 return NULL;
1395 }
1396
1397 /* unusual */
1398 const char *
1399 HttpHeader::getLastStr(http_hdr_type id) const
1400 {
1401 HttpHeaderEntry *e;
1402 assert_eid(id);
1403 assert(Headers[id].type == ftStr); /* must be of an appropriate type */
1404
1405 if ((e = findLastEntry(id))) {
1406 httpHeaderNoteParsedEntry(e->id, e->value, 0); /* no errors are possible */
1407 return e->value.termedBuf();
1408 }
1409
1410 return NULL;
1411 }
1412
1413 HttpHdrCc *
1414 HttpHeader::getCc() const
1415 {
1416 if (!CBIT_TEST(mask, HDR_CACHE_CONTROL))
1417 return NULL;
1418 PROF_start(HttpHeader_getCc);
1419
1420 String s;
1421 getList(HDR_CACHE_CONTROL, &s);
1422
1423 HttpHdrCc *cc=new HttpHdrCc();
1424
1425 if (!cc->parse(s)) {
1426 delete cc;
1427 cc = NULL;
1428 }
1429
1430 ++ HttpHeaderStats[owner].ccParsedCount;
1431
1432 if (cc)
1433 httpHdrCcUpdateStats(cc, &HttpHeaderStats[owner].ccTypeDistr);
1434
1435 httpHeaderNoteParsedEntry(HDR_CACHE_CONTROL, s, !cc);
1436
1437 PROF_stop(HttpHeader_getCc);
1438
1439 return cc;
1440 }
1441
1442 HttpHdrRange *
1443 HttpHeader::getRange() const
1444 {
1445 HttpHdrRange *r = NULL;
1446 HttpHeaderEntry *e;
1447 /* some clients will send "Request-Range" _and_ *matching* "Range"
1448 * who knows, some clients might send Request-Range only;
1449 * this "if" should work correctly in both cases;
1450 * hopefully no clients send mismatched headers! */
1451
1452 if ((e = findEntry(HDR_RANGE)) ||
1453 (e = findEntry(HDR_REQUEST_RANGE))) {
1454 r = HttpHdrRange::ParseCreate(&e->value);
1455 httpHeaderNoteParsedEntry(e->id, e->value, !r);
1456 }
1457
1458 return r;
1459 }
1460
1461 HttpHdrSc *
1462 HttpHeader::getSc() const
1463 {
1464 if (!CBIT_TEST(mask, HDR_SURROGATE_CONTROL))
1465 return NULL;
1466
1467 String s;
1468
1469 (void) getList(HDR_SURROGATE_CONTROL, &s);
1470
1471 HttpHdrSc *sc = httpHdrScParseCreate(s);
1472
1473 ++ HttpHeaderStats[owner].ccParsedCount;
1474
1475 if (sc)
1476 sc->updateStats(&HttpHeaderStats[owner].scTypeDistr);
1477
1478 httpHeaderNoteParsedEntry(HDR_SURROGATE_CONTROL, s, !sc);
1479
1480 return sc;
1481 }
1482
1483 HttpHdrContRange *
1484 HttpHeader::getContRange() const
1485 {
1486 HttpHdrContRange *cr = NULL;
1487 HttpHeaderEntry *e;
1488
1489 if ((e = findEntry(HDR_CONTENT_RANGE))) {
1490 cr = httpHdrContRangeParseCreate(e->value.termedBuf());
1491 httpHeaderNoteParsedEntry(e->id, e->value, !cr);
1492 }
1493
1494 return cr;
1495 }
1496
1497 const char *
1498 HttpHeader::getAuth(http_hdr_type id, const char *auth_scheme) const
1499 {
1500 const char *field;
1501 int l;
1502 assert(auth_scheme);
1503 field = getStr(id);
1504
1505 if (!field) /* no authorization field */
1506 return NULL;
1507
1508 l = strlen(auth_scheme);
1509
1510 if (!l || strncasecmp(field, auth_scheme, l)) /* wrong scheme */
1511 return NULL;
1512
1513 field += l;
1514
1515 if (!xisspace(*field)) /* wrong scheme */
1516 return NULL;
1517
1518 /* skip white space */
1519 for (; field && xisspace(*field); ++field);
1520
1521 if (!*field) /* no authorization cookie */
1522 return NULL;
1523
1524 static char decodedAuthToken[8192];
1525 const int decodedLen = base64_decode(decodedAuthToken, sizeof(decodedAuthToken)-1, field);
1526 decodedAuthToken[decodedLen] = '\0';
1527 return decodedAuthToken;
1528 }
1529
1530 ETag
1531 HttpHeader::getETag(http_hdr_type id) const
1532 {
1533 ETag etag = {NULL, -1};
1534 HttpHeaderEntry *e;
1535 assert(Headers[id].type == ftETag); /* must be of an appropriate type */
1536
1537 if ((e = findEntry(id)))
1538 etagParseInit(&etag, e->value.termedBuf());
1539
1540 return etag;
1541 }
1542
1543 TimeOrTag
1544 HttpHeader::getTimeOrTag(http_hdr_type id) const
1545 {
1546 TimeOrTag tot;
1547 HttpHeaderEntry *e;
1548 assert(Headers[id].type == ftDate_1123_or_ETag); /* must be of an appropriate type */
1549 memset(&tot, 0, sizeof(tot));
1550
1551 if ((e = findEntry(id))) {
1552 const char *str = e->value.termedBuf();
1553 /* try as an ETag */
1554
1555 if (etagParseInit(&tot.tag, str)) {
1556 tot.valid = tot.tag.str != NULL;
1557 tot.time = -1;
1558 } else {
1559 /* or maybe it is time? */
1560 tot.time = parse_rfc1123(str);
1561 tot.valid = tot.time >= 0;
1562 tot.tag.str = NULL;
1563 }
1564 }
1565
1566 assert(tot.time < 0 || !tot.tag.str); /* paranoid */
1567 return tot;
1568 }
1569
1570 /*
1571 * HttpHeaderEntry
1572 */
1573
1574 HttpHeaderEntry::HttpHeaderEntry(http_hdr_type anId, const char *aName, const char *aValue)
1575 {
1576 assert_eid(anId);
1577 id = anId;
1578
1579 if (id != HDR_OTHER)
1580 name = Headers[id].name;
1581 else
1582 name = aName;
1583
1584 value = aValue;
1585
1586 ++ Headers[id].stat.aliveCount;
1587
1588 debugs(55, 9, "created HttpHeaderEntry " << this << ": '" << name << " : " << value );
1589 }
1590
1591 HttpHeaderEntry::~HttpHeaderEntry()
1592 {
1593 assert_eid(id);
1594 debugs(55, 9, "destroying entry " << this << ": '" << name << ": " << value << "'");
1595 /* clean name if needed */
1596
1597 if (id == HDR_OTHER)
1598 name.clean();
1599
1600 value.clean();
1601
1602 assert(Headers[id].stat.aliveCount);
1603
1604 -- Headers[id].stat.aliveCount;
1605
1606 id = HDR_BAD_HDR;
1607 }
1608
1609 /* parses and inits header entry, returns true/false */
1610 HttpHeaderEntry *
1611 HttpHeaderEntry::parse(const char *field_start, const char *field_end)
1612 {
1613 /* note: name_start == field_start */
1614 const char *name_end = (const char *)memchr(field_start, ':', field_end - field_start);
1615 int name_len = name_end ? name_end - field_start :0;
1616 const char *value_start = field_start + name_len + 1; /* skip ':' */
1617 /* note: value_end == field_end */
1618
1619 ++ HeaderEntryParsedCount;
1620
1621 /* do we have a valid field name within this field? */
1622
1623 if (!name_len || name_end > field_end)
1624 return NULL;
1625
1626 if (name_len > 65534) {
1627 /* String must be LESS THAN 64K and it adds a terminating NULL */
1628 debugs(55, DBG_IMPORTANT, "WARNING: ignoring header name of " << name_len << " bytes");
1629 return NULL;
1630 }
1631
1632 if (Config.onoff.relaxed_header_parser && xisspace(field_start[name_len - 1])) {
1633 debugs(55, Config.onoff.relaxed_header_parser <= 0 ? 1 : 2,
1634 "NOTICE: Whitespace after header name in '" << getStringPrefix(field_start, field_end) << "'");
1635
1636 while (name_len > 0 && xisspace(field_start[name_len - 1]))
1637 --name_len;
1638
1639 if (!name_len)
1640 return NULL;
1641 }
1642
1643 /* now we know we can parse it */
1644
1645 debugs(55, 9, "parsing HttpHeaderEntry: near '" << getStringPrefix(field_start, field_end) << "'");
1646
1647 /* is it a "known" field? */
1648 http_hdr_type id = httpHeaderIdByName(field_start, name_len, Headers, HDR_ENUM_END);
1649
1650 String name;
1651
1652 String value;
1653
1654 if (id < 0)
1655 id = HDR_OTHER;
1656
1657 assert_eid(id);
1658
1659 /* set field name */
1660 if (id == HDR_OTHER)
1661 name.limitInit(field_start, name_len);
1662 else
1663 name = Headers[id].name;
1664
1665 /* trim field value */
1666 while (value_start < field_end && xisspace(*value_start))
1667 ++value_start;
1668
1669 while (value_start < field_end && xisspace(field_end[-1]))
1670 --field_end;
1671
1672 if (field_end - value_start > 65534) {
1673 /* String must be LESS THAN 64K and it adds a terminating NULL */
1674 debugs(55, DBG_IMPORTANT, "WARNING: ignoring '" << name << "' header of " << (field_end - value_start) << " bytes");
1675
1676 if (id == HDR_OTHER)
1677 name.clean();
1678
1679 return NULL;
1680 }
1681
1682 /* set field value */
1683 value.limitInit(value_start, field_end - value_start);
1684
1685 ++ Headers[id].stat.seenCount;
1686
1687 debugs(55, 9, "parsed HttpHeaderEntry: '" << name << ": " << value << "'");
1688
1689 return new HttpHeaderEntry(id, name.termedBuf(), value.termedBuf());
1690 }
1691
1692 HttpHeaderEntry *
1693 HttpHeaderEntry::clone() const
1694 {
1695 return new HttpHeaderEntry(id, name.termedBuf(), value.termedBuf());
1696 }
1697
1698 void
1699 HttpHeaderEntry::packInto(Packer * p) const
1700 {
1701 assert(p);
1702 packerAppend(p, name.rawBuf(), name.size());
1703 packerAppend(p, ": ", 2);
1704 packerAppend(p, value.rawBuf(), value.size());
1705 packerAppend(p, "\r\n", 2);
1706 }
1707
1708 int
1709 HttpHeaderEntry::getInt() const
1710 {
1711 assert_eid (id);
1712 assert (Headers[id].type == ftInt);
1713 int val = -1;
1714 int ok = httpHeaderParseInt(value.termedBuf(), &val);
1715 httpHeaderNoteParsedEntry(id, value, !ok);
1716 /* XXX: Should we check ok - ie
1717 * return ok ? -1 : value;
1718 */
1719 return val;
1720 }
1721
1722 int64_t
1723 HttpHeaderEntry::getInt64() const
1724 {
1725 assert_eid (id);
1726 assert (Headers[id].type == ftInt64);
1727 int64_t val = -1;
1728 int ok = httpHeaderParseOffset(value.termedBuf(), &val);
1729 httpHeaderNoteParsedEntry(id, value, !ok);
1730 /* XXX: Should we check ok - ie
1731 * return ok ? -1 : value;
1732 */
1733 return val;
1734 }
1735
1736 static void
1737 httpHeaderNoteParsedEntry(http_hdr_type id, String const &context, int error)
1738 {
1739 ++ Headers[id].stat.parsCount;
1740
1741 if (error) {
1742 ++ Headers[id].stat.errCount;
1743 debugs(55, 2, "cannot parse hdr field: '" << Headers[id].name << ": " << context << "'");
1744 }
1745 }
1746
1747 /*
1748 * Reports
1749 */
1750
1751 /* tmp variable used to pass stat info to dumpers */
1752 extern const HttpHeaderStat *dump_stat; /* argh! */
1753 const HttpHeaderStat *dump_stat = NULL;
1754
1755 void
1756 httpHeaderFieldStatDumper(StoreEntry * sentry, int idx, double val, double size, int count)
1757 {
1758 const int id = (int) val;
1759 const int valid_id = id >= 0 && id < HDR_ENUM_END;
1760 const char *name = valid_id ? Headers[id].name.termedBuf() : "INVALID";
1761 int visible = count > 0;
1762 /* for entries with zero count, list only those that belong to current type of message */
1763
1764 if (!visible && valid_id && dump_stat->owner_mask)
1765 visible = CBIT_TEST(*dump_stat->owner_mask, id);
1766
1767 if (visible)
1768 storeAppendPrintf(sentry, "%2d\t %-20s\t %5d\t %6.2f\n",
1769 id, name, count, xdiv(count, dump_stat->busyDestroyedCount));
1770 }
1771
1772 static void
1773 httpHeaderFldsPerHdrDumper(StoreEntry * sentry, int idx, double val, double size, int count)
1774 {
1775 if (count)
1776 storeAppendPrintf(sentry, "%2d\t %5d\t %5d\t %6.2f\n",
1777 idx, (int) val, count,
1778 xpercent(count, dump_stat->destroyedCount));
1779 }
1780
1781 static void
1782 httpHeaderStatDump(const HttpHeaderStat * hs, StoreEntry * e)
1783 {
1784 assert(hs && e);
1785
1786 dump_stat = hs;
1787 storeAppendPrintf(e, "\nHeader Stats: %s\n", hs->label);
1788 storeAppendPrintf(e, "\nField type distribution\n");
1789 storeAppendPrintf(e, "%2s\t %-20s\t %5s\t %6s\n",
1790 "id", "name", "count", "#/header");
1791 hs->fieldTypeDistr.dump(e, httpHeaderFieldStatDumper);
1792 storeAppendPrintf(e, "\nCache-control directives distribution\n");
1793 storeAppendPrintf(e, "%2s\t %-20s\t %5s\t %6s\n",
1794 "id", "name", "count", "#/cc_field");
1795 hs->ccTypeDistr.dump(e, httpHdrCcStatDumper);
1796 storeAppendPrintf(e, "\nSurrogate-control directives distribution\n");
1797 storeAppendPrintf(e, "%2s\t %-20s\t %5s\t %6s\n",
1798 "id", "name", "count", "#/sc_field");
1799 hs->scTypeDistr.dump(e, httpHdrScStatDumper);
1800 storeAppendPrintf(e, "\nNumber of fields per header distribution\n");
1801 storeAppendPrintf(e, "%2s\t %-5s\t %5s\t %6s\n",
1802 "id", "#flds", "count", "%total");
1803 hs->hdrUCountDistr.dump(e, httpHeaderFldsPerHdrDumper);
1804 storeAppendPrintf(e, "\n");
1805 dump_stat = NULL;
1806 }
1807
1808 void
1809 httpHeaderStoreReport(StoreEntry * e)
1810 {
1811 int i;
1812 http_hdr_type ht;
1813 assert(e);
1814
1815 HttpHeaderStats[0].parsedCount =
1816 HttpHeaderStats[hoRequest].parsedCount + HttpHeaderStats[hoReply].parsedCount;
1817 HttpHeaderStats[0].ccParsedCount =
1818 HttpHeaderStats[hoRequest].ccParsedCount + HttpHeaderStats[hoReply].ccParsedCount;
1819 HttpHeaderStats[0].destroyedCount =
1820 HttpHeaderStats[hoRequest].destroyedCount + HttpHeaderStats[hoReply].destroyedCount;
1821 HttpHeaderStats[0].busyDestroyedCount =
1822 HttpHeaderStats[hoRequest].busyDestroyedCount + HttpHeaderStats[hoReply].busyDestroyedCount;
1823
1824 for (i = 1; i < HttpHeaderStatCount; ++i) {
1825 httpHeaderStatDump(HttpHeaderStats + i, e);
1826 }
1827
1828 /* field stats for all messages */
1829 storeAppendPrintf(e, "\nHttp Fields Stats (replies and requests)\n");
1830
1831 storeAppendPrintf(e, "%2s\t %-25s\t %5s\t %6s\t %6s\n",
1832 "id", "name", "#alive", "%err", "%repeat");
1833
1834 for (ht = (http_hdr_type)0; ht < HDR_ENUM_END; ++ht) {
1835 HttpHeaderFieldInfo *f = Headers + ht;
1836 storeAppendPrintf(e, "%2d\t %-25s\t %5d\t %6.3f\t %6.3f\n",
1837 f->id, f->name.termedBuf(), f->stat.aliveCount,
1838 xpercent(f->stat.errCount, f->stat.parsCount),
1839 xpercent(f->stat.repCount, f->stat.seenCount));
1840 }
1841
1842 storeAppendPrintf(e, "Headers Parsed: %d + %d = %d\n",
1843 HttpHeaderStats[hoRequest].parsedCount,
1844 HttpHeaderStats[hoReply].parsedCount,
1845 HttpHeaderStats[0].parsedCount);
1846 storeAppendPrintf(e, "Hdr Fields Parsed: %d\n", HeaderEntryParsedCount);
1847 }
1848
1849 http_hdr_type
1850 httpHeaderIdByName(const char *name, size_t name_len, const HttpHeaderFieldInfo * info, int end)
1851 {
1852 if (name_len > 0) {
1853 for (int i = 0; i < end; ++i) {
1854 if (name_len != info[i].name.size())
1855 continue;
1856
1857 if (!strncasecmp(name, info[i].name.rawBuf(), name_len))
1858 return info[i].id;
1859 }
1860 }
1861
1862 return HDR_BAD_HDR;
1863 }
1864
1865 http_hdr_type
1866 httpHeaderIdByNameDef(const char *name, int name_len)
1867 {
1868 if (!Headers)
1869 Headers = httpHeaderBuildFieldsInfo(HeadersAttrs, HDR_ENUM_END);
1870
1871 return httpHeaderIdByName(name, name_len, Headers, HDR_ENUM_END);
1872 }
1873
1874 const char *
1875 httpHeaderNameById(int id)
1876 {
1877 if (!Headers)
1878 Headers = httpHeaderBuildFieldsInfo(HeadersAttrs, HDR_ENUM_END);
1879
1880 assert(id >= 0 && id < HDR_ENUM_END);
1881
1882 return Headers[id].name.termedBuf();
1883 }
1884
1885 int
1886 HttpHeader::hasListMember(http_hdr_type id, const char *member, const char separator) const
1887 {
1888 int result = 0;
1889 const char *pos = NULL;
1890 const char *item;
1891 int ilen;
1892 int mlen = strlen(member);
1893
1894 assert(id >= 0);
1895
1896 String header (getStrOrList(id));
1897
1898 while (strListGetItem(&header, separator, &item, &ilen, &pos)) {
1899 if (strncasecmp(item, member, mlen) == 0
1900 && (item[mlen] == '=' || item[mlen] == separator || item[mlen] == ';' || item[mlen] == '\0')) {
1901 result = 1;
1902 break;
1903 }
1904 }
1905
1906 return result;
1907 }
1908
1909 int
1910 HttpHeader::hasByNameListMember(const char *name, const char *member, const char separator) const
1911 {
1912 int result = 0;
1913 const char *pos = NULL;
1914 const char *item;
1915 int ilen;
1916 int mlen = strlen(member);
1917
1918 assert(name);
1919
1920 String header (getByName(name));
1921
1922 while (strListGetItem(&header, separator, &item, &ilen, &pos)) {
1923 if (strncasecmp(item, member, mlen) == 0
1924 && (item[mlen] == '=' || item[mlen] == separator || item[mlen] == ';' || item[mlen] == '\0')) {
1925 result = 1;
1926 break;
1927 }
1928 }
1929
1930 return result;
1931 }
1932
1933 void
1934 HttpHeader::removeHopByHopEntries()
1935 {
1936 removeConnectionHeaderEntries();
1937
1938 const HttpHeaderEntry *e;
1939 HttpHeaderPos pos = HttpHeaderInitPos;
1940 int headers_deleted = 0;
1941 while ((e = getEntry(&pos))) {
1942 int id = e->id;
1943 if (CBIT_TEST(HopByHopHeadersMask, id)) {
1944 delAt(pos, headers_deleted);
1945 CBIT_CLR(mask, id);
1946 }
1947 }
1948 }
1949
1950 void
1951 HttpHeader::removeConnectionHeaderEntries()
1952 {
1953 if (has(HDR_CONNECTION)) {
1954 /* anything that matches Connection list member will be deleted */
1955 String strConnection;
1956
1957 (void) getList(HDR_CONNECTION, &strConnection);
1958 const HttpHeaderEntry *e;
1959 HttpHeaderPos pos = HttpHeaderInitPos;
1960 /*
1961 * think: on-average-best nesting of the two loops (hdrEntry
1962 * and strListItem) @?@
1963 */
1964 /*
1965 * maybe we should delete standard stuff ("keep-alive","close")
1966 * from strConnection first?
1967 */
1968
1969 int headers_deleted = 0;
1970 while ((e = getEntry(&pos))) {
1971 if (strListIsMember(&strConnection, e->name.termedBuf(), ','))
1972 delAt(pos, headers_deleted);
1973 }
1974 if (headers_deleted)
1975 refreshMask();
1976 }
1977 }