]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http.cc
Initial revision
[thirdparty/squid.git] / src / http.cc
CommitLineData
090089c4 1static char rcsid[] = "$Id: http.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $";
2/*
3 * File: http.c
4 * Description: state machine for http retrieval protocol.
5 * Based on John's gopher retrieval module.
6 * Author: Anawat Chankhunthod, USC
7 * Created: Tue May 28 10:57:11 1994
8 * Language: C
9 **********************************************************************
10 * Copyright (c) 1994, 1995. All rights reserved.
11 *
12 * The Harvest software was developed by the Internet Research Task
13 * Force Research Group on Resource Discovery (IRTF-RD):
14 *
15 * Mic Bowman of Transarc Corporation.
16 * Peter Danzig of the University of Southern California.
17 * Darren R. Hardy of the University of Colorado at Boulder.
18 * Udi Manber of the University of Arizona.
19 * Michael F. Schwartz of the University of Colorado at Boulder.
20 * Duane Wessels of the University of Colorado at Boulder.
21 *
22 * This copyright notice applies to software in the Harvest
23 * ``src/'' directory only. Users should consult the individual
24 * copyright notices in the ``components/'' subdirectories for
25 * copyright information about other software bundled with the
26 * Harvest source code distribution.
27 *
28 * TERMS OF USE
29 *
30 * The Harvest software may be used and re-distributed without
31 * charge, provided that the software origin and research team are
32 * cited in any use of the system. Most commonly this is
33 * accomplished by including a link to the Harvest Home Page
34 * (http://harvest.cs.colorado.edu/) from the query page of any
35 * Broker you deploy, as well as in the query result pages. These
36 * links are generated automatically by the standard Broker
37 * software distribution.
38 *
39 * The Harvest software is provided ``as is'', without express or
40 * implied warranty, and with no support nor obligation to assist
41 * in its use, correction, modification or enhancement. We assume
42 * no liability with respect to the infringement of copyrights,
43 * trade secrets, or any patents, and are not responsible for
44 * consequential damages. Proper use of the Harvest software is
45 * entirely the responsibility of the user.
46 *
47 * DERIVATIVE WORKS
48 *
49 * Users may make derivative works from the Harvest software, subject
50 * to the following constraints:
51 *
52 * - You must include the above copyright notice and these
53 * accompanying paragraphs in all forms of derivative works,
54 * and any documentation and other materials related to such
55 * distribution and use acknowledge that the software was
56 * developed at the above institutions.
57 *
58 * - You must notify IRTF-RD regarding your distribution of
59 * the derivative work.
60 *
61 * - You must clearly notify users that your are distributing
62 * a modified version and not the original Harvest software.
63 *
64 * - Any derivative product is also subject to these copyright
65 * and use restrictions.
66 *
67 * Note that the Harvest software is NOT in the public domain. We
68 * retain copyright, as specified above.
69 *
70 * HISTORY OF FREE SOFTWARE STATUS
71 *
72 * Originally we required sites to license the software in cases
73 * where they were going to build commercial products/services
74 * around Harvest. In June 1995 we changed this policy. We now
75 * allow people to use the core Harvest software (the code found in
76 * the Harvest ``src/'' directory) for free. We made this change
77 * in the interest of encouraging the widest possible deployment of
78 * the technology. The Harvest software is really a reference
79 * implementation of a set of protocols and formats, some of which
80 * we intend to standardize. We encourage commercial
81 * re-implementations of code complying to this set of standards.
82 *
83 *
84 */
85#include "config.h"
86#include <sys/errno.h>
87#include <stdlib.h>
88#include <string.h>
89#include <unistd.h>
90
91#include "ansihelp.h"
92#include "comm.h"
93#include "store.h"
94#include "stat.h"
95#include "url.h"
96#include "ipcache.h"
97#include "cache_cf.h"
98#include "ttl.h"
99#include "icp.h"
100#include "util.h"
101
102#define HTTP_PORT 80
103#define HTTP_DELETE_GAP (64*1024)
104
105extern int errno;
106extern char *dns_error_message;
107extern time_t cached_curtime;
108
109typedef struct _httpdata {
110 StoreEntry *entry;
111 char host[HARVESTHOSTNAMELEN + 1];
112 int port;
113 char *type;
114 char *mime_hdr;
115 char type_id;
116 char request[MAX_URL + 1];
117 char *icp_page_ptr; /* Used to send proxy-http request:
118 * put_free_8k_page(me) if the lifetime
119 * expires */
120 char *icp_rwd_ptr; /* When a lifetime expires during the
121 * middle of an icpwrite, don't lose the
122 * icpReadWriteData */
123} HttpData;
124
125extern char *tmp_error_buf;
126
127char *HTTP_OPS[] =
128{"GET", "POST", "HEAD", ""};
129
130int http_url_parser(url, host, port, request)
131 char *url;
132 char *host;
133 int *port;
134 char *request;
135{
136 static char hostbuf[MAX_URL];
137 static char atypebuf[MAX_URL];
138 int t;
139
140 /* initialize everything */
141 (*port) = 0;
142 atypebuf[0] = hostbuf[0] = request[0] = host[0] = '\0';
143
144 t = sscanf(url, "%[a-zA-Z]://%[^/]%s", atypebuf, hostbuf, request);
145 if ((t < 2) || (strcasecmp(atypebuf, "http") != 0)) {
146 return -1;
147 } else if (t == 2) {
148 strcpy(request, "/");
149 }
150 if (sscanf(hostbuf, "%[^:]:%d", host, port) < 2)
151 (*port) = HTTP_PORT;
152 return 0;
153}
154
155int httpCachable(url, type, mime_hdr)
156 char *url;
157 char *type;
158 char *mime_hdr;
159{
160 stoplist *p;
161
162 /* GET and HEAD are cachable. Others are not. */
163 if (((strncasecmp(type, "GET", 3) != 0)) &&
164 (strncasecmp(type, "HEAD", 4) != 0))
165 return 0;
166
167 /* url's requiring authentication are uncachable */
168 if (mime_hdr && (strstr(mime_hdr, "Authorization")))
169 return 0;
170
171 /* scan stop list */
172 p = http_stoplist;
173 while (p) {
174 if (strstr(url, p->key))
175 return 0;
176 p = p->next;
177 }
178
179 /* else cachable */
180 return 1;
181}
182
183/* This will be called when timeout on read. */
184void httpReadReplyTimeout(fd, data)
185 int fd;
186 HttpData *data;
187{
188 StoreEntry *entry = NULL;
189
190 entry = data->entry;
191 debug(4, "httpReadReplyTimeout: FD %d: <URL:%s>\n", fd, entry->url);
192 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
193 entry->url,
194 entry->url,
195 "HTTP",
196 103,
197 "Read timeout",
198 "The Network/Remote site may be down. Try again later.",
199 HARVEST_VERSION,
200 comm_hostname());
201
202 if (data->icp_rwd_ptr)
203 safe_free(data->icp_rwd_ptr);
204 if (data->icp_page_ptr) {
205 put_free_8k_page(data->icp_page_ptr);
206 data->icp_page_ptr = NULL;
207 }
208 storeAbort(entry, tmp_error_buf);
209 comm_set_select_handler(fd, COMM_SELECT_READ, 0, 0);
210 comm_close(fd);
211#ifdef LOG_ERRORS
212 CacheInfo->log_append(CacheInfo,
213 entry->url,
214 "0.0.0.0",
215 store_mem_obj(entry, e_current_len),
216 "ERR_103", /* HTTP READ TIMEOUT */
217 data->type ? data->type : "NULL");
218#endif
219 safe_free(data);
220}
221
222/* This will be called when socket lifetime is expired. */
223void httpLifetimeExpire(fd, data)
224 int fd;
225 HttpData *data;
226{
227 StoreEntry *entry = NULL;
228
229 entry = data->entry;
230 debug(4, "httpLifeTimeExpire: FD %d: <URL:%s>\n", fd, entry->url);
231
232 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
233 entry->url,
234 entry->url,
235 "HTTP",
236 110,
237 "Transaction Timeout",
238 "The Network/Remote site may be down or too slow. Try again later.",
239 HARVEST_VERSION,
240 comm_hostname());
241
242 if (data->icp_page_ptr) {
243 put_free_8k_page(data->icp_page_ptr);
244 data->icp_page_ptr = NULL;
245 }
246 if (data->icp_rwd_ptr)
247 safe_free(data->icp_rwd_ptr);
248 storeAbort(entry, tmp_error_buf);
249 comm_set_select_handler(fd, COMM_SELECT_READ | COMM_SELECT_WRITE, 0, 0);
250 comm_close(fd);
251#ifdef LOG_ERRORS
252 CacheInfo->log_append(CacheInfo,
253 entry->url,
254 "0.0.0.0",
255 store_mem_obj(entry, e_current_len),
256 "ERR_110", /* HTTP LIFETIME EXPIRE */
257 data->type ? data->type : "NULL");
258#endif
259 safe_free(data);
260}
261
262
263
264/* This will be called when data is ready to be read from fd. Read until
265 * error or connection closed. */
266void httpReadReply(fd, data)
267 int fd;
268 HttpData *data;
269{
270 static char buf[4096];
271 int len;
272 int clen;
273 int off;
274 StoreEntry *entry = NULL;
275
276 entry = data->entry;
277 if (entry->flag & DELETE_BEHIND) {
278 if (storeClientWaiting(entry)) {
279 /* check if we want to defer reading */
280 clen = store_mem_obj(entry, e_current_len);
281 off = store_mem_obj(entry, e_lowest_offset);
282 if ((clen - off) > HTTP_DELETE_GAP) {
283 debug(3, "httpReadReply: Read deferred for Object: %s\n",
284 entry->key);
285 debug(3, " Current Gap: %d bytes\n",
286 clen - off);
287
288 /* reschedule, so it will be automatically reactivated
289 * when Gap is big enough. */
290 comm_set_select_handler(fd,
291 COMM_SELECT_READ,
292 (PF) httpReadReply,
293 (caddr_t) data);
294
295/* don't install read timeout until we are below the GAP */
296#ifdef INSTALL_READ_TIMEOUT_ABOVE_GAP
297 comm_set_select_handler_plus_timeout(fd,
298 COMM_SELECT_TIMEOUT,
299 (PF) httpReadReplyTimeout,
300 (caddr_t) data,
301 getReadTimeout());
302#else
303 comm_set_select_handler_plus_timeout(fd,
304 COMM_SELECT_TIMEOUT,
305 (PF) NULL,
306 (caddr_t) NULL,
307 (time_t) 0);
308#endif
309 return;
310 }
311 } else {
312 /* we can terminate connection right now */
313 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
314 entry->url,
315 entry->url,
316 "HTTP",
317 119,
318 "No Client",
319 "All Clients went away before tranmission is complete and object is too big to cache.",
320 HARVEST_VERSION,
321 comm_hostname());
322 storeAbort(entry, tmp_error_buf);
323 comm_close(fd);
324#ifdef LOG_ERRORS
325 CacheInfo->log_append(CacheInfo,
326 entry->url,
327 "0.0.0.0",
328 store_mem_obj(entry, e_current_len),
329 "ERR_119", /* HTTP NO CLIENTS, BIG OBJ */
330 data->type ? data->type : "NULL");
331#endif
332 safe_free(data);
333 return;
334 }
335 }
336 len = read(fd, buf, 4096);
337 debug(5, "httpReadReply: FD %d: len %d.\n", fd, len);
338
339 if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) {
340 /* XXX we we should log when len==0 and current_len==0 */
341 debug(2, "httpReadReply: FD %d: read failure: %s.\n",
342 fd, xstrerror());
343 if (errno == ECONNRESET) {
344 /* Connection reset by peer */
345 /* consider it as a EOF */
346 if (!(entry->flag & DELETE_BEHIND))
347 entry->expires = cached_curtime + ttlSet(entry);
348 sprintf(tmp_error_buf, "\n<p>Warning: The Remote Server sent RESET at the end of transmission.\n");
349 storeAppend(entry, tmp_error_buf, strlen(tmp_error_buf));
350 storeComplete(entry);
351 } else {
352 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
353 entry->url,
354 entry->url,
355 "HTTP",
356 105,
357 "Read error",
358 "Network/Remote site is down. Try again later.",
359 HARVEST_VERSION,
360 comm_hostname());
361 storeAbort(entry, tmp_error_buf);
362 }
363 comm_close(fd);
364#ifdef LOG_ERRORS
365 CacheInfo->log_append(CacheInfo,
366 entry->url,
367 "0.0.0.0",
368 store_mem_obj(entry, e_current_len),
369 "ERR_105", /* HTTP READ ERROR */
370 data->type ? data->type : "NULL");
371#endif
372 safe_free(data);
373 } else if (len == 0) {
374 /* Connection closed; retrieval done. */
375 if (!(entry->flag & DELETE_BEHIND))
376 entry->expires = cached_curtime + ttlSet(entry);
377 storeComplete(entry);
378 comm_close(fd);
379 safe_free(data);
380 } else if (((store_mem_obj(entry, e_current_len) + len) > getHttpMax()) &&
381 !(entry->flag & DELETE_BEHIND)) {
382 /* accept data, but start to delete behind it */
383 storeStartDeleteBehind(entry);
384
385 storeAppend(entry, buf, len);
386 comm_set_select_handler(fd, COMM_SELECT_READ,
387 (PF) httpReadReply, (caddr_t) data);
388 comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT,
389 (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout());
390
391 } else if (entry->flag & CLIENT_ABORT_REQUEST) {
392 /* append the last bit of info we get */
393 storeAppend(entry, buf, len);
394 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
395 entry->url,
396 entry->url,
397 "HTTP",
398 107,
399 "Client Aborted",
400 "Client(s) dropped connection before transmission is complete.\nObject fetching is aborted.\n",
401 HARVEST_VERSION,
402 comm_hostname());
403 storeAbort(entry, tmp_error_buf);
404 comm_close(fd);
405#ifdef LOG_ERRORS
406 CacheInfo->log_append(CacheInfo,
407 entry->url,
408 "0.0.0.0",
409 store_mem_obj(entry, e_current_len),
410 "ERR_107", /* HTTP CLIENT ABORT */
411 data->type ? data->type : "NULL");
412#endif
413 safe_free(data);
414 } else {
415 storeAppend(entry, buf, len);
416 comm_set_select_handler(fd, COMM_SELECT_READ,
417 (PF) httpReadReply, (caddr_t) data);
418 comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT,
419 (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout());
420 }
421}
422
423/* This will be called when request write is complete. Schedule read of
424 * reply. */
425void httpSendComplete(fd, buf, size, errflag, data)
426 int fd;
427 char *buf;
428 int size;
429 int errflag;
430 HttpData *data;
431{
432 StoreEntry *entry = NULL;
433
434 entry = data->entry;
435 debug(5, "httpSendComplete: FD %d: size %d: errflag %d.\n",
436 fd, size, errflag);
437
438 if (buf) {
439 put_free_8k_page(buf); /* Allocated by httpSendRequest. */
440 buf = NULL;
441 }
442 data->icp_page_ptr = NULL; /* So lifetime expire doesn't re-free */
443 data->icp_rwd_ptr = NULL; /* Don't double free in lifetimeexpire */
444
445 if (errflag) {
446 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
447 entry->url,
448 entry->url,
449 "HTTP",
450 101,
451 "Cannot connect to the original site",
452 "The remote site may be down.",
453 HARVEST_VERSION,
454 comm_hostname());
455 storeAbort(entry, tmp_error_buf);
456 comm_close(fd);
457#ifdef LOG_ERRORS
458 CacheInfo->log_append(CacheInfo,
459 entry->url,
460 "0.0.0.0",
461 store_mem_obj(entry, e_current_len),
462 "ERR_101", /* HTTP CONNECT FAIL */
463 data->type ? data->type : "NULL");
464#endif
465 safe_free(data);
466 return;
467 } else {
468 /* Schedule read reply. */
469 comm_set_select_handler(fd, COMM_SELECT_READ,
470 (PF) httpReadReply, (caddr_t) data);
471 comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT,
472 (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout());
473 comm_set_fd_lifetime(fd, -1); /* disable lifetime DPW */
474
475 }
476}
477
478/* This will be called when connect completes. Write request. */
479void httpSendRequest(fd, data)
480 int fd;
481 HttpData *data;
482{
483 char *xbuf = NULL;
484 char *ybuf = NULL;
485 char *buf = NULL;
486 char *t = NULL;
487 char *post_buf = NULL;
488 static char *crlf = "\r\n";
489 static char *HARVEST_PROXY_TEXT = "via Harvest Cache version";
490 int len = 0;
491 int buflen;
492
493 debug(5, "httpSendRequest: FD %d: data %p.\n", fd, data);
494 buflen = strlen(data->type) + strlen(data->request);
495 if (data->mime_hdr)
496 buflen += strlen(data->mime_hdr);
497 buflen += 512; /* lots of extra */
498
499 if (!strcasecmp(data->type, "POST") && data->mime_hdr) {
500 if ((t = strstr(data->mime_hdr, "\r\n\r\n"))) {
501 post_buf = xstrdup(t + 4);
502 *(t + 4) = '\0';
503 }
504 }
505 /* Since we limit the URL read to a 4K page, I doubt that the
506 * mime header could be longer than an 8K page */
507 buf = (char *) get_free_8k_page();
508 data->icp_page_ptr = buf;
509 if (buflen > DISK_PAGE_SIZE) {
510 debug(0, "Mime header length %d is breaking ICP code\n", buflen);
511 }
512 memset(buf, '\0', buflen);
513
514 sprintf(buf, "%s %s ", data->type, data->request);
515 len = strlen(buf);
516 if (data->mime_hdr) { /* we have to parse the MIME header */
517 xbuf = xstrdup(data->mime_hdr);
518 for (t = strtok(xbuf, crlf); t; t = strtok(NULL, crlf)) {
519 if (strncasecmp(t, "User-Agent:", 11) == 0) {
520 ybuf = (char *) get_free_4k_page();
521 memset(ybuf, '\0', SM_PAGE_SIZE);
522 sprintf(ybuf, "%s %s %s", t, HARVEST_PROXY_TEXT, HARVEST_VERSION);
523 t = ybuf;
524 }
525 if (strncasecmp(t, "If-Modified-Since:", 18) == 0)
526 continue;
527 if (len + (int) strlen(t) > buflen - 10)
528 continue;
529 strcat(buf, t);
530 strcat(buf, crlf);
531 len += strlen(t) + 2;
532 }
533 xfree(xbuf);
534 if (ybuf) {
535 put_free_4k_page(ybuf);
536 ybuf = NULL;
537 }
538 }
539 strcat(buf, crlf);
540 len += 2;
541 if (post_buf) {
542 strcat(buf, post_buf);
543 len += strlen(post_buf);
544 xfree(post_buf);
545 }
546 debug(6, "httpSendRequest: FD %d: buf '%s'\n", fd, buf);
547 data->icp_rwd_ptr = icpWrite(fd, buf, len, 30, httpSendComplete, data);
548}
549
550void httpConnInProgress(fd, data)
551 int fd;
552 HttpData *data;
553{
554 StoreEntry *entry = data->entry;
555
556 if (comm_connect(fd, data->host, data->port) != COMM_OK)
557 switch (errno) {
558 case EINPROGRESS:
559 case EALREADY:
560 /* schedule this handler again */
561 comm_set_select_handler(fd,
562 COMM_SELECT_WRITE,
563 (PF) httpConnInProgress,
564 (caddr_t) data);
565 return;
566 case EISCONN:
567 break; /* cool, we're connected */
568 default:
569 comm_close(fd);
570 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
571 entry->url,
572 entry->url,
573 "HTTP",
574 104,
575 "Cannot connect to the original site",
576 "The remote site may be down.",
577 HARVEST_VERSION,
578 comm_hostname());
579 storeAbort(entry, tmp_error_buf);
580#ifdef LOG_ERRORS
581 CacheInfo->log_append(CacheInfo,
582 entry->url,
583 "0.0.0.0",
584 store_mem_obj(entry, e_current_len),
585 "ERR_104", /* HTTP CONNECT FAIL */
586 data->type ? data->type : "NULL");
587#endif
588 safe_free(data);
589 return;
590 }
591 /* Call the real write handler, now that we're fully connected */
592 comm_set_select_handler(fd, COMM_SELECT_WRITE,
593 (PF) httpSendRequest, (caddr_t) data);
594}
595
596int proxyhttpStart(e, url, entry)
597 edge *e;
598 char *url;
599 StoreEntry *entry;
600{
601
602 /* Create state structure. */
603 int sock, status;
604 HttpData *data = (HttpData *) xmalloc(sizeof(HttpData));
605
606 debug(3, "proxyhttpStart: <URL:%s>\n", url);
607 debug(10, "proxyhttpStart: HTTP request header:\n%s\n",
608 store_mem_obj(entry, mime_hdr));
609
610 memset(data, '\0', sizeof(HttpData));
611 data->entry = entry;
612
613 strncpy(data->request, url, sizeof(data->request) - 1);
614 data->type = HTTP_OPS[entry->type_id];
615 data->port = e->ascii_port;
616 data->mime_hdr = store_mem_obj(entry, mime_hdr);
617 strncpy(data->host, e->host, sizeof(data->host) - 1);
618
619 if (e->proxy_only)
620 storeStartDeleteBehind(entry);
621
622 /* Create socket. */
623 sock = comm_open(COMM_NONBLOCKING, 0, 0, url);
624 if (sock == COMM_ERROR) {
625 debug(4, "proxyhttpStart: Failed because we're out of sockets.\n");
626 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
627 entry->url,
628 entry->url,
629 "HTTP",
630 111,
631 "Cached short of file-descriptors, sorry",
632 "",
633 HARVEST_VERSION,
634 comm_hostname());
635 storeAbort(entry, tmp_error_buf);
636#ifdef LOG_ERRORS
637 CacheInfo->log_append(CacheInfo,
638 entry->url,
639 "0.0.0.0",
640 store_mem_obj(entry, e_current_len),
641 "ERR_111", /* HTTP NO FD'S */
642 data->type ? data->type : "NULL");
643#endif
644 safe_free(data);
645 return COMM_ERROR;
646 }
647 /* check if IP is already in cache. It must be.
648 * It should be done before this route is called.
649 * Otherwise, we cannot check return code for connect. */
650 if (!ipcache_gethostbyname(data->host)) {
651 debug(4, "proxyhttpstart: Called without IP entry in ipcache. OR lookup failed.\n");
652 comm_close(sock);
653 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
654 entry->url,
655 entry->url,
656 "HTTP",
657 102,
658 "DNS name lookup failure",
659 dns_error_message,
660 HARVEST_VERSION,
661 comm_hostname());
662 storeAbort(entry, tmp_error_buf);
663#ifdef LOG_ERRORS
664 CacheInfo->log_append(CacheInfo,
665 entry->url,
666 "0.0.0.0",
667 store_mem_obj(entry, e_current_len),
668 "ERR_102", /* HTTP DNS FAIL */
669 data->type ? data->type : "NULL");
670#endif
671 safe_free(data);
672 return COMM_ERROR;
673 }
674 /* Open connection. */
675 if ((status = comm_connect(sock, data->host, data->port))) {
676 if (status != EINPROGRESS) {
677 comm_close(sock);
678 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
679 entry->url,
680 entry->url,
681 "HTTP",
682 104,
683 "Cannot connect to the original site",
684 "The remote site may be down.",
685 HARVEST_VERSION,
686 comm_hostname());
687 storeAbort(entry, tmp_error_buf);
688#ifdef LOG_ERRORS
689 CacheInfo->log_append(CacheInfo,
690 entry->url,
691 "0.0.0.0",
692 store_mem_obj(entry, e_current_len),
693 "ERR_104", /* HTTP CONNECT FAIL */
694 data->type ? data->type : "NULL");
695#endif
696 safe_free(data);
697 e->last_fail_time = cached_curtime;
698 e->neighbor_up = 0;
699 return COMM_ERROR;
700 } else {
701 debug(5, "proxyhttpStart: FD %d: EINPROGRESS.\n", sock);
702 comm_set_select_handler(sock, COMM_SELECT_LIFETIME,
703 (PF) httpLifetimeExpire, (caddr_t) data);
704 comm_set_select_handler(sock, COMM_SELECT_WRITE,
705 (PF) httpConnInProgress, (caddr_t) data);
706 return COMM_OK;
707 }
708 }
709 /* Install connection complete handler. */
710 fd_note(sock, entry->url);
711 comm_set_select_handler(sock, COMM_SELECT_LIFETIME,
712 (PF) httpLifetimeExpire, (caddr_t) data);
713 comm_set_select_handler(sock, COMM_SELECT_WRITE,
714 (PF) httpSendRequest, (caddr_t) data);
715 return COMM_OK;
716
717}
718
719int httpStart(unusedfd, url, type, mime_hdr, entry)
720 int unusedfd;
721 char *url;
722 char *type;
723 char *mime_hdr;
724 StoreEntry *entry;
725{
726 /* Create state structure. */
727 int sock, status;
728 HttpData *data = (HttpData *) xmalloc(sizeof(HttpData));
729
730 debug(3, "httpStart: %s <URL:%s>\n", type, url);
731 debug(10, "httpStart: mime_hdr '%s'\n", mime_hdr);
732
733 memset(data, '\0', sizeof(HttpData));
734 data->entry = entry;
735 data->type = type;
736 data->mime_hdr = mime_hdr;
737
738 /* Parse url. */
739 if (http_url_parser(url, data->host, &data->port, data->request)) {
740 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
741 entry->url,
742 entry->url,
743 "HTTP",
744 110,
745 "Invalid URL syntax: Cannot parse.",
746 "Contact your system administrator for further help.",
747 HARVEST_VERSION,
748 comm_hostname());
749 storeAbort(entry, tmp_error_buf);
750#ifdef LOG_ERRORS
751 CacheInfo->log_append(CacheInfo,
752 entry->url,
753 "0.0.0.0",
754 store_mem_obj(entry, e_current_len),
755 "ERR_110", /* HTTP INVALID URL */
756 data->type ? data->type : "NULL");
757#endif
758 safe_free(data);
759 return COMM_ERROR;
760 }
761 /* Create socket. */
762 sock = comm_open(COMM_NONBLOCKING, 0, 0, url);
763 if (sock == COMM_ERROR) {
764 debug(4, "httpStart: Failed because we're out of sockets.\n");
765 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
766 entry->url,
767 entry->url,
768 "HTTP",
769 111,
770 "Cached short of file-descriptors, sorry",
771 "",
772 HARVEST_VERSION,
773 comm_hostname());
774 storeAbort(entry, tmp_error_buf);
775#ifdef LOG_ERRORS
776 CacheInfo->log_append(CacheInfo,
777 entry->url,
778 "0.0.0.0",
779 store_mem_obj(entry, e_current_len),
780 "ERR_111", /* HTTP NO FD'S */
781 data->type ? data->type : "NULL");
782#endif
783 safe_free(data);
784 return COMM_ERROR;
785 }
786 /* check if IP is already in cache. It must be.
787 * It should be done before this route is called.
788 * Otherwise, we cannot check return code for connect. */
789 if (!ipcache_gethostbyname(data->host)) {
790 debug(4, "httpstart: Called without IP entry in ipcache. OR lookup failed.\n");
791 comm_close(sock);
792 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
793 entry->url,
794 entry->url,
795 "HTTP",
796 108,
797 "DNS name lookup failure",
798 dns_error_message,
799 HARVEST_VERSION,
800 comm_hostname());
801 storeAbort(entry, tmp_error_buf);
802#ifdef LOG_ERRORS
803 CacheInfo->log_append(CacheInfo,
804 entry->url,
805 "0.0.0.0",
806 store_mem_obj(entry, e_current_len),
807 "ERR_108", /* HTTP DNS FAIL */
808 data->type ? data->type : "NULL");
809#endif
810 safe_free(data);
811 return COMM_ERROR;
812 }
813 /* Open connection. */
814 if ((status = comm_connect(sock, data->host, data->port))) {
815 if (status != EINPROGRESS) {
816 comm_close(sock);
817 sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
818 entry->url,
819 entry->url,
820 "HTTP",
821 109,
822 "Cannot connect to the original site",
823 "The remote site may be down.",
824 HARVEST_VERSION,
825 comm_hostname());
826 storeAbort(entry, tmp_error_buf);
827#ifdef LOG_ERRORS
828 CacheInfo->log_append(CacheInfo,
829 entry->url,
830 "0.0.0.0",
831 store_mem_obj(entry, e_current_len),
832 "ERR_109", /* HTTP CONNECT FAIL */
833 data->type ? data->type : "NULL");
834#endif
835 safe_free(data);
836 return COMM_ERROR;
837 } else {
838 debug(5, "httpStart: FD %d: EINPROGRESS.\n", sock);
839 comm_set_select_handler(sock, COMM_SELECT_LIFETIME,
840 (PF) httpLifetimeExpire, (caddr_t) data);
841 comm_set_select_handler(sock, COMM_SELECT_WRITE,
842 (PF) httpConnInProgress, (caddr_t) data);
843 return COMM_OK;
844 }
845 }
846 /* Install connection complete handler. */
847 fd_note(sock, entry->url);
848 comm_set_select_handler(sock, COMM_SELECT_LIFETIME,
849 (PF) httpLifetimeExpire, (caddr_t) data);
850 comm_set_select_handler(sock, COMM_SELECT_WRITE,
851 (PF) httpSendRequest, (caddr_t) data);
852 return COMM_OK;
853}