]>
Commit | Line | Data |
---|---|---|
090089c4 | 1 | static char rcsid[] = "$Id: http.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; |
2 | /* | |
3 | * File: http.c | |
4 | * Description: state machine for http retrieval protocol. | |
5 | * Based on John's gopher retrieval module. | |
6 | * Author: Anawat Chankhunthod, USC | |
7 | * Created: Tue May 28 10:57:11 1994 | |
8 | * Language: C | |
9 | ********************************************************************** | |
10 | * Copyright (c) 1994, 1995. All rights reserved. | |
11 | * | |
12 | * The Harvest software was developed by the Internet Research Task | |
13 | * Force Research Group on Resource Discovery (IRTF-RD): | |
14 | * | |
15 | * Mic Bowman of Transarc Corporation. | |
16 | * Peter Danzig of the University of Southern California. | |
17 | * Darren R. Hardy of the University of Colorado at Boulder. | |
18 | * Udi Manber of the University of Arizona. | |
19 | * Michael F. Schwartz of the University of Colorado at Boulder. | |
20 | * Duane Wessels of the University of Colorado at Boulder. | |
21 | * | |
22 | * This copyright notice applies to software in the Harvest | |
23 | * ``src/'' directory only. Users should consult the individual | |
24 | * copyright notices in the ``components/'' subdirectories for | |
25 | * copyright information about other software bundled with the | |
26 | * Harvest source code distribution. | |
27 | * | |
28 | * TERMS OF USE | |
29 | * | |
30 | * The Harvest software may be used and re-distributed without | |
31 | * charge, provided that the software origin and research team are | |
32 | * cited in any use of the system. Most commonly this is | |
33 | * accomplished by including a link to the Harvest Home Page | |
34 | * (http://harvest.cs.colorado.edu/) from the query page of any | |
35 | * Broker you deploy, as well as in the query result pages. These | |
36 | * links are generated automatically by the standard Broker | |
37 | * software distribution. | |
38 | * | |
39 | * The Harvest software is provided ``as is'', without express or | |
40 | * implied warranty, and with no support nor obligation to assist | |
41 | * in its use, correction, modification or enhancement. We assume | |
42 | * no liability with respect to the infringement of copyrights, | |
43 | * trade secrets, or any patents, and are not responsible for | |
44 | * consequential damages. Proper use of the Harvest software is | |
45 | * entirely the responsibility of the user. | |
46 | * | |
47 | * DERIVATIVE WORKS | |
48 | * | |
49 | * Users may make derivative works from the Harvest software, subject | |
50 | * to the following constraints: | |
51 | * | |
52 | * - You must include the above copyright notice and these | |
53 | * accompanying paragraphs in all forms of derivative works, | |
54 | * and any documentation and other materials related to such | |
55 | * distribution and use acknowledge that the software was | |
56 | * developed at the above institutions. | |
57 | * | |
58 | * - You must notify IRTF-RD regarding your distribution of | |
59 | * the derivative work. | |
60 | * | |
61 | * - You must clearly notify users that your are distributing | |
62 | * a modified version and not the original Harvest software. | |
63 | * | |
64 | * - Any derivative product is also subject to these copyright | |
65 | * and use restrictions. | |
66 | * | |
67 | * Note that the Harvest software is NOT in the public domain. We | |
68 | * retain copyright, as specified above. | |
69 | * | |
70 | * HISTORY OF FREE SOFTWARE STATUS | |
71 | * | |
72 | * Originally we required sites to license the software in cases | |
73 | * where they were going to build commercial products/services | |
74 | * around Harvest. In June 1995 we changed this policy. We now | |
75 | * allow people to use the core Harvest software (the code found in | |
76 | * the Harvest ``src/'' directory) for free. We made this change | |
77 | * in the interest of encouraging the widest possible deployment of | |
78 | * the technology. The Harvest software is really a reference | |
79 | * implementation of a set of protocols and formats, some of which | |
80 | * we intend to standardize. We encourage commercial | |
81 | * re-implementations of code complying to this set of standards. | |
82 | * | |
83 | * | |
84 | */ | |
85 | #include "config.h" | |
86 | #include <sys/errno.h> | |
87 | #include <stdlib.h> | |
88 | #include <string.h> | |
89 | #include <unistd.h> | |
90 | ||
91 | #include "ansihelp.h" | |
92 | #include "comm.h" | |
93 | #include "store.h" | |
94 | #include "stat.h" | |
95 | #include "url.h" | |
96 | #include "ipcache.h" | |
97 | #include "cache_cf.h" | |
98 | #include "ttl.h" | |
99 | #include "icp.h" | |
100 | #include "util.h" | |
101 | ||
102 | #define HTTP_PORT 80 | |
103 | #define HTTP_DELETE_GAP (64*1024) | |
104 | ||
105 | extern int errno; | |
106 | extern char *dns_error_message; | |
107 | extern time_t cached_curtime; | |
108 | ||
109 | typedef struct _httpdata { | |
110 | StoreEntry *entry; | |
111 | char host[HARVESTHOSTNAMELEN + 1]; | |
112 | int port; | |
113 | char *type; | |
114 | char *mime_hdr; | |
115 | char type_id; | |
116 | char request[MAX_URL + 1]; | |
117 | char *icp_page_ptr; /* Used to send proxy-http request: | |
118 | * put_free_8k_page(me) if the lifetime | |
119 | * expires */ | |
120 | char *icp_rwd_ptr; /* When a lifetime expires during the | |
121 | * middle of an icpwrite, don't lose the | |
122 | * icpReadWriteData */ | |
123 | } HttpData; | |
124 | ||
125 | extern char *tmp_error_buf; | |
126 | ||
127 | char *HTTP_OPS[] = | |
128 | {"GET", "POST", "HEAD", ""}; | |
129 | ||
130 | int http_url_parser(url, host, port, request) | |
131 | char *url; | |
132 | char *host; | |
133 | int *port; | |
134 | char *request; | |
135 | { | |
136 | static char hostbuf[MAX_URL]; | |
137 | static char atypebuf[MAX_URL]; | |
138 | int t; | |
139 | ||
140 | /* initialize everything */ | |
141 | (*port) = 0; | |
142 | atypebuf[0] = hostbuf[0] = request[0] = host[0] = '\0'; | |
143 | ||
144 | t = sscanf(url, "%[a-zA-Z]://%[^/]%s", atypebuf, hostbuf, request); | |
145 | if ((t < 2) || (strcasecmp(atypebuf, "http") != 0)) { | |
146 | return -1; | |
147 | } else if (t == 2) { | |
148 | strcpy(request, "/"); | |
149 | } | |
150 | if (sscanf(hostbuf, "%[^:]:%d", host, port) < 2) | |
151 | (*port) = HTTP_PORT; | |
152 | return 0; | |
153 | } | |
154 | ||
155 | int httpCachable(url, type, mime_hdr) | |
156 | char *url; | |
157 | char *type; | |
158 | char *mime_hdr; | |
159 | { | |
160 | stoplist *p; | |
161 | ||
162 | /* GET and HEAD are cachable. Others are not. */ | |
163 | if (((strncasecmp(type, "GET", 3) != 0)) && | |
164 | (strncasecmp(type, "HEAD", 4) != 0)) | |
165 | return 0; | |
166 | ||
167 | /* url's requiring authentication are uncachable */ | |
168 | if (mime_hdr && (strstr(mime_hdr, "Authorization"))) | |
169 | return 0; | |
170 | ||
171 | /* scan stop list */ | |
172 | p = http_stoplist; | |
173 | while (p) { | |
174 | if (strstr(url, p->key)) | |
175 | return 0; | |
176 | p = p->next; | |
177 | } | |
178 | ||
179 | /* else cachable */ | |
180 | return 1; | |
181 | } | |
182 | ||
183 | /* This will be called when timeout on read. */ | |
184 | void httpReadReplyTimeout(fd, data) | |
185 | int fd; | |
186 | HttpData *data; | |
187 | { | |
188 | StoreEntry *entry = NULL; | |
189 | ||
190 | entry = data->entry; | |
191 | debug(4, "httpReadReplyTimeout: FD %d: <URL:%s>\n", fd, entry->url); | |
192 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
193 | entry->url, | |
194 | entry->url, | |
195 | "HTTP", | |
196 | 103, | |
197 | "Read timeout", | |
198 | "The Network/Remote site may be down. Try again later.", | |
199 | HARVEST_VERSION, | |
200 | comm_hostname()); | |
201 | ||
202 | if (data->icp_rwd_ptr) | |
203 | safe_free(data->icp_rwd_ptr); | |
204 | if (data->icp_page_ptr) { | |
205 | put_free_8k_page(data->icp_page_ptr); | |
206 | data->icp_page_ptr = NULL; | |
207 | } | |
208 | storeAbort(entry, tmp_error_buf); | |
209 | comm_set_select_handler(fd, COMM_SELECT_READ, 0, 0); | |
210 | comm_close(fd); | |
211 | #ifdef LOG_ERRORS | |
212 | CacheInfo->log_append(CacheInfo, | |
213 | entry->url, | |
214 | "0.0.0.0", | |
215 | store_mem_obj(entry, e_current_len), | |
216 | "ERR_103", /* HTTP READ TIMEOUT */ | |
217 | data->type ? data->type : "NULL"); | |
218 | #endif | |
219 | safe_free(data); | |
220 | } | |
221 | ||
222 | /* This will be called when socket lifetime is expired. */ | |
223 | void httpLifetimeExpire(fd, data) | |
224 | int fd; | |
225 | HttpData *data; | |
226 | { | |
227 | StoreEntry *entry = NULL; | |
228 | ||
229 | entry = data->entry; | |
230 | debug(4, "httpLifeTimeExpire: FD %d: <URL:%s>\n", fd, entry->url); | |
231 | ||
232 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
233 | entry->url, | |
234 | entry->url, | |
235 | "HTTP", | |
236 | 110, | |
237 | "Transaction Timeout", | |
238 | "The Network/Remote site may be down or too slow. Try again later.", | |
239 | HARVEST_VERSION, | |
240 | comm_hostname()); | |
241 | ||
242 | if (data->icp_page_ptr) { | |
243 | put_free_8k_page(data->icp_page_ptr); | |
244 | data->icp_page_ptr = NULL; | |
245 | } | |
246 | if (data->icp_rwd_ptr) | |
247 | safe_free(data->icp_rwd_ptr); | |
248 | storeAbort(entry, tmp_error_buf); | |
249 | comm_set_select_handler(fd, COMM_SELECT_READ | COMM_SELECT_WRITE, 0, 0); | |
250 | comm_close(fd); | |
251 | #ifdef LOG_ERRORS | |
252 | CacheInfo->log_append(CacheInfo, | |
253 | entry->url, | |
254 | "0.0.0.0", | |
255 | store_mem_obj(entry, e_current_len), | |
256 | "ERR_110", /* HTTP LIFETIME EXPIRE */ | |
257 | data->type ? data->type : "NULL"); | |
258 | #endif | |
259 | safe_free(data); | |
260 | } | |
261 | ||
262 | ||
263 | ||
264 | /* This will be called when data is ready to be read from fd. Read until | |
265 | * error or connection closed. */ | |
266 | void httpReadReply(fd, data) | |
267 | int fd; | |
268 | HttpData *data; | |
269 | { | |
270 | static char buf[4096]; | |
271 | int len; | |
272 | int clen; | |
273 | int off; | |
274 | StoreEntry *entry = NULL; | |
275 | ||
276 | entry = data->entry; | |
277 | if (entry->flag & DELETE_BEHIND) { | |
278 | if (storeClientWaiting(entry)) { | |
279 | /* check if we want to defer reading */ | |
280 | clen = store_mem_obj(entry, e_current_len); | |
281 | off = store_mem_obj(entry, e_lowest_offset); | |
282 | if ((clen - off) > HTTP_DELETE_GAP) { | |
283 | debug(3, "httpReadReply: Read deferred for Object: %s\n", | |
284 | entry->key); | |
285 | debug(3, " Current Gap: %d bytes\n", | |
286 | clen - off); | |
287 | ||
288 | /* reschedule, so it will be automatically reactivated | |
289 | * when Gap is big enough. */ | |
290 | comm_set_select_handler(fd, | |
291 | COMM_SELECT_READ, | |
292 | (PF) httpReadReply, | |
293 | (caddr_t) data); | |
294 | ||
295 | /* don't install read timeout until we are below the GAP */ | |
296 | #ifdef INSTALL_READ_TIMEOUT_ABOVE_GAP | |
297 | comm_set_select_handler_plus_timeout(fd, | |
298 | COMM_SELECT_TIMEOUT, | |
299 | (PF) httpReadReplyTimeout, | |
300 | (caddr_t) data, | |
301 | getReadTimeout()); | |
302 | #else | |
303 | comm_set_select_handler_plus_timeout(fd, | |
304 | COMM_SELECT_TIMEOUT, | |
305 | (PF) NULL, | |
306 | (caddr_t) NULL, | |
307 | (time_t) 0); | |
308 | #endif | |
309 | return; | |
310 | } | |
311 | } else { | |
312 | /* we can terminate connection right now */ | |
313 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
314 | entry->url, | |
315 | entry->url, | |
316 | "HTTP", | |
317 | 119, | |
318 | "No Client", | |
319 | "All Clients went away before tranmission is complete and object is too big to cache.", | |
320 | HARVEST_VERSION, | |
321 | comm_hostname()); | |
322 | storeAbort(entry, tmp_error_buf); | |
323 | comm_close(fd); | |
324 | #ifdef LOG_ERRORS | |
325 | CacheInfo->log_append(CacheInfo, | |
326 | entry->url, | |
327 | "0.0.0.0", | |
328 | store_mem_obj(entry, e_current_len), | |
329 | "ERR_119", /* HTTP NO CLIENTS, BIG OBJ */ | |
330 | data->type ? data->type : "NULL"); | |
331 | #endif | |
332 | safe_free(data); | |
333 | return; | |
334 | } | |
335 | } | |
336 | len = read(fd, buf, 4096); | |
337 | debug(5, "httpReadReply: FD %d: len %d.\n", fd, len); | |
338 | ||
339 | if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) { | |
340 | /* XXX we we should log when len==0 and current_len==0 */ | |
341 | debug(2, "httpReadReply: FD %d: read failure: %s.\n", | |
342 | fd, xstrerror()); | |
343 | if (errno == ECONNRESET) { | |
344 | /* Connection reset by peer */ | |
345 | /* consider it as a EOF */ | |
346 | if (!(entry->flag & DELETE_BEHIND)) | |
347 | entry->expires = cached_curtime + ttlSet(entry); | |
348 | sprintf(tmp_error_buf, "\n<p>Warning: The Remote Server sent RESET at the end of transmission.\n"); | |
349 | storeAppend(entry, tmp_error_buf, strlen(tmp_error_buf)); | |
350 | storeComplete(entry); | |
351 | } else { | |
352 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
353 | entry->url, | |
354 | entry->url, | |
355 | "HTTP", | |
356 | 105, | |
357 | "Read error", | |
358 | "Network/Remote site is down. Try again later.", | |
359 | HARVEST_VERSION, | |
360 | comm_hostname()); | |
361 | storeAbort(entry, tmp_error_buf); | |
362 | } | |
363 | comm_close(fd); | |
364 | #ifdef LOG_ERRORS | |
365 | CacheInfo->log_append(CacheInfo, | |
366 | entry->url, | |
367 | "0.0.0.0", | |
368 | store_mem_obj(entry, e_current_len), | |
369 | "ERR_105", /* HTTP READ ERROR */ | |
370 | data->type ? data->type : "NULL"); | |
371 | #endif | |
372 | safe_free(data); | |
373 | } else if (len == 0) { | |
374 | /* Connection closed; retrieval done. */ | |
375 | if (!(entry->flag & DELETE_BEHIND)) | |
376 | entry->expires = cached_curtime + ttlSet(entry); | |
377 | storeComplete(entry); | |
378 | comm_close(fd); | |
379 | safe_free(data); | |
380 | } else if (((store_mem_obj(entry, e_current_len) + len) > getHttpMax()) && | |
381 | !(entry->flag & DELETE_BEHIND)) { | |
382 | /* accept data, but start to delete behind it */ | |
383 | storeStartDeleteBehind(entry); | |
384 | ||
385 | storeAppend(entry, buf, len); | |
386 | comm_set_select_handler(fd, COMM_SELECT_READ, | |
387 | (PF) httpReadReply, (caddr_t) data); | |
388 | comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, | |
389 | (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); | |
390 | ||
391 | } else if (entry->flag & CLIENT_ABORT_REQUEST) { | |
392 | /* append the last bit of info we get */ | |
393 | storeAppend(entry, buf, len); | |
394 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
395 | entry->url, | |
396 | entry->url, | |
397 | "HTTP", | |
398 | 107, | |
399 | "Client Aborted", | |
400 | "Client(s) dropped connection before transmission is complete.\nObject fetching is aborted.\n", | |
401 | HARVEST_VERSION, | |
402 | comm_hostname()); | |
403 | storeAbort(entry, tmp_error_buf); | |
404 | comm_close(fd); | |
405 | #ifdef LOG_ERRORS | |
406 | CacheInfo->log_append(CacheInfo, | |
407 | entry->url, | |
408 | "0.0.0.0", | |
409 | store_mem_obj(entry, e_current_len), | |
410 | "ERR_107", /* HTTP CLIENT ABORT */ | |
411 | data->type ? data->type : "NULL"); | |
412 | #endif | |
413 | safe_free(data); | |
414 | } else { | |
415 | storeAppend(entry, buf, len); | |
416 | comm_set_select_handler(fd, COMM_SELECT_READ, | |
417 | (PF) httpReadReply, (caddr_t) data); | |
418 | comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, | |
419 | (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); | |
420 | } | |
421 | } | |
422 | ||
423 | /* This will be called when request write is complete. Schedule read of | |
424 | * reply. */ | |
425 | void httpSendComplete(fd, buf, size, errflag, data) | |
426 | int fd; | |
427 | char *buf; | |
428 | int size; | |
429 | int errflag; | |
430 | HttpData *data; | |
431 | { | |
432 | StoreEntry *entry = NULL; | |
433 | ||
434 | entry = data->entry; | |
435 | debug(5, "httpSendComplete: FD %d: size %d: errflag %d.\n", | |
436 | fd, size, errflag); | |
437 | ||
438 | if (buf) { | |
439 | put_free_8k_page(buf); /* Allocated by httpSendRequest. */ | |
440 | buf = NULL; | |
441 | } | |
442 | data->icp_page_ptr = NULL; /* So lifetime expire doesn't re-free */ | |
443 | data->icp_rwd_ptr = NULL; /* Don't double free in lifetimeexpire */ | |
444 | ||
445 | if (errflag) { | |
446 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
447 | entry->url, | |
448 | entry->url, | |
449 | "HTTP", | |
450 | 101, | |
451 | "Cannot connect to the original site", | |
452 | "The remote site may be down.", | |
453 | HARVEST_VERSION, | |
454 | comm_hostname()); | |
455 | storeAbort(entry, tmp_error_buf); | |
456 | comm_close(fd); | |
457 | #ifdef LOG_ERRORS | |
458 | CacheInfo->log_append(CacheInfo, | |
459 | entry->url, | |
460 | "0.0.0.0", | |
461 | store_mem_obj(entry, e_current_len), | |
462 | "ERR_101", /* HTTP CONNECT FAIL */ | |
463 | data->type ? data->type : "NULL"); | |
464 | #endif | |
465 | safe_free(data); | |
466 | return; | |
467 | } else { | |
468 | /* Schedule read reply. */ | |
469 | comm_set_select_handler(fd, COMM_SELECT_READ, | |
470 | (PF) httpReadReply, (caddr_t) data); | |
471 | comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, | |
472 | (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); | |
473 | comm_set_fd_lifetime(fd, -1); /* disable lifetime DPW */ | |
474 | ||
475 | } | |
476 | } | |
477 | ||
478 | /* This will be called when connect completes. Write request. */ | |
479 | void httpSendRequest(fd, data) | |
480 | int fd; | |
481 | HttpData *data; | |
482 | { | |
483 | char *xbuf = NULL; | |
484 | char *ybuf = NULL; | |
485 | char *buf = NULL; | |
486 | char *t = NULL; | |
487 | char *post_buf = NULL; | |
488 | static char *crlf = "\r\n"; | |
489 | static char *HARVEST_PROXY_TEXT = "via Harvest Cache version"; | |
490 | int len = 0; | |
491 | int buflen; | |
492 | ||
493 | debug(5, "httpSendRequest: FD %d: data %p.\n", fd, data); | |
494 | buflen = strlen(data->type) + strlen(data->request); | |
495 | if (data->mime_hdr) | |
496 | buflen += strlen(data->mime_hdr); | |
497 | buflen += 512; /* lots of extra */ | |
498 | ||
499 | if (!strcasecmp(data->type, "POST") && data->mime_hdr) { | |
500 | if ((t = strstr(data->mime_hdr, "\r\n\r\n"))) { | |
501 | post_buf = xstrdup(t + 4); | |
502 | *(t + 4) = '\0'; | |
503 | } | |
504 | } | |
505 | /* Since we limit the URL read to a 4K page, I doubt that the | |
506 | * mime header could be longer than an 8K page */ | |
507 | buf = (char *) get_free_8k_page(); | |
508 | data->icp_page_ptr = buf; | |
509 | if (buflen > DISK_PAGE_SIZE) { | |
510 | debug(0, "Mime header length %d is breaking ICP code\n", buflen); | |
511 | } | |
512 | memset(buf, '\0', buflen); | |
513 | ||
514 | sprintf(buf, "%s %s ", data->type, data->request); | |
515 | len = strlen(buf); | |
516 | if (data->mime_hdr) { /* we have to parse the MIME header */ | |
517 | xbuf = xstrdup(data->mime_hdr); | |
518 | for (t = strtok(xbuf, crlf); t; t = strtok(NULL, crlf)) { | |
519 | if (strncasecmp(t, "User-Agent:", 11) == 0) { | |
520 | ybuf = (char *) get_free_4k_page(); | |
521 | memset(ybuf, '\0', SM_PAGE_SIZE); | |
522 | sprintf(ybuf, "%s %s %s", t, HARVEST_PROXY_TEXT, HARVEST_VERSION); | |
523 | t = ybuf; | |
524 | } | |
525 | if (strncasecmp(t, "If-Modified-Since:", 18) == 0) | |
526 | continue; | |
527 | if (len + (int) strlen(t) > buflen - 10) | |
528 | continue; | |
529 | strcat(buf, t); | |
530 | strcat(buf, crlf); | |
531 | len += strlen(t) + 2; | |
532 | } | |
533 | xfree(xbuf); | |
534 | if (ybuf) { | |
535 | put_free_4k_page(ybuf); | |
536 | ybuf = NULL; | |
537 | } | |
538 | } | |
539 | strcat(buf, crlf); | |
540 | len += 2; | |
541 | if (post_buf) { | |
542 | strcat(buf, post_buf); | |
543 | len += strlen(post_buf); | |
544 | xfree(post_buf); | |
545 | } | |
546 | debug(6, "httpSendRequest: FD %d: buf '%s'\n", fd, buf); | |
547 | data->icp_rwd_ptr = icpWrite(fd, buf, len, 30, httpSendComplete, data); | |
548 | } | |
549 | ||
550 | void httpConnInProgress(fd, data) | |
551 | int fd; | |
552 | HttpData *data; | |
553 | { | |
554 | StoreEntry *entry = data->entry; | |
555 | ||
556 | if (comm_connect(fd, data->host, data->port) != COMM_OK) | |
557 | switch (errno) { | |
558 | case EINPROGRESS: | |
559 | case EALREADY: | |
560 | /* schedule this handler again */ | |
561 | comm_set_select_handler(fd, | |
562 | COMM_SELECT_WRITE, | |
563 | (PF) httpConnInProgress, | |
564 | (caddr_t) data); | |
565 | return; | |
566 | case EISCONN: | |
567 | break; /* cool, we're connected */ | |
568 | default: | |
569 | comm_close(fd); | |
570 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
571 | entry->url, | |
572 | entry->url, | |
573 | "HTTP", | |
574 | 104, | |
575 | "Cannot connect to the original site", | |
576 | "The remote site may be down.", | |
577 | HARVEST_VERSION, | |
578 | comm_hostname()); | |
579 | storeAbort(entry, tmp_error_buf); | |
580 | #ifdef LOG_ERRORS | |
581 | CacheInfo->log_append(CacheInfo, | |
582 | entry->url, | |
583 | "0.0.0.0", | |
584 | store_mem_obj(entry, e_current_len), | |
585 | "ERR_104", /* HTTP CONNECT FAIL */ | |
586 | data->type ? data->type : "NULL"); | |
587 | #endif | |
588 | safe_free(data); | |
589 | return; | |
590 | } | |
591 | /* Call the real write handler, now that we're fully connected */ | |
592 | comm_set_select_handler(fd, COMM_SELECT_WRITE, | |
593 | (PF) httpSendRequest, (caddr_t) data); | |
594 | } | |
595 | ||
596 | int proxyhttpStart(e, url, entry) | |
597 | edge *e; | |
598 | char *url; | |
599 | StoreEntry *entry; | |
600 | { | |
601 | ||
602 | /* Create state structure. */ | |
603 | int sock, status; | |
604 | HttpData *data = (HttpData *) xmalloc(sizeof(HttpData)); | |
605 | ||
606 | debug(3, "proxyhttpStart: <URL:%s>\n", url); | |
607 | debug(10, "proxyhttpStart: HTTP request header:\n%s\n", | |
608 | store_mem_obj(entry, mime_hdr)); | |
609 | ||
610 | memset(data, '\0', sizeof(HttpData)); | |
611 | data->entry = entry; | |
612 | ||
613 | strncpy(data->request, url, sizeof(data->request) - 1); | |
614 | data->type = HTTP_OPS[entry->type_id]; | |
615 | data->port = e->ascii_port; | |
616 | data->mime_hdr = store_mem_obj(entry, mime_hdr); | |
617 | strncpy(data->host, e->host, sizeof(data->host) - 1); | |
618 | ||
619 | if (e->proxy_only) | |
620 | storeStartDeleteBehind(entry); | |
621 | ||
622 | /* Create socket. */ | |
623 | sock = comm_open(COMM_NONBLOCKING, 0, 0, url); | |
624 | if (sock == COMM_ERROR) { | |
625 | debug(4, "proxyhttpStart: Failed because we're out of sockets.\n"); | |
626 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
627 | entry->url, | |
628 | entry->url, | |
629 | "HTTP", | |
630 | 111, | |
631 | "Cached short of file-descriptors, sorry", | |
632 | "", | |
633 | HARVEST_VERSION, | |
634 | comm_hostname()); | |
635 | storeAbort(entry, tmp_error_buf); | |
636 | #ifdef LOG_ERRORS | |
637 | CacheInfo->log_append(CacheInfo, | |
638 | entry->url, | |
639 | "0.0.0.0", | |
640 | store_mem_obj(entry, e_current_len), | |
641 | "ERR_111", /* HTTP NO FD'S */ | |
642 | data->type ? data->type : "NULL"); | |
643 | #endif | |
644 | safe_free(data); | |
645 | return COMM_ERROR; | |
646 | } | |
647 | /* check if IP is already in cache. It must be. | |
648 | * It should be done before this route is called. | |
649 | * Otherwise, we cannot check return code for connect. */ | |
650 | if (!ipcache_gethostbyname(data->host)) { | |
651 | debug(4, "proxyhttpstart: Called without IP entry in ipcache. OR lookup failed.\n"); | |
652 | comm_close(sock); | |
653 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
654 | entry->url, | |
655 | entry->url, | |
656 | "HTTP", | |
657 | 102, | |
658 | "DNS name lookup failure", | |
659 | dns_error_message, | |
660 | HARVEST_VERSION, | |
661 | comm_hostname()); | |
662 | storeAbort(entry, tmp_error_buf); | |
663 | #ifdef LOG_ERRORS | |
664 | CacheInfo->log_append(CacheInfo, | |
665 | entry->url, | |
666 | "0.0.0.0", | |
667 | store_mem_obj(entry, e_current_len), | |
668 | "ERR_102", /* HTTP DNS FAIL */ | |
669 | data->type ? data->type : "NULL"); | |
670 | #endif | |
671 | safe_free(data); | |
672 | return COMM_ERROR; | |
673 | } | |
674 | /* Open connection. */ | |
675 | if ((status = comm_connect(sock, data->host, data->port))) { | |
676 | if (status != EINPROGRESS) { | |
677 | comm_close(sock); | |
678 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
679 | entry->url, | |
680 | entry->url, | |
681 | "HTTP", | |
682 | 104, | |
683 | "Cannot connect to the original site", | |
684 | "The remote site may be down.", | |
685 | HARVEST_VERSION, | |
686 | comm_hostname()); | |
687 | storeAbort(entry, tmp_error_buf); | |
688 | #ifdef LOG_ERRORS | |
689 | CacheInfo->log_append(CacheInfo, | |
690 | entry->url, | |
691 | "0.0.0.0", | |
692 | store_mem_obj(entry, e_current_len), | |
693 | "ERR_104", /* HTTP CONNECT FAIL */ | |
694 | data->type ? data->type : "NULL"); | |
695 | #endif | |
696 | safe_free(data); | |
697 | e->last_fail_time = cached_curtime; | |
698 | e->neighbor_up = 0; | |
699 | return COMM_ERROR; | |
700 | } else { | |
701 | debug(5, "proxyhttpStart: FD %d: EINPROGRESS.\n", sock); | |
702 | comm_set_select_handler(sock, COMM_SELECT_LIFETIME, | |
703 | (PF) httpLifetimeExpire, (caddr_t) data); | |
704 | comm_set_select_handler(sock, COMM_SELECT_WRITE, | |
705 | (PF) httpConnInProgress, (caddr_t) data); | |
706 | return COMM_OK; | |
707 | } | |
708 | } | |
709 | /* Install connection complete handler. */ | |
710 | fd_note(sock, entry->url); | |
711 | comm_set_select_handler(sock, COMM_SELECT_LIFETIME, | |
712 | (PF) httpLifetimeExpire, (caddr_t) data); | |
713 | comm_set_select_handler(sock, COMM_SELECT_WRITE, | |
714 | (PF) httpSendRequest, (caddr_t) data); | |
715 | return COMM_OK; | |
716 | ||
717 | } | |
718 | ||
719 | int httpStart(unusedfd, url, type, mime_hdr, entry) | |
720 | int unusedfd; | |
721 | char *url; | |
722 | char *type; | |
723 | char *mime_hdr; | |
724 | StoreEntry *entry; | |
725 | { | |
726 | /* Create state structure. */ | |
727 | int sock, status; | |
728 | HttpData *data = (HttpData *) xmalloc(sizeof(HttpData)); | |
729 | ||
730 | debug(3, "httpStart: %s <URL:%s>\n", type, url); | |
731 | debug(10, "httpStart: mime_hdr '%s'\n", mime_hdr); | |
732 | ||
733 | memset(data, '\0', sizeof(HttpData)); | |
734 | data->entry = entry; | |
735 | data->type = type; | |
736 | data->mime_hdr = mime_hdr; | |
737 | ||
738 | /* Parse url. */ | |
739 | if (http_url_parser(url, data->host, &data->port, data->request)) { | |
740 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
741 | entry->url, | |
742 | entry->url, | |
743 | "HTTP", | |
744 | 110, | |
745 | "Invalid URL syntax: Cannot parse.", | |
746 | "Contact your system administrator for further help.", | |
747 | HARVEST_VERSION, | |
748 | comm_hostname()); | |
749 | storeAbort(entry, tmp_error_buf); | |
750 | #ifdef LOG_ERRORS | |
751 | CacheInfo->log_append(CacheInfo, | |
752 | entry->url, | |
753 | "0.0.0.0", | |
754 | store_mem_obj(entry, e_current_len), | |
755 | "ERR_110", /* HTTP INVALID URL */ | |
756 | data->type ? data->type : "NULL"); | |
757 | #endif | |
758 | safe_free(data); | |
759 | return COMM_ERROR; | |
760 | } | |
761 | /* Create socket. */ | |
762 | sock = comm_open(COMM_NONBLOCKING, 0, 0, url); | |
763 | if (sock == COMM_ERROR) { | |
764 | debug(4, "httpStart: Failed because we're out of sockets.\n"); | |
765 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
766 | entry->url, | |
767 | entry->url, | |
768 | "HTTP", | |
769 | 111, | |
770 | "Cached short of file-descriptors, sorry", | |
771 | "", | |
772 | HARVEST_VERSION, | |
773 | comm_hostname()); | |
774 | storeAbort(entry, tmp_error_buf); | |
775 | #ifdef LOG_ERRORS | |
776 | CacheInfo->log_append(CacheInfo, | |
777 | entry->url, | |
778 | "0.0.0.0", | |
779 | store_mem_obj(entry, e_current_len), | |
780 | "ERR_111", /* HTTP NO FD'S */ | |
781 | data->type ? data->type : "NULL"); | |
782 | #endif | |
783 | safe_free(data); | |
784 | return COMM_ERROR; | |
785 | } | |
786 | /* check if IP is already in cache. It must be. | |
787 | * It should be done before this route is called. | |
788 | * Otherwise, we cannot check return code for connect. */ | |
789 | if (!ipcache_gethostbyname(data->host)) { | |
790 | debug(4, "httpstart: Called without IP entry in ipcache. OR lookup failed.\n"); | |
791 | comm_close(sock); | |
792 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
793 | entry->url, | |
794 | entry->url, | |
795 | "HTTP", | |
796 | 108, | |
797 | "DNS name lookup failure", | |
798 | dns_error_message, | |
799 | HARVEST_VERSION, | |
800 | comm_hostname()); | |
801 | storeAbort(entry, tmp_error_buf); | |
802 | #ifdef LOG_ERRORS | |
803 | CacheInfo->log_append(CacheInfo, | |
804 | entry->url, | |
805 | "0.0.0.0", | |
806 | store_mem_obj(entry, e_current_len), | |
807 | "ERR_108", /* HTTP DNS FAIL */ | |
808 | data->type ? data->type : "NULL"); | |
809 | #endif | |
810 | safe_free(data); | |
811 | return COMM_ERROR; | |
812 | } | |
813 | /* Open connection. */ | |
814 | if ((status = comm_connect(sock, data->host, data->port))) { | |
815 | if (status != EINPROGRESS) { | |
816 | comm_close(sock); | |
817 | sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, | |
818 | entry->url, | |
819 | entry->url, | |
820 | "HTTP", | |
821 | 109, | |
822 | "Cannot connect to the original site", | |
823 | "The remote site may be down.", | |
824 | HARVEST_VERSION, | |
825 | comm_hostname()); | |
826 | storeAbort(entry, tmp_error_buf); | |
827 | #ifdef LOG_ERRORS | |
828 | CacheInfo->log_append(CacheInfo, | |
829 | entry->url, | |
830 | "0.0.0.0", | |
831 | store_mem_obj(entry, e_current_len), | |
832 | "ERR_109", /* HTTP CONNECT FAIL */ | |
833 | data->type ? data->type : "NULL"); | |
834 | #endif | |
835 | safe_free(data); | |
836 | return COMM_ERROR; | |
837 | } else { | |
838 | debug(5, "httpStart: FD %d: EINPROGRESS.\n", sock); | |
839 | comm_set_select_handler(sock, COMM_SELECT_LIFETIME, | |
840 | (PF) httpLifetimeExpire, (caddr_t) data); | |
841 | comm_set_select_handler(sock, COMM_SELECT_WRITE, | |
842 | (PF) httpConnInProgress, (caddr_t) data); | |
843 | return COMM_OK; | |
844 | } | |
845 | } | |
846 | /* Install connection complete handler. */ | |
847 | fd_note(sock, entry->url); | |
848 | comm_set_select_handler(sock, COMM_SELECT_LIFETIME, | |
849 | (PF) httpLifetimeExpire, (caddr_t) data); | |
850 | comm_set_select_handler(sock, COMM_SELECT_WRITE, | |
851 | (PF) httpSendRequest, (caddr_t) data); | |
852 | return COMM_OK; | |
853 | } |