]>
Commit | Line | Data |
---|---|---|
eb1f6bfa | 1 | // |
7962bc6a | 2 | // $Id$ |
eb1f6bfa | 3 | // |
0b96a9b3 | 4 | // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de> |
eb1f6bfa AJ |
5 | // |
6 | // File: purge.cc | |
7 | // Wed Jan 13 1999 | |
8 | // | |
9 | // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme | |
0b96a9b3 | 10 | // Universit?t Hannover, Germany |
eb1f6bfa AJ |
11 | // |
12 | // Permission to use, copy, modify, distribute, and sell this software | |
13 | // and its documentation for any purpose is hereby granted without fee, | |
14 | // provided that (i) the above copyright notices and this permission | |
15 | // notice appear in all copies of the software and related documentation, | |
16 | // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte | |
17 | // Systeme and the University of Hannover may not be used in any | |
18 | // advertising or publicity relating to the software without the | |
19 | // specific, prior written permission of Lehrgebiet Rechnernetze und | |
20 | // Verteilte Systeme and the University of Hannover. | |
21 | // | |
22 | // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, | |
23 | // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY | |
24 | // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
25 | // | |
26 | // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR | |
27 | // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL, | |
28 | // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES | |
29 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT | |
30 | // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, | |
31 | // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
32 | // SOFTWARE. | |
33 | // | |
eb1f6bfa AJ |
34 | // Revision 1.17 2000/09/21 10:59:53 cached |
35 | // *** empty log message *** | |
36 | // | |
37 | // Revision 1.16 2000/09/21 09:45:18 cached | |
38 | // Fixed some small bugs. | |
39 | // | |
40 | // Revision 1.15 2000/09/21 09:05:56 cached | |
41 | // added multi cache_dir support, thus changing -c cmdline option. | |
42 | // modified file reading to support /dev/fd/0 reading for non-disclosed items. | |
43 | // | |
44 | // Revision 1.14 2000/06/20 09:43:01 voeckler | |
45 | // added FreeBSD related fixes and support. | |
46 | // | |
47 | // Revision 1.13 2000/03/29 08:12:21 voeckler | |
48 | // fixed wrong header file. | |
49 | // | |
50 | // Revision 1.12 2000/03/29 07:54:41 voeckler | |
51 | // added mechanism to give a port specification precedence over a host | |
52 | // specificiation with the -p option and not colon. | |
53 | // | |
54 | // Revision 1.11 1999/06/18 13:18:28 voeckler | |
55 | // added refcount, fixed missing LF in -s output. | |
56 | // | |
57 | // Revision 1.10 1999/06/16 13:06:05 voeckler | |
58 | // reversed meaning of -M flag. | |
59 | // | |
60 | // Revision 1.9 1999/06/15 21:11:53 voeckler | |
61 | // added extended logging feature which extract the squid meta data available | |
62 | // within the disk files. moved the content extraction and squid meta data | |
63 | // handling parts into separate files. added options for copy-out and verbose. | |
64 | // | |
65 | // Revision 1.8 1999/06/14 20:14:46 voeckler | |
66 | // intermediate version when adding understanding about the way | |
67 | // Squid does log the metadata into the file. | |
68 | // | |
69 | // Revision 1.7 1999/01/23 21:01:10 root | |
70 | // stumbled over libc5 header/lib inconsistency bug.... | |
71 | // | |
72 | // Revision 1.6 1999/01/23 20:47:54 root | |
73 | // added Linux specifics for psignal... | |
74 | // Hope this helps. | |
75 | // | |
76 | // Revision 1.5 1999/01/20 09:48:12 voeckler | |
77 | // added warning as first line of output. | |
78 | // | |
79 | // Revision 1.4 1999/01/19 11:53:49 voeckler | |
80 | // added psignal() from <siginfo.h> handling. | |
81 | // | |
82 | // Revision 1.3 1999/01/19 11:00:50 voeckler | |
83 | // added keyboard interrupt handling, exit handling, removed C++ strings and | |
84 | // regular expression syntax in favour of less source code, added comments, | |
85 | // added a reminder to remove swap.state in case of unlinks, added IAA flag, | |
86 | // added a few assertions, changed policy to enforce the definition of at | |
87 | // least one regular expression, and catch a few signals. | |
88 | // | |
89 | // Revision 1.2 1999/01/15 23:06:28 voeckler | |
90 | // downgraded to simple C strings... | |
91 | // | |
92 | // Revision 1.1 1999/01/14 12:05:32 voeckler | |
93 | // Initial revision | |
94 | // | |
95 | // | |
d8b258a9 | 96 | #if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) |
eb1f6bfa | 97 | #pragma implementation |
eb1f6bfa AJ |
98 | #endif |
99 | ||
f7f3304a | 100 | #include "squid.h" |
2ccf2eb2 AJ |
101 | // for xstrdup |
102 | #include "util.h" | |
103 | ||
104 | //#include <assert.h> | |
eb1f6bfa AJ |
105 | #include <stdarg.h> |
106 | #include <stdio.h> | |
107 | #include <dirent.h> | |
2ccf2eb2 | 108 | //#include <ctype.h> |
eb1f6bfa | 109 | #include <string.h> |
2ccf2eb2 | 110 | //#include <sys/types.h> |
eb1f6bfa AJ |
111 | #include <sys/stat.h> |
112 | #include <sys/wait.h> | |
113 | #include <fcntl.h> | |
114 | #include <unistd.h> | |
115 | #include <stdlib.h> | |
116 | #include <limits.h> | |
117 | #include <signal.h> | |
118 | #include <errno.h> | |
119 | ||
2ccf2eb2 | 120 | #if HAVE_SIGINFO_H |
eb1f6bfa | 121 | #include <siginfo.h> |
2ccf2eb2 | 122 | #endif |
eb1f6bfa AJ |
123 | |
124 | #include <netinet/in.h> | |
125 | #include <netinet/tcp.h> // TCP_NODELAY | |
126 | #include <arpa/inet.h> | |
127 | #include <netdb.h> // gethostbyname() | |
357b8e33 | 128 | //#include <regex.h> //comes via compat.h |
eb1f6bfa AJ |
129 | |
130 | #include "convert.hh" | |
131 | #include "socket.hh" | |
132 | #include "signal.hh" | |
133 | #include "squid-tlv.hh" | |
134 | #include "copyout.hh" | |
135 | #include "conffile.hh" | |
136 | ||
137 | #ifndef DEFAULTHOST | |
138 | #define DEFAULTHOST "localhost" | |
139 | #endif // DEFAULTHOST | |
140 | ||
141 | #ifndef DEFAULTPORT | |
142 | #define DEFAULTPORT 3128 | |
143 | #endif // DEFAULTPORT | |
144 | ||
145 | volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal... | |
feec68a0 A |
146 | char* linebuffer = 0; |
147 | size_t buffersize = 16834; | |
eb1f6bfa | 148 | static char* copydir = 0; |
59a09b98 | 149 | static unsigned debugFlag = 0; |
eb1f6bfa AJ |
150 | static unsigned purgeMode = 0; |
151 | static bool iamalive = false; | |
152 | static bool reminder = false; | |
153 | static bool verbose = false; | |
154 | static bool envelope = false; | |
155 | static bool no_fork = false; | |
156 | static const char* programname = 0; | |
2ccf2eb2 | 157 | static const char* RCS_ID = "$Id$"; |
eb1f6bfa AJ |
158 | |
159 | // ---------------------------------------------------------------------- | |
160 | ||
161 | struct REList { | |
feec68a0 A |
162 | REList( const char* what, bool doCase ); |
163 | ~REList(); | |
164 | bool match( const char* check ) const; | |
eb1f6bfa | 165 | |
feec68a0 A |
166 | REList* next; |
167 | const char* data; | |
168 | regex_t rexp; | |
eb1f6bfa AJ |
169 | }; |
170 | ||
171 | REList::REList( const char* what, bool doCase ) | |
2ccf2eb2 | 172 | :next(0),data(xstrdup(what)) |
eb1f6bfa | 173 | { |
feec68a0 A |
174 | int result = regcomp( &rexp, what, |
175 | REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) ); | |
176 | if ( result != 0 ) { | |
177 | char buffer[256]; | |
178 | regerror( result, &rexp, buffer, 256 ); | |
179 | fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer ); | |
180 | exit(1); | |
181 | } | |
eb1f6bfa AJ |
182 | } |
183 | ||
184 | REList::~REList() | |
feec68a0 A |
185 | { |
186 | if ( next ) delete next; | |
6e2aefad | 187 | if ( data ) xfree((void*) data); |
feec68a0 | 188 | regfree(&rexp); |
eb1f6bfa AJ |
189 | } |
190 | ||
191 | bool | |
192 | REList::match( const char* check ) const | |
193 | { | |
feec68a0 A |
194 | int result = regexec( &rexp, check, 0, 0, 0 ); |
195 | if ( result != 0 && result != REG_NOMATCH ) { | |
196 | char buffer[256]; | |
197 | regerror( result, &rexp, buffer, 256 ); | |
198 | fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n", | |
199 | data, check, buffer ); | |
200 | exit(1); | |
201 | } | |
202 | return ( result == 0 ); | |
eb1f6bfa AJ |
203 | } |
204 | ||
205 | // ---------------------------------------------------------------------- | |
206 | ||
207 | char* | |
208 | concat( const char* start, ... ) | |
feec68a0 A |
209 | // purpose: concatinate an arbitrary number of C strings. |
210 | // paramtr: start (IN): first C string | |
211 | // ... (IN): further C strings, terminated with a NULL pointer | |
212 | // returns: memory allocated via new(), containing the concatinated string. | |
eb1f6bfa | 213 | { |
feec68a0 A |
214 | va_list ap; |
215 | const char* s; | |
216 | ||
217 | // first run: determine size | |
218 | unsigned size = strlen(start)+1; | |
219 | va_start( ap, start ); | |
220 | while ( (s=va_arg(ap,const char*)) != NULL ) size += strlen(s ? s : ""); | |
221 | va_end(ap); | |
222 | ||
223 | // allocate | |
224 | char* result = new char[size]; | |
225 | if ( result == 0 ) { | |
226 | perror( "string memory allocation" ); | |
227 | exit(1); | |
228 | } | |
229 | ||
230 | // second run: copy content | |
231 | strcpy( result, start ); | |
232 | va_start( ap, start ); | |
233 | while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s ); | |
234 | va_end(ap); | |
235 | ||
236 | return result; | |
eb1f6bfa AJ |
237 | } |
238 | ||
239 | bool | |
240 | isxstring( const char* s, size_t testlen ) | |
feec68a0 A |
241 | // purpose: test a string for conforming to xdigit |
242 | // paramtr: s (IN): string to test | |
243 | // testlen (IN): length the string must have | |
244 | // returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise | |
eb1f6bfa | 245 | { |
feec68a0 | 246 | if ( strlen(s) != testlen ) return false; |
eb1f6bfa | 247 | |
feec68a0 | 248 | size_t i=0; |
14942edd FC |
249 | while ( i<testlen && isxdigit(s[i]) ) |
250 | ++i; | |
feec68a0 | 251 | return (i==testlen); |
eb1f6bfa AJ |
252 | } |
253 | ||
254 | inline | |
255 | int | |
256 | log_output( const char* fn, int code, long size, const char* url ) | |
257 | { | |
feec68a0 | 258 | return printf( "%s %3d %8ld %s\n", fn, code, size, url ); |
eb1f6bfa AJ |
259 | } |
260 | ||
261 | static | |
262 | int | |
263 | log_extended( const char* fn, int code, long size, const SquidMetaList* meta ) | |
264 | { | |
feec68a0 A |
265 | static const char hexdigit[] = "0123456789ABCDEF"; |
266 | char md5[34]; | |
267 | const SquidTLV* findings = 0; | |
268 | ||
269 | if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) { | |
270 | unsigned char* s = (unsigned char*) findings->data; | |
cbebe602 | 271 | for ( int j=0; j<16; ++j, ++s ) { |
feec68a0 A |
272 | md5[j*2+0] = hexdigit[ *s >> 4 ]; |
273 | md5[j*2+1] = hexdigit[ *s & 15 ]; | |
274 | } | |
275 | md5[32] = '\0'; // terminate string | |
276 | } else { | |
6e2aefad | 277 | snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" ); |
feec68a0 A |
278 | } |
279 | ||
280 | char timeb[64]; | |
281 | if ( meta && (findings = meta->search( STORE_META_STD )) ) { | |
282 | StoreMetaStd temp; | |
283 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
284 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 285 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
286 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
287 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 A |
288 | } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) { |
289 | StoreMetaStdLFS temp; | |
290 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
291 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 292 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
293 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
294 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 | 295 | } else { |
8baf6ea3 | 296 | unsigned long ul = ULONG_MAX; // Match type of StoreMetaTLV fields |
d8b258a9 PW |
297 | unsigned short hu = 0; // Match type of StoreMetaTLV refcount fields |
298 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5d ", ul, ul, ul, ul, 0, hu); | |
feec68a0 A |
299 | } |
300 | ||
301 | // make sure that there is just one printf() | |
302 | if ( meta && (findings = meta->search( STORE_META_URL )) ) { | |
303 | return printf( "%s %3d %8ld %s %s %s\n", | |
304 | fn, code, size, md5, timeb, findings->data ); | |
305 | } else { | |
306 | return printf( "%s %3d %8ld %s %s strange_file\n", | |
307 | fn, code, size, md5, timeb ); | |
eb1f6bfa | 308 | } |
eb1f6bfa AJ |
309 | } |
310 | ||
311 | // o.k., this is pure lazyness... | |
312 | static struct in_addr serverHost; | |
313 | static unsigned short serverPort; | |
314 | ||
315 | bool | |
316 | action( int fd, size_t metasize, | |
feec68a0 A |
317 | const char* fn, const char* url, const SquidMetaList& meta ) |
318 | // purpose: if cmdline-requested, send the purge request to the cache | |
319 | // paramtr: fd (IN): open FD for the object file | |
320 | // metasize (IN): offset into data portion of file (meta data size) | |
321 | // fn (IN): name of the object file | |
322 | // url (IN): URL string stored in the object file | |
323 | // meta (IN): list containing further meta data | |
324 | // returns: true for a successful action, false otherwise. The action | |
325 | // may just print the file, send the purge request or even | |
326 | // remove unwanted files. | |
327 | // globals: ::purgeMode (IN): bit#0 set -> send purge request. | |
328 | // bit#1 set -> remove 404 object files. | |
329 | // ::serverHost (IN): cache host address | |
330 | // ::serverPort (IN): cache port number | |
eb1f6bfa | 331 | { |
feec68a0 A |
332 | static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n"; |
333 | struct stat st; | |
334 | long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) ); | |
335 | int status = 0; | |
336 | ||
337 | // if we want to copy out the file, do that first of all. | |
338 | if ( ::copydir && *copydir && size > 0 ) | |
59a09b98 | 339 | copy_out( st.st_size, metasize, ::debugFlag, |
feec68a0 A |
340 | fn, url, ::copydir, ::envelope ); |
341 | ||
342 | // do we need to PURGE the file, yes, if purgemode bit#0 was set. | |
343 | if ( ::purgeMode & 0x01 ) { | |
344 | unsigned long bufsize = strlen(url) + strlen(schablone) + 4; | |
345 | char* buffer = new char[bufsize]; | |
346 | ||
6e2aefad | 347 | snprintf( buffer, bufsize, schablone, url ); |
feec68a0 A |
348 | int sockfd = connectTo( serverHost, serverPort, true ); |
349 | if ( sockfd == -1 ) { | |
350 | fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) ); | |
351 | delete[] buffer; | |
352 | return false; | |
353 | } | |
354 | ||
355 | int size = strlen(buffer); | |
356 | if ( write( sockfd, buffer, size ) != size ) { | |
357 | // error while talking to squid | |
358 | fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) ); | |
359 | close(sockfd); | |
360 | delete[] buffer; | |
361 | return false; | |
362 | } | |
363 | memset( buffer+8, 0, 4 ); | |
364 | if ( read( sockfd, buffer, bufsize ) < 1 ) { | |
365 | // error while reading squid's answer | |
366 | fprintf( stderr, "unable to read answer: %s\n", strerror(errno) ); | |
367 | close(sockfd); | |
368 | delete[] buffer; | |
369 | return false; | |
370 | } | |
371 | close(sockfd); | |
372 | status = strtol(buffer+8,0,10); | |
373 | delete[] buffer; | |
eb1f6bfa | 374 | } |
feec68a0 A |
375 | |
376 | // log the output of our operation | |
377 | bool flag = true; | |
378 | if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 ); | |
379 | else flag = ( log_output( fn, status, size, url ) >= 0 ); | |
380 | ||
381 | // remove the file, if purgemode bit#1, and HTTP result status 404). | |
382 | if ( (::purgeMode & 0x02) && status == 404 ) { | |
383 | reminder = true; | |
384 | if ( unlink(fn) == -1 ) | |
385 | // error while unlinking file, this may happen due to the cache | |
386 | // unlinking a file while it is still in the readdir() cache of purge. | |
387 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
388 | fn, strerror(errno) ); | |
eb1f6bfa | 389 | } |
feec68a0 A |
390 | |
391 | return flag; | |
eb1f6bfa AJ |
392 | } |
393 | ||
394 | bool | |
395 | match( const char* fn, const REList* list ) | |
feec68a0 A |
396 | // purpose: do something with the given cache content filename |
397 | // paramtr: fn (IN): filename of cache file | |
398 | // returns: true for successful action, false otherwise. | |
399 | // warning: only return false, if you want the loop to terminate! | |
eb1f6bfa | 400 | { |
feec68a0 A |
401 | static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int); |
402 | bool flag = true; | |
403 | ||
59a09b98 | 404 | if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn ); |
feec68a0 A |
405 | int fd = open( fn, O_RDONLY ); |
406 | if ( fd != -1 ) { | |
407 | if ( read(fd,::linebuffer,::buffersize-1) > 60 ) { | |
408 | ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string | |
409 | ||
410 | // check the offset into the start of object data. The offset is | |
411 | // stored in a host endianess after the first byte. | |
412 | unsigned int datastart; | |
413 | memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) ); | |
414 | if ( datastart > ::buffersize - addon - 1 ) { | |
415 | // check offset into server reply header (start of cache data). | |
416 | fputs( "WARNING: Using a truncated URL string.\n", stderr ); | |
417 | datastart = ::buffersize - addon - 1; | |
418 | } | |
419 | ||
420 | // NEW: Parse squid meta data, which is a kind of linked list | |
421 | // flattened out into a file byte stream. Somewhere within is | |
422 | // the URL as part of the list. First, gobble all meta data. | |
423 | unsigned int offset = addon; | |
424 | SquidMetaList meta; | |
425 | while ( offset + addon <= datastart ) { | |
426 | unsigned int size = 0; | |
427 | memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) ); | |
428 | meta.append( SquidMetaType(*(linebuffer+offset)), | |
429 | size, linebuffer+offset+addon ); | |
430 | offset += ( addon + size ); | |
431 | } | |
432 | ||
433 | // Now extract the key URL from the meta data. | |
434 | const SquidTLV* urlmeta = meta.search( STORE_META_URL ); | |
435 | if ( urlmeta ) { | |
436 | // found URL in meta data. Try to process the URL | |
437 | if ( list == 0 ) | |
438 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
439 | else { | |
440 | REList* head = (REList*) list; // YUCK! | |
441 | while ( head != 0 ) { | |
442 | if ( head->match( (char*) urlmeta->data ) ) break; | |
443 | head = head->next; | |
444 | } | |
445 | if ( head != 0 ) | |
446 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
447 | else flag = true; | |
448 | } | |
449 | } | |
450 | ||
451 | // "meta" will be deleted when exiting from this block | |
452 | } else { | |
453 | // weird file, FIXME: stat() it! | |
454 | struct stat st; | |
455 | long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size ); | |
456 | if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 ); | |
457 | else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 ); | |
458 | ||
459 | if ( (::purgeMode & 0x04) ) { | |
460 | reminder = true; | |
461 | if ( unlink(fn) == -1 ) | |
462 | // error while unlinking file, this may happen due to the cache | |
463 | // unlinking a file while it is in the readdir() cache of purge. | |
464 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
465 | fn, strerror(errno) ); | |
466 | } | |
467 | } | |
468 | close(fd); | |
eb1f6bfa | 469 | } else { |
feec68a0 A |
470 | // error while opening file, this may happen due to the cache |
471 | // unlinking a file while it is still in the readdir() cache of purge. | |
472 | fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) ); | |
eb1f6bfa | 473 | } |
feec68a0 A |
474 | |
475 | return flag; | |
eb1f6bfa AJ |
476 | } |
477 | ||
478 | bool | |
479 | filelevel( const char* directory, const REList* list ) | |
feec68a0 A |
480 | // purpose: from given starting point, look for squid xxxxxxxx files. |
481 | // example: "/var/spool/cache/08/7F" as input, do action over files | |
482 | // paramtr: directory (IN): starting point | |
483 | // list (IN): list of rexps to match URLs against | |
484 | // returns: true, if every subdir && action was successful. | |
eb1f6bfa | 485 | { |
fb151769 | 486 | dirent_t * entry; |
59a09b98 | 487 | if ( debugFlag & 0x01 ) |
feec68a0 A |
488 | fprintf( stderr, "# [2] %s\n", directory ); |
489 | ||
490 | DIR* dir = opendir( directory ); | |
491 | if ( dir == NULL ) { | |
492 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
493 | directory, strerror(errno) ); | |
494 | return false; | |
495 | } | |
496 | ||
497 | // display a rotating character as "i am alive" signal (slows purge). | |
498 | if ( ::iamalive ) { | |
499 | static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" }; | |
500 | static unsigned short alivecount = 0; | |
501 | assert( write( STDOUT_FILENO, alivelist[alivecount++ & 3], 2 ) == 2 ); | |
502 | } | |
503 | ||
504 | bool flag = true; | |
505 | while ( (entry=readdir(dir)) && flag ) { | |
506 | if ( isxstring(entry->d_name,8) ) { | |
507 | char* name = concat( directory, "/", entry->d_name, 0 ); | |
508 | flag = match( name, list ); | |
509 | delete[] name; | |
510 | } | |
eb1f6bfa | 511 | } |
eb1f6bfa | 512 | |
feec68a0 A |
513 | closedir(dir); |
514 | return flag; | |
eb1f6bfa AJ |
515 | } |
516 | ||
517 | bool | |
518 | dirlevel( const char* dirname, const REList* list, bool level=false ) | |
feec68a0 A |
519 | // purpose: from given starting point, look for squid 00..FF directories. |
520 | // paramtr: dirname (IN): starting point | |
521 | // list (IN): list of rexps to match URLs against | |
522 | // level (IN): false==toplevel, true==1st level | |
523 | // example: "/var/spool/cache", false as input, traverse subdirs w/ action. | |
524 | // example: "/var/spool/cache/08", true as input, traverse subdirs w/ action. | |
525 | // returns: true, if every subdir && action was successful. | |
526 | // warning: this function is once-recursive, no deeper. | |
eb1f6bfa | 527 | { |
fb151769 | 528 | dirent_t* entry; |
59a09b98 | 529 | if ( debugFlag & 0x01 ) |
feec68a0 A |
530 | fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname ); |
531 | ||
532 | DIR* dir = opendir( dirname ); | |
533 | if ( dir == NULL ) { | |
534 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
535 | dirname, strerror(errno) ); | |
536 | return false; | |
eb1f6bfa | 537 | } |
eb1f6bfa | 538 | |
feec68a0 A |
539 | bool flag = true; |
540 | while ( (entry=readdir(dir)) && flag ) { | |
541 | if ( strlen(entry->d_name) == 2 && | |
542 | isxdigit(entry->d_name[0]) && | |
543 | isxdigit(entry->d_name[1]) ) { | |
544 | char* name = concat( dirname, "/", entry->d_name, 0 ); | |
545 | flag = level ? filelevel( name, list ) : dirlevel( name, list, true ); | |
546 | delete[] name; | |
547 | } | |
548 | } | |
549 | ||
550 | closedir(dir); | |
551 | return flag; | |
eb1f6bfa AJ |
552 | } |
553 | ||
554 | int | |
555 | checkForPortOnly( const char* optarg ) | |
feec68a0 A |
556 | // purpose: see if somebody just put in a port instead of a hostname |
557 | // paramtr: optarg (IN): argument from commandline | |
558 | // returns: 0..65535 is the valid port number in network byte order, | |
559 | // -1 if not a port | |
eb1f6bfa | 560 | { |
feec68a0 A |
561 | // if there is a period in there, it must be a valid hostname |
562 | if ( strchr( optarg, '.' ) != 0 ) return -1; | |
eb1f6bfa | 563 | |
feec68a0 A |
564 | // if it is just a number between 0 and 65535, it must be a port |
565 | char* errstr = 0; | |
566 | unsigned long result = strtoul( optarg, &errstr, 0 ); | |
567 | if ( result < 65536 && errstr != optarg ) return htons(result); | |
eb1f6bfa AJ |
568 | |
569 | #if 0 | |
feec68a0 A |
570 | // one last try, test for a symbolical service name |
571 | struct servent* service = getservbyname( optarg, "tcp" ); | |
572 | return service ? service->s_port : -1; | |
eb1f6bfa | 573 | #else |
feec68a0 | 574 | return -1; |
eb1f6bfa AJ |
575 | #endif |
576 | } | |
577 | ||
578 | void | |
579 | helpMe( void ) | |
feec68a0 | 580 | // purpuse: write help message and exit |
eb1f6bfa | 581 | { |
feec68a0 A |
582 | printf( "\n%s\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] " |
583 | "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n", | |
584 | ::RCS_ID, ::programname ); | |
585 | printf( | |
586 | " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n" | |
587 | " -c c\tsquid.conf location, default \"%s\".\n" | |
588 | " -C dir\tbase directory for content extraction (copy-out mode).\n" | |
589 | " -d l\tdebug level, an OR of different debug options.\n" | |
590 | " -e re\tsingle regular expression per -e instance (use quotes!).\n" | |
591 | " -E re\tsingle case sensitive regular expression like -e.\n" | |
592 | " -f fn\tname of textfile containing one regular expression per line.\n" | |
593 | " -F fn\tname of textfile like -f containing case sensitive REs.\n" | |
594 | " -H\tprepend HTTP reply header to destination files in copy-out mode.\n" | |
595 | " -n\tdo not fork() when using more than one cache_dir.\n" | |
596 | " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n" | |
597 | " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n" | |
598 | "\t 0x01 really send PURGE to the cache.\n" | |
599 | "\t 0x02 remove all caches files reported as 404 (not found).\n" | |
600 | "\t 0x04 remove all weird (inaccessible or too small) cache files.\n" | |
601 | "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n" | |
602 | " -s\tshow all options after option parsing, but before really starting.\n" | |
603 | " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n" | |
604 | "\n", DEFAULT_SQUID_CONF, DEFAULTHOST, DEFAULTPORT ); | |
eb1f6bfa AJ |
605 | |
606 | } | |
607 | ||
608 | void | |
609 | parseCommandline( int argc, char* argv[], REList*& head, | |
feec68a0 A |
610 | char*& conffile, char*& copydir, |
611 | struct in_addr& serverHost, unsigned short& serverPort ) | |
612 | // paramtr: argc: see ::main(). | |
613 | // argv: see ::main(). | |
614 | // returns: Does terminate the program on errors! | |
615 | // purpose: suck in any commandline options, and set the global vars. | |
616 | { | |
617 | int option, port, showme = 0; | |
618 | char* ptr, *colon; | |
619 | FILE* rfile; | |
620 | ||
621 | // program basename | |
14942edd FC |
622 | if ( (ptr = strrchr(argv[0],'/')) == NULL ) |
623 | ptr=argv[0]; | |
624 | else | |
625 | ++ptr; | |
feec68a0 A |
626 | ::programname = ptr; |
627 | ||
628 | // extract commandline parameters | |
629 | REList* tail = head = 0; | |
630 | opterr = 0; | |
631 | while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) { | |
632 | switch ( option ) { | |
633 | case 'a': | |
634 | ::iamalive = ! ::iamalive; | |
635 | break; | |
636 | case 'C': | |
637 | if ( optarg && *optarg ) { | |
6e2aefad | 638 | if ( copydir ) xfree( (void*) copydir ); |
2ccf2eb2 | 639 | assert( (copydir = xstrdup(optarg)) ); |
feec68a0 A |
640 | } |
641 | break; | |
642 | case 'c': | |
643 | if ( optarg && *optarg ) { | |
6e2aefad | 644 | if ( *conffile ) xfree((void*) conffile ); |
2ccf2eb2 | 645 | assert( (conffile = xstrdup(optarg)) ); |
feec68a0 A |
646 | } |
647 | break; | |
648 | ||
649 | case 'd': | |
59a09b98 | 650 | ::debugFlag = strtoul( optarg, 0, 0 ); |
feec68a0 A |
651 | break; |
652 | ||
653 | case 'E': | |
654 | case 'e': | |
655 | if ( head == 0 ) tail = head = new REList( optarg, option=='E' ); | |
656 | else { | |
657 | tail->next = new REList( optarg, option=='E' ); | |
658 | tail = tail->next; | |
659 | } | |
660 | break; | |
661 | ||
662 | case 'f': | |
663 | if ( (rfile = fopen( optarg, "r" )) != NULL ) { | |
664 | unsigned long lineno = 0; | |
eb1f6bfa | 665 | #define LINESIZE 512 |
feec68a0 A |
666 | char line[LINESIZE]; |
667 | while ( fgets( line, LINESIZE, rfile ) != NULL ) { | |
14942edd | 668 | ++lineno; |
feec68a0 A |
669 | int len = strlen(line)-1; |
670 | if ( len+2 >= LINESIZE ) { | |
671 | fprintf( stderr, "%s:%lu: line too long, sorry.\n", | |
672 | optarg, lineno ); | |
673 | exit(1); | |
674 | } | |
675 | ||
676 | // remove trailing line breaks | |
677 | while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) ) | |
678 | line[len--] = '\0'; | |
679 | ||
680 | // insert into list of expressions | |
681 | if ( head == 0 ) tail = head = new REList(line,option=='F'); | |
682 | else { | |
683 | tail->next = new REList(line,option=='F'); | |
684 | tail = tail->next; | |
685 | } | |
686 | } | |
687 | fclose(rfile); | |
688 | } else | |
689 | fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno)); | |
690 | break; | |
691 | ||
692 | case 'H': | |
693 | ::envelope = ! ::envelope; | |
694 | break; | |
695 | case 'n': | |
696 | ::no_fork = ! ::no_fork; | |
697 | break; | |
698 | case 'p': | |
699 | colon = strchr( optarg, ':' ); | |
700 | if ( colon == 0 ) { | |
701 | // no colon, only look at host | |
702 | ||
703 | // fix: see if somebody just put in there a port (no periods) | |
704 | // give port number precedence over host names | |
705 | port = checkForPortOnly( optarg ); | |
706 | if ( port == -1 ) { | |
707 | // assume that main() did set the default port | |
708 | if ( convertHostname(optarg,serverHost) == -1 ) { | |
709 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
710 | exit(1); | |
711 | } | |
712 | } else { | |
713 | // assume that main() did set the default host | |
714 | serverPort = port; | |
715 | } | |
716 | } else { | |
717 | // colon used, port is extra | |
14942edd FC |
718 | *colon = 0; |
719 | ++colon; | |
feec68a0 A |
720 | if ( convertHostname(optarg,serverHost) == -1 ) { |
721 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
722 | exit(1); | |
723 | } | |
724 | if ( convertPortname(colon,serverPort) == -1 ) { | |
725 | fprintf( stderr, "unable to resolve port %s!\n", colon ); | |
726 | exit(1); | |
727 | } | |
728 | } | |
729 | break; | |
730 | case 'P': | |
731 | ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 ); | |
732 | break; | |
733 | case 's': | |
734 | showme=1; | |
735 | break; | |
736 | case 'v': | |
737 | ::verbose = ! ::verbose; | |
738 | break; | |
739 | case '?': | |
740 | default: | |
741 | helpMe(); | |
742 | exit(1); | |
743 | } | |
744 | } | |
745 | ||
746 | // adjust | |
59a09b98 | 747 | if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false; |
feec68a0 A |
748 | if ( head == 0 ) { |
749 | fputs( "There was no regular expression defined. If you intend\n", stderr ); | |
750 | fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr ); | |
751 | exit(1); | |
eb1f6bfa | 752 | } |
feec68a0 A |
753 | |
754 | // postcondition: head != 0 | |
755 | assert( head != 0 ); | |
756 | ||
757 | // make sure that the copy out directory is there and accessible | |
758 | if ( copydir && *copydir ) | |
759 | if ( assert_copydir( copydir ) != 0 ) exit(1); | |
760 | ||
761 | // show results | |
762 | if ( showme ) { | |
763 | printf( "#\n# Currently active values for %s:\n# %s\n", | |
764 | ::programname, ::RCS_ID ); | |
765 | printf( "# Debug level : " ); | |
d83197e3 | 766 | if ( ::debugFlag ) printf( "%#6.4x", ::debugFlag ); |
feec68a0 A |
767 | else printf( "production level" ); // printf omits 0x prefix for 0! |
768 | printf( " + %s mode", ::no_fork ? "linear" : "parallel" ); | |
769 | puts( ::verbose ? " + extra verbosity" : "" ); | |
770 | ||
771 | printf( "# Copy-out directory: %s ", | |
772 | copydir ? copydir : "copy-out mode disabled" ); | |
773 | if ( copydir ) | |
774 | printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" ); | |
775 | else | |
776 | puts(""); | |
777 | ||
778 | printf( "# Squid config file : %s\n", conffile ); | |
779 | printf( "# Cacheserveraddress: %s:%u\n", | |
780 | inet_ntoa( serverHost ), ntohs( serverPort ) ); | |
781 | printf( "# purge mode : 0x%02x\n", ::purgeMode ); | |
782 | printf( "# Regular expression: " ); | |
783 | ||
784 | unsigned count(0); | |
785 | for ( tail = head; tail != NULL; tail = tail->next ) { | |
14942edd FC |
786 | if ( count++ ) |
787 | printf( "#%22u", count ); | |
eb1f6bfa | 788 | #if defined(LINUX) && putc==_IO_putc |
feec68a0 A |
789 | // I HATE BROKEN LINUX HEADERS! |
790 | // purge.o(.text+0x1040): undefined reference to `_IO_putc' | |
791 | // If your compilation breaks here, remove the undefinition | |
792 | #undef putc | |
eb1f6bfa | 793 | #endif |
feec68a0 A |
794 | else putchar('1'); |
795 | printf( " \"%s\"\n", tail->data ); | |
796 | } | |
797 | puts( "#" ); | |
eb1f6bfa | 798 | } |
feec68a0 | 799 | fflush( stdout ); |
eb1f6bfa AJ |
800 | } |
801 | ||
802 | extern "C" { | |
803 | ||
feec68a0 A |
804 | static |
805 | void | |
806 | exiter( void ) { | |
807 | if ( ::term_flag ) psignal( ::term_flag, "received signal" ); | |
808 | delete[] ::linebuffer; | |
809 | if ( ::reminder ) { | |
810 | fputs( | |
811 | "WARNING! Caches files were removed. Please shut down your cache, remove\n" | |
812 | "your swap.state files and restart your cache again, i.e. effictively do\n" | |
813 | "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr ); | |
814 | } | |
815 | } | |
eb1f6bfa | 816 | |
feec68a0 A |
817 | static |
818 | void | |
819 | handler( int signo ) { | |
820 | ::term_flag = signo; | |
821 | if ( getpid() == getpgrp() ) kill( -getpgrp(), signo ); | |
822 | exit(1); | |
823 | } | |
eb1f6bfa AJ |
824 | |
825 | } // extern "C" | |
826 | ||
827 | static | |
828 | int | |
829 | makelinebuffered( FILE* fp, const char* fn = 0 ) | |
feec68a0 A |
830 | // purpose: make the given FILE line buffered |
831 | // paramtr: fp (IO): file pointer which to put into line buffer mode | |
832 | // fn (IN): name of file to print in case of error | |
833 | // returns: 0 is ok, -1 to indicate an error | |
834 | // warning: error messages will already be printed | |
eb1f6bfa | 835 | { |
feec68a0 A |
836 | if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) { |
837 | // ok | |
838 | return 0; | |
839 | } else { | |
840 | // error | |
841 | fprintf( stderr, "unable to make \"%s\" line buffered: %s\n", | |
842 | fn ? fn : "", strerror(errno) ); | |
843 | return -1; | |
844 | } | |
eb1f6bfa AJ |
845 | } |
846 | ||
847 | int | |
848 | main( int argc, char* argv[] ) | |
849 | { | |
feec68a0 A |
850 | // setup variables |
851 | REList* list = 0; | |
2ccf2eb2 | 852 | char* conffile = xstrdup( DEFAULT_SQUID_CONF ); |
feec68a0 A |
853 | serverPort = htons(DEFAULTPORT); |
854 | if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) { | |
855 | fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST ); | |
856 | return 1; | |
857 | } | |
858 | ||
859 | // setup line buffer | |
860 | ::linebuffer = new char[ ::buffersize ]; | |
861 | assert( ::linebuffer != 0 ); | |
862 | ||
863 | // parse commandline | |
864 | puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###"); | |
865 | parseCommandline( argc, argv, list, conffile, ::copydir, | |
866 | serverHost, serverPort ); | |
867 | ||
868 | // prepare execution | |
869 | if ( atexit( exiter ) != 0 || | |
870 | Signal( SIGTERM, handler, true ) == SIG_ERR || | |
871 | Signal( SIGINT, handler, true ) == SIG_ERR || | |
872 | Signal( SIGHUP, handler, true ) == SIG_ERR ) { | |
873 | perror( "unable to install signal/exit function" ); | |
874 | return 1; | |
875 | } | |
876 | ||
877 | // try to read squid.conf file to determine all cache_dir locations | |
878 | CacheDirVector cdv(0); | |
59a09b98 | 879 | if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) { |
feec68a0 A |
880 | // there are some valid cache_dir entries. |
881 | // unless forking was forbidden by cmdline option, | |
882 | // for a process for each cache_dir entry to remove files. | |
883 | ||
884 | if ( ::no_fork || cdv.size() == 1 ) { | |
885 | // linear mode, one cache_dir after the next | |
886 | for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) { | |
887 | // execute OR complain | |
888 | if ( ! dirlevel(i->base,list) ) | |
889 | fprintf( stderr, "program terminated due to error: %s", | |
890 | strerror(errno) ); | |
6e2aefad | 891 | xfree((void*) i->base); |
feec68a0 A |
892 | } |
893 | } else { | |
894 | // parallel mode, all cache_dir in parallel | |
895 | pid_t* child = new pid_t[ cdv.size() ]; | |
896 | ||
897 | // make stdout/stderr line bufferd | |
898 | makelinebuffered( stdout, "stdout" ); | |
899 | makelinebuffered( stderr, "stderr" ); | |
900 | ||
901 | // make parent process group leader for easier killings | |
902 | if ( setpgid(getpid(), getpid()) != 0 ) { | |
903 | perror( "unable to set process group leader" ); | |
904 | return 1; | |
905 | } | |
906 | ||
907 | // -a is mutually exclusive with fork mode | |
908 | if ( ::iamalive ) { | |
909 | puts( "# i-am-alive flag incompatible with fork mode, resetting" ); | |
910 | ::iamalive = false; | |
911 | } | |
912 | ||
2ccf2eb2 | 913 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
914 | if ( getpid() == getpgrp() ) { |
915 | // only parent == group leader may fork off new processes | |
916 | if ( (child[i]=fork()) < 0 ) { | |
917 | // fork error, this is bad! | |
918 | perror( "unable to fork" ); | |
919 | kill( -getpgrp(), SIGTERM ); | |
920 | return 1; | |
921 | } else if ( child[i] == 0 ) { | |
922 | // child mode | |
923 | // execute OR complain | |
924 | if ( ! dirlevel(cdv[i].base,list) ) | |
925 | fprintf( stderr, "program terminated due to error: %s\n", | |
926 | strerror(errno) ); | |
6e2aefad | 927 | xfree((void*) cdv[i].base); |
feec68a0 A |
928 | return 0; |
929 | } else { | |
930 | // parent mode | |
59a09b98 | 931 | if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] ); |
feec68a0 A |
932 | } |
933 | } | |
934 | } | |
935 | ||
936 | // collect the garbase | |
937 | pid_t temp; | |
938 | int status; | |
2ccf2eb2 | 939 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
940 | while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 ) |
941 | if ( errno == EINTR ) continue; | |
59a09b98 | 942 | if ( ::debugFlag ) printf( "collected child %d\n", (int) temp ); |
feec68a0 A |
943 | } |
944 | delete[] child; | |
945 | } | |
eb1f6bfa | 946 | } else { |
feec68a0 | 947 | fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile ); |
eb1f6bfa | 948 | } |
feec68a0 A |
949 | |
950 | // clean up | |
6e2aefad HN |
951 | if ( copydir ) xfree( (void*) copydir ); |
952 | xfree((void*) conffile); | |
feec68a0 A |
953 | delete list; |
954 | return 0; | |
eb1f6bfa | 955 | } |