]>
Commit | Line | Data |
---|---|---|
eb1f6bfa | 1 | // |
7962bc6a | 2 | // $Id$ |
eb1f6bfa | 3 | // |
0b96a9b3 | 4 | // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de> |
eb1f6bfa AJ |
5 | // |
6 | // File: purge.cc | |
7 | // Wed Jan 13 1999 | |
8 | // | |
9 | // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme | |
0b96a9b3 | 10 | // Universit?t Hannover, Germany |
eb1f6bfa AJ |
11 | // |
12 | // Permission to use, copy, modify, distribute, and sell this software | |
13 | // and its documentation for any purpose is hereby granted without fee, | |
14 | // provided that (i) the above copyright notices and this permission | |
15 | // notice appear in all copies of the software and related documentation, | |
16 | // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte | |
17 | // Systeme and the University of Hannover may not be used in any | |
18 | // advertising or publicity relating to the software without the | |
19 | // specific, prior written permission of Lehrgebiet Rechnernetze und | |
20 | // Verteilte Systeme and the University of Hannover. | |
21 | // | |
22 | // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, | |
23 | // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY | |
24 | // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
25 | // | |
26 | // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR | |
27 | // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL, | |
28 | // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES | |
29 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT | |
30 | // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, | |
31 | // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
32 | // SOFTWARE. | |
33 | // | |
eb1f6bfa AJ |
34 | // Revision 1.17 2000/09/21 10:59:53 cached |
35 | // *** empty log message *** | |
36 | // | |
37 | // Revision 1.16 2000/09/21 09:45:18 cached | |
38 | // Fixed some small bugs. | |
39 | // | |
40 | // Revision 1.15 2000/09/21 09:05:56 cached | |
41 | // added multi cache_dir support, thus changing -c cmdline option. | |
42 | // modified file reading to support /dev/fd/0 reading for non-disclosed items. | |
43 | // | |
44 | // Revision 1.14 2000/06/20 09:43:01 voeckler | |
45 | // added FreeBSD related fixes and support. | |
46 | // | |
47 | // Revision 1.13 2000/03/29 08:12:21 voeckler | |
48 | // fixed wrong header file. | |
49 | // | |
50 | // Revision 1.12 2000/03/29 07:54:41 voeckler | |
51 | // added mechanism to give a port specification precedence over a host | |
52 | // specificiation with the -p option and not colon. | |
53 | // | |
54 | // Revision 1.11 1999/06/18 13:18:28 voeckler | |
55 | // added refcount, fixed missing LF in -s output. | |
56 | // | |
57 | // Revision 1.10 1999/06/16 13:06:05 voeckler | |
58 | // reversed meaning of -M flag. | |
59 | // | |
60 | // Revision 1.9 1999/06/15 21:11:53 voeckler | |
61 | // added extended logging feature which extract the squid meta data available | |
62 | // within the disk files. moved the content extraction and squid meta data | |
63 | // handling parts into separate files. added options for copy-out and verbose. | |
64 | // | |
65 | // Revision 1.8 1999/06/14 20:14:46 voeckler | |
66 | // intermediate version when adding understanding about the way | |
67 | // Squid does log the metadata into the file. | |
68 | // | |
69 | // Revision 1.7 1999/01/23 21:01:10 root | |
70 | // stumbled over libc5 header/lib inconsistency bug.... | |
71 | // | |
72 | // Revision 1.6 1999/01/23 20:47:54 root | |
73 | // added Linux specifics for psignal... | |
74 | // Hope this helps. | |
75 | // | |
76 | // Revision 1.5 1999/01/20 09:48:12 voeckler | |
77 | // added warning as first line of output. | |
78 | // | |
79 | // Revision 1.4 1999/01/19 11:53:49 voeckler | |
80 | // added psignal() from <siginfo.h> handling. | |
81 | // | |
82 | // Revision 1.3 1999/01/19 11:00:50 voeckler | |
83 | // added keyboard interrupt handling, exit handling, removed C++ strings and | |
84 | // regular expression syntax in favour of less source code, added comments, | |
85 | // added a reminder to remove swap.state in case of unlinks, added IAA flag, | |
86 | // added a few assertions, changed policy to enforce the definition of at | |
87 | // least one regular expression, and catch a few signals. | |
88 | // | |
89 | // Revision 1.2 1999/01/15 23:06:28 voeckler | |
90 | // downgraded to simple C strings... | |
91 | // | |
92 | // Revision 1.1 1999/01/14 12:05:32 voeckler | |
93 | // Initial revision | |
94 | // | |
95 | // | |
96 | #if defined(__GNUC__) || defined(__GNUG__) | |
97 | #pragma implementation | |
eb1f6bfa AJ |
98 | #endif |
99 | ||
2ccf2eb2 AJ |
100 | #include "config.h" |
101 | // for xstrdup | |
102 | #include "util.h" | |
103 | ||
104 | //#include <assert.h> | |
eb1f6bfa AJ |
105 | #include <stdarg.h> |
106 | #include <stdio.h> | |
107 | #include <dirent.h> | |
2ccf2eb2 | 108 | //#include <ctype.h> |
eb1f6bfa | 109 | #include <string.h> |
2ccf2eb2 | 110 | //#include <sys/types.h> |
eb1f6bfa AJ |
111 | #include <sys/stat.h> |
112 | #include <sys/wait.h> | |
113 | #include <fcntl.h> | |
114 | #include <unistd.h> | |
115 | #include <stdlib.h> | |
116 | #include <limits.h> | |
117 | #include <signal.h> | |
118 | #include <errno.h> | |
119 | ||
2ccf2eb2 | 120 | #if HAVE_SIGINFO_H |
eb1f6bfa | 121 | #include <siginfo.h> |
2ccf2eb2 | 122 | #endif |
eb1f6bfa AJ |
123 | |
124 | #include <netinet/in.h> | |
125 | #include <netinet/tcp.h> // TCP_NODELAY | |
126 | #include <arpa/inet.h> | |
127 | #include <netdb.h> // gethostbyname() | |
357b8e33 | 128 | //#include <regex.h> //comes via compat.h |
eb1f6bfa AJ |
129 | |
130 | #include "convert.hh" | |
131 | #include "socket.hh" | |
132 | #include "signal.hh" | |
133 | #include "squid-tlv.hh" | |
134 | #include "copyout.hh" | |
135 | #include "conffile.hh" | |
136 | ||
137 | #ifndef DEFAULTHOST | |
138 | #define DEFAULTHOST "localhost" | |
139 | #endif // DEFAULTHOST | |
140 | ||
141 | #ifndef DEFAULTPORT | |
142 | #define DEFAULTPORT 3128 | |
143 | #endif // DEFAULTPORT | |
144 | ||
145 | volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal... | |
feec68a0 A |
146 | char* linebuffer = 0; |
147 | size_t buffersize = 16834; | |
eb1f6bfa | 148 | static char* copydir = 0; |
59a09b98 | 149 | static unsigned debugFlag = 0; |
eb1f6bfa AJ |
150 | static unsigned purgeMode = 0; |
151 | static bool iamalive = false; | |
152 | static bool reminder = false; | |
153 | static bool verbose = false; | |
154 | static bool envelope = false; | |
155 | static bool no_fork = false; | |
156 | static const char* programname = 0; | |
2ccf2eb2 | 157 | static const char* RCS_ID = "$Id$"; |
eb1f6bfa AJ |
158 | |
159 | // ---------------------------------------------------------------------- | |
160 | ||
161 | struct REList { | |
feec68a0 A |
162 | REList( const char* what, bool doCase ); |
163 | ~REList(); | |
164 | bool match( const char* check ) const; | |
eb1f6bfa | 165 | |
feec68a0 A |
166 | REList* next; |
167 | const char* data; | |
168 | regex_t rexp; | |
eb1f6bfa AJ |
169 | }; |
170 | ||
171 | REList::REList( const char* what, bool doCase ) | |
2ccf2eb2 | 172 | :next(0),data(xstrdup(what)) |
eb1f6bfa | 173 | { |
feec68a0 A |
174 | int result = regcomp( &rexp, what, |
175 | REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) ); | |
176 | if ( result != 0 ) { | |
177 | char buffer[256]; | |
178 | regerror( result, &rexp, buffer, 256 ); | |
179 | fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer ); | |
180 | exit(1); | |
181 | } | |
eb1f6bfa AJ |
182 | } |
183 | ||
184 | REList::~REList() | |
feec68a0 A |
185 | { |
186 | if ( next ) delete next; | |
6e2aefad | 187 | if ( data ) xfree((void*) data); |
feec68a0 | 188 | regfree(&rexp); |
eb1f6bfa AJ |
189 | } |
190 | ||
191 | bool | |
192 | REList::match( const char* check ) const | |
193 | { | |
feec68a0 A |
194 | int result = regexec( &rexp, check, 0, 0, 0 ); |
195 | if ( result != 0 && result != REG_NOMATCH ) { | |
196 | char buffer[256]; | |
197 | regerror( result, &rexp, buffer, 256 ); | |
198 | fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n", | |
199 | data, check, buffer ); | |
200 | exit(1); | |
201 | } | |
202 | return ( result == 0 ); | |
eb1f6bfa AJ |
203 | } |
204 | ||
205 | // ---------------------------------------------------------------------- | |
206 | ||
207 | char* | |
208 | concat( const char* start, ... ) | |
feec68a0 A |
209 | // purpose: concatinate an arbitrary number of C strings. |
210 | // paramtr: start (IN): first C string | |
211 | // ... (IN): further C strings, terminated with a NULL pointer | |
212 | // returns: memory allocated via new(), containing the concatinated string. | |
eb1f6bfa | 213 | { |
feec68a0 A |
214 | va_list ap; |
215 | const char* s; | |
216 | ||
217 | // first run: determine size | |
218 | unsigned size = strlen(start)+1; | |
219 | va_start( ap, start ); | |
220 | while ( (s=va_arg(ap,const char*)) != NULL ) size += strlen(s ? s : ""); | |
221 | va_end(ap); | |
222 | ||
223 | // allocate | |
224 | char* result = new char[size]; | |
225 | if ( result == 0 ) { | |
226 | perror( "string memory allocation" ); | |
227 | exit(1); | |
228 | } | |
229 | ||
230 | // second run: copy content | |
231 | strcpy( result, start ); | |
232 | va_start( ap, start ); | |
233 | while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s ); | |
234 | va_end(ap); | |
235 | ||
236 | return result; | |
eb1f6bfa AJ |
237 | } |
238 | ||
239 | bool | |
240 | isxstring( const char* s, size_t testlen ) | |
feec68a0 A |
241 | // purpose: test a string for conforming to xdigit |
242 | // paramtr: s (IN): string to test | |
243 | // testlen (IN): length the string must have | |
244 | // returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise | |
eb1f6bfa | 245 | { |
feec68a0 | 246 | if ( strlen(s) != testlen ) return false; |
eb1f6bfa | 247 | |
feec68a0 A |
248 | size_t i=0; |
249 | while ( i<testlen && isxdigit(s[i]) ) i++; | |
250 | return (i==testlen); | |
eb1f6bfa AJ |
251 | } |
252 | ||
253 | inline | |
254 | int | |
255 | log_output( const char* fn, int code, long size, const char* url ) | |
256 | { | |
feec68a0 | 257 | return printf( "%s %3d %8ld %s\n", fn, code, size, url ); |
eb1f6bfa AJ |
258 | } |
259 | ||
260 | static | |
261 | int | |
262 | log_extended( const char* fn, int code, long size, const SquidMetaList* meta ) | |
263 | { | |
feec68a0 A |
264 | static const char hexdigit[] = "0123456789ABCDEF"; |
265 | char md5[34]; | |
266 | const SquidTLV* findings = 0; | |
267 | ||
268 | if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) { | |
269 | unsigned char* s = (unsigned char*) findings->data; | |
270 | for ( int j=0; j<16; j++, s++ ) { | |
271 | md5[j*2+0] = hexdigit[ *s >> 4 ]; | |
272 | md5[j*2+1] = hexdigit[ *s & 15 ]; | |
273 | } | |
274 | md5[32] = '\0'; // terminate string | |
275 | } else { | |
6e2aefad | 276 | snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" ); |
feec68a0 A |
277 | } |
278 | ||
279 | char timeb[64]; | |
280 | if ( meta && (findings = meta->search( STORE_META_STD )) ) { | |
281 | StoreMetaStd temp; | |
282 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
283 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 284 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
285 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
286 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 A |
287 | } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) { |
288 | StoreMetaStdLFS temp; | |
289 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
290 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 291 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
292 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
293 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 | 294 | } else { |
6e2aefad | 295 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", (unsigned long)-1, (unsigned long)-1, (unsigned long)-1, (unsigned long)-1, 0, 0 ); |
feec68a0 A |
296 | } |
297 | ||
298 | // make sure that there is just one printf() | |
299 | if ( meta && (findings = meta->search( STORE_META_URL )) ) { | |
300 | return printf( "%s %3d %8ld %s %s %s\n", | |
301 | fn, code, size, md5, timeb, findings->data ); | |
302 | } else { | |
303 | return printf( "%s %3d %8ld %s %s strange_file\n", | |
304 | fn, code, size, md5, timeb ); | |
eb1f6bfa | 305 | } |
eb1f6bfa AJ |
306 | } |
307 | ||
308 | // o.k., this is pure lazyness... | |
309 | static struct in_addr serverHost; | |
310 | static unsigned short serverPort; | |
311 | ||
312 | bool | |
313 | action( int fd, size_t metasize, | |
feec68a0 A |
314 | const char* fn, const char* url, const SquidMetaList& meta ) |
315 | // purpose: if cmdline-requested, send the purge request to the cache | |
316 | // paramtr: fd (IN): open FD for the object file | |
317 | // metasize (IN): offset into data portion of file (meta data size) | |
318 | // fn (IN): name of the object file | |
319 | // url (IN): URL string stored in the object file | |
320 | // meta (IN): list containing further meta data | |
321 | // returns: true for a successful action, false otherwise. The action | |
322 | // may just print the file, send the purge request or even | |
323 | // remove unwanted files. | |
324 | // globals: ::purgeMode (IN): bit#0 set -> send purge request. | |
325 | // bit#1 set -> remove 404 object files. | |
326 | // ::serverHost (IN): cache host address | |
327 | // ::serverPort (IN): cache port number | |
eb1f6bfa | 328 | { |
feec68a0 A |
329 | static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n"; |
330 | struct stat st; | |
331 | long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) ); | |
332 | int status = 0; | |
333 | ||
334 | // if we want to copy out the file, do that first of all. | |
335 | if ( ::copydir && *copydir && size > 0 ) | |
59a09b98 | 336 | copy_out( st.st_size, metasize, ::debugFlag, |
feec68a0 A |
337 | fn, url, ::copydir, ::envelope ); |
338 | ||
339 | // do we need to PURGE the file, yes, if purgemode bit#0 was set. | |
340 | if ( ::purgeMode & 0x01 ) { | |
341 | unsigned long bufsize = strlen(url) + strlen(schablone) + 4; | |
342 | char* buffer = new char[bufsize]; | |
343 | ||
6e2aefad | 344 | snprintf( buffer, bufsize, schablone, url ); |
feec68a0 A |
345 | int sockfd = connectTo( serverHost, serverPort, true ); |
346 | if ( sockfd == -1 ) { | |
347 | fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) ); | |
348 | delete[] buffer; | |
349 | return false; | |
350 | } | |
351 | ||
352 | int size = strlen(buffer); | |
353 | if ( write( sockfd, buffer, size ) != size ) { | |
354 | // error while talking to squid | |
355 | fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) ); | |
356 | close(sockfd); | |
357 | delete[] buffer; | |
358 | return false; | |
359 | } | |
360 | memset( buffer+8, 0, 4 ); | |
361 | if ( read( sockfd, buffer, bufsize ) < 1 ) { | |
362 | // error while reading squid's answer | |
363 | fprintf( stderr, "unable to read answer: %s\n", strerror(errno) ); | |
364 | close(sockfd); | |
365 | delete[] buffer; | |
366 | return false; | |
367 | } | |
368 | close(sockfd); | |
369 | status = strtol(buffer+8,0,10); | |
370 | delete[] buffer; | |
eb1f6bfa | 371 | } |
feec68a0 A |
372 | |
373 | // log the output of our operation | |
374 | bool flag = true; | |
375 | if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 ); | |
376 | else flag = ( log_output( fn, status, size, url ) >= 0 ); | |
377 | ||
378 | // remove the file, if purgemode bit#1, and HTTP result status 404). | |
379 | if ( (::purgeMode & 0x02) && status == 404 ) { | |
380 | reminder = true; | |
381 | if ( unlink(fn) == -1 ) | |
382 | // error while unlinking file, this may happen due to the cache | |
383 | // unlinking a file while it is still in the readdir() cache of purge. | |
384 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
385 | fn, strerror(errno) ); | |
eb1f6bfa | 386 | } |
feec68a0 A |
387 | |
388 | return flag; | |
eb1f6bfa AJ |
389 | } |
390 | ||
391 | bool | |
392 | match( const char* fn, const REList* list ) | |
feec68a0 A |
393 | // purpose: do something with the given cache content filename |
394 | // paramtr: fn (IN): filename of cache file | |
395 | // returns: true for successful action, false otherwise. | |
396 | // warning: only return false, if you want the loop to terminate! | |
eb1f6bfa | 397 | { |
feec68a0 A |
398 | static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int); |
399 | bool flag = true; | |
400 | ||
59a09b98 | 401 | if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn ); |
feec68a0 A |
402 | int fd = open( fn, O_RDONLY ); |
403 | if ( fd != -1 ) { | |
404 | if ( read(fd,::linebuffer,::buffersize-1) > 60 ) { | |
405 | ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string | |
406 | ||
407 | // check the offset into the start of object data. The offset is | |
408 | // stored in a host endianess after the first byte. | |
409 | unsigned int datastart; | |
410 | memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) ); | |
411 | if ( datastart > ::buffersize - addon - 1 ) { | |
412 | // check offset into server reply header (start of cache data). | |
413 | fputs( "WARNING: Using a truncated URL string.\n", stderr ); | |
414 | datastart = ::buffersize - addon - 1; | |
415 | } | |
416 | ||
417 | // NEW: Parse squid meta data, which is a kind of linked list | |
418 | // flattened out into a file byte stream. Somewhere within is | |
419 | // the URL as part of the list. First, gobble all meta data. | |
420 | unsigned int offset = addon; | |
421 | SquidMetaList meta; | |
422 | while ( offset + addon <= datastart ) { | |
423 | unsigned int size = 0; | |
424 | memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) ); | |
425 | meta.append( SquidMetaType(*(linebuffer+offset)), | |
426 | size, linebuffer+offset+addon ); | |
427 | offset += ( addon + size ); | |
428 | } | |
429 | ||
430 | // Now extract the key URL from the meta data. | |
431 | const SquidTLV* urlmeta = meta.search( STORE_META_URL ); | |
432 | if ( urlmeta ) { | |
433 | // found URL in meta data. Try to process the URL | |
434 | if ( list == 0 ) | |
435 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
436 | else { | |
437 | REList* head = (REList*) list; // YUCK! | |
438 | while ( head != 0 ) { | |
439 | if ( head->match( (char*) urlmeta->data ) ) break; | |
440 | head = head->next; | |
441 | } | |
442 | if ( head != 0 ) | |
443 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
444 | else flag = true; | |
445 | } | |
446 | } | |
447 | ||
448 | // "meta" will be deleted when exiting from this block | |
449 | } else { | |
450 | // weird file, FIXME: stat() it! | |
451 | struct stat st; | |
452 | long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size ); | |
453 | if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 ); | |
454 | else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 ); | |
455 | ||
456 | if ( (::purgeMode & 0x04) ) { | |
457 | reminder = true; | |
458 | if ( unlink(fn) == -1 ) | |
459 | // error while unlinking file, this may happen due to the cache | |
460 | // unlinking a file while it is in the readdir() cache of purge. | |
461 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
462 | fn, strerror(errno) ); | |
463 | } | |
464 | } | |
465 | close(fd); | |
eb1f6bfa | 466 | } else { |
feec68a0 A |
467 | // error while opening file, this may happen due to the cache |
468 | // unlinking a file while it is still in the readdir() cache of purge. | |
469 | fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) ); | |
eb1f6bfa | 470 | } |
feec68a0 A |
471 | |
472 | return flag; | |
eb1f6bfa AJ |
473 | } |
474 | ||
475 | bool | |
476 | filelevel( const char* directory, const REList* list ) | |
feec68a0 A |
477 | // purpose: from given starting point, look for squid xxxxxxxx files. |
478 | // example: "/var/spool/cache/08/7F" as input, do action over files | |
479 | // paramtr: directory (IN): starting point | |
480 | // list (IN): list of rexps to match URLs against | |
481 | // returns: true, if every subdir && action was successful. | |
eb1f6bfa | 482 | { |
fb151769 | 483 | dirent_t * entry; |
59a09b98 | 484 | if ( debugFlag & 0x01 ) |
feec68a0 A |
485 | fprintf( stderr, "# [2] %s\n", directory ); |
486 | ||
487 | DIR* dir = opendir( directory ); | |
488 | if ( dir == NULL ) { | |
489 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
490 | directory, strerror(errno) ); | |
491 | return false; | |
492 | } | |
493 | ||
494 | // display a rotating character as "i am alive" signal (slows purge). | |
495 | if ( ::iamalive ) { | |
496 | static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" }; | |
497 | static unsigned short alivecount = 0; | |
498 | assert( write( STDOUT_FILENO, alivelist[alivecount++ & 3], 2 ) == 2 ); | |
499 | } | |
500 | ||
501 | bool flag = true; | |
502 | while ( (entry=readdir(dir)) && flag ) { | |
503 | if ( isxstring(entry->d_name,8) ) { | |
504 | char* name = concat( directory, "/", entry->d_name, 0 ); | |
505 | flag = match( name, list ); | |
506 | delete[] name; | |
507 | } | |
eb1f6bfa | 508 | } |
eb1f6bfa | 509 | |
feec68a0 A |
510 | closedir(dir); |
511 | return flag; | |
eb1f6bfa AJ |
512 | } |
513 | ||
514 | bool | |
515 | dirlevel( const char* dirname, const REList* list, bool level=false ) | |
feec68a0 A |
516 | // purpose: from given starting point, look for squid 00..FF directories. |
517 | // paramtr: dirname (IN): starting point | |
518 | // list (IN): list of rexps to match URLs against | |
519 | // level (IN): false==toplevel, true==1st level | |
520 | // example: "/var/spool/cache", false as input, traverse subdirs w/ action. | |
521 | // example: "/var/spool/cache/08", true as input, traverse subdirs w/ action. | |
522 | // returns: true, if every subdir && action was successful. | |
523 | // warning: this function is once-recursive, no deeper. | |
eb1f6bfa | 524 | { |
fb151769 | 525 | dirent_t* entry; |
59a09b98 | 526 | if ( debugFlag & 0x01 ) |
feec68a0 A |
527 | fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname ); |
528 | ||
529 | DIR* dir = opendir( dirname ); | |
530 | if ( dir == NULL ) { | |
531 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
532 | dirname, strerror(errno) ); | |
533 | return false; | |
eb1f6bfa | 534 | } |
eb1f6bfa | 535 | |
feec68a0 A |
536 | bool flag = true; |
537 | while ( (entry=readdir(dir)) && flag ) { | |
538 | if ( strlen(entry->d_name) == 2 && | |
539 | isxdigit(entry->d_name[0]) && | |
540 | isxdigit(entry->d_name[1]) ) { | |
541 | char* name = concat( dirname, "/", entry->d_name, 0 ); | |
542 | flag = level ? filelevel( name, list ) : dirlevel( name, list, true ); | |
543 | delete[] name; | |
544 | } | |
545 | } | |
546 | ||
547 | closedir(dir); | |
548 | return flag; | |
eb1f6bfa AJ |
549 | } |
550 | ||
551 | int | |
552 | checkForPortOnly( const char* optarg ) | |
feec68a0 A |
553 | // purpose: see if somebody just put in a port instead of a hostname |
554 | // paramtr: optarg (IN): argument from commandline | |
555 | // returns: 0..65535 is the valid port number in network byte order, | |
556 | // -1 if not a port | |
eb1f6bfa | 557 | { |
feec68a0 A |
558 | // if there is a period in there, it must be a valid hostname |
559 | if ( strchr( optarg, '.' ) != 0 ) return -1; | |
eb1f6bfa | 560 | |
feec68a0 A |
561 | // if it is just a number between 0 and 65535, it must be a port |
562 | char* errstr = 0; | |
563 | unsigned long result = strtoul( optarg, &errstr, 0 ); | |
564 | if ( result < 65536 && errstr != optarg ) return htons(result); | |
eb1f6bfa AJ |
565 | |
566 | #if 0 | |
feec68a0 A |
567 | // one last try, test for a symbolical service name |
568 | struct servent* service = getservbyname( optarg, "tcp" ); | |
569 | return service ? service->s_port : -1; | |
eb1f6bfa | 570 | #else |
feec68a0 | 571 | return -1; |
eb1f6bfa AJ |
572 | #endif |
573 | } | |
574 | ||
575 | void | |
576 | helpMe( void ) | |
feec68a0 | 577 | // purpuse: write help message and exit |
eb1f6bfa | 578 | { |
feec68a0 A |
579 | printf( "\n%s\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] " |
580 | "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n", | |
581 | ::RCS_ID, ::programname ); | |
582 | printf( | |
583 | " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n" | |
584 | " -c c\tsquid.conf location, default \"%s\".\n" | |
585 | " -C dir\tbase directory for content extraction (copy-out mode).\n" | |
586 | " -d l\tdebug level, an OR of different debug options.\n" | |
587 | " -e re\tsingle regular expression per -e instance (use quotes!).\n" | |
588 | " -E re\tsingle case sensitive regular expression like -e.\n" | |
589 | " -f fn\tname of textfile containing one regular expression per line.\n" | |
590 | " -F fn\tname of textfile like -f containing case sensitive REs.\n" | |
591 | " -H\tprepend HTTP reply header to destination files in copy-out mode.\n" | |
592 | " -n\tdo not fork() when using more than one cache_dir.\n" | |
593 | " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n" | |
594 | " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n" | |
595 | "\t 0x01 really send PURGE to the cache.\n" | |
596 | "\t 0x02 remove all caches files reported as 404 (not found).\n" | |
597 | "\t 0x04 remove all weird (inaccessible or too small) cache files.\n" | |
598 | "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n" | |
599 | " -s\tshow all options after option parsing, but before really starting.\n" | |
600 | " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n" | |
601 | "\n", DEFAULT_SQUID_CONF, DEFAULTHOST, DEFAULTPORT ); | |
eb1f6bfa AJ |
602 | |
603 | } | |
604 | ||
605 | void | |
606 | parseCommandline( int argc, char* argv[], REList*& head, | |
feec68a0 A |
607 | char*& conffile, char*& copydir, |
608 | struct in_addr& serverHost, unsigned short& serverPort ) | |
609 | // paramtr: argc: see ::main(). | |
610 | // argv: see ::main(). | |
611 | // returns: Does terminate the program on errors! | |
612 | // purpose: suck in any commandline options, and set the global vars. | |
613 | { | |
614 | int option, port, showme = 0; | |
615 | char* ptr, *colon; | |
616 | FILE* rfile; | |
617 | ||
618 | // program basename | |
619 | if ( (ptr = strrchr(argv[0],'/')) == NULL ) ptr=argv[0]; | |
620 | else ptr++; | |
621 | ::programname = ptr; | |
622 | ||
623 | // extract commandline parameters | |
624 | REList* tail = head = 0; | |
625 | opterr = 0; | |
626 | while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) { | |
627 | switch ( option ) { | |
628 | case 'a': | |
629 | ::iamalive = ! ::iamalive; | |
630 | break; | |
631 | case 'C': | |
632 | if ( optarg && *optarg ) { | |
6e2aefad | 633 | if ( copydir ) xfree( (void*) copydir ); |
2ccf2eb2 | 634 | assert( (copydir = xstrdup(optarg)) ); |
feec68a0 A |
635 | } |
636 | break; | |
637 | case 'c': | |
638 | if ( optarg && *optarg ) { | |
6e2aefad | 639 | if ( *conffile ) xfree((void*) conffile ); |
2ccf2eb2 | 640 | assert( (conffile = xstrdup(optarg)) ); |
feec68a0 A |
641 | } |
642 | break; | |
643 | ||
644 | case 'd': | |
59a09b98 | 645 | ::debugFlag = strtoul( optarg, 0, 0 ); |
feec68a0 A |
646 | break; |
647 | ||
648 | case 'E': | |
649 | case 'e': | |
650 | if ( head == 0 ) tail = head = new REList( optarg, option=='E' ); | |
651 | else { | |
652 | tail->next = new REList( optarg, option=='E' ); | |
653 | tail = tail->next; | |
654 | } | |
655 | break; | |
656 | ||
657 | case 'f': | |
658 | if ( (rfile = fopen( optarg, "r" )) != NULL ) { | |
659 | unsigned long lineno = 0; | |
eb1f6bfa | 660 | #define LINESIZE 512 |
feec68a0 A |
661 | char line[LINESIZE]; |
662 | while ( fgets( line, LINESIZE, rfile ) != NULL ) { | |
663 | lineno++; | |
664 | int len = strlen(line)-1; | |
665 | if ( len+2 >= LINESIZE ) { | |
666 | fprintf( stderr, "%s:%lu: line too long, sorry.\n", | |
667 | optarg, lineno ); | |
668 | exit(1); | |
669 | } | |
670 | ||
671 | // remove trailing line breaks | |
672 | while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) ) | |
673 | line[len--] = '\0'; | |
674 | ||
675 | // insert into list of expressions | |
676 | if ( head == 0 ) tail = head = new REList(line,option=='F'); | |
677 | else { | |
678 | tail->next = new REList(line,option=='F'); | |
679 | tail = tail->next; | |
680 | } | |
681 | } | |
682 | fclose(rfile); | |
683 | } else | |
684 | fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno)); | |
685 | break; | |
686 | ||
687 | case 'H': | |
688 | ::envelope = ! ::envelope; | |
689 | break; | |
690 | case 'n': | |
691 | ::no_fork = ! ::no_fork; | |
692 | break; | |
693 | case 'p': | |
694 | colon = strchr( optarg, ':' ); | |
695 | if ( colon == 0 ) { | |
696 | // no colon, only look at host | |
697 | ||
698 | // fix: see if somebody just put in there a port (no periods) | |
699 | // give port number precedence over host names | |
700 | port = checkForPortOnly( optarg ); | |
701 | if ( port == -1 ) { | |
702 | // assume that main() did set the default port | |
703 | if ( convertHostname(optarg,serverHost) == -1 ) { | |
704 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
705 | exit(1); | |
706 | } | |
707 | } else { | |
708 | // assume that main() did set the default host | |
709 | serverPort = port; | |
710 | } | |
711 | } else { | |
712 | // colon used, port is extra | |
713 | *colon++ = 0; | |
714 | if ( convertHostname(optarg,serverHost) == -1 ) { | |
715 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
716 | exit(1); | |
717 | } | |
718 | if ( convertPortname(colon,serverPort) == -1 ) { | |
719 | fprintf( stderr, "unable to resolve port %s!\n", colon ); | |
720 | exit(1); | |
721 | } | |
722 | } | |
723 | break; | |
724 | case 'P': | |
725 | ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 ); | |
726 | break; | |
727 | case 's': | |
728 | showme=1; | |
729 | break; | |
730 | case 'v': | |
731 | ::verbose = ! ::verbose; | |
732 | break; | |
733 | case '?': | |
734 | default: | |
735 | helpMe(); | |
736 | exit(1); | |
737 | } | |
738 | } | |
739 | ||
740 | // adjust | |
59a09b98 | 741 | if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false; |
feec68a0 A |
742 | if ( head == 0 ) { |
743 | fputs( "There was no regular expression defined. If you intend\n", stderr ); | |
744 | fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr ); | |
745 | exit(1); | |
eb1f6bfa | 746 | } |
feec68a0 A |
747 | |
748 | // postcondition: head != 0 | |
749 | assert( head != 0 ); | |
750 | ||
751 | // make sure that the copy out directory is there and accessible | |
752 | if ( copydir && *copydir ) | |
753 | if ( assert_copydir( copydir ) != 0 ) exit(1); | |
754 | ||
755 | // show results | |
756 | if ( showme ) { | |
757 | printf( "#\n# Currently active values for %s:\n# %s\n", | |
758 | ::programname, ::RCS_ID ); | |
759 | printf( "# Debug level : " ); | |
59a09b98 | 760 | if ( ::debugFlag ) printf( "%#6.4hx", ::debugFlag ); |
feec68a0 A |
761 | else printf( "production level" ); // printf omits 0x prefix for 0! |
762 | printf( " + %s mode", ::no_fork ? "linear" : "parallel" ); | |
763 | puts( ::verbose ? " + extra verbosity" : "" ); | |
764 | ||
765 | printf( "# Copy-out directory: %s ", | |
766 | copydir ? copydir : "copy-out mode disabled" ); | |
767 | if ( copydir ) | |
768 | printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" ); | |
769 | else | |
770 | puts(""); | |
771 | ||
772 | printf( "# Squid config file : %s\n", conffile ); | |
773 | printf( "# Cacheserveraddress: %s:%u\n", | |
774 | inet_ntoa( serverHost ), ntohs( serverPort ) ); | |
775 | printf( "# purge mode : 0x%02x\n", ::purgeMode ); | |
776 | printf( "# Regular expression: " ); | |
777 | ||
778 | unsigned count(0); | |
779 | for ( tail = head; tail != NULL; tail = tail->next ) { | |
780 | if ( count++ ) printf( "#%22u", count ); | |
eb1f6bfa | 781 | #if defined(LINUX) && putc==_IO_putc |
feec68a0 A |
782 | // I HATE BROKEN LINUX HEADERS! |
783 | // purge.o(.text+0x1040): undefined reference to `_IO_putc' | |
784 | // If your compilation breaks here, remove the undefinition | |
785 | #undef putc | |
eb1f6bfa | 786 | #endif |
feec68a0 A |
787 | else putchar('1'); |
788 | printf( " \"%s\"\n", tail->data ); | |
789 | } | |
790 | puts( "#" ); | |
eb1f6bfa | 791 | } |
feec68a0 | 792 | fflush( stdout ); |
eb1f6bfa AJ |
793 | } |
794 | ||
795 | extern "C" { | |
796 | ||
feec68a0 A |
797 | static |
798 | void | |
799 | exiter( void ) { | |
800 | if ( ::term_flag ) psignal( ::term_flag, "received signal" ); | |
801 | delete[] ::linebuffer; | |
802 | if ( ::reminder ) { | |
803 | fputs( | |
804 | "WARNING! Caches files were removed. Please shut down your cache, remove\n" | |
805 | "your swap.state files and restart your cache again, i.e. effictively do\n" | |
806 | "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr ); | |
807 | } | |
808 | } | |
eb1f6bfa | 809 | |
feec68a0 A |
810 | static |
811 | void | |
812 | handler( int signo ) { | |
813 | ::term_flag = signo; | |
814 | if ( getpid() == getpgrp() ) kill( -getpgrp(), signo ); | |
815 | exit(1); | |
816 | } | |
eb1f6bfa AJ |
817 | |
818 | } // extern "C" | |
819 | ||
820 | static | |
821 | int | |
822 | makelinebuffered( FILE* fp, const char* fn = 0 ) | |
feec68a0 A |
823 | // purpose: make the given FILE line buffered |
824 | // paramtr: fp (IO): file pointer which to put into line buffer mode | |
825 | // fn (IN): name of file to print in case of error | |
826 | // returns: 0 is ok, -1 to indicate an error | |
827 | // warning: error messages will already be printed | |
eb1f6bfa | 828 | { |
feec68a0 A |
829 | if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) { |
830 | // ok | |
831 | return 0; | |
832 | } else { | |
833 | // error | |
834 | fprintf( stderr, "unable to make \"%s\" line buffered: %s\n", | |
835 | fn ? fn : "", strerror(errno) ); | |
836 | return -1; | |
837 | } | |
eb1f6bfa AJ |
838 | } |
839 | ||
840 | int | |
841 | main( int argc, char* argv[] ) | |
842 | { | |
feec68a0 A |
843 | // setup variables |
844 | REList* list = 0; | |
2ccf2eb2 | 845 | char* conffile = xstrdup( DEFAULT_SQUID_CONF ); |
feec68a0 A |
846 | serverPort = htons(DEFAULTPORT); |
847 | if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) { | |
848 | fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST ); | |
849 | return 1; | |
850 | } | |
851 | ||
852 | // setup line buffer | |
853 | ::linebuffer = new char[ ::buffersize ]; | |
854 | assert( ::linebuffer != 0 ); | |
855 | ||
856 | // parse commandline | |
857 | puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###"); | |
858 | parseCommandline( argc, argv, list, conffile, ::copydir, | |
859 | serverHost, serverPort ); | |
860 | ||
861 | // prepare execution | |
862 | if ( atexit( exiter ) != 0 || | |
863 | Signal( SIGTERM, handler, true ) == SIG_ERR || | |
864 | Signal( SIGINT, handler, true ) == SIG_ERR || | |
865 | Signal( SIGHUP, handler, true ) == SIG_ERR ) { | |
866 | perror( "unable to install signal/exit function" ); | |
867 | return 1; | |
868 | } | |
869 | ||
870 | // try to read squid.conf file to determine all cache_dir locations | |
871 | CacheDirVector cdv(0); | |
59a09b98 | 872 | if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) { |
feec68a0 A |
873 | // there are some valid cache_dir entries. |
874 | // unless forking was forbidden by cmdline option, | |
875 | // for a process for each cache_dir entry to remove files. | |
876 | ||
877 | if ( ::no_fork || cdv.size() == 1 ) { | |
878 | // linear mode, one cache_dir after the next | |
879 | for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) { | |
880 | // execute OR complain | |
881 | if ( ! dirlevel(i->base,list) ) | |
882 | fprintf( stderr, "program terminated due to error: %s", | |
883 | strerror(errno) ); | |
6e2aefad | 884 | xfree((void*) i->base); |
feec68a0 A |
885 | } |
886 | } else { | |
887 | // parallel mode, all cache_dir in parallel | |
888 | pid_t* child = new pid_t[ cdv.size() ]; | |
889 | ||
890 | // make stdout/stderr line bufferd | |
891 | makelinebuffered( stdout, "stdout" ); | |
892 | makelinebuffered( stderr, "stderr" ); | |
893 | ||
894 | // make parent process group leader for easier killings | |
895 | if ( setpgid(getpid(), getpid()) != 0 ) { | |
896 | perror( "unable to set process group leader" ); | |
897 | return 1; | |
898 | } | |
899 | ||
900 | // -a is mutually exclusive with fork mode | |
901 | if ( ::iamalive ) { | |
902 | puts( "# i-am-alive flag incompatible with fork mode, resetting" ); | |
903 | ::iamalive = false; | |
904 | } | |
905 | ||
2ccf2eb2 | 906 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
907 | if ( getpid() == getpgrp() ) { |
908 | // only parent == group leader may fork off new processes | |
909 | if ( (child[i]=fork()) < 0 ) { | |
910 | // fork error, this is bad! | |
911 | perror( "unable to fork" ); | |
912 | kill( -getpgrp(), SIGTERM ); | |
913 | return 1; | |
914 | } else if ( child[i] == 0 ) { | |
915 | // child mode | |
916 | // execute OR complain | |
917 | if ( ! dirlevel(cdv[i].base,list) ) | |
918 | fprintf( stderr, "program terminated due to error: %s\n", | |
919 | strerror(errno) ); | |
6e2aefad | 920 | xfree((void*) cdv[i].base); |
feec68a0 A |
921 | return 0; |
922 | } else { | |
923 | // parent mode | |
59a09b98 | 924 | if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] ); |
feec68a0 A |
925 | } |
926 | } | |
927 | } | |
928 | ||
929 | // collect the garbase | |
930 | pid_t temp; | |
931 | int status; | |
2ccf2eb2 | 932 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
933 | while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 ) |
934 | if ( errno == EINTR ) continue; | |
59a09b98 | 935 | if ( ::debugFlag ) printf( "collected child %d\n", (int) temp ); |
feec68a0 A |
936 | } |
937 | delete[] child; | |
938 | } | |
eb1f6bfa | 939 | } else { |
feec68a0 | 940 | fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile ); |
eb1f6bfa | 941 | } |
feec68a0 A |
942 | |
943 | // clean up | |
6e2aefad HN |
944 | if ( copydir ) xfree( (void*) copydir ); |
945 | xfree((void*) conffile); | |
feec68a0 A |
946 | delete list; |
947 | return 0; | |
eb1f6bfa | 948 | } |