]>
Commit | Line | Data |
---|---|---|
eb1f6bfa | 1 | // |
7962bc6a | 2 | // $Id$ |
eb1f6bfa | 3 | // |
0b96a9b3 | 4 | // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de> |
eb1f6bfa AJ |
5 | // |
6 | // File: purge.cc | |
7 | // Wed Jan 13 1999 | |
8 | // | |
9 | // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme | |
0b96a9b3 | 10 | // Universit?t Hannover, Germany |
eb1f6bfa AJ |
11 | // |
12 | // Permission to use, copy, modify, distribute, and sell this software | |
13 | // and its documentation for any purpose is hereby granted without fee, | |
14 | // provided that (i) the above copyright notices and this permission | |
15 | // notice appear in all copies of the software and related documentation, | |
16 | // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte | |
17 | // Systeme and the University of Hannover may not be used in any | |
18 | // advertising or publicity relating to the software without the | |
19 | // specific, prior written permission of Lehrgebiet Rechnernetze und | |
20 | // Verteilte Systeme and the University of Hannover. | |
21 | // | |
22 | // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, | |
23 | // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY | |
24 | // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
25 | // | |
26 | // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR | |
27 | // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL, | |
28 | // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES | |
29 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT | |
30 | // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, | |
31 | // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
32 | // SOFTWARE. | |
33 | // | |
eb1f6bfa AJ |
34 | // Revision 1.17 2000/09/21 10:59:53 cached |
35 | // *** empty log message *** | |
36 | // | |
37 | // Revision 1.16 2000/09/21 09:45:18 cached | |
38 | // Fixed some small bugs. | |
39 | // | |
40 | // Revision 1.15 2000/09/21 09:05:56 cached | |
41 | // added multi cache_dir support, thus changing -c cmdline option. | |
42 | // modified file reading to support /dev/fd/0 reading for non-disclosed items. | |
43 | // | |
44 | // Revision 1.14 2000/06/20 09:43:01 voeckler | |
45 | // added FreeBSD related fixes and support. | |
46 | // | |
47 | // Revision 1.13 2000/03/29 08:12:21 voeckler | |
48 | // fixed wrong header file. | |
49 | // | |
50 | // Revision 1.12 2000/03/29 07:54:41 voeckler | |
51 | // added mechanism to give a port specification precedence over a host | |
52 | // specificiation with the -p option and not colon. | |
53 | // | |
54 | // Revision 1.11 1999/06/18 13:18:28 voeckler | |
55 | // added refcount, fixed missing LF in -s output. | |
56 | // | |
57 | // Revision 1.10 1999/06/16 13:06:05 voeckler | |
58 | // reversed meaning of -M flag. | |
59 | // | |
60 | // Revision 1.9 1999/06/15 21:11:53 voeckler | |
61 | // added extended logging feature which extract the squid meta data available | |
62 | // within the disk files. moved the content extraction and squid meta data | |
63 | // handling parts into separate files. added options for copy-out and verbose. | |
64 | // | |
65 | // Revision 1.8 1999/06/14 20:14:46 voeckler | |
66 | // intermediate version when adding understanding about the way | |
67 | // Squid does log the metadata into the file. | |
68 | // | |
69 | // Revision 1.7 1999/01/23 21:01:10 root | |
70 | // stumbled over libc5 header/lib inconsistency bug.... | |
71 | // | |
72 | // Revision 1.6 1999/01/23 20:47:54 root | |
73 | // added Linux specifics for psignal... | |
74 | // Hope this helps. | |
75 | // | |
76 | // Revision 1.5 1999/01/20 09:48:12 voeckler | |
77 | // added warning as first line of output. | |
78 | // | |
79 | // Revision 1.4 1999/01/19 11:53:49 voeckler | |
80 | // added psignal() from <siginfo.h> handling. | |
81 | // | |
82 | // Revision 1.3 1999/01/19 11:00:50 voeckler | |
83 | // added keyboard interrupt handling, exit handling, removed C++ strings and | |
84 | // regular expression syntax in favour of less source code, added comments, | |
85 | // added a reminder to remove swap.state in case of unlinks, added IAA flag, | |
86 | // added a few assertions, changed policy to enforce the definition of at | |
87 | // least one regular expression, and catch a few signals. | |
88 | // | |
89 | // Revision 1.2 1999/01/15 23:06:28 voeckler | |
90 | // downgraded to simple C strings... | |
91 | // | |
92 | // Revision 1.1 1999/01/14 12:05:32 voeckler | |
93 | // Initial revision | |
94 | // | |
95 | // | |
d8b258a9 | 96 | #if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) |
eb1f6bfa | 97 | #pragma implementation |
eb1f6bfa AJ |
98 | #endif |
99 | ||
f7f3304a | 100 | #include "squid.h" |
2ccf2eb2 AJ |
101 | // for xstrdup |
102 | #include "util.h" | |
103 | ||
104 | //#include <assert.h> | |
eb1f6bfa AJ |
105 | #include <stdarg.h> |
106 | #include <stdio.h> | |
107 | #include <dirent.h> | |
2ccf2eb2 | 108 | //#include <ctype.h> |
eb1f6bfa | 109 | #include <string.h> |
2ccf2eb2 | 110 | //#include <sys/types.h> |
eb1f6bfa AJ |
111 | #include <sys/stat.h> |
112 | #include <sys/wait.h> | |
113 | #include <fcntl.h> | |
114 | #include <unistd.h> | |
115 | #include <stdlib.h> | |
116 | #include <limits.h> | |
117 | #include <signal.h> | |
118 | #include <errno.h> | |
119 | ||
2ccf2eb2 | 120 | #if HAVE_SIGINFO_H |
eb1f6bfa | 121 | #include <siginfo.h> |
2ccf2eb2 | 122 | #endif |
eb1f6bfa AJ |
123 | |
124 | #include <netinet/in.h> | |
125 | #include <netinet/tcp.h> // TCP_NODELAY | |
126 | #include <arpa/inet.h> | |
127 | #include <netdb.h> // gethostbyname() | |
357b8e33 | 128 | //#include <regex.h> //comes via compat.h |
eb1f6bfa AJ |
129 | |
130 | #include "convert.hh" | |
131 | #include "socket.hh" | |
132 | #include "signal.hh" | |
133 | #include "squid-tlv.hh" | |
134 | #include "copyout.hh" | |
135 | #include "conffile.hh" | |
136 | ||
137 | #ifndef DEFAULTHOST | |
138 | #define DEFAULTHOST "localhost" | |
139 | #endif // DEFAULTHOST | |
140 | ||
141 | #ifndef DEFAULTPORT | |
142 | #define DEFAULTPORT 3128 | |
143 | #endif // DEFAULTPORT | |
144 | ||
145 | volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal... | |
feec68a0 A |
146 | char* linebuffer = 0; |
147 | size_t buffersize = 16834; | |
eb1f6bfa | 148 | static char* copydir = 0; |
59a09b98 | 149 | static unsigned debugFlag = 0; |
eb1f6bfa AJ |
150 | static unsigned purgeMode = 0; |
151 | static bool iamalive = false; | |
152 | static bool reminder = false; | |
153 | static bool verbose = false; | |
154 | static bool envelope = false; | |
155 | static bool no_fork = false; | |
156 | static const char* programname = 0; | |
2ccf2eb2 | 157 | static const char* RCS_ID = "$Id$"; |
eb1f6bfa AJ |
158 | |
159 | // ---------------------------------------------------------------------- | |
160 | ||
161 | struct REList { | |
feec68a0 A |
162 | REList( const char* what, bool doCase ); |
163 | ~REList(); | |
164 | bool match( const char* check ) const; | |
eb1f6bfa | 165 | |
feec68a0 A |
166 | REList* next; |
167 | const char* data; | |
168 | regex_t rexp; | |
eb1f6bfa AJ |
169 | }; |
170 | ||
171 | REList::REList( const char* what, bool doCase ) | |
2ccf2eb2 | 172 | :next(0),data(xstrdup(what)) |
eb1f6bfa | 173 | { |
feec68a0 A |
174 | int result = regcomp( &rexp, what, |
175 | REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) ); | |
176 | if ( result != 0 ) { | |
177 | char buffer[256]; | |
178 | regerror( result, &rexp, buffer, 256 ); | |
179 | fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer ); | |
180 | exit(1); | |
181 | } | |
eb1f6bfa AJ |
182 | } |
183 | ||
184 | REList::~REList() | |
feec68a0 A |
185 | { |
186 | if ( next ) delete next; | |
6e2aefad | 187 | if ( data ) xfree((void*) data); |
feec68a0 | 188 | regfree(&rexp); |
eb1f6bfa AJ |
189 | } |
190 | ||
191 | bool | |
192 | REList::match( const char* check ) const | |
193 | { | |
feec68a0 A |
194 | int result = regexec( &rexp, check, 0, 0, 0 ); |
195 | if ( result != 0 && result != REG_NOMATCH ) { | |
196 | char buffer[256]; | |
197 | regerror( result, &rexp, buffer, 256 ); | |
198 | fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n", | |
199 | data, check, buffer ); | |
200 | exit(1); | |
201 | } | |
202 | return ( result == 0 ); | |
eb1f6bfa AJ |
203 | } |
204 | ||
205 | // ---------------------------------------------------------------------- | |
206 | ||
207 | char* | |
208 | concat( const char* start, ... ) | |
feec68a0 A |
209 | // purpose: concatinate an arbitrary number of C strings. |
210 | // paramtr: start (IN): first C string | |
211 | // ... (IN): further C strings, terminated with a NULL pointer | |
212 | // returns: memory allocated via new(), containing the concatinated string. | |
eb1f6bfa | 213 | { |
feec68a0 A |
214 | va_list ap; |
215 | const char* s; | |
216 | ||
217 | // first run: determine size | |
218 | unsigned size = strlen(start)+1; | |
219 | va_start( ap, start ); | |
220 | while ( (s=va_arg(ap,const char*)) != NULL ) size += strlen(s ? s : ""); | |
221 | va_end(ap); | |
222 | ||
223 | // allocate | |
224 | char* result = new char[size]; | |
225 | if ( result == 0 ) { | |
226 | perror( "string memory allocation" ); | |
227 | exit(1); | |
228 | } | |
229 | ||
230 | // second run: copy content | |
231 | strcpy( result, start ); | |
232 | va_start( ap, start ); | |
233 | while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s ); | |
234 | va_end(ap); | |
235 | ||
236 | return result; | |
eb1f6bfa AJ |
237 | } |
238 | ||
239 | bool | |
240 | isxstring( const char* s, size_t testlen ) | |
feec68a0 A |
241 | // purpose: test a string for conforming to xdigit |
242 | // paramtr: s (IN): string to test | |
243 | // testlen (IN): length the string must have | |
244 | // returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise | |
eb1f6bfa | 245 | { |
feec68a0 | 246 | if ( strlen(s) != testlen ) return false; |
eb1f6bfa | 247 | |
feec68a0 A |
248 | size_t i=0; |
249 | while ( i<testlen && isxdigit(s[i]) ) i++; | |
250 | return (i==testlen); | |
eb1f6bfa AJ |
251 | } |
252 | ||
253 | inline | |
254 | int | |
255 | log_output( const char* fn, int code, long size, const char* url ) | |
256 | { | |
feec68a0 | 257 | return printf( "%s %3d %8ld %s\n", fn, code, size, url ); |
eb1f6bfa AJ |
258 | } |
259 | ||
260 | static | |
261 | int | |
262 | log_extended( const char* fn, int code, long size, const SquidMetaList* meta ) | |
263 | { | |
feec68a0 A |
264 | static const char hexdigit[] = "0123456789ABCDEF"; |
265 | char md5[34]; | |
266 | const SquidTLV* findings = 0; | |
267 | ||
268 | if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) { | |
269 | unsigned char* s = (unsigned char*) findings->data; | |
270 | for ( int j=0; j<16; j++, s++ ) { | |
271 | md5[j*2+0] = hexdigit[ *s >> 4 ]; | |
272 | md5[j*2+1] = hexdigit[ *s & 15 ]; | |
273 | } | |
274 | md5[32] = '\0'; // terminate string | |
275 | } else { | |
6e2aefad | 276 | snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" ); |
feec68a0 A |
277 | } |
278 | ||
279 | char timeb[64]; | |
280 | if ( meta && (findings = meta->search( STORE_META_STD )) ) { | |
281 | StoreMetaStd temp; | |
282 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
283 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 284 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
285 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
286 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 A |
287 | } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) { |
288 | StoreMetaStdLFS temp; | |
289 | // make data aligned, avoid SIGBUS on RISC machines (ARGH!) | |
290 | memcpy( &temp, findings->data, sizeof(StoreMetaStd) ); | |
6e2aefad | 291 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", |
8978bd9d A |
292 | (unsigned long)temp.timestamp, (unsigned long)temp.lastref, |
293 | (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount ); | |
feec68a0 | 294 | } else { |
8baf6ea3 | 295 | unsigned long ul = ULONG_MAX; // Match type of StoreMetaTLV fields |
d8b258a9 PW |
296 | unsigned short hu = 0; // Match type of StoreMetaTLV refcount fields |
297 | snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5d ", ul, ul, ul, ul, 0, hu); | |
feec68a0 A |
298 | } |
299 | ||
300 | // make sure that there is just one printf() | |
301 | if ( meta && (findings = meta->search( STORE_META_URL )) ) { | |
302 | return printf( "%s %3d %8ld %s %s %s\n", | |
303 | fn, code, size, md5, timeb, findings->data ); | |
304 | } else { | |
305 | return printf( "%s %3d %8ld %s %s strange_file\n", | |
306 | fn, code, size, md5, timeb ); | |
eb1f6bfa | 307 | } |
eb1f6bfa AJ |
308 | } |
309 | ||
310 | // o.k., this is pure lazyness... | |
311 | static struct in_addr serverHost; | |
312 | static unsigned short serverPort; | |
313 | ||
314 | bool | |
315 | action( int fd, size_t metasize, | |
feec68a0 A |
316 | const char* fn, const char* url, const SquidMetaList& meta ) |
317 | // purpose: if cmdline-requested, send the purge request to the cache | |
318 | // paramtr: fd (IN): open FD for the object file | |
319 | // metasize (IN): offset into data portion of file (meta data size) | |
320 | // fn (IN): name of the object file | |
321 | // url (IN): URL string stored in the object file | |
322 | // meta (IN): list containing further meta data | |
323 | // returns: true for a successful action, false otherwise. The action | |
324 | // may just print the file, send the purge request or even | |
325 | // remove unwanted files. | |
326 | // globals: ::purgeMode (IN): bit#0 set -> send purge request. | |
327 | // bit#1 set -> remove 404 object files. | |
328 | // ::serverHost (IN): cache host address | |
329 | // ::serverPort (IN): cache port number | |
eb1f6bfa | 330 | { |
feec68a0 A |
331 | static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n"; |
332 | struct stat st; | |
333 | long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) ); | |
334 | int status = 0; | |
335 | ||
336 | // if we want to copy out the file, do that first of all. | |
337 | if ( ::copydir && *copydir && size > 0 ) | |
59a09b98 | 338 | copy_out( st.st_size, metasize, ::debugFlag, |
feec68a0 A |
339 | fn, url, ::copydir, ::envelope ); |
340 | ||
341 | // do we need to PURGE the file, yes, if purgemode bit#0 was set. | |
342 | if ( ::purgeMode & 0x01 ) { | |
343 | unsigned long bufsize = strlen(url) + strlen(schablone) + 4; | |
344 | char* buffer = new char[bufsize]; | |
345 | ||
6e2aefad | 346 | snprintf( buffer, bufsize, schablone, url ); |
feec68a0 A |
347 | int sockfd = connectTo( serverHost, serverPort, true ); |
348 | if ( sockfd == -1 ) { | |
349 | fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) ); | |
350 | delete[] buffer; | |
351 | return false; | |
352 | } | |
353 | ||
354 | int size = strlen(buffer); | |
355 | if ( write( sockfd, buffer, size ) != size ) { | |
356 | // error while talking to squid | |
357 | fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) ); | |
358 | close(sockfd); | |
359 | delete[] buffer; | |
360 | return false; | |
361 | } | |
362 | memset( buffer+8, 0, 4 ); | |
363 | if ( read( sockfd, buffer, bufsize ) < 1 ) { | |
364 | // error while reading squid's answer | |
365 | fprintf( stderr, "unable to read answer: %s\n", strerror(errno) ); | |
366 | close(sockfd); | |
367 | delete[] buffer; | |
368 | return false; | |
369 | } | |
370 | close(sockfd); | |
371 | status = strtol(buffer+8,0,10); | |
372 | delete[] buffer; | |
eb1f6bfa | 373 | } |
feec68a0 A |
374 | |
375 | // log the output of our operation | |
376 | bool flag = true; | |
377 | if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 ); | |
378 | else flag = ( log_output( fn, status, size, url ) >= 0 ); | |
379 | ||
380 | // remove the file, if purgemode bit#1, and HTTP result status 404). | |
381 | if ( (::purgeMode & 0x02) && status == 404 ) { | |
382 | reminder = true; | |
383 | if ( unlink(fn) == -1 ) | |
384 | // error while unlinking file, this may happen due to the cache | |
385 | // unlinking a file while it is still in the readdir() cache of purge. | |
386 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
387 | fn, strerror(errno) ); | |
eb1f6bfa | 388 | } |
feec68a0 A |
389 | |
390 | return flag; | |
eb1f6bfa AJ |
391 | } |
392 | ||
393 | bool | |
394 | match( const char* fn, const REList* list ) | |
feec68a0 A |
395 | // purpose: do something with the given cache content filename |
396 | // paramtr: fn (IN): filename of cache file | |
397 | // returns: true for successful action, false otherwise. | |
398 | // warning: only return false, if you want the loop to terminate! | |
eb1f6bfa | 399 | { |
feec68a0 A |
400 | static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int); |
401 | bool flag = true; | |
402 | ||
59a09b98 | 403 | if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn ); |
feec68a0 A |
404 | int fd = open( fn, O_RDONLY ); |
405 | if ( fd != -1 ) { | |
406 | if ( read(fd,::linebuffer,::buffersize-1) > 60 ) { | |
407 | ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string | |
408 | ||
409 | // check the offset into the start of object data. The offset is | |
410 | // stored in a host endianess after the first byte. | |
411 | unsigned int datastart; | |
412 | memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) ); | |
413 | if ( datastart > ::buffersize - addon - 1 ) { | |
414 | // check offset into server reply header (start of cache data). | |
415 | fputs( "WARNING: Using a truncated URL string.\n", stderr ); | |
416 | datastart = ::buffersize - addon - 1; | |
417 | } | |
418 | ||
419 | // NEW: Parse squid meta data, which is a kind of linked list | |
420 | // flattened out into a file byte stream. Somewhere within is | |
421 | // the URL as part of the list. First, gobble all meta data. | |
422 | unsigned int offset = addon; | |
423 | SquidMetaList meta; | |
424 | while ( offset + addon <= datastart ) { | |
425 | unsigned int size = 0; | |
426 | memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) ); | |
427 | meta.append( SquidMetaType(*(linebuffer+offset)), | |
428 | size, linebuffer+offset+addon ); | |
429 | offset += ( addon + size ); | |
430 | } | |
431 | ||
432 | // Now extract the key URL from the meta data. | |
433 | const SquidTLV* urlmeta = meta.search( STORE_META_URL ); | |
434 | if ( urlmeta ) { | |
435 | // found URL in meta data. Try to process the URL | |
436 | if ( list == 0 ) | |
437 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
438 | else { | |
439 | REList* head = (REList*) list; // YUCK! | |
440 | while ( head != 0 ) { | |
441 | if ( head->match( (char*) urlmeta->data ) ) break; | |
442 | head = head->next; | |
443 | } | |
444 | if ( head != 0 ) | |
445 | flag = action( fd, datastart, fn, (char*) urlmeta->data, meta ); | |
446 | else flag = true; | |
447 | } | |
448 | } | |
449 | ||
450 | // "meta" will be deleted when exiting from this block | |
451 | } else { | |
452 | // weird file, FIXME: stat() it! | |
453 | struct stat st; | |
454 | long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size ); | |
455 | if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 ); | |
456 | else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 ); | |
457 | ||
458 | if ( (::purgeMode & 0x04) ) { | |
459 | reminder = true; | |
460 | if ( unlink(fn) == -1 ) | |
461 | // error while unlinking file, this may happen due to the cache | |
462 | // unlinking a file while it is in the readdir() cache of purge. | |
463 | fprintf( stderr, "WARNING: unable to unlink %s: %s\n", | |
464 | fn, strerror(errno) ); | |
465 | } | |
466 | } | |
467 | close(fd); | |
eb1f6bfa | 468 | } else { |
feec68a0 A |
469 | // error while opening file, this may happen due to the cache |
470 | // unlinking a file while it is still in the readdir() cache of purge. | |
471 | fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) ); | |
eb1f6bfa | 472 | } |
feec68a0 A |
473 | |
474 | return flag; | |
eb1f6bfa AJ |
475 | } |
476 | ||
477 | bool | |
478 | filelevel( const char* directory, const REList* list ) | |
feec68a0 A |
479 | // purpose: from given starting point, look for squid xxxxxxxx files. |
480 | // example: "/var/spool/cache/08/7F" as input, do action over files | |
481 | // paramtr: directory (IN): starting point | |
482 | // list (IN): list of rexps to match URLs against | |
483 | // returns: true, if every subdir && action was successful. | |
eb1f6bfa | 484 | { |
fb151769 | 485 | dirent_t * entry; |
59a09b98 | 486 | if ( debugFlag & 0x01 ) |
feec68a0 A |
487 | fprintf( stderr, "# [2] %s\n", directory ); |
488 | ||
489 | DIR* dir = opendir( directory ); | |
490 | if ( dir == NULL ) { | |
491 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
492 | directory, strerror(errno) ); | |
493 | return false; | |
494 | } | |
495 | ||
496 | // display a rotating character as "i am alive" signal (slows purge). | |
497 | if ( ::iamalive ) { | |
498 | static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" }; | |
499 | static unsigned short alivecount = 0; | |
500 | assert( write( STDOUT_FILENO, alivelist[alivecount++ & 3], 2 ) == 2 ); | |
501 | } | |
502 | ||
503 | bool flag = true; | |
504 | while ( (entry=readdir(dir)) && flag ) { | |
505 | if ( isxstring(entry->d_name,8) ) { | |
506 | char* name = concat( directory, "/", entry->d_name, 0 ); | |
507 | flag = match( name, list ); | |
508 | delete[] name; | |
509 | } | |
eb1f6bfa | 510 | } |
eb1f6bfa | 511 | |
feec68a0 A |
512 | closedir(dir); |
513 | return flag; | |
eb1f6bfa AJ |
514 | } |
515 | ||
516 | bool | |
517 | dirlevel( const char* dirname, const REList* list, bool level=false ) | |
feec68a0 A |
518 | // purpose: from given starting point, look for squid 00..FF directories. |
519 | // paramtr: dirname (IN): starting point | |
520 | // list (IN): list of rexps to match URLs against | |
521 | // level (IN): false==toplevel, true==1st level | |
522 | // example: "/var/spool/cache", false as input, traverse subdirs w/ action. | |
523 | // example: "/var/spool/cache/08", true as input, traverse subdirs w/ action. | |
524 | // returns: true, if every subdir && action was successful. | |
525 | // warning: this function is once-recursive, no deeper. | |
eb1f6bfa | 526 | { |
fb151769 | 527 | dirent_t* entry; |
59a09b98 | 528 | if ( debugFlag & 0x01 ) |
feec68a0 A |
529 | fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname ); |
530 | ||
531 | DIR* dir = opendir( dirname ); | |
532 | if ( dir == NULL ) { | |
533 | fprintf( stderr, "unable to open directory \"%s\": %s\n", | |
534 | dirname, strerror(errno) ); | |
535 | return false; | |
eb1f6bfa | 536 | } |
eb1f6bfa | 537 | |
feec68a0 A |
538 | bool flag = true; |
539 | while ( (entry=readdir(dir)) && flag ) { | |
540 | if ( strlen(entry->d_name) == 2 && | |
541 | isxdigit(entry->d_name[0]) && | |
542 | isxdigit(entry->d_name[1]) ) { | |
543 | char* name = concat( dirname, "/", entry->d_name, 0 ); | |
544 | flag = level ? filelevel( name, list ) : dirlevel( name, list, true ); | |
545 | delete[] name; | |
546 | } | |
547 | } | |
548 | ||
549 | closedir(dir); | |
550 | return flag; | |
eb1f6bfa AJ |
551 | } |
552 | ||
553 | int | |
554 | checkForPortOnly( const char* optarg ) | |
feec68a0 A |
555 | // purpose: see if somebody just put in a port instead of a hostname |
556 | // paramtr: optarg (IN): argument from commandline | |
557 | // returns: 0..65535 is the valid port number in network byte order, | |
558 | // -1 if not a port | |
eb1f6bfa | 559 | { |
feec68a0 A |
560 | // if there is a period in there, it must be a valid hostname |
561 | if ( strchr( optarg, '.' ) != 0 ) return -1; | |
eb1f6bfa | 562 | |
feec68a0 A |
563 | // if it is just a number between 0 and 65535, it must be a port |
564 | char* errstr = 0; | |
565 | unsigned long result = strtoul( optarg, &errstr, 0 ); | |
566 | if ( result < 65536 && errstr != optarg ) return htons(result); | |
eb1f6bfa AJ |
567 | |
568 | #if 0 | |
feec68a0 A |
569 | // one last try, test for a symbolical service name |
570 | struct servent* service = getservbyname( optarg, "tcp" ); | |
571 | return service ? service->s_port : -1; | |
eb1f6bfa | 572 | #else |
feec68a0 | 573 | return -1; |
eb1f6bfa AJ |
574 | #endif |
575 | } | |
576 | ||
577 | void | |
578 | helpMe( void ) | |
feec68a0 | 579 | // purpuse: write help message and exit |
eb1f6bfa | 580 | { |
feec68a0 A |
581 | printf( "\n%s\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] " |
582 | "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n", | |
583 | ::RCS_ID, ::programname ); | |
584 | printf( | |
585 | " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n" | |
586 | " -c c\tsquid.conf location, default \"%s\".\n" | |
587 | " -C dir\tbase directory for content extraction (copy-out mode).\n" | |
588 | " -d l\tdebug level, an OR of different debug options.\n" | |
589 | " -e re\tsingle regular expression per -e instance (use quotes!).\n" | |
590 | " -E re\tsingle case sensitive regular expression like -e.\n" | |
591 | " -f fn\tname of textfile containing one regular expression per line.\n" | |
592 | " -F fn\tname of textfile like -f containing case sensitive REs.\n" | |
593 | " -H\tprepend HTTP reply header to destination files in copy-out mode.\n" | |
594 | " -n\tdo not fork() when using more than one cache_dir.\n" | |
595 | " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n" | |
596 | " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n" | |
597 | "\t 0x01 really send PURGE to the cache.\n" | |
598 | "\t 0x02 remove all caches files reported as 404 (not found).\n" | |
599 | "\t 0x04 remove all weird (inaccessible or too small) cache files.\n" | |
600 | "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n" | |
601 | " -s\tshow all options after option parsing, but before really starting.\n" | |
602 | " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n" | |
603 | "\n", DEFAULT_SQUID_CONF, DEFAULTHOST, DEFAULTPORT ); | |
eb1f6bfa AJ |
604 | |
605 | } | |
606 | ||
607 | void | |
608 | parseCommandline( int argc, char* argv[], REList*& head, | |
feec68a0 A |
609 | char*& conffile, char*& copydir, |
610 | struct in_addr& serverHost, unsigned short& serverPort ) | |
611 | // paramtr: argc: see ::main(). | |
612 | // argv: see ::main(). | |
613 | // returns: Does terminate the program on errors! | |
614 | // purpose: suck in any commandline options, and set the global vars. | |
615 | { | |
616 | int option, port, showme = 0; | |
617 | char* ptr, *colon; | |
618 | FILE* rfile; | |
619 | ||
620 | // program basename | |
621 | if ( (ptr = strrchr(argv[0],'/')) == NULL ) ptr=argv[0]; | |
622 | else ptr++; | |
623 | ::programname = ptr; | |
624 | ||
625 | // extract commandline parameters | |
626 | REList* tail = head = 0; | |
627 | opterr = 0; | |
628 | while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) { | |
629 | switch ( option ) { | |
630 | case 'a': | |
631 | ::iamalive = ! ::iamalive; | |
632 | break; | |
633 | case 'C': | |
634 | if ( optarg && *optarg ) { | |
6e2aefad | 635 | if ( copydir ) xfree( (void*) copydir ); |
2ccf2eb2 | 636 | assert( (copydir = xstrdup(optarg)) ); |
feec68a0 A |
637 | } |
638 | break; | |
639 | case 'c': | |
640 | if ( optarg && *optarg ) { | |
6e2aefad | 641 | if ( *conffile ) xfree((void*) conffile ); |
2ccf2eb2 | 642 | assert( (conffile = xstrdup(optarg)) ); |
feec68a0 A |
643 | } |
644 | break; | |
645 | ||
646 | case 'd': | |
59a09b98 | 647 | ::debugFlag = strtoul( optarg, 0, 0 ); |
feec68a0 A |
648 | break; |
649 | ||
650 | case 'E': | |
651 | case 'e': | |
652 | if ( head == 0 ) tail = head = new REList( optarg, option=='E' ); | |
653 | else { | |
654 | tail->next = new REList( optarg, option=='E' ); | |
655 | tail = tail->next; | |
656 | } | |
657 | break; | |
658 | ||
659 | case 'f': | |
660 | if ( (rfile = fopen( optarg, "r" )) != NULL ) { | |
661 | unsigned long lineno = 0; | |
eb1f6bfa | 662 | #define LINESIZE 512 |
feec68a0 A |
663 | char line[LINESIZE]; |
664 | while ( fgets( line, LINESIZE, rfile ) != NULL ) { | |
665 | lineno++; | |
666 | int len = strlen(line)-1; | |
667 | if ( len+2 >= LINESIZE ) { | |
668 | fprintf( stderr, "%s:%lu: line too long, sorry.\n", | |
669 | optarg, lineno ); | |
670 | exit(1); | |
671 | } | |
672 | ||
673 | // remove trailing line breaks | |
674 | while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) ) | |
675 | line[len--] = '\0'; | |
676 | ||
677 | // insert into list of expressions | |
678 | if ( head == 0 ) tail = head = new REList(line,option=='F'); | |
679 | else { | |
680 | tail->next = new REList(line,option=='F'); | |
681 | tail = tail->next; | |
682 | } | |
683 | } | |
684 | fclose(rfile); | |
685 | } else | |
686 | fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno)); | |
687 | break; | |
688 | ||
689 | case 'H': | |
690 | ::envelope = ! ::envelope; | |
691 | break; | |
692 | case 'n': | |
693 | ::no_fork = ! ::no_fork; | |
694 | break; | |
695 | case 'p': | |
696 | colon = strchr( optarg, ':' ); | |
697 | if ( colon == 0 ) { | |
698 | // no colon, only look at host | |
699 | ||
700 | // fix: see if somebody just put in there a port (no periods) | |
701 | // give port number precedence over host names | |
702 | port = checkForPortOnly( optarg ); | |
703 | if ( port == -1 ) { | |
704 | // assume that main() did set the default port | |
705 | if ( convertHostname(optarg,serverHost) == -1 ) { | |
706 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
707 | exit(1); | |
708 | } | |
709 | } else { | |
710 | // assume that main() did set the default host | |
711 | serverPort = port; | |
712 | } | |
713 | } else { | |
714 | // colon used, port is extra | |
715 | *colon++ = 0; | |
716 | if ( convertHostname(optarg,serverHost) == -1 ) { | |
717 | fprintf( stderr, "unable to resolve host %s!\n", optarg ); | |
718 | exit(1); | |
719 | } | |
720 | if ( convertPortname(colon,serverPort) == -1 ) { | |
721 | fprintf( stderr, "unable to resolve port %s!\n", colon ); | |
722 | exit(1); | |
723 | } | |
724 | } | |
725 | break; | |
726 | case 'P': | |
727 | ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 ); | |
728 | break; | |
729 | case 's': | |
730 | showme=1; | |
731 | break; | |
732 | case 'v': | |
733 | ::verbose = ! ::verbose; | |
734 | break; | |
735 | case '?': | |
736 | default: | |
737 | helpMe(); | |
738 | exit(1); | |
739 | } | |
740 | } | |
741 | ||
742 | // adjust | |
59a09b98 | 743 | if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false; |
feec68a0 A |
744 | if ( head == 0 ) { |
745 | fputs( "There was no regular expression defined. If you intend\n", stderr ); | |
746 | fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr ); | |
747 | exit(1); | |
eb1f6bfa | 748 | } |
feec68a0 A |
749 | |
750 | // postcondition: head != 0 | |
751 | assert( head != 0 ); | |
752 | ||
753 | // make sure that the copy out directory is there and accessible | |
754 | if ( copydir && *copydir ) | |
755 | if ( assert_copydir( copydir ) != 0 ) exit(1); | |
756 | ||
757 | // show results | |
758 | if ( showme ) { | |
759 | printf( "#\n# Currently active values for %s:\n# %s\n", | |
760 | ::programname, ::RCS_ID ); | |
761 | printf( "# Debug level : " ); | |
d83197e3 | 762 | if ( ::debugFlag ) printf( "%#6.4x", ::debugFlag ); |
feec68a0 A |
763 | else printf( "production level" ); // printf omits 0x prefix for 0! |
764 | printf( " + %s mode", ::no_fork ? "linear" : "parallel" ); | |
765 | puts( ::verbose ? " + extra verbosity" : "" ); | |
766 | ||
767 | printf( "# Copy-out directory: %s ", | |
768 | copydir ? copydir : "copy-out mode disabled" ); | |
769 | if ( copydir ) | |
770 | printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" ); | |
771 | else | |
772 | puts(""); | |
773 | ||
774 | printf( "# Squid config file : %s\n", conffile ); | |
775 | printf( "# Cacheserveraddress: %s:%u\n", | |
776 | inet_ntoa( serverHost ), ntohs( serverPort ) ); | |
777 | printf( "# purge mode : 0x%02x\n", ::purgeMode ); | |
778 | printf( "# Regular expression: " ); | |
779 | ||
780 | unsigned count(0); | |
781 | for ( tail = head; tail != NULL; tail = tail->next ) { | |
782 | if ( count++ ) printf( "#%22u", count ); | |
eb1f6bfa | 783 | #if defined(LINUX) && putc==_IO_putc |
feec68a0 A |
784 | // I HATE BROKEN LINUX HEADERS! |
785 | // purge.o(.text+0x1040): undefined reference to `_IO_putc' | |
786 | // If your compilation breaks here, remove the undefinition | |
787 | #undef putc | |
eb1f6bfa | 788 | #endif |
feec68a0 A |
789 | else putchar('1'); |
790 | printf( " \"%s\"\n", tail->data ); | |
791 | } | |
792 | puts( "#" ); | |
eb1f6bfa | 793 | } |
feec68a0 | 794 | fflush( stdout ); |
eb1f6bfa AJ |
795 | } |
796 | ||
797 | extern "C" { | |
798 | ||
feec68a0 A |
799 | static |
800 | void | |
801 | exiter( void ) { | |
802 | if ( ::term_flag ) psignal( ::term_flag, "received signal" ); | |
803 | delete[] ::linebuffer; | |
804 | if ( ::reminder ) { | |
805 | fputs( | |
806 | "WARNING! Caches files were removed. Please shut down your cache, remove\n" | |
807 | "your swap.state files and restart your cache again, i.e. effictively do\n" | |
808 | "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr ); | |
809 | } | |
810 | } | |
eb1f6bfa | 811 | |
feec68a0 A |
812 | static |
813 | void | |
814 | handler( int signo ) { | |
815 | ::term_flag = signo; | |
816 | if ( getpid() == getpgrp() ) kill( -getpgrp(), signo ); | |
817 | exit(1); | |
818 | } | |
eb1f6bfa AJ |
819 | |
820 | } // extern "C" | |
821 | ||
822 | static | |
823 | int | |
824 | makelinebuffered( FILE* fp, const char* fn = 0 ) | |
feec68a0 A |
825 | // purpose: make the given FILE line buffered |
826 | // paramtr: fp (IO): file pointer which to put into line buffer mode | |
827 | // fn (IN): name of file to print in case of error | |
828 | // returns: 0 is ok, -1 to indicate an error | |
829 | // warning: error messages will already be printed | |
eb1f6bfa | 830 | { |
feec68a0 A |
831 | if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) { |
832 | // ok | |
833 | return 0; | |
834 | } else { | |
835 | // error | |
836 | fprintf( stderr, "unable to make \"%s\" line buffered: %s\n", | |
837 | fn ? fn : "", strerror(errno) ); | |
838 | return -1; | |
839 | } | |
eb1f6bfa AJ |
840 | } |
841 | ||
842 | int | |
843 | main( int argc, char* argv[] ) | |
844 | { | |
feec68a0 A |
845 | // setup variables |
846 | REList* list = 0; | |
2ccf2eb2 | 847 | char* conffile = xstrdup( DEFAULT_SQUID_CONF ); |
feec68a0 A |
848 | serverPort = htons(DEFAULTPORT); |
849 | if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) { | |
850 | fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST ); | |
851 | return 1; | |
852 | } | |
853 | ||
854 | // setup line buffer | |
855 | ::linebuffer = new char[ ::buffersize ]; | |
856 | assert( ::linebuffer != 0 ); | |
857 | ||
858 | // parse commandline | |
859 | puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###"); | |
860 | parseCommandline( argc, argv, list, conffile, ::copydir, | |
861 | serverHost, serverPort ); | |
862 | ||
863 | // prepare execution | |
864 | if ( atexit( exiter ) != 0 || | |
865 | Signal( SIGTERM, handler, true ) == SIG_ERR || | |
866 | Signal( SIGINT, handler, true ) == SIG_ERR || | |
867 | Signal( SIGHUP, handler, true ) == SIG_ERR ) { | |
868 | perror( "unable to install signal/exit function" ); | |
869 | return 1; | |
870 | } | |
871 | ||
872 | // try to read squid.conf file to determine all cache_dir locations | |
873 | CacheDirVector cdv(0); | |
59a09b98 | 874 | if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) { |
feec68a0 A |
875 | // there are some valid cache_dir entries. |
876 | // unless forking was forbidden by cmdline option, | |
877 | // for a process for each cache_dir entry to remove files. | |
878 | ||
879 | if ( ::no_fork || cdv.size() == 1 ) { | |
880 | // linear mode, one cache_dir after the next | |
881 | for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) { | |
882 | // execute OR complain | |
883 | if ( ! dirlevel(i->base,list) ) | |
884 | fprintf( stderr, "program terminated due to error: %s", | |
885 | strerror(errno) ); | |
6e2aefad | 886 | xfree((void*) i->base); |
feec68a0 A |
887 | } |
888 | } else { | |
889 | // parallel mode, all cache_dir in parallel | |
890 | pid_t* child = new pid_t[ cdv.size() ]; | |
891 | ||
892 | // make stdout/stderr line bufferd | |
893 | makelinebuffered( stdout, "stdout" ); | |
894 | makelinebuffered( stderr, "stderr" ); | |
895 | ||
896 | // make parent process group leader for easier killings | |
897 | if ( setpgid(getpid(), getpid()) != 0 ) { | |
898 | perror( "unable to set process group leader" ); | |
899 | return 1; | |
900 | } | |
901 | ||
902 | // -a is mutually exclusive with fork mode | |
903 | if ( ::iamalive ) { | |
904 | puts( "# i-am-alive flag incompatible with fork mode, resetting" ); | |
905 | ::iamalive = false; | |
906 | } | |
907 | ||
2ccf2eb2 | 908 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
909 | if ( getpid() == getpgrp() ) { |
910 | // only parent == group leader may fork off new processes | |
911 | if ( (child[i]=fork()) < 0 ) { | |
912 | // fork error, this is bad! | |
913 | perror( "unable to fork" ); | |
914 | kill( -getpgrp(), SIGTERM ); | |
915 | return 1; | |
916 | } else if ( child[i] == 0 ) { | |
917 | // child mode | |
918 | // execute OR complain | |
919 | if ( ! dirlevel(cdv[i].base,list) ) | |
920 | fprintf( stderr, "program terminated due to error: %s\n", | |
921 | strerror(errno) ); | |
6e2aefad | 922 | xfree((void*) cdv[i].base); |
feec68a0 A |
923 | return 0; |
924 | } else { | |
925 | // parent mode | |
59a09b98 | 926 | if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] ); |
feec68a0 A |
927 | } |
928 | } | |
929 | } | |
930 | ||
931 | // collect the garbase | |
932 | pid_t temp; | |
933 | int status; | |
2ccf2eb2 | 934 | for ( size_t i=0; i < cdv.size(); ++i ) { |
feec68a0 A |
935 | while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 ) |
936 | if ( errno == EINTR ) continue; | |
59a09b98 | 937 | if ( ::debugFlag ) printf( "collected child %d\n", (int) temp ); |
feec68a0 A |
938 | } |
939 | delete[] child; | |
940 | } | |
eb1f6bfa | 941 | } else { |
feec68a0 | 942 | fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile ); |
eb1f6bfa | 943 | } |
feec68a0 A |
944 | |
945 | // clean up | |
6e2aefad HN |
946 | if ( copydir ) xfree( (void*) copydir ); |
947 | xfree((void*) conffile); | |
feec68a0 A |
948 | delete list; |
949 | return 0; | |
eb1f6bfa | 950 | } |