]> git.ipfire.org Git - thirdparty/squid.git/blob - tools/purge/purge.cc
Source Format Enforcement (#763)
[thirdparty/squid.git] / tools / purge / purge.cc
1 /*
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de>
10 //
11 // File: purge.cc
12 // Wed Jan 13 1999
13 //
14 // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme
15 // Universit?t Hannover, Germany
16 //
17 // Permission to use, copy, modify, distribute, and sell this software
18 // and its documentation for any purpose is hereby granted without fee,
19 // provided that (i) the above copyright notices and this permission
20 // notice appear in all copies of the software and related documentation,
21 // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte
22 // Systeme and the University of Hannover may not be used in any
23 // advertising or publicity relating to the software without the
24 // specific, prior written permission of Lehrgebiet Rechnernetze und
25 // Verteilte Systeme and the University of Hannover.
26 //
27 // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
28 // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
29 // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
30 //
31 // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR
32 // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
33 // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES
34 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT
35 // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY,
36 // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
37 // SOFTWARE.
38 //
39 // Revision 1.17 2000/09/21 10:59:53 cached
40 // *** empty log message ***
41 //
42 // Revision 1.16 2000/09/21 09:45:18 cached
43 // Fixed some small bugs.
44 //
45 // Revision 1.15 2000/09/21 09:05:56 cached
46 // added multi cache_dir support, thus changing -c cmdline option.
47 // modified file reading to support /dev/fd/0 reading for non-disclosed items.
48 //
49 // Revision 1.14 2000/06/20 09:43:01 voeckler
50 // added FreeBSD related fixes and support.
51 //
52 // Revision 1.13 2000/03/29 08:12:21 voeckler
53 // fixed wrong header file.
54 //
55 // Revision 1.12 2000/03/29 07:54:41 voeckler
56 // added mechanism to give a port specification precedence over a host
57 // specification with the -p option and not colon.
58 //
59 // Revision 1.11 1999/06/18 13:18:28 voeckler
60 // added refcount, fixed missing LF in -s output.
61 //
62 // Revision 1.10 1999/06/16 13:06:05 voeckler
63 // reversed meaning of -M flag.
64 //
65 // Revision 1.9 1999/06/15 21:11:53 voeckler
66 // added extended logging feature which extract the squid meta data available
67 // within the disk files. moved the content extraction and squid meta data
68 // handling parts into separate files. added options for copy-out and verbose.
69 //
70 // Revision 1.8 1999/06/14 20:14:46 voeckler
71 // intermediate version when adding understanding about the way
72 // Squid does log the metadata into the file.
73 //
74 // Revision 1.7 1999/01/23 21:01:10 root
75 // stumbled over libc5 header/lib inconsistency bug....
76 //
77 // Revision 1.6 1999/01/23 20:47:54 root
78 // added Linux specifics for psignal...
79 // Hope this helps.
80 //
81 // Revision 1.5 1999/01/20 09:48:12 voeckler
82 // added warning as first line of output.
83 //
84 // Revision 1.4 1999/01/19 11:53:49 voeckler
85 // added psignal() from <siginfo.h> handling.
86 //
87 // Revision 1.3 1999/01/19 11:00:50 voeckler
88 // added keyboard interrupt handling, exit handling, removed C++ strings and
89 // regular expression syntax in favour of less source code, added comments,
90 // added a reminder to remove swap.state in case of unlinks, added IAA flag,
91 // added a few assertions, changed policy to enforce the definition of at
92 // least one regular expression, and catch a few signals.
93 //
94 // Revision 1.2 1999/01/15 23:06:28 voeckler
95 // downgraded to simple C strings...
96 //
97 // Revision 1.1 1999/01/14 12:05:32 voeckler
98 // Initial revision
99 //
100 //
101 #include "squid.h"
102 #include "util.h"
103
104 #include <cerrno>
105 #include <climits>
106 #include <csignal>
107 #include <cstdarg>
108 #include <cstdlib>
109 #include <cstring>
110 #include <dirent.h>
111 #include <sys/stat.h>
112 #include <sys/wait.h>
113 #include <fcntl.h>
114 #include <unistd.h>
115
116 #if HAVE_SIGINFO_H
117 #include <siginfo.h>
118 #endif
119
120 #include <netinet/in.h>
121 #include <netinet/tcp.h>
122 #include <arpa/inet.h>
123 #include <netdb.h>
124
125 #include "conffile.hh"
126 #include "convert.hh"
127 #include "copyout.hh"
128 #include "signal.hh"
129 #include "socket.hh"
130 #include "squid-tlv.hh"
131
132 #ifndef DEFAULTHOST
133 #define DEFAULTHOST "localhost"
134 #endif // DEFAULTHOST
135
136 #ifndef DEFAULTPORT
137 #define DEFAULTPORT 3128
138 #endif // DEFAULTPORT
139
140 volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal...
141 char* linebuffer = 0;
142 size_t buffersize = 128*1024;
143 static char* copydir = 0;
144 static uint32_t debugFlag = 0;
145 static unsigned purgeMode = 0;
146 static bool iamalive = false;
147 static bool reminder = false;
148 static bool verbose = false;
149 static bool envelope = false;
150 static bool no_fork = false;
151 static const char* programname = 0;
152
153 // ----------------------------------------------------------------------
154
155 struct REList {
156 REList( const char* what, bool doCase );
157 ~REList();
158 bool match( const char* check ) const;
159
160 REList* next;
161 const char* data;
162 regex_t rexp;
163 };
164
165 REList::REList( const char* what, bool doCase )
166 :next(0),data(xstrdup(what))
167 {
168 int result = regcomp( &rexp, what,
169 REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) );
170 if ( result != 0 ) {
171 char buffer[256];
172 regerror( result, &rexp, buffer, 256 );
173 fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer );
174 exit(EXIT_FAILURE);
175 }
176 }
177
178 REList::~REList()
179 {
180 if ( next ) delete next;
181 if ( data ) xfree((void*) data);
182 regfree(&rexp);
183 }
184
185 bool
186 REList::match( const char* check ) const
187 {
188 int result = regexec( &rexp, check, 0, 0, 0 );
189 if ( result != 0 && result != REG_NOMATCH ) {
190 char buffer[256];
191 regerror( result, &rexp, buffer, 256 );
192 fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n",
193 data, check, buffer );
194 exit(EXIT_FAILURE);
195 }
196 return ( result == 0 );
197 }
198
199 // ----------------------------------------------------------------------
200
201 char*
202 concat( const char* start, ... )
203 // purpose: concatinate an arbitrary number of C strings.
204 // paramtr: start (IN): first C string
205 // ... (IN): further C strings, terminated with a NULL pointer
206 // returns: memory allocated via new(), containing the concatenated string.
207 {
208 va_list ap;
209 const char* s;
210
211 // first run: determine size
212 unsigned size = strlen(start)+1;
213 va_start( ap, start );
214 while ( (s=va_arg(ap,const char*)) != NULL )
215 size += strlen(s);
216 va_end(ap);
217
218 // allocate
219 char* result = new char[size];
220 if ( result == 0 ) {
221 perror( "string memory allocation" );
222 exit(EXIT_FAILURE);
223 }
224
225 // second run: copy content
226 strcpy( result, start );
227 va_start( ap, start );
228 while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s );
229 va_end(ap);
230
231 return result;
232 }
233
234 bool
235 isxstring( const char* s, size_t testlen )
236 // purpose: test a string for conforming to xdigit
237 // paramtr: s (IN): string to test
238 // testlen (IN): length the string must have
239 // returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise
240 {
241 if ( strlen(s) != testlen ) return false;
242
243 size_t i=0;
244 while ( i<testlen && isxdigit(s[i]) )
245 ++i;
246 return (i==testlen);
247 }
248
249 inline
250 int
251 log_output( const char* fn, int code, long size, const char* url )
252 {
253 return printf( "%s %3d %8ld %s\n", fn, code, size, url );
254 }
255
256 static
257 int
258 log_extended( const char* fn, int code, long size, const SquidMetaList* meta )
259 {
260 static const char hexdigit[] = "0123456789ABCDEF";
261 char md5[34];
262 const SquidTLV* findings = 0;
263
264 if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) {
265 unsigned char* s = (unsigned char*) findings->data;
266 for ( int j=0; j<16; ++j, ++s ) {
267 md5[j*2+0] = hexdigit[ *s >> 4 ];
268 md5[j*2+1] = hexdigit[ *s & 15 ];
269 }
270 md5[32] = '\0'; // terminate string
271 } else {
272 snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" );
273 }
274
275 char timeb[256];
276 if ( meta && (findings = meta->search( STORE_META_STD )) ) {
277 StoreMetaStd temp;
278 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
279 memcpy( &temp, findings->data, sizeof(StoreMetaStd) );
280 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
281 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
282 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
283 } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) {
284 StoreMetaStdLFS temp;
285 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
286 memcpy( &temp, findings->data, sizeof(StoreMetaStdLFS) );
287 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
288 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
289 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
290 } else {
291 unsigned long ul = ULONG_MAX; // Match type of StoreMetaTLV fields
292 unsigned short hu = 0; // Match type of StoreMetaTLV refcount fields
293 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5d ", ul, ul, ul, ul, 0, hu);
294 }
295
296 // make sure that there is just one printf()
297 if ( meta && (findings = meta->search( STORE_META_URL )) ) {
298 return printf( "%s %3d %8ld %s %s %s\n",
299 fn, code, size, md5, timeb, findings->data );
300 } else {
301 return printf( "%s %3d %8ld %s %s strange_file\n",
302 fn, code, size, md5, timeb );
303 }
304 }
305
306 // o.k., this is pure laziness...
307 static struct in_addr serverHost;
308 static unsigned short serverPort;
309
310 bool
311 action( int fd, size_t metasize,
312 const char* fn, const char* url, const SquidMetaList& meta )
313 // purpose: if cmdline-requested, send the purge request to the cache
314 // paramtr: fd (IN): open FD for the object file
315 // metasize (IN): offset into data portion of file (meta data size)
316 // fn (IN): name of the object file
317 // url (IN): URL string stored in the object file
318 // meta (IN): list containing further meta data
319 // returns: true for a successful action, false otherwise. The action
320 // may just print the file, send the purge request or even
321 // remove unwanted files.
322 // globals: ::purgeMode (IN): bit#0 set -> send purge request.
323 // bit#1 set -> remove 404 object files.
324 // ::serverHost (IN): cache host address
325 // ::serverPort (IN): cache port number
326 {
327 static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n";
328 struct stat st;
329 long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) );
330
331 // if we want to copy out the file, do that first of all.
332 if ( ::copydir && *copydir && size > 0 )
333 copy_out( st.st_size, metasize, ::debugFlag,
334 fn, url, ::copydir, ::envelope );
335
336 // do we need to PURGE the file, yes, if purgemode bit#0 was set.
337 int status = 0;
338 if ( ::purgeMode & 0x01 ) {
339 unsigned long bufsize = strlen(url) + strlen(schablone) + 4;
340 char* buffer = new char[bufsize];
341
342 snprintf( buffer, bufsize, schablone, url );
343 int sockfd = connectTo( serverHost, serverPort, true );
344 if ( sockfd == -1 ) {
345 fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) );
346 delete[] buffer;
347 return false;
348 }
349
350 int content_size = strlen(buffer);
351 if ( write( sockfd, buffer, content_size ) != content_size ) {
352 // error while talking to squid
353 fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) );
354 close(sockfd);
355 delete[] buffer;
356 return false;
357 }
358 memset( buffer+8, 0, 4 );
359 int readLen = read(sockfd, buffer, bufsize);
360 if (readLen < 1) {
361 // error while reading squid's answer
362 fprintf( stderr, "unable to read answer: %s\n", strerror(errno) );
363 close(sockfd);
364 delete[] buffer;
365 return false;
366 }
367 buffer[bufsize-1] = '\0';
368 close(sockfd);
369 int64_t s = strtol(buffer+8,0,10);
370 if (s > 0 && s < 1000)
371 status = s;
372 else {
373 // error while reading squid's answer
374 fprintf( stderr, "invalid HTTP status in reply: %s\n", buffer+8);
375 }
376 delete[] buffer;
377 }
378
379 // log the output of our operation
380 bool flag = true;
381 if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 );
382 else flag = ( log_output( fn, status, size, url ) >= 0 );
383
384 // remove the file, if purgemode bit#1, and HTTP result status 404).
385 if ( (::purgeMode & 0x02) && status == 404 ) {
386 reminder = true;
387 if ( unlink(fn) == -1 )
388 // error while unlinking file, this may happen due to the cache
389 // unlinking a file while it is still in the readdir() cache of purge.
390 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
391 fn, strerror(errno) );
392 }
393
394 return flag;
395 }
396
397 bool
398 match( const char* fn, const REList* list )
399 // purpose: do something with the given cache content filename
400 // paramtr: fn (IN): filename of cache file
401 // returns: true for successful action, false otherwise.
402 // warning: only return false, if you want the loop to terminate!
403 {
404 static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int);
405 bool flag = true;
406
407 if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn );
408 int fd = open( fn, O_RDONLY );
409 if ( fd != -1 ) {
410 memset(::linebuffer, 0, ::buffersize);
411 size_t readLen = read(fd,::linebuffer,::buffersize-1);
412 if ( readLen > 60 ) {
413 ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string
414
415 // check the offset into the start of object data. The offset is
416 // stored in a host endianness after the first byte.
417 unsigned int datastart;
418 memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) );
419 if ( datastart > ::buffersize - addon - 1 ) {
420 // check offset into server reply header (start of cache data).
421 fputs( "WARNING: Using a truncated URL string.\n", stderr );
422 datastart = ::buffersize - addon - 1;
423 }
424
425 // NEW: Parse squid meta data, which is a kind of linked list
426 // flattened out into a file byte stream. Somewhere within is
427 // the URL as part of the list. First, gobble all meta data.
428 unsigned int offset = addon;
429 SquidMetaList meta;
430 while ( offset + addon <= datastart ) {
431 unsigned int size = 0;
432 memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) );
433 if (size+offset < size) {
434 fputs("WARNING: file corruption detected. 32-bit overflow in size field.\n", stderr);
435 break;
436 }
437 if (size+offset > readLen) {
438 fputs( "WARNING: Partial meta data loaded.\n", stderr );
439 break;
440 }
441 meta.append( SquidMetaType(*(linebuffer+offset)),
442 size, linebuffer+offset+addon );
443 offset += ( addon + size );
444 }
445
446 // Now extract the key URL from the meta data.
447 const SquidTLV* urlmeta = meta.search( STORE_META_URL );
448 if ( urlmeta ) {
449 // found URL in meta data. Try to process the URL
450 if ( list == 0 )
451 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
452 else {
453 REList* head = (REList*) list; // YUCK!
454 while ( head != 0 ) {
455 if ( head->match( (char*) urlmeta->data ) ) break;
456 head = head->next;
457 }
458 if ( head != 0 )
459 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
460 else flag = true;
461 }
462 }
463
464 // "meta" will be deleted when exiting from this block
465 } else {
466 // weird file, TODO: stat() it!
467 struct stat st;
468 long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size );
469 if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 );
470 else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 );
471
472 if ( (::purgeMode & 0x04) ) {
473 reminder = true;
474 if ( unlink(fn) == -1 )
475 // error while unlinking file, this may happen due to the cache
476 // unlinking a file while it is in the readdir() cache of purge.
477 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
478 fn, strerror(errno) );
479 }
480 }
481 close(fd);
482 } else {
483 // error while opening file, this may happen due to the cache
484 // unlinking a file while it is still in the readdir() cache of purge.
485 fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) );
486 }
487
488 return flag;
489 }
490
491 bool
492 filelevel( const char* directory, const REList* list )
493 // purpose: from given starting point, look for squid xxxxxxxx files.
494 // example: "/var/spool/cache/08/7F" as input, do action over files
495 // paramtr: directory (IN): starting point
496 // list (IN): list of rexps to match URLs against
497 // returns: true, if every subdir && action was successful.
498 {
499 dirent_t * entry;
500 if ( debugFlag & 0x01 )
501 fprintf( stderr, "# [2] %s\n", directory );
502
503 DIR* dir = opendir( directory );
504 if ( dir == NULL ) {
505 fprintf( stderr, "unable to open directory \"%s\": %s\n",
506 directory, strerror(errno) );
507 return false;
508 }
509
510 // display a rotating character as "i am alive" signal (slows purge).
511 if ( ::iamalive ) {
512 static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" };
513 static unsigned short alivecount = 0;
514 const int write_success = write(STDOUT_FILENO, alivelist[alivecount++ & 3], 2);
515 assert(write_success == 2);
516 }
517
518 bool flag = true;
519 while ( (entry=readdir(dir)) && flag ) {
520 if ( isxstring(entry->d_name,8) ) {
521 char* name = concat( directory, "/", entry->d_name, 0 );
522 flag = match( name, list );
523 delete[] name;
524 }
525 }
526
527 closedir(dir);
528 return flag;
529 }
530
531 bool
532 dirlevel( const char* dirname, const REList* list, bool level=false )
533 // purpose: from given starting point, look for squid 00..FF directories.
534 // paramtr: dirname (IN): starting point
535 // list (IN): list of rexps to match URLs against
536 // level (IN): false==toplevel, true==1st level
537 // example: "/var/spool/cache", false as input, traverse subdirs w/ action.
538 // example: "/var/spool/cache/08", true as input, traverse subdirs w/ action.
539 // returns: true, if every subdir && action was successful.
540 // warning: this function is once-recursive, no deeper.
541 {
542 dirent_t* entry;
543 if ( debugFlag & 0x01 )
544 fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname );
545
546 DIR* dir = opendir( dirname );
547 if ( dir == NULL ) {
548 fprintf( stderr, "unable to open directory \"%s\": %s\n",
549 dirname, strerror(errno) );
550 return false;
551 }
552
553 bool flag = true;
554 while ( (entry=readdir(dir)) && flag ) {
555 if ( strlen(entry->d_name) == 2 &&
556 isxdigit(entry->d_name[0]) &&
557 isxdigit(entry->d_name[1]) ) {
558 char* name = concat( dirname, "/", entry->d_name, 0 );
559 flag = level ? filelevel( name, list ) : dirlevel( name, list, true );
560 delete[] name;
561 }
562 }
563
564 closedir(dir);
565 return flag;
566 }
567
568 int
569 checkForPortOnly( const char* arg )
570 // purpose: see if somebody just put in a port instead of a hostname
571 // paramtr: optarg (IN): argument from commandline
572 // returns: 0..65535 is the valid port number in network byte order,
573 // -1 if not a port
574 {
575 // if there is a period in there, it must be a valid hostname
576 if ( strchr( arg, '.' ) != 0 ) return -1;
577
578 // if it is just a number between 0 and 65535, it must be a port
579 char* errstr = 0;
580 unsigned long result = strtoul( arg, &errstr, 0 );
581 if ( result < 65536 && errstr != arg ) return htons(result);
582
583 #if 0
584 // one last try, test for a symbolical service name
585 struct servent* service = getservbyname( arg, "tcp" );
586 return service ? service->s_port : -1;
587 #else
588 return -1;
589 #endif
590 }
591
592 void
593 helpMe( void )
594 // purpuse: write help message and exit
595 {
596 printf( "\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] "
597 "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n",
598 ::programname );
599 printf(
600 " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n"
601 " -c c\tsquid.conf location, default \"%s\".\n"
602 " -C dir\tbase directory for content extraction (copy-out mode).\n"
603 " -d l\tdebug level, an OR mask of different debug options.\n"
604 " -e re\tsingle regular expression per -e instance (use quotes!).\n"
605 " -E re\tsingle case sensitive regular expression like -e.\n"
606 " -f fn\tname of textfile containing one regular expression per line.\n"
607 " -F fn\tname of textfile like -f containing case sensitive REs.\n"
608 " -H\tprepend HTTP reply header to destination files in copy-out mode.\n"
609 " -n\tdo not fork() when using more than one cache_dir.\n"
610 " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n"
611 " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n"
612 "\t 0x01 really send PURGE to the cache.\n"
613 "\t 0x02 remove all caches files reported as 404 (not found).\n"
614 "\t 0x04 remove all weird (inaccessible or too small) cache files.\n"
615 "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n"
616 " -s\tshow all options after option parsing, but before really starting.\n"
617 " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n"
618 "\n", DEFAULT_CONFIG_FILE, DEFAULTHOST, DEFAULTPORT );
619
620 }
621
622 void
623 parseCommandline( int argc, char* argv[], REList*& head,
624 char*& conffile, char*& copyDirPath,
625 struct in_addr& serverHostIp, unsigned short& serverHostPort )
626 // paramtr: argc: see ::main().
627 // argv: see ::main().
628 // returns: Does terminate the program on errors!
629 // purpose: suck in any commandline options, and set the global vars.
630 {
631 int option, port, showme = 0;
632 char* ptr, *colon;
633 FILE* rfile;
634
635 // program basename
636 if ( (ptr = strrchr(argv[0],'/')) == NULL )
637 ptr=argv[0];
638 else
639 ++ptr;
640 ::programname = ptr;
641
642 // extract commandline parameters
643 REList* tail = head = 0;
644 opterr = 0;
645 while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) {
646 switch ( option ) {
647 case 'a':
648 ::iamalive = ! ::iamalive;
649 break;
650 case 'C':
651 if ( optarg && *optarg ) {
652 if ( copyDirPath ) xfree( (void*) copyDirPath );
653 copyDirPath = xstrdup(optarg);
654 assert(copyDirPath);
655 }
656 break;
657 case 'c':
658 if ( !optarg || !*optarg ) {
659 fprintf( stderr, "%c requires a regex pattern argument!\n", option );
660 exit(EXIT_FAILURE);
661 }
662 if ( *conffile ) xfree((void*) conffile);
663 conffile = xstrdup(optarg);
664 assert(conffile);
665 break;
666
667 case 'd':
668 if ( !optarg || !*optarg ) {
669 fprintf( stderr, "%c expects a mask parameter. Debug disabled.\n", option );
670 ::debugFlag = 0;
671 } else
672 ::debugFlag = (strtoul(optarg, NULL, 0) & 0xFFFFFFFF);
673 break;
674
675 case 'E':
676 case 'e':
677 if ( !optarg || !*optarg ) {
678 fprintf( stderr, "%c requires a regex pattern argument!\n", option );
679 exit(EXIT_FAILURE);
680 }
681 if ( head == 0 )
682 tail = head = new REList( optarg, option=='E' );
683 else {
684 tail->next = new REList( optarg, option=='E' );
685 tail = tail->next;
686 }
687 break;
688
689 case 'f':
690 if ( !optarg || !*optarg ) {
691 fprintf( stderr, "%c requires a filename argument!\n", option );
692 exit(EXIT_FAILURE);
693 }
694 if ( (rfile = fopen( optarg, "r" )) != NULL ) {
695 unsigned long lineno = 0;
696 #define LINESIZE 512
697 char line[LINESIZE];
698 while ( fgets( line, LINESIZE, rfile ) != NULL ) {
699 ++lineno;
700 int len = strlen(line)-1;
701 if ( len+2 >= LINESIZE ) {
702 fprintf( stderr, "%s:%lu: line too long, sorry.\n",
703 optarg, lineno );
704 exit(EXIT_FAILURE);
705 }
706
707 // remove trailing line breaks
708 while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) ) {
709 line[len] = '\0';
710 --len;
711 }
712
713 // insert into list of expressions
714 if ( head == 0 ) tail = head = new REList(line,option=='F');
715 else {
716 tail->next = new REList(line,option=='F');
717 tail = tail->next;
718 }
719 }
720 fclose(rfile);
721 } else
722 fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno));
723 break;
724
725 case 'H':
726 ::envelope = ! ::envelope;
727 break;
728 case 'n':
729 ::no_fork = ! ::no_fork;
730 break;
731 case 'p':
732 if ( !optarg || !*optarg ) {
733 fprintf( stderr, "%c requires a port argument!\n", option );
734 exit(EXIT_FAILURE);
735 }
736 colon = strchr( optarg, ':' );
737 if ( colon == 0 ) {
738 // no colon, only look at host
739
740 // fix: see if somebody just put in there a port (no periods)
741 // give port number precedence over host names
742 port = checkForPortOnly( optarg );
743 if ( port == -1 ) {
744 // assume that main() did set the default port
745 if ( convertHostname(optarg,serverHostIp) == -1 ) {
746 fprintf( stderr, "unable to resolve host %s!\n", optarg );
747 exit(EXIT_FAILURE);
748 }
749 } else {
750 // assume that main() did set the default host
751 serverHostPort = port;
752 }
753 } else {
754 // colon used, port is extra
755 *colon = 0;
756 ++colon;
757 if ( convertHostname(optarg,serverHostIp) == -1 ) {
758 fprintf( stderr, "unable to resolve host %s!\n", optarg );
759 exit(EXIT_FAILURE);
760 }
761 if ( convertPortname(colon,serverHostPort) == -1 ) {
762 fprintf( stderr, "unable to resolve port %s!\n", colon );
763 exit(EXIT_FAILURE);
764 }
765 }
766 break;
767 case 'P':
768 if ( !optarg || !*optarg ) {
769 fprintf( stderr, "%c requires a mode argument!\n", option );
770 exit(EXIT_FAILURE);
771 }
772 ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 );
773 break;
774 case 's':
775 showme=1;
776 break;
777 case 'v':
778 ::verbose = ! ::verbose;
779 break;
780 case '?':
781 default:
782 helpMe();
783 exit(EXIT_FAILURE);
784 }
785 }
786
787 // adjust
788 if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false;
789 if ( head == 0 ) {
790 fputs( "There was no regular expression defined. If you intend\n", stderr );
791 fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr );
792 exit(EXIT_FAILURE);
793 }
794
795 // postcondition: head != 0
796 assert( head != 0 );
797
798 // make sure that the copy out directory is there and accessible
799 if ( copyDirPath && *copyDirPath )
800 if ( assert_copydir( copyDirPath ) != 0 ) exit(1);
801
802 // show results
803 if ( showme ) {
804 printf( "#\n# Currently active values for %s:\n",
805 ::programname);
806 printf( "# Debug level : " );
807 if ( ::debugFlag ) printf( "%#6.4x", ::debugFlag );
808 else printf( "production level" ); // printf omits 0x prefix for 0!
809 printf( " + %s mode", ::no_fork ? "linear" : "parallel" );
810 puts( ::verbose ? " + extra verbosity" : "" );
811
812 printf( "# Copy-out directory: %s ",
813 copyDirPath ? copyDirPath : "copy-out mode disabled" );
814 if ( copyDirPath )
815 printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" );
816 else
817 puts("");
818
819 printf( "# Squid config file : %s\n", conffile );
820 printf( "# Cacheserveraddress: %s:%u\n",
821 inet_ntoa( serverHostIp ), ntohs( serverHostPort ) );
822 printf( "# purge mode : 0x%02x\n", ::purgeMode );
823 printf( "# Regular expression: " );
824
825 unsigned count(0);
826 for ( tail = head; tail != NULL; tail = tail->next ) {
827 if ( count++ )
828 printf( "#%22u", count );
829 #if defined(LINUX) && putc==_IO_putc
830 // I HATE BROKEN LINUX HEADERS!
831 // purge.o(.text+0x1040): undefined reference to `_IO_putc'
832 // If your compilation breaks here, remove the undefinition
833 #undef putc
834 #endif
835 else putchar('1');
836 printf( " \"%s\"\n", tail->data );
837 }
838 puts( "#" );
839 }
840 fflush( stdout );
841 }
842
843 extern "C" {
844
845 static
846 void
847 exiter( void ) {
848 if ( ::term_flag ) psignal( ::term_flag, "received signal" );
849 delete[] ::linebuffer;
850 if ( ::reminder ) {
851 fputs(
852 "WARNING! Caches files were removed. Please shut down your cache, remove\n"
853 "your swap.state files and restart your cache again, i.e. effictively do\n"
854 "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr );
855 }
856 }
857
858 static
859 void
860 handler( int signo ) {
861 ::term_flag = signo;
862 if ( getpid() == getpgrp() ) kill( -getpgrp(), signo );
863 exit(EXIT_FAILURE);
864 }
865
866 } // extern "C"
867
868 static
869 int
870 makelinebuffered( FILE* fp, const char* fn = 0 )
871 // purpose: make the given FILE line buffered
872 // paramtr: fp (IO): file pointer which to put into line buffer mode
873 // fn (IN): name of file to print in case of error
874 // returns: 0 is ok, -1 to indicate an error
875 // warning: error messages will already be printed
876 {
877 if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) {
878 // ok
879 return 0;
880 } else {
881 // error
882 fprintf( stderr, "unable to make \"%s\" line buffered: %s\n",
883 fn ? fn : "", strerror(errno) );
884 return -1;
885 }
886 }
887
888 int
889 main( int argc, char* argv[] )
890 {
891 // setup variables
892 REList* list = 0;
893 char* conffile = xstrdup(DEFAULT_CONFIG_FILE);
894 serverPort = htons(DEFAULTPORT);
895 if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) {
896 fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST );
897 exit(EXIT_FAILURE);
898 }
899
900 // setup line buffer
901 ::linebuffer = new char[ ::buffersize ];
902 assert( ::linebuffer != 0 );
903
904 // parse commandline
905 puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###");
906 parseCommandline( argc, argv, list, conffile, ::copydir,
907 serverHost, serverPort );
908
909 // prepare execution
910 if ( atexit( exiter ) != 0 ||
911 Signal( SIGTERM, handler, true ) == SIG_ERR ||
912 Signal( SIGINT, handler, true ) == SIG_ERR ||
913 Signal( SIGHUP, handler, true ) == SIG_ERR ) {
914 perror( "unable to install signal/exit function" );
915 exit(EXIT_FAILURE);
916 }
917
918 // try to read squid.conf file to determine all cache_dir locations
919 CacheDirVector cdv(0);
920 if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) {
921 // there are some valid cache_dir entries.
922 // unless forking was forbidden by cmdline option,
923 // for a process for each cache_dir entry to remove files.
924
925 if ( ::no_fork || cdv.size() == 1 ) {
926 // linear mode, one cache_dir after the next
927 for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) {
928 // execute OR complain
929 if ( ! dirlevel(i->base,list) )
930 fprintf( stderr, "program terminated due to error: %s",
931 strerror(errno) );
932 xfree((void*) i->base);
933 }
934 } else {
935 // parallel mode, all cache_dir in parallel
936 pid_t* child = new pid_t[ cdv.size() ];
937
938 // make stdout/stderr line bufferd
939 makelinebuffered( stdout, "stdout" );
940 makelinebuffered( stderr, "stderr" );
941
942 // make parent process group leader for easier killings
943 if ( setpgid(getpid(), getpid()) != 0 ) {
944 perror( "unable to set process group leader" );
945 exit(EXIT_FAILURE);
946 }
947
948 // -a is mutually exclusive with fork mode
949 if ( ::iamalive ) {
950 puts( "# i-am-alive flag incompatible with fork mode, resetting" );
951 ::iamalive = false;
952 }
953
954 for ( size_t i=0; i < cdv.size(); ++i ) {
955 if ( getpid() == getpgrp() ) {
956 // only parent == group leader may fork off new processes
957 if ( (child[i]=fork()) < 0 ) {
958 // fork error, this is bad!
959 perror( "unable to fork" );
960 kill( -getpgrp(), SIGTERM );
961 exit(EXIT_FAILURE);
962 } else if ( child[i] == 0 ) {
963 // child mode
964 // execute OR complain
965 if ( ! dirlevel(cdv[i].base,list) )
966 fprintf( stderr, "program terminated due to error: %s\n",
967 strerror(errno) );
968 xfree((void*) cdv[i].base);
969 exit(EXIT_SUCCESS);
970 } else {
971 // parent mode
972 if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] );
973 }
974 }
975 }
976
977 // collect the garbase
978 pid_t temp;
979 int status;
980 for ( size_t i=0; i < cdv.size(); ++i ) {
981 while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 )
982 if ( errno == EINTR ) continue;
983 if ( ::debugFlag ) printf( "collected child %d\n", (int) temp );
984 }
985 delete[] child;
986 }
987 } else {
988 fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile );
989 }
990
991 // clean up
992 if ( copydir ) xfree( (void*) copydir );
993 xfree((void*) conffile);
994 delete list;
995 return EXIT_SUCCESS;
996 }
997