]> git.ipfire.org Git - thirdparty/squid.git/blame - tools/purge/purge.cc
helper_stateful_server is a class these days, not a struct
[thirdparty/squid.git] / tools / purge / purge.cc
CommitLineData
eb1f6bfa 1//
7962bc6a 2// $Id$
eb1f6bfa 3//
0b96a9b3 4// Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de>
eb1f6bfa
AJ
5//
6// File: purge.cc
7// Wed Jan 13 1999
8//
9// (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme
0b96a9b3 10// Universit?t Hannover, Germany
eb1f6bfa
AJ
11//
12// Permission to use, copy, modify, distribute, and sell this software
13// and its documentation for any purpose is hereby granted without fee,
14// provided that (i) the above copyright notices and this permission
15// notice appear in all copies of the software and related documentation,
16// and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte
17// Systeme and the University of Hannover may not be used in any
18// advertising or publicity relating to the software without the
19// specific, prior written permission of Lehrgebiet Rechnernetze und
20// Verteilte Systeme and the University of Hannover.
21//
22// THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
23// EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
24// WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
25//
26// IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR
27// THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
28// INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES
29// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT
30// ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY,
31// ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
32// SOFTWARE.
33//
eb1f6bfa
AJ
34// Revision 1.17 2000/09/21 10:59:53 cached
35// *** empty log message ***
36//
37// Revision 1.16 2000/09/21 09:45:18 cached
38// Fixed some small bugs.
39//
40// Revision 1.15 2000/09/21 09:05:56 cached
41// added multi cache_dir support, thus changing -c cmdline option.
42// modified file reading to support /dev/fd/0 reading for non-disclosed items.
43//
44// Revision 1.14 2000/06/20 09:43:01 voeckler
45// added FreeBSD related fixes and support.
46//
47// Revision 1.13 2000/03/29 08:12:21 voeckler
48// fixed wrong header file.
49//
50// Revision 1.12 2000/03/29 07:54:41 voeckler
51// added mechanism to give a port specification precedence over a host
52// specificiation with the -p option and not colon.
53//
54// Revision 1.11 1999/06/18 13:18:28 voeckler
55// added refcount, fixed missing LF in -s output.
56//
57// Revision 1.10 1999/06/16 13:06:05 voeckler
58// reversed meaning of -M flag.
59//
60// Revision 1.9 1999/06/15 21:11:53 voeckler
61// added extended logging feature which extract the squid meta data available
62// within the disk files. moved the content extraction and squid meta data
63// handling parts into separate files. added options for copy-out and verbose.
64//
65// Revision 1.8 1999/06/14 20:14:46 voeckler
66// intermediate version when adding understanding about the way
67// Squid does log the metadata into the file.
68//
69// Revision 1.7 1999/01/23 21:01:10 root
70// stumbled over libc5 header/lib inconsistency bug....
71//
72// Revision 1.6 1999/01/23 20:47:54 root
73// added Linux specifics for psignal...
74// Hope this helps.
75//
76// Revision 1.5 1999/01/20 09:48:12 voeckler
77// added warning as first line of output.
78//
79// Revision 1.4 1999/01/19 11:53:49 voeckler
80// added psignal() from <siginfo.h> handling.
81//
82// Revision 1.3 1999/01/19 11:00:50 voeckler
83// added keyboard interrupt handling, exit handling, removed C++ strings and
84// regular expression syntax in favour of less source code, added comments,
85// added a reminder to remove swap.state in case of unlinks, added IAA flag,
86// added a few assertions, changed policy to enforce the definition of at
87// least one regular expression, and catch a few signals.
88//
89// Revision 1.2 1999/01/15 23:06:28 voeckler
90// downgraded to simple C strings...
91//
92// Revision 1.1 1999/01/14 12:05:32 voeckler
93// Initial revision
94//
95//
96#if defined(__GNUC__) || defined(__GNUG__)
97#pragma implementation
eb1f6bfa
AJ
98#endif
99
2ccf2eb2
AJ
100#include "config.h"
101// for xstrdup
102#include "util.h"
103
104//#include <assert.h>
eb1f6bfa
AJ
105#include <stdarg.h>
106#include <stdio.h>
107#include <dirent.h>
2ccf2eb2 108//#include <ctype.h>
eb1f6bfa 109#include <string.h>
2ccf2eb2 110//#include <sys/types.h>
eb1f6bfa
AJ
111#include <sys/stat.h>
112#include <sys/wait.h>
113#include <fcntl.h>
114#include <unistd.h>
115#include <stdlib.h>
116#include <limits.h>
117#include <signal.h>
118#include <errno.h>
119
2ccf2eb2 120#if HAVE_SIGINFO_H
eb1f6bfa 121#include <siginfo.h>
2ccf2eb2 122#endif
eb1f6bfa
AJ
123
124#include <netinet/in.h>
125#include <netinet/tcp.h> // TCP_NODELAY
126#include <arpa/inet.h>
127#include <netdb.h> // gethostbyname()
357b8e33 128//#include <regex.h> //comes via compat.h
eb1f6bfa
AJ
129
130#include "convert.hh"
131#include "socket.hh"
132#include "signal.hh"
133#include "squid-tlv.hh"
134#include "copyout.hh"
135#include "conffile.hh"
136
137#ifndef DEFAULTHOST
138#define DEFAULTHOST "localhost"
139#endif // DEFAULTHOST
140
141#ifndef DEFAULTPORT
142#define DEFAULTPORT 3128
143#endif // DEFAULTPORT
144
145volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal...
feec68a0
A
146char* linebuffer = 0;
147size_t buffersize = 16834;
eb1f6bfa 148static char* copydir = 0;
59a09b98 149static unsigned debugFlag = 0;
eb1f6bfa
AJ
150static unsigned purgeMode = 0;
151static bool iamalive = false;
152static bool reminder = false;
153static bool verbose = false;
154static bool envelope = false;
155static bool no_fork = false;
156static const char* programname = 0;
2ccf2eb2 157static const char* RCS_ID = "$Id$";
eb1f6bfa
AJ
158
159// ----------------------------------------------------------------------
160
161struct REList {
feec68a0
A
162 REList( const char* what, bool doCase );
163 ~REList();
164 bool match( const char* check ) const;
eb1f6bfa 165
feec68a0
A
166 REList* next;
167 const char* data;
168 regex_t rexp;
eb1f6bfa
AJ
169};
170
171REList::REList( const char* what, bool doCase )
2ccf2eb2 172 :next(0),data(xstrdup(what))
eb1f6bfa 173{
feec68a0
A
174 int result = regcomp( &rexp, what,
175 REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) );
176 if ( result != 0 ) {
177 char buffer[256];
178 regerror( result, &rexp, buffer, 256 );
179 fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer );
180 exit(1);
181 }
eb1f6bfa
AJ
182}
183
184REList::~REList()
feec68a0
A
185{
186 if ( next ) delete next;
6e2aefad 187 if ( data ) xfree((void*) data);
feec68a0 188 regfree(&rexp);
eb1f6bfa
AJ
189}
190
191bool
192REList::match( const char* check ) const
193{
feec68a0
A
194 int result = regexec( &rexp, check, 0, 0, 0 );
195 if ( result != 0 && result != REG_NOMATCH ) {
196 char buffer[256];
197 regerror( result, &rexp, buffer, 256 );
198 fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n",
199 data, check, buffer );
200 exit(1);
201 }
202 return ( result == 0 );
eb1f6bfa
AJ
203}
204
205// ----------------------------------------------------------------------
206
207char*
208concat( const char* start, ... )
feec68a0
A
209// purpose: concatinate an arbitrary number of C strings.
210// paramtr: start (IN): first C string
211// ... (IN): further C strings, terminated with a NULL pointer
212// returns: memory allocated via new(), containing the concatinated string.
eb1f6bfa 213{
feec68a0
A
214 va_list ap;
215 const char* s;
216
217 // first run: determine size
218 unsigned size = strlen(start)+1;
219 va_start( ap, start );
220 while ( (s=va_arg(ap,const char*)) != NULL ) size += strlen(s ? s : "");
221 va_end(ap);
222
223 // allocate
224 char* result = new char[size];
225 if ( result == 0 ) {
226 perror( "string memory allocation" );
227 exit(1);
228 }
229
230 // second run: copy content
231 strcpy( result, start );
232 va_start( ap, start );
233 while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s );
234 va_end(ap);
235
236 return result;
eb1f6bfa
AJ
237}
238
239bool
240isxstring( const char* s, size_t testlen )
feec68a0
A
241// purpose: test a string for conforming to xdigit
242// paramtr: s (IN): string to test
243// testlen (IN): length the string must have
244// returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise
eb1f6bfa 245{
feec68a0 246 if ( strlen(s) != testlen ) return false;
eb1f6bfa 247
feec68a0
A
248 size_t i=0;
249 while ( i<testlen && isxdigit(s[i]) ) i++;
250 return (i==testlen);
eb1f6bfa
AJ
251}
252
253inline
254int
255log_output( const char* fn, int code, long size, const char* url )
256{
feec68a0 257 return printf( "%s %3d %8ld %s\n", fn, code, size, url );
eb1f6bfa
AJ
258}
259
260static
261int
262log_extended( const char* fn, int code, long size, const SquidMetaList* meta )
263{
feec68a0
A
264 static const char hexdigit[] = "0123456789ABCDEF";
265 char md5[34];
266 const SquidTLV* findings = 0;
267
268 if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) {
269 unsigned char* s = (unsigned char*) findings->data;
270 for ( int j=0; j<16; j++, s++ ) {
271 md5[j*2+0] = hexdigit[ *s >> 4 ];
272 md5[j*2+1] = hexdigit[ *s & 15 ];
273 }
274 md5[32] = '\0'; // terminate string
275 } else {
6e2aefad 276 snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" );
feec68a0
A
277 }
278
279 char timeb[64];
280 if ( meta && (findings = meta->search( STORE_META_STD )) ) {
281 StoreMetaStd temp;
282 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
283 memcpy( &temp, findings->data, sizeof(StoreMetaStd) );
6e2aefad 284 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
8978bd9d
A
285 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
286 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
feec68a0
A
287 } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) {
288 StoreMetaStdLFS temp;
289 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
290 memcpy( &temp, findings->data, sizeof(StoreMetaStd) );
6e2aefad 291 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
8978bd9d
A
292 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
293 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
feec68a0 294 } else {
6e2aefad 295 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ", (unsigned long)-1, (unsigned long)-1, (unsigned long)-1, (unsigned long)-1, 0, 0 );
feec68a0
A
296 }
297
298 // make sure that there is just one printf()
299 if ( meta && (findings = meta->search( STORE_META_URL )) ) {
300 return printf( "%s %3d %8ld %s %s %s\n",
301 fn, code, size, md5, timeb, findings->data );
302 } else {
303 return printf( "%s %3d %8ld %s %s strange_file\n",
304 fn, code, size, md5, timeb );
eb1f6bfa 305 }
eb1f6bfa
AJ
306}
307
308// o.k., this is pure lazyness...
309static struct in_addr serverHost;
310static unsigned short serverPort;
311
312bool
313action( int fd, size_t metasize,
feec68a0
A
314 const char* fn, const char* url, const SquidMetaList& meta )
315// purpose: if cmdline-requested, send the purge request to the cache
316// paramtr: fd (IN): open FD for the object file
317// metasize (IN): offset into data portion of file (meta data size)
318// fn (IN): name of the object file
319// url (IN): URL string stored in the object file
320// meta (IN): list containing further meta data
321// returns: true for a successful action, false otherwise. The action
322// may just print the file, send the purge request or even
323// remove unwanted files.
324// globals: ::purgeMode (IN): bit#0 set -> send purge request.
325// bit#1 set -> remove 404 object files.
326// ::serverHost (IN): cache host address
327// ::serverPort (IN): cache port number
eb1f6bfa 328{
feec68a0
A
329 static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n";
330 struct stat st;
331 long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) );
332 int status = 0;
333
334 // if we want to copy out the file, do that first of all.
335 if ( ::copydir && *copydir && size > 0 )
59a09b98 336 copy_out( st.st_size, metasize, ::debugFlag,
feec68a0
A
337 fn, url, ::copydir, ::envelope );
338
339 // do we need to PURGE the file, yes, if purgemode bit#0 was set.
340 if ( ::purgeMode & 0x01 ) {
341 unsigned long bufsize = strlen(url) + strlen(schablone) + 4;
342 char* buffer = new char[bufsize];
343
6e2aefad 344 snprintf( buffer, bufsize, schablone, url );
feec68a0
A
345 int sockfd = connectTo( serverHost, serverPort, true );
346 if ( sockfd == -1 ) {
347 fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) );
348 delete[] buffer;
349 return false;
350 }
351
352 int size = strlen(buffer);
353 if ( write( sockfd, buffer, size ) != size ) {
354 // error while talking to squid
355 fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) );
356 close(sockfd);
357 delete[] buffer;
358 return false;
359 }
360 memset( buffer+8, 0, 4 );
361 if ( read( sockfd, buffer, bufsize ) < 1 ) {
362 // error while reading squid's answer
363 fprintf( stderr, "unable to read answer: %s\n", strerror(errno) );
364 close(sockfd);
365 delete[] buffer;
366 return false;
367 }
368 close(sockfd);
369 status = strtol(buffer+8,0,10);
370 delete[] buffer;
eb1f6bfa 371 }
feec68a0
A
372
373 // log the output of our operation
374 bool flag = true;
375 if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 );
376 else flag = ( log_output( fn, status, size, url ) >= 0 );
377
378 // remove the file, if purgemode bit#1, and HTTP result status 404).
379 if ( (::purgeMode & 0x02) && status == 404 ) {
380 reminder = true;
381 if ( unlink(fn) == -1 )
382 // error while unlinking file, this may happen due to the cache
383 // unlinking a file while it is still in the readdir() cache of purge.
384 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
385 fn, strerror(errno) );
eb1f6bfa 386 }
feec68a0
A
387
388 return flag;
eb1f6bfa
AJ
389}
390
391bool
392match( const char* fn, const REList* list )
feec68a0
A
393// purpose: do something with the given cache content filename
394// paramtr: fn (IN): filename of cache file
395// returns: true for successful action, false otherwise.
396// warning: only return false, if you want the loop to terminate!
eb1f6bfa 397{
feec68a0
A
398 static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int);
399 bool flag = true;
400
59a09b98 401 if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn );
feec68a0
A
402 int fd = open( fn, O_RDONLY );
403 if ( fd != -1 ) {
404 if ( read(fd,::linebuffer,::buffersize-1) > 60 ) {
405 ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string
406
407 // check the offset into the start of object data. The offset is
408 // stored in a host endianess after the first byte.
409 unsigned int datastart;
410 memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) );
411 if ( datastart > ::buffersize - addon - 1 ) {
412 // check offset into server reply header (start of cache data).
413 fputs( "WARNING: Using a truncated URL string.\n", stderr );
414 datastart = ::buffersize - addon - 1;
415 }
416
417 // NEW: Parse squid meta data, which is a kind of linked list
418 // flattened out into a file byte stream. Somewhere within is
419 // the URL as part of the list. First, gobble all meta data.
420 unsigned int offset = addon;
421 SquidMetaList meta;
422 while ( offset + addon <= datastart ) {
423 unsigned int size = 0;
424 memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) );
425 meta.append( SquidMetaType(*(linebuffer+offset)),
426 size, linebuffer+offset+addon );
427 offset += ( addon + size );
428 }
429
430 // Now extract the key URL from the meta data.
431 const SquidTLV* urlmeta = meta.search( STORE_META_URL );
432 if ( urlmeta ) {
433 // found URL in meta data. Try to process the URL
434 if ( list == 0 )
435 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
436 else {
437 REList* head = (REList*) list; // YUCK!
438 while ( head != 0 ) {
439 if ( head->match( (char*) urlmeta->data ) ) break;
440 head = head->next;
441 }
442 if ( head != 0 )
443 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
444 else flag = true;
445 }
446 }
447
448 // "meta" will be deleted when exiting from this block
449 } else {
450 // weird file, FIXME: stat() it!
451 struct stat st;
452 long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size );
453 if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 );
454 else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 );
455
456 if ( (::purgeMode & 0x04) ) {
457 reminder = true;
458 if ( unlink(fn) == -1 )
459 // error while unlinking file, this may happen due to the cache
460 // unlinking a file while it is in the readdir() cache of purge.
461 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
462 fn, strerror(errno) );
463 }
464 }
465 close(fd);
eb1f6bfa 466 } else {
feec68a0
A
467 // error while opening file, this may happen due to the cache
468 // unlinking a file while it is still in the readdir() cache of purge.
469 fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) );
eb1f6bfa 470 }
feec68a0
A
471
472 return flag;
eb1f6bfa
AJ
473}
474
475bool
476filelevel( const char* directory, const REList* list )
feec68a0
A
477// purpose: from given starting point, look for squid xxxxxxxx files.
478// example: "/var/spool/cache/08/7F" as input, do action over files
479// paramtr: directory (IN): starting point
480// list (IN): list of rexps to match URLs against
481// returns: true, if every subdir && action was successful.
eb1f6bfa 482{
fb151769 483 dirent_t * entry;
59a09b98 484 if ( debugFlag & 0x01 )
feec68a0
A
485 fprintf( stderr, "# [2] %s\n", directory );
486
487 DIR* dir = opendir( directory );
488 if ( dir == NULL ) {
489 fprintf( stderr, "unable to open directory \"%s\": %s\n",
490 directory, strerror(errno) );
491 return false;
492 }
493
494 // display a rotating character as "i am alive" signal (slows purge).
495 if ( ::iamalive ) {
496 static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" };
497 static unsigned short alivecount = 0;
498 assert( write( STDOUT_FILENO, alivelist[alivecount++ & 3], 2 ) == 2 );
499 }
500
501 bool flag = true;
502 while ( (entry=readdir(dir)) && flag ) {
503 if ( isxstring(entry->d_name,8) ) {
504 char* name = concat( directory, "/", entry->d_name, 0 );
505 flag = match( name, list );
506 delete[] name;
507 }
eb1f6bfa 508 }
eb1f6bfa 509
feec68a0
A
510 closedir(dir);
511 return flag;
eb1f6bfa
AJ
512}
513
514bool
515dirlevel( const char* dirname, const REList* list, bool level=false )
feec68a0
A
516// purpose: from given starting point, look for squid 00..FF directories.
517// paramtr: dirname (IN): starting point
518// list (IN): list of rexps to match URLs against
519// level (IN): false==toplevel, true==1st level
520// example: "/var/spool/cache", false as input, traverse subdirs w/ action.
521// example: "/var/spool/cache/08", true as input, traverse subdirs w/ action.
522// returns: true, if every subdir && action was successful.
523// warning: this function is once-recursive, no deeper.
eb1f6bfa 524{
fb151769 525 dirent_t* entry;
59a09b98 526 if ( debugFlag & 0x01 )
feec68a0
A
527 fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname );
528
529 DIR* dir = opendir( dirname );
530 if ( dir == NULL ) {
531 fprintf( stderr, "unable to open directory \"%s\": %s\n",
532 dirname, strerror(errno) );
533 return false;
eb1f6bfa 534 }
eb1f6bfa 535
feec68a0
A
536 bool flag = true;
537 while ( (entry=readdir(dir)) && flag ) {
538 if ( strlen(entry->d_name) == 2 &&
539 isxdigit(entry->d_name[0]) &&
540 isxdigit(entry->d_name[1]) ) {
541 char* name = concat( dirname, "/", entry->d_name, 0 );
542 flag = level ? filelevel( name, list ) : dirlevel( name, list, true );
543 delete[] name;
544 }
545 }
546
547 closedir(dir);
548 return flag;
eb1f6bfa
AJ
549}
550
551int
552checkForPortOnly( const char* optarg )
feec68a0
A
553// purpose: see if somebody just put in a port instead of a hostname
554// paramtr: optarg (IN): argument from commandline
555// returns: 0..65535 is the valid port number in network byte order,
556// -1 if not a port
eb1f6bfa 557{
feec68a0
A
558 // if there is a period in there, it must be a valid hostname
559 if ( strchr( optarg, '.' ) != 0 ) return -1;
eb1f6bfa 560
feec68a0
A
561 // if it is just a number between 0 and 65535, it must be a port
562 char* errstr = 0;
563 unsigned long result = strtoul( optarg, &errstr, 0 );
564 if ( result < 65536 && errstr != optarg ) return htons(result);
eb1f6bfa
AJ
565
566#if 0
feec68a0
A
567 // one last try, test for a symbolical service name
568 struct servent* service = getservbyname( optarg, "tcp" );
569 return service ? service->s_port : -1;
eb1f6bfa 570#else
feec68a0 571 return -1;
eb1f6bfa
AJ
572#endif
573}
574
575void
576helpMe( void )
feec68a0 577// purpuse: write help message and exit
eb1f6bfa 578{
feec68a0
A
579 printf( "\n%s\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] "
580 "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n",
581 ::RCS_ID, ::programname );
582 printf(
583 " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n"
584 " -c c\tsquid.conf location, default \"%s\".\n"
585 " -C dir\tbase directory for content extraction (copy-out mode).\n"
586 " -d l\tdebug level, an OR of different debug options.\n"
587 " -e re\tsingle regular expression per -e instance (use quotes!).\n"
588 " -E re\tsingle case sensitive regular expression like -e.\n"
589 " -f fn\tname of textfile containing one regular expression per line.\n"
590 " -F fn\tname of textfile like -f containing case sensitive REs.\n"
591 " -H\tprepend HTTP reply header to destination files in copy-out mode.\n"
592 " -n\tdo not fork() when using more than one cache_dir.\n"
593 " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n"
594 " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n"
595 "\t 0x01 really send PURGE to the cache.\n"
596 "\t 0x02 remove all caches files reported as 404 (not found).\n"
597 "\t 0x04 remove all weird (inaccessible or too small) cache files.\n"
598 "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n"
599 " -s\tshow all options after option parsing, but before really starting.\n"
600 " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n"
601 "\n", DEFAULT_SQUID_CONF, DEFAULTHOST, DEFAULTPORT );
eb1f6bfa
AJ
602
603}
604
605void
606parseCommandline( int argc, char* argv[], REList*& head,
feec68a0
A
607 char*& conffile, char*& copydir,
608 struct in_addr& serverHost, unsigned short& serverPort )
609// paramtr: argc: see ::main().
610// argv: see ::main().
611// returns: Does terminate the program on errors!
612// purpose: suck in any commandline options, and set the global vars.
613{
614 int option, port, showme = 0;
615 char* ptr, *colon;
616 FILE* rfile;
617
618 // program basename
619 if ( (ptr = strrchr(argv[0],'/')) == NULL ) ptr=argv[0];
620 else ptr++;
621 ::programname = ptr;
622
623 // extract commandline parameters
624 REList* tail = head = 0;
625 opterr = 0;
626 while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) {
627 switch ( option ) {
628 case 'a':
629 ::iamalive = ! ::iamalive;
630 break;
631 case 'C':
632 if ( optarg && *optarg ) {
6e2aefad 633 if ( copydir ) xfree( (void*) copydir );
2ccf2eb2 634 assert( (copydir = xstrdup(optarg)) );
feec68a0
A
635 }
636 break;
637 case 'c':
638 if ( optarg && *optarg ) {
6e2aefad 639 if ( *conffile ) xfree((void*) conffile );
2ccf2eb2 640 assert( (conffile = xstrdup(optarg)) );
feec68a0
A
641 }
642 break;
643
644 case 'd':
59a09b98 645 ::debugFlag = strtoul( optarg, 0, 0 );
feec68a0
A
646 break;
647
648 case 'E':
649 case 'e':
650 if ( head == 0 ) tail = head = new REList( optarg, option=='E' );
651 else {
652 tail->next = new REList( optarg, option=='E' );
653 tail = tail->next;
654 }
655 break;
656
657 case 'f':
658 if ( (rfile = fopen( optarg, "r" )) != NULL ) {
659 unsigned long lineno = 0;
eb1f6bfa 660#define LINESIZE 512
feec68a0
A
661 char line[LINESIZE];
662 while ( fgets( line, LINESIZE, rfile ) != NULL ) {
663 lineno++;
664 int len = strlen(line)-1;
665 if ( len+2 >= LINESIZE ) {
666 fprintf( stderr, "%s:%lu: line too long, sorry.\n",
667 optarg, lineno );
668 exit(1);
669 }
670
671 // remove trailing line breaks
672 while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) )
673 line[len--] = '\0';
674
675 // insert into list of expressions
676 if ( head == 0 ) tail = head = new REList(line,option=='F');
677 else {
678 tail->next = new REList(line,option=='F');
679 tail = tail->next;
680 }
681 }
682 fclose(rfile);
683 } else
684 fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno));
685 break;
686
687 case 'H':
688 ::envelope = ! ::envelope;
689 break;
690 case 'n':
691 ::no_fork = ! ::no_fork;
692 break;
693 case 'p':
694 colon = strchr( optarg, ':' );
695 if ( colon == 0 ) {
696 // no colon, only look at host
697
698 // fix: see if somebody just put in there a port (no periods)
699 // give port number precedence over host names
700 port = checkForPortOnly( optarg );
701 if ( port == -1 ) {
702 // assume that main() did set the default port
703 if ( convertHostname(optarg,serverHost) == -1 ) {
704 fprintf( stderr, "unable to resolve host %s!\n", optarg );
705 exit(1);
706 }
707 } else {
708 // assume that main() did set the default host
709 serverPort = port;
710 }
711 } else {
712 // colon used, port is extra
713 *colon++ = 0;
714 if ( convertHostname(optarg,serverHost) == -1 ) {
715 fprintf( stderr, "unable to resolve host %s!\n", optarg );
716 exit(1);
717 }
718 if ( convertPortname(colon,serverPort) == -1 ) {
719 fprintf( stderr, "unable to resolve port %s!\n", colon );
720 exit(1);
721 }
722 }
723 break;
724 case 'P':
725 ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 );
726 break;
727 case 's':
728 showme=1;
729 break;
730 case 'v':
731 ::verbose = ! ::verbose;
732 break;
733 case '?':
734 default:
735 helpMe();
736 exit(1);
737 }
738 }
739
740 // adjust
59a09b98 741 if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false;
feec68a0
A
742 if ( head == 0 ) {
743 fputs( "There was no regular expression defined. If you intend\n", stderr );
744 fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr );
745 exit(1);
eb1f6bfa 746 }
feec68a0
A
747
748 // postcondition: head != 0
749 assert( head != 0 );
750
751 // make sure that the copy out directory is there and accessible
752 if ( copydir && *copydir )
753 if ( assert_copydir( copydir ) != 0 ) exit(1);
754
755 // show results
756 if ( showme ) {
757 printf( "#\n# Currently active values for %s:\n# %s\n",
758 ::programname, ::RCS_ID );
759 printf( "# Debug level : " );
59a09b98 760 if ( ::debugFlag ) printf( "%#6.4hx", ::debugFlag );
feec68a0
A
761 else printf( "production level" ); // printf omits 0x prefix for 0!
762 printf( " + %s mode", ::no_fork ? "linear" : "parallel" );
763 puts( ::verbose ? " + extra verbosity" : "" );
764
765 printf( "# Copy-out directory: %s ",
766 copydir ? copydir : "copy-out mode disabled" );
767 if ( copydir )
768 printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" );
769 else
770 puts("");
771
772 printf( "# Squid config file : %s\n", conffile );
773 printf( "# Cacheserveraddress: %s:%u\n",
774 inet_ntoa( serverHost ), ntohs( serverPort ) );
775 printf( "# purge mode : 0x%02x\n", ::purgeMode );
776 printf( "# Regular expression: " );
777
778 unsigned count(0);
779 for ( tail = head; tail != NULL; tail = tail->next ) {
780 if ( count++ ) printf( "#%22u", count );
eb1f6bfa 781#if defined(LINUX) && putc==_IO_putc
feec68a0
A
782 // I HATE BROKEN LINUX HEADERS!
783 // purge.o(.text+0x1040): undefined reference to `_IO_putc'
784 // If your compilation breaks here, remove the undefinition
785#undef putc
eb1f6bfa 786#endif
feec68a0
A
787 else putchar('1');
788 printf( " \"%s\"\n", tail->data );
789 }
790 puts( "#" );
eb1f6bfa 791 }
feec68a0 792 fflush( stdout );
eb1f6bfa
AJ
793}
794
795extern "C" {
796
feec68a0
A
797 static
798 void
799 exiter( void ) {
800 if ( ::term_flag ) psignal( ::term_flag, "received signal" );
801 delete[] ::linebuffer;
802 if ( ::reminder ) {
803 fputs(
804 "WARNING! Caches files were removed. Please shut down your cache, remove\n"
805 "your swap.state files and restart your cache again, i.e. effictively do\n"
806 "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr );
807 }
808 }
eb1f6bfa 809
feec68a0
A
810 static
811 void
812 handler( int signo ) {
813 ::term_flag = signo;
814 if ( getpid() == getpgrp() ) kill( -getpgrp(), signo );
815 exit(1);
816 }
eb1f6bfa
AJ
817
818} // extern "C"
819
820static
821int
822makelinebuffered( FILE* fp, const char* fn = 0 )
feec68a0
A
823// purpose: make the given FILE line buffered
824// paramtr: fp (IO): file pointer which to put into line buffer mode
825// fn (IN): name of file to print in case of error
826// returns: 0 is ok, -1 to indicate an error
827// warning: error messages will already be printed
eb1f6bfa 828{
feec68a0
A
829 if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) {
830 // ok
831 return 0;
832 } else {
833 // error
834 fprintf( stderr, "unable to make \"%s\" line buffered: %s\n",
835 fn ? fn : "", strerror(errno) );
836 return -1;
837 }
eb1f6bfa
AJ
838}
839
840int
841main( int argc, char* argv[] )
842{
feec68a0
A
843 // setup variables
844 REList* list = 0;
2ccf2eb2 845 char* conffile = xstrdup( DEFAULT_SQUID_CONF );
feec68a0
A
846 serverPort = htons(DEFAULTPORT);
847 if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) {
848 fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST );
849 return 1;
850 }
851
852 // setup line buffer
853 ::linebuffer = new char[ ::buffersize ];
854 assert( ::linebuffer != 0 );
855
856 // parse commandline
857 puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###");
858 parseCommandline( argc, argv, list, conffile, ::copydir,
859 serverHost, serverPort );
860
861 // prepare execution
862 if ( atexit( exiter ) != 0 ||
863 Signal( SIGTERM, handler, true ) == SIG_ERR ||
864 Signal( SIGINT, handler, true ) == SIG_ERR ||
865 Signal( SIGHUP, handler, true ) == SIG_ERR ) {
866 perror( "unable to install signal/exit function" );
867 return 1;
868 }
869
870 // try to read squid.conf file to determine all cache_dir locations
871 CacheDirVector cdv(0);
59a09b98 872 if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) {
feec68a0
A
873 // there are some valid cache_dir entries.
874 // unless forking was forbidden by cmdline option,
875 // for a process for each cache_dir entry to remove files.
876
877 if ( ::no_fork || cdv.size() == 1 ) {
878 // linear mode, one cache_dir after the next
879 for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) {
880 // execute OR complain
881 if ( ! dirlevel(i->base,list) )
882 fprintf( stderr, "program terminated due to error: %s",
883 strerror(errno) );
6e2aefad 884 xfree((void*) i->base);
feec68a0
A
885 }
886 } else {
887 // parallel mode, all cache_dir in parallel
888 pid_t* child = new pid_t[ cdv.size() ];
889
890 // make stdout/stderr line bufferd
891 makelinebuffered( stdout, "stdout" );
892 makelinebuffered( stderr, "stderr" );
893
894 // make parent process group leader for easier killings
895 if ( setpgid(getpid(), getpid()) != 0 ) {
896 perror( "unable to set process group leader" );
897 return 1;
898 }
899
900 // -a is mutually exclusive with fork mode
901 if ( ::iamalive ) {
902 puts( "# i-am-alive flag incompatible with fork mode, resetting" );
903 ::iamalive = false;
904 }
905
2ccf2eb2 906 for ( size_t i=0; i < cdv.size(); ++i ) {
feec68a0
A
907 if ( getpid() == getpgrp() ) {
908 // only parent == group leader may fork off new processes
909 if ( (child[i]=fork()) < 0 ) {
910 // fork error, this is bad!
911 perror( "unable to fork" );
912 kill( -getpgrp(), SIGTERM );
913 return 1;
914 } else if ( child[i] == 0 ) {
915 // child mode
916 // execute OR complain
917 if ( ! dirlevel(cdv[i].base,list) )
918 fprintf( stderr, "program terminated due to error: %s\n",
919 strerror(errno) );
6e2aefad 920 xfree((void*) cdv[i].base);
feec68a0
A
921 return 0;
922 } else {
923 // parent mode
59a09b98 924 if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] );
feec68a0
A
925 }
926 }
927 }
928
929 // collect the garbase
930 pid_t temp;
931 int status;
2ccf2eb2 932 for ( size_t i=0; i < cdv.size(); ++i ) {
feec68a0
A
933 while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 )
934 if ( errno == EINTR ) continue;
59a09b98 935 if ( ::debugFlag ) printf( "collected child %d\n", (int) temp );
feec68a0
A
936 }
937 delete[] child;
938 }
eb1f6bfa 939 } else {
feec68a0 940 fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile );
eb1f6bfa 941 }
feec68a0
A
942
943 // clean up
6e2aefad
HN
944 if ( copydir ) xfree( (void*) copydir );
945 xfree((void*) conffile);
feec68a0
A
946 delete list;
947 return 0;
eb1f6bfa 948}