]> git.ipfire.org Git - thirdparty/squid.git/blame - tools/purge/purge.cc
SourceFormat Enforcement
[thirdparty/squid.git] / tools / purge / purge.cc
CommitLineData
0b96a9b3 1// Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de>
eb1f6bfa
AJ
2//
3// File: purge.cc
4// Wed Jan 13 1999
5//
6// (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme
0b96a9b3 7// Universit?t Hannover, Germany
eb1f6bfa
AJ
8//
9// Permission to use, copy, modify, distribute, and sell this software
10// and its documentation for any purpose is hereby granted without fee,
11// provided that (i) the above copyright notices and this permission
12// notice appear in all copies of the software and related documentation,
13// and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte
14// Systeme and the University of Hannover may not be used in any
15// advertising or publicity relating to the software without the
16// specific, prior written permission of Lehrgebiet Rechnernetze und
17// Verteilte Systeme and the University of Hannover.
18//
19// THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
20// EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
21// WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
22//
23// IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR
24// THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
25// INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES
26// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT
27// ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY,
28// ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
29// SOFTWARE.
30//
eb1f6bfa
AJ
31// Revision 1.17 2000/09/21 10:59:53 cached
32// *** empty log message ***
33//
34// Revision 1.16 2000/09/21 09:45:18 cached
35// Fixed some small bugs.
36//
37// Revision 1.15 2000/09/21 09:05:56 cached
38// added multi cache_dir support, thus changing -c cmdline option.
39// modified file reading to support /dev/fd/0 reading for non-disclosed items.
40//
41// Revision 1.14 2000/06/20 09:43:01 voeckler
42// added FreeBSD related fixes and support.
43//
44// Revision 1.13 2000/03/29 08:12:21 voeckler
45// fixed wrong header file.
46//
47// Revision 1.12 2000/03/29 07:54:41 voeckler
48// added mechanism to give a port specification precedence over a host
49// specificiation with the -p option and not colon.
50//
51// Revision 1.11 1999/06/18 13:18:28 voeckler
52// added refcount, fixed missing LF in -s output.
53//
54// Revision 1.10 1999/06/16 13:06:05 voeckler
55// reversed meaning of -M flag.
56//
57// Revision 1.9 1999/06/15 21:11:53 voeckler
58// added extended logging feature which extract the squid meta data available
59// within the disk files. moved the content extraction and squid meta data
60// handling parts into separate files. added options for copy-out and verbose.
61//
62// Revision 1.8 1999/06/14 20:14:46 voeckler
63// intermediate version when adding understanding about the way
64// Squid does log the metadata into the file.
65//
66// Revision 1.7 1999/01/23 21:01:10 root
67// stumbled over libc5 header/lib inconsistency bug....
68//
69// Revision 1.6 1999/01/23 20:47:54 root
70// added Linux specifics for psignal...
71// Hope this helps.
72//
73// Revision 1.5 1999/01/20 09:48:12 voeckler
74// added warning as first line of output.
75//
76// Revision 1.4 1999/01/19 11:53:49 voeckler
77// added psignal() from <siginfo.h> handling.
78//
79// Revision 1.3 1999/01/19 11:00:50 voeckler
80// added keyboard interrupt handling, exit handling, removed C++ strings and
81// regular expression syntax in favour of less source code, added comments,
82// added a reminder to remove swap.state in case of unlinks, added IAA flag,
83// added a few assertions, changed policy to enforce the definition of at
84// least one regular expression, and catch a few signals.
85//
86// Revision 1.2 1999/01/15 23:06:28 voeckler
87// downgraded to simple C strings...
88//
89// Revision 1.1 1999/01/14 12:05:32 voeckler
90// Initial revision
91//
92//
f7f3304a 93#include "squid.h"
2ccf2eb2
AJ
94#include "util.h"
95
eb1f6bfa
AJ
96#include <stdarg.h>
97#include <stdio.h>
98#include <dirent.h>
eb1f6bfa 99#include <string.h>
eb1f6bfa
AJ
100#include <sys/stat.h>
101#include <sys/wait.h>
102#include <fcntl.h>
103#include <unistd.h>
104#include <stdlib.h>
105#include <limits.h>
106#include <signal.h>
107#include <errno.h>
108
2ccf2eb2 109#if HAVE_SIGINFO_H
eb1f6bfa 110#include <siginfo.h>
2ccf2eb2 111#endif
eb1f6bfa
AJ
112
113#include <netinet/in.h>
b0fb853f 114#include <netinet/tcp.h>
eb1f6bfa 115#include <arpa/inet.h>
582c2af2 116#include <netdb.h>
eb1f6bfa 117
602d9612 118#include "conffile.hh"
eb1f6bfa 119#include "convert.hh"
602d9612 120#include "copyout.hh"
eb1f6bfa 121#include "signal.hh"
602d9612 122#include "socket.hh"
eb1f6bfa 123#include "squid-tlv.hh"
eb1f6bfa
AJ
124
125#ifndef DEFAULTHOST
126#define DEFAULTHOST "localhost"
127#endif // DEFAULTHOST
128
129#ifndef DEFAULTPORT
130#define DEFAULTPORT 3128
131#endif // DEFAULTPORT
132
133volatile sig_atomic_t term_flag = 0; // 'terminate' is a gcc 2.8.x internal...
feec68a0 134char* linebuffer = 0;
e65cf8d5 135size_t buffersize = 128*1024;
eb1f6bfa 136static char* copydir = 0;
052aecef 137static uint32_t debugFlag = 0;
eb1f6bfa
AJ
138static unsigned purgeMode = 0;
139static bool iamalive = false;
140static bool reminder = false;
141static bool verbose = false;
142static bool envelope = false;
143static bool no_fork = false;
144static const char* programname = 0;
eb1f6bfa
AJ
145
146// ----------------------------------------------------------------------
147
148struct REList {
feec68a0
A
149 REList( const char* what, bool doCase );
150 ~REList();
151 bool match( const char* check ) const;
eb1f6bfa 152
feec68a0
A
153 REList* next;
154 const char* data;
155 regex_t rexp;
eb1f6bfa
AJ
156};
157
158REList::REList( const char* what, bool doCase )
2ccf2eb2 159 :next(0),data(xstrdup(what))
eb1f6bfa 160{
feec68a0
A
161 int result = regcomp( &rexp, what,
162 REG_EXTENDED | REG_NOSUB | (doCase ? 0 : REG_ICASE) );
163 if ( result != 0 ) {
164 char buffer[256];
165 regerror( result, &rexp, buffer, 256 );
166 fprintf( stderr, "unable to compile re \"%s\": %s\n", what, buffer );
167 exit(1);
168 }
eb1f6bfa
AJ
169}
170
171REList::~REList()
feec68a0
A
172{
173 if ( next ) delete next;
6e2aefad 174 if ( data ) xfree((void*) data);
feec68a0 175 regfree(&rexp);
eb1f6bfa
AJ
176}
177
178bool
179REList::match( const char* check ) const
180{
feec68a0
A
181 int result = regexec( &rexp, check, 0, 0, 0 );
182 if ( result != 0 && result != REG_NOMATCH ) {
183 char buffer[256];
184 regerror( result, &rexp, buffer, 256 );
185 fprintf( stderr, "unable to execute re \"%s\"\n+ on line \"%s\": %s\n",
186 data, check, buffer );
187 exit(1);
188 }
189 return ( result == 0 );
eb1f6bfa
AJ
190}
191
192// ----------------------------------------------------------------------
193
194char*
195concat( const char* start, ... )
feec68a0
A
196// purpose: concatinate an arbitrary number of C strings.
197// paramtr: start (IN): first C string
198// ... (IN): further C strings, terminated with a NULL pointer
199// returns: memory allocated via new(), containing the concatinated string.
eb1f6bfa 200{
feec68a0
A
201 va_list ap;
202 const char* s;
203
204 // first run: determine size
205 unsigned size = strlen(start)+1;
206 va_start( ap, start );
2a9c12b3
AJ
207 while ( (s=va_arg(ap,const char*)) != NULL )
208 size += strlen(s);
feec68a0
A
209 va_end(ap);
210
211 // allocate
212 char* result = new char[size];
213 if ( result == 0 ) {
214 perror( "string memory allocation" );
215 exit(1);
216 }
217
218 // second run: copy content
219 strcpy( result, start );
220 va_start( ap, start );
221 while ( (s=va_arg(ap,const char*)) != NULL ) strcat( result, s );
222 va_end(ap);
223
224 return result;
eb1f6bfa
AJ
225}
226
227bool
228isxstring( const char* s, size_t testlen )
feec68a0
A
229// purpose: test a string for conforming to xdigit
230// paramtr: s (IN): string to test
231// testlen (IN): length the string must have
232// returns: true, iff strlen(s)==testlen && all_x_chars(s), false otherwise
eb1f6bfa 233{
feec68a0 234 if ( strlen(s) != testlen ) return false;
eb1f6bfa 235
feec68a0 236 size_t i=0;
14942edd
FC
237 while ( i<testlen && isxdigit(s[i]) )
238 ++i;
feec68a0 239 return (i==testlen);
eb1f6bfa
AJ
240}
241
242inline
243int
244log_output( const char* fn, int code, long size, const char* url )
245{
feec68a0 246 return printf( "%s %3d %8ld %s\n", fn, code, size, url );
eb1f6bfa
AJ
247}
248
249static
250int
251log_extended( const char* fn, int code, long size, const SquidMetaList* meta )
252{
feec68a0
A
253 static const char hexdigit[] = "0123456789ABCDEF";
254 char md5[34];
255 const SquidTLV* findings = 0;
256
257 if ( meta && (findings = meta->search( STORE_META_KEY_MD5 )) ) {
258 unsigned char* s = (unsigned char*) findings->data;
cbebe602 259 for ( int j=0; j<16; ++j, ++s ) {
feec68a0
A
260 md5[j*2+0] = hexdigit[ *s >> 4 ];
261 md5[j*2+1] = hexdigit[ *s & 15 ];
262 }
263 md5[32] = '\0'; // terminate string
264 } else {
6e2aefad 265 snprintf( md5, sizeof(md5), "%-32s", "(no_md5_data_available)" );
feec68a0
A
266 }
267
268 char timeb[64];
269 if ( meta && (findings = meta->search( STORE_META_STD )) ) {
270 StoreMetaStd temp;
271 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
272 memcpy( &temp, findings->data, sizeof(StoreMetaStd) );
6e2aefad 273 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
8978bd9d
A
274 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
275 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
feec68a0
A
276 } else if ( meta && (findings = meta->search( STORE_META_STD_LFS )) ) {
277 StoreMetaStdLFS temp;
278 // make data aligned, avoid SIGBUS on RISC machines (ARGH!)
279 memcpy( &temp, findings->data, sizeof(StoreMetaStd) );
6e2aefad 280 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5hu ",
8978bd9d
A
281 (unsigned long)temp.timestamp, (unsigned long)temp.lastref,
282 (unsigned long)temp.expires, (unsigned long)temp.lastmod, temp.flags, temp.refcount );
feec68a0 283 } else {
8baf6ea3 284 unsigned long ul = ULONG_MAX; // Match type of StoreMetaTLV fields
d8b258a9
PW
285 unsigned short hu = 0; // Match type of StoreMetaTLV refcount fields
286 snprintf( timeb, sizeof(timeb), "%08lx %08lx %08lx %08lx %04x %5d ", ul, ul, ul, ul, 0, hu);
feec68a0
A
287 }
288
289 // make sure that there is just one printf()
290 if ( meta && (findings = meta->search( STORE_META_URL )) ) {
291 return printf( "%s %3d %8ld %s %s %s\n",
292 fn, code, size, md5, timeb, findings->data );
293 } else {
294 return printf( "%s %3d %8ld %s %s strange_file\n",
295 fn, code, size, md5, timeb );
eb1f6bfa 296 }
eb1f6bfa
AJ
297}
298
299// o.k., this is pure lazyness...
300static struct in_addr serverHost;
301static unsigned short serverPort;
302
303bool
304action( int fd, size_t metasize,
feec68a0
A
305 const char* fn, const char* url, const SquidMetaList& meta )
306// purpose: if cmdline-requested, send the purge request to the cache
307// paramtr: fd (IN): open FD for the object file
308// metasize (IN): offset into data portion of file (meta data size)
309// fn (IN): name of the object file
310// url (IN): URL string stored in the object file
311// meta (IN): list containing further meta data
312// returns: true for a successful action, false otherwise. The action
313// may just print the file, send the purge request or even
314// remove unwanted files.
315// globals: ::purgeMode (IN): bit#0 set -> send purge request.
316// bit#1 set -> remove 404 object files.
317// ::serverHost (IN): cache host address
318// ::serverPort (IN): cache port number
eb1f6bfa 319{
feec68a0
A
320 static const char* schablone = "PURGE %s HTTP/1.0\r\nAccept: */*\r\n\r\n";
321 struct stat st;
322 long size = ( fstat(fd,&st) == -1 ? -1 : long(st.st_size - metasize) );
feec68a0
A
323
324 // if we want to copy out the file, do that first of all.
325 if ( ::copydir && *copydir && size > 0 )
59a09b98 326 copy_out( st.st_size, metasize, ::debugFlag,
feec68a0
A
327 fn, url, ::copydir, ::envelope );
328
329 // do we need to PURGE the file, yes, if purgemode bit#0 was set.
e65cf8d5 330 int status = 0;
feec68a0
A
331 if ( ::purgeMode & 0x01 ) {
332 unsigned long bufsize = strlen(url) + strlen(schablone) + 4;
333 char* buffer = new char[bufsize];
334
6e2aefad 335 snprintf( buffer, bufsize, schablone, url );
feec68a0
A
336 int sockfd = connectTo( serverHost, serverPort, true );
337 if ( sockfd == -1 ) {
338 fprintf( stderr, "unable to connect to server: %s\n", strerror(errno) );
339 delete[] buffer;
340 return false;
341 }
342
9dca980d
AJ
343 int content_size = strlen(buffer);
344 if ( write( sockfd, buffer, content_size ) != content_size ) {
feec68a0
A
345 // error while talking to squid
346 fprintf( stderr, "unable to talk to server: %s\n", strerror(errno) );
347 close(sockfd);
348 delete[] buffer;
349 return false;
350 }
351 memset( buffer+8, 0, 4 );
3efeeda2
AJ
352 int readLen = read(sockfd, buffer, bufsize);
353 if (readLen < 1) {
feec68a0
A
354 // error while reading squid's answer
355 fprintf( stderr, "unable to read answer: %s\n", strerror(errno) );
356 close(sockfd);
357 delete[] buffer;
358 return false;
359 }
3efeeda2 360 buffer[bufsize-1] = '\0';
feec68a0 361 close(sockfd);
e65cf8d5
AJ
362 int64_t s = strtol(buffer+8,0,10);
363 if (s > 0 && s < 1000)
364 status = s;
365 else {
366 // error while reading squid's answer
367 fprintf( stderr, "invalid HTTP status in reply: %s\n", buffer+8);
368 }
feec68a0 369 delete[] buffer;
eb1f6bfa 370 }
feec68a0
A
371
372 // log the output of our operation
373 bool flag = true;
374 if ( ::verbose ) flag = ( log_extended( fn, status, size, &meta ) >= 0 );
375 else flag = ( log_output( fn, status, size, url ) >= 0 );
376
377 // remove the file, if purgemode bit#1, and HTTP result status 404).
378 if ( (::purgeMode & 0x02) && status == 404 ) {
379 reminder = true;
380 if ( unlink(fn) == -1 )
381 // error while unlinking file, this may happen due to the cache
382 // unlinking a file while it is still in the readdir() cache of purge.
383 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
384 fn, strerror(errno) );
eb1f6bfa 385 }
feec68a0
A
386
387 return flag;
eb1f6bfa
AJ
388}
389
390bool
391match( const char* fn, const REList* list )
feec68a0
A
392// purpose: do something with the given cache content filename
393// paramtr: fn (IN): filename of cache file
394// returns: true for successful action, false otherwise.
395// warning: only return false, if you want the loop to terminate!
eb1f6bfa 396{
feec68a0
A
397 static const size_t addon = sizeof(unsigned char) + sizeof(unsigned int);
398 bool flag = true;
399
59a09b98 400 if ( debugFlag & 0x01 ) fprintf( stderr, "# [3] %s\n", fn );
feec68a0
A
401 int fd = open( fn, O_RDONLY );
402 if ( fd != -1 ) {
e65cf8d5
AJ
403 memset(::linebuffer, 0, ::buffersize);
404 size_t readLen = read(fd,::linebuffer,::buffersize-1);
405 if ( readLen > 60 ) {
feec68a0
A
406 ::linebuffer[ ::buffersize-1 ] = '\0'; // force-terminate string
407
408 // check the offset into the start of object data. The offset is
409 // stored in a host endianess after the first byte.
410 unsigned int datastart;
411 memcpy( &datastart, ::linebuffer + 1, sizeof(unsigned int) );
412 if ( datastart > ::buffersize - addon - 1 ) {
413 // check offset into server reply header (start of cache data).
414 fputs( "WARNING: Using a truncated URL string.\n", stderr );
415 datastart = ::buffersize - addon - 1;
416 }
417
418 // NEW: Parse squid meta data, which is a kind of linked list
419 // flattened out into a file byte stream. Somewhere within is
420 // the URL as part of the list. First, gobble all meta data.
421 unsigned int offset = addon;
422 SquidMetaList meta;
423 while ( offset + addon <= datastart ) {
424 unsigned int size = 0;
425 memcpy( &size, linebuffer+offset+sizeof(char), sizeof(unsigned int) );
3efeeda2
AJ
426 if (size+offset < size) {
427 fputs("WARNING: file corruption detected. 32-bit overflow in size field.\n", stderr);
428 break;
429 }
e65cf8d5
AJ
430 if (size+offset > readLen) {
431 fputs( "WARNING: Partial meta data loaded.\n", stderr );
432 break;
433 }
feec68a0
A
434 meta.append( SquidMetaType(*(linebuffer+offset)),
435 size, linebuffer+offset+addon );
436 offset += ( addon + size );
437 }
438
439 // Now extract the key URL from the meta data.
440 const SquidTLV* urlmeta = meta.search( STORE_META_URL );
441 if ( urlmeta ) {
442 // found URL in meta data. Try to process the URL
443 if ( list == 0 )
444 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
445 else {
446 REList* head = (REList*) list; // YUCK!
447 while ( head != 0 ) {
448 if ( head->match( (char*) urlmeta->data ) ) break;
449 head = head->next;
450 }
451 if ( head != 0 )
452 flag = action( fd, datastart, fn, (char*) urlmeta->data, meta );
453 else flag = true;
454 }
455 }
456
457 // "meta" will be deleted when exiting from this block
458 } else {
459 // weird file, FIXME: stat() it!
460 struct stat st;
461 long size = ( fstat(fd,&st) == -1 ? -1 : st.st_size );
462 if ( ::verbose ) flag = ( log_extended( fn, -1, size, 0 ) >= 0 );
463 else flag = ( log_output( fn, -1, size, "strange file" ) >= 0 );
464
465 if ( (::purgeMode & 0x04) ) {
466 reminder = true;
467 if ( unlink(fn) == -1 )
468 // error while unlinking file, this may happen due to the cache
469 // unlinking a file while it is in the readdir() cache of purge.
470 fprintf( stderr, "WARNING: unable to unlink %s: %s\n",
471 fn, strerror(errno) );
472 }
473 }
474 close(fd);
eb1f6bfa 475 } else {
feec68a0
A
476 // error while opening file, this may happen due to the cache
477 // unlinking a file while it is still in the readdir() cache of purge.
478 fprintf( stderr, "WARNING: open \"%s\": %s\n", fn, strerror(errno) );
eb1f6bfa 479 }
feec68a0
A
480
481 return flag;
eb1f6bfa
AJ
482}
483
484bool
485filelevel( const char* directory, const REList* list )
feec68a0
A
486// purpose: from given starting point, look for squid xxxxxxxx files.
487// example: "/var/spool/cache/08/7F" as input, do action over files
488// paramtr: directory (IN): starting point
489// list (IN): list of rexps to match URLs against
490// returns: true, if every subdir && action was successful.
eb1f6bfa 491{
fb151769 492 dirent_t * entry;
59a09b98 493 if ( debugFlag & 0x01 )
feec68a0
A
494 fprintf( stderr, "# [2] %s\n", directory );
495
496 DIR* dir = opendir( directory );
497 if ( dir == NULL ) {
498 fprintf( stderr, "unable to open directory \"%s\": %s\n",
499 directory, strerror(errno) );
500 return false;
501 }
502
503 // display a rotating character as "i am alive" signal (slows purge).
504 if ( ::iamalive ) {
505 static char alivelist[4][3] = { "\\\b", "|\b", "/\b", "-\b" };
506 static unsigned short alivecount = 0;
7f56277d
AJ
507 const int write_success = write(STDOUT_FILENO, alivelist[alivecount++ & 3], 2);
508 assert(write_success == 2);
feec68a0
A
509 }
510
511 bool flag = true;
512 while ( (entry=readdir(dir)) && flag ) {
513 if ( isxstring(entry->d_name,8) ) {
514 char* name = concat( directory, "/", entry->d_name, 0 );
515 flag = match( name, list );
516 delete[] name;
517 }
eb1f6bfa 518 }
eb1f6bfa 519
feec68a0
A
520 closedir(dir);
521 return flag;
eb1f6bfa
AJ
522}
523
524bool
525dirlevel( const char* dirname, const REList* list, bool level=false )
feec68a0
A
526// purpose: from given starting point, look for squid 00..FF directories.
527// paramtr: dirname (IN): starting point
528// list (IN): list of rexps to match URLs against
529// level (IN): false==toplevel, true==1st level
530// example: "/var/spool/cache", false as input, traverse subdirs w/ action.
531// example: "/var/spool/cache/08", true as input, traverse subdirs w/ action.
532// returns: true, if every subdir && action was successful.
533// warning: this function is once-recursive, no deeper.
eb1f6bfa 534{
fb151769 535 dirent_t* entry;
59a09b98 536 if ( debugFlag & 0x01 )
feec68a0
A
537 fprintf( stderr, "# [%d] %s\n", (level ? 1 : 0), dirname );
538
539 DIR* dir = opendir( dirname );
540 if ( dir == NULL ) {
541 fprintf( stderr, "unable to open directory \"%s\": %s\n",
542 dirname, strerror(errno) );
543 return false;
eb1f6bfa 544 }
eb1f6bfa 545
feec68a0
A
546 bool flag = true;
547 while ( (entry=readdir(dir)) && flag ) {
548 if ( strlen(entry->d_name) == 2 &&
549 isxdigit(entry->d_name[0]) &&
550 isxdigit(entry->d_name[1]) ) {
551 char* name = concat( dirname, "/", entry->d_name, 0 );
552 flag = level ? filelevel( name, list ) : dirlevel( name, list, true );
553 delete[] name;
554 }
555 }
556
557 closedir(dir);
558 return flag;
eb1f6bfa
AJ
559}
560
561int
9dca980d 562checkForPortOnly( const char* arg )
feec68a0
A
563// purpose: see if somebody just put in a port instead of a hostname
564// paramtr: optarg (IN): argument from commandline
565// returns: 0..65535 is the valid port number in network byte order,
566// -1 if not a port
eb1f6bfa 567{
feec68a0 568 // if there is a period in there, it must be a valid hostname
9dca980d 569 if ( strchr( arg, '.' ) != 0 ) return -1;
eb1f6bfa 570
feec68a0
A
571 // if it is just a number between 0 and 65535, it must be a port
572 char* errstr = 0;
9dca980d
AJ
573 unsigned long result = strtoul( arg, &errstr, 0 );
574 if ( result < 65536 && errstr != arg ) return htons(result);
eb1f6bfa
AJ
575
576#if 0
feec68a0 577 // one last try, test for a symbolical service name
9dca980d 578 struct servent* service = getservbyname( arg, "tcp" );
feec68a0 579 return service ? service->s_port : -1;
eb1f6bfa 580#else
feec68a0 581 return -1;
eb1f6bfa
AJ
582#endif
583}
584
585void
586helpMe( void )
feec68a0 587// purpuse: write help message and exit
eb1f6bfa 588{
6d7a7410 589 printf( "\nUsage:\t%s\t[-a] [-c cf] [-d l] [-(f|F) fn | -(e|E) re] "
feec68a0 590 "[-p h[:p]]\n\t\t[-P #] [-s] [-v] [-C dir [-H]] [-n]\n\n",
6d7a7410 591 ::programname );
feec68a0
A
592 printf(
593 " -a\tdisplay a little rotating thingy to indicate that I am alive (tty only).\n"
594 " -c c\tsquid.conf location, default \"%s\".\n"
595 " -C dir\tbase directory for content extraction (copy-out mode).\n"
052aecef 596 " -d l\tdebug level, an OR mask of different debug options.\n"
feec68a0
A
597 " -e re\tsingle regular expression per -e instance (use quotes!).\n"
598 " -E re\tsingle case sensitive regular expression like -e.\n"
599 " -f fn\tname of textfile containing one regular expression per line.\n"
600 " -F fn\tname of textfile like -f containing case sensitive REs.\n"
601 " -H\tprepend HTTP reply header to destination files in copy-out mode.\n"
602 " -n\tdo not fork() when using more than one cache_dir.\n"
603 " -p h:p\tcache runs on host h and optional port p, default is %s:%u.\n"
604 " -P #\tif 0, just print matches; otherwise OR the following purge modes:\n"
605 "\t 0x01 really send PURGE to the cache.\n"
606 "\t 0x02 remove all caches files reported as 404 (not found).\n"
607 "\t 0x04 remove all weird (inaccessible or too small) cache files.\n"
608 "\t0 and 1 are recommended - slow rebuild your cache with other modes.\n"
609 " -s\tshow all options after option parsing, but before really starting.\n"
610 " -v\tshow more information about the file, e.g. MD5, timestamps and flags.\n"
611 "\n", DEFAULT_SQUID_CONF, DEFAULTHOST, DEFAULTPORT );
eb1f6bfa
AJ
612
613}
614
615void
616parseCommandline( int argc, char* argv[], REList*& head,
9dca980d
AJ
617 char*& conffile, char*& copyDirPath,
618 struct in_addr& serverHostIp, unsigned short& serverHostPort )
feec68a0
A
619// paramtr: argc: see ::main().
620// argv: see ::main().
621// returns: Does terminate the program on errors!
622// purpose: suck in any commandline options, and set the global vars.
623{
624 int option, port, showme = 0;
625 char* ptr, *colon;
626 FILE* rfile;
627
628 // program basename
14942edd
FC
629 if ( (ptr = strrchr(argv[0],'/')) == NULL )
630 ptr=argv[0];
631 else
632 ++ptr;
feec68a0
A
633 ::programname = ptr;
634
635 // extract commandline parameters
636 REList* tail = head = 0;
637 opterr = 0;
638 while ( (option = getopt( argc, argv, "ac:C:d:E:e:F:f:Hnp:P:sv" )) != -1 ) {
639 switch ( option ) {
640 case 'a':
641 ::iamalive = ! ::iamalive;
642 break;
643 case 'C':
644 if ( optarg && *optarg ) {
9dca980d
AJ
645 if ( copyDirPath ) xfree( (void*) copyDirPath );
646 copyDirPath = xstrdup(optarg);
647 assert(copyDirPath);
feec68a0
A
648 }
649 break;
650 case 'c':
b2d7d4be
AJ
651 if ( !optarg || !*optarg ) {
652 fprintf( stderr, "%c requires a regex pattern argument!\n", option );
653 exit(1);
feec68a0 654 }
b2d7d4be
AJ
655 if ( *conffile ) xfree((void*) conffile);
656 conffile = xstrdup(optarg);
657 assert(conffile);
feec68a0
A
658 break;
659
660 case 'd':
052aecef
AJ
661 if ( !optarg || !*optarg ) {
662 fprintf( stderr, "%c expects a mask parameter. Debug disabled.\n", option );
663 ::debugFlag = 0;
664 } else
665 ::debugFlag = (strtoul(optarg, NULL, 0) & 0xFFFFFFFF);
feec68a0
A
666 break;
667
668 case 'E':
669 case 'e':
b2d7d4be
AJ
670 if ( !optarg || !*optarg ) {
671 fprintf( stderr, "%c requires a regex pattern argument!\n", option );
672 exit(1);
673 }
674 if ( head == 0 )
675 tail = head = new REList( optarg, option=='E' );
feec68a0
A
676 else {
677 tail->next = new REList( optarg, option=='E' );
678 tail = tail->next;
679 }
680 break;
681
682 case 'f':
b2d7d4be
AJ
683 if ( !optarg || !*optarg ) {
684 fprintf( stderr, "%c requires a filename argument!\n", option );
685 exit(1);
686 }
feec68a0
A
687 if ( (rfile = fopen( optarg, "r" )) != NULL ) {
688 unsigned long lineno = 0;
eb1f6bfa 689#define LINESIZE 512
feec68a0
A
690 char line[LINESIZE];
691 while ( fgets( line, LINESIZE, rfile ) != NULL ) {
14942edd 692 ++lineno;
feec68a0
A
693 int len = strlen(line)-1;
694 if ( len+2 >= LINESIZE ) {
695 fprintf( stderr, "%s:%lu: line too long, sorry.\n",
696 optarg, lineno );
697 exit(1);
698 }
699
700 // remove trailing line breaks
f412b2d6
FC
701 while ( len > 0 && ( line[len] == '\n' || line[len] == '\r' ) ) {
702 line[len] = '\0';
703 --len;
704 }
feec68a0
A
705
706 // insert into list of expressions
707 if ( head == 0 ) tail = head = new REList(line,option=='F');
708 else {
709 tail->next = new REList(line,option=='F');
710 tail = tail->next;
711 }
712 }
713 fclose(rfile);
714 } else
715 fprintf( stderr, "unable to open %s: %s\n", optarg, strerror(errno));
716 break;
717
718 case 'H':
719 ::envelope = ! ::envelope;
720 break;
721 case 'n':
722 ::no_fork = ! ::no_fork;
723 break;
724 case 'p':
b2d7d4be
AJ
725 if ( !optarg || !*optarg ) {
726 fprintf( stderr, "%c requires a port argument!\n", option );
727 exit(1);
728 }
feec68a0
A
729 colon = strchr( optarg, ':' );
730 if ( colon == 0 ) {
731 // no colon, only look at host
732
733 // fix: see if somebody just put in there a port (no periods)
734 // give port number precedence over host names
735 port = checkForPortOnly( optarg );
736 if ( port == -1 ) {
737 // assume that main() did set the default port
9dca980d 738 if ( convertHostname(optarg,serverHostIp) == -1 ) {
feec68a0
A
739 fprintf( stderr, "unable to resolve host %s!\n", optarg );
740 exit(1);
741 }
742 } else {
743 // assume that main() did set the default host
9dca980d 744 serverHostPort = port;
feec68a0
A
745 }
746 } else {
747 // colon used, port is extra
14942edd
FC
748 *colon = 0;
749 ++colon;
9dca980d 750 if ( convertHostname(optarg,serverHostIp) == -1 ) {
feec68a0
A
751 fprintf( stderr, "unable to resolve host %s!\n", optarg );
752 exit(1);
753 }
9dca980d 754 if ( convertPortname(colon,serverHostPort) == -1 ) {
feec68a0
A
755 fprintf( stderr, "unable to resolve port %s!\n", colon );
756 exit(1);
757 }
758 }
759 break;
760 case 'P':
b2d7d4be
AJ
761 if ( !optarg || !*optarg ) {
762 fprintf( stderr, "%c requires a mode argument!\n", option );
763 exit(1);
764 }
feec68a0
A
765 ::purgeMode = ( strtol( optarg, 0, 0 ) & 0x07 );
766 break;
767 case 's':
768 showme=1;
769 break;
770 case 'v':
771 ::verbose = ! ::verbose;
772 break;
773 case '?':
774 default:
775 helpMe();
776 exit(1);
777 }
778 }
779
780 // adjust
59a09b98 781 if ( ! isatty(fileno(stdout)) || (::debugFlag & 0x01) ) ::iamalive = false;
feec68a0
A
782 if ( head == 0 ) {
783 fputs( "There was no regular expression defined. If you intend\n", stderr );
784 fputs( "to match all possible URLs, use \"-e .\" instead.\n", stderr );
785 exit(1);
eb1f6bfa 786 }
feec68a0
A
787
788 // postcondition: head != 0
789 assert( head != 0 );
790
791 // make sure that the copy out directory is there and accessible
9dca980d
AJ
792 if ( copyDirPath && *copyDirPath )
793 if ( assert_copydir( copyDirPath ) != 0 ) exit(1);
feec68a0
A
794
795 // show results
796 if ( showme ) {
6d7a7410
FC
797 printf( "#\n# Currently active values for %s:\n",
798 ::programname);
feec68a0 799 printf( "# Debug level : " );
d83197e3 800 if ( ::debugFlag ) printf( "%#6.4x", ::debugFlag );
feec68a0
A
801 else printf( "production level" ); // printf omits 0x prefix for 0!
802 printf( " + %s mode", ::no_fork ? "linear" : "parallel" );
803 puts( ::verbose ? " + extra verbosity" : "" );
804
805 printf( "# Copy-out directory: %s ",
9dca980d
AJ
806 copyDirPath ? copyDirPath : "copy-out mode disabled" );
807 if ( copyDirPath )
feec68a0
A
808 printf( "(%s HTTP header)\n", ::envelope ? "prepend" : "no" );
809 else
810 puts("");
811
812 printf( "# Squid config file : %s\n", conffile );
813 printf( "# Cacheserveraddress: %s:%u\n",
9dca980d 814 inet_ntoa( serverHostIp ), ntohs( serverHostPort ) );
feec68a0
A
815 printf( "# purge mode : 0x%02x\n", ::purgeMode );
816 printf( "# Regular expression: " );
817
818 unsigned count(0);
819 for ( tail = head; tail != NULL; tail = tail->next ) {
14942edd
FC
820 if ( count++ )
821 printf( "#%22u", count );
eb1f6bfa 822#if defined(LINUX) && putc==_IO_putc
feec68a0
A
823 // I HATE BROKEN LINUX HEADERS!
824 // purge.o(.text+0x1040): undefined reference to `_IO_putc'
825 // If your compilation breaks here, remove the undefinition
826#undef putc
eb1f6bfa 827#endif
feec68a0
A
828 else putchar('1');
829 printf( " \"%s\"\n", tail->data );
830 }
831 puts( "#" );
eb1f6bfa 832 }
feec68a0 833 fflush( stdout );
eb1f6bfa
AJ
834}
835
836extern "C" {
837
feec68a0
A
838 static
839 void
840 exiter( void ) {
841 if ( ::term_flag ) psignal( ::term_flag, "received signal" );
842 delete[] ::linebuffer;
843 if ( ::reminder ) {
844 fputs(
845 "WARNING! Caches files were removed. Please shut down your cache, remove\n"
846 "your swap.state files and restart your cache again, i.e. effictively do\n"
847 "a slow rebuild your cache! Otherwise your squid *will* choke!\n", stderr );
848 }
849 }
eb1f6bfa 850
feec68a0
A
851 static
852 void
853 handler( int signo ) {
854 ::term_flag = signo;
855 if ( getpid() == getpgrp() ) kill( -getpgrp(), signo );
856 exit(1);
857 }
eb1f6bfa
AJ
858
859} // extern "C"
860
861static
862int
863makelinebuffered( FILE* fp, const char* fn = 0 )
feec68a0
A
864// purpose: make the given FILE line buffered
865// paramtr: fp (IO): file pointer which to put into line buffer mode
866// fn (IN): name of file to print in case of error
867// returns: 0 is ok, -1 to indicate an error
868// warning: error messages will already be printed
eb1f6bfa 869{
feec68a0
A
870 if ( setvbuf( fp, 0, _IOLBF, 0 ) == 0 ) {
871 // ok
872 return 0;
873 } else {
874 // error
875 fprintf( stderr, "unable to make \"%s\" line buffered: %s\n",
876 fn ? fn : "", strerror(errno) );
877 return -1;
878 }
eb1f6bfa
AJ
879}
880
881int
882main( int argc, char* argv[] )
883{
feec68a0
A
884 // setup variables
885 REList* list = 0;
2ccf2eb2 886 char* conffile = xstrdup( DEFAULT_SQUID_CONF );
feec68a0
A
887 serverPort = htons(DEFAULTPORT);
888 if ( convertHostname(DEFAULTHOST,serverHost) == -1 ) {
889 fprintf( stderr, "unable to resolve host %s!\n", DEFAULTHOST );
890 return 1;
891 }
892
893 // setup line buffer
894 ::linebuffer = new char[ ::buffersize ];
895 assert( ::linebuffer != 0 );
896
897 // parse commandline
898 puts( "### Use at your own risk! No guarantees whatsoever. You were warned. ###");
899 parseCommandline( argc, argv, list, conffile, ::copydir,
900 serverHost, serverPort );
901
902 // prepare execution
903 if ( atexit( exiter ) != 0 ||
904 Signal( SIGTERM, handler, true ) == SIG_ERR ||
905 Signal( SIGINT, handler, true ) == SIG_ERR ||
906 Signal( SIGHUP, handler, true ) == SIG_ERR ) {
907 perror( "unable to install signal/exit function" );
908 return 1;
909 }
910
911 // try to read squid.conf file to determine all cache_dir locations
912 CacheDirVector cdv(0);
59a09b98 913 if ( readConfigFile( cdv, conffile, debugFlag ? stderr : 0 ) > 0 ) {
feec68a0
A
914 // there are some valid cache_dir entries.
915 // unless forking was forbidden by cmdline option,
916 // for a process for each cache_dir entry to remove files.
917
918 if ( ::no_fork || cdv.size() == 1 ) {
919 // linear mode, one cache_dir after the next
920 for ( CacheDirVector::iterator i = cdv.begin(); i != cdv.end(); ++i ) {
921 // execute OR complain
922 if ( ! dirlevel(i->base,list) )
923 fprintf( stderr, "program terminated due to error: %s",
924 strerror(errno) );
6e2aefad 925 xfree((void*) i->base);
feec68a0
A
926 }
927 } else {
928 // parallel mode, all cache_dir in parallel
929 pid_t* child = new pid_t[ cdv.size() ];
930
931 // make stdout/stderr line bufferd
932 makelinebuffered( stdout, "stdout" );
933 makelinebuffered( stderr, "stderr" );
934
935 // make parent process group leader for easier killings
936 if ( setpgid(getpid(), getpid()) != 0 ) {
937 perror( "unable to set process group leader" );
938 return 1;
939 }
940
941 // -a is mutually exclusive with fork mode
942 if ( ::iamalive ) {
943 puts( "# i-am-alive flag incompatible with fork mode, resetting" );
944 ::iamalive = false;
945 }
946
2ccf2eb2 947 for ( size_t i=0; i < cdv.size(); ++i ) {
feec68a0
A
948 if ( getpid() == getpgrp() ) {
949 // only parent == group leader may fork off new processes
950 if ( (child[i]=fork()) < 0 ) {
951 // fork error, this is bad!
952 perror( "unable to fork" );
953 kill( -getpgrp(), SIGTERM );
954 return 1;
955 } else if ( child[i] == 0 ) {
956 // child mode
957 // execute OR complain
958 if ( ! dirlevel(cdv[i].base,list) )
959 fprintf( stderr, "program terminated due to error: %s\n",
960 strerror(errno) );
6e2aefad 961 xfree((void*) cdv[i].base);
feec68a0
A
962 return 0;
963 } else {
964 // parent mode
59a09b98 965 if ( ::debugFlag ) printf( "forked child %d\n", (int) child[i] );
feec68a0
A
966 }
967 }
968 }
969
970 // collect the garbase
971 pid_t temp;
972 int status;
2ccf2eb2 973 for ( size_t i=0; i < cdv.size(); ++i ) {
feec68a0
A
974 while ( (temp=waitpid( (pid_t)-1, &status, 0 )) == -1 )
975 if ( errno == EINTR ) continue;
59a09b98 976 if ( ::debugFlag ) printf( "collected child %d\n", (int) temp );
feec68a0
A
977 }
978 delete[] child;
979 }
eb1f6bfa 980 } else {
feec68a0 981 fprintf( stderr, "no cache_dir or error accessing \"%s\"\n", conffile );
eb1f6bfa 982 }
feec68a0
A
983
984 // clean up
6e2aefad
HN
985 if ( copydir ) xfree( (void*) copydir );
986 xfree((void*) conffile);
feec68a0
A
987 delete list;
988 return 0;
eb1f6bfa 989}