#!/usr/local/bin/perl
-# check_cache.pl - martin hamilton <m.t.hamilton@lut.ac.uk>
+# check_cache.pl
#
-# Check the Harvest cache directory for stale objects - i.e. those
+# Squid-1.0 version by martin hamilton <m.t.hamilton@lut.ac.uk>
+# Squid-1.1 version by Bertold Kolics <bertold@tohotom.vein.hu>
+#
+# Check the Squid-1.1.x cache directory for stale objects - i.e. those
# which exist on disk but aren't listed in cached's log file.
-# Version 1 did all this in memory, but the log file can be a
-# little on the large side... 8-(
-# $Id: check_cache.pl,v 1.3 1996/07/09 03:41:16 wessels Exp $
+# $Id: check_cache.pl,v 1.4 1997/02/03 23:42:21 wessels Exp $
require "getopts.pl";
-&Getopts("c:dl:rt:v");
+&Getopts("c:drt:vh");
+# -c : the full path to squid.conf
+# -d : turn on debugging
+# -r : actually remove stale files
+# -t tmpdir : temporary directory
+# -v : list stale files
+# -h : print the help
-$cachedir = $opt_c || "/usr/local/harvest/cache";
-# -d -> turn on debugging output
-$logfile = $opt_l || "$cachedir/log";
-# -r -> actually remove stale files
-$tmpdir = $opt_t || $ENV{TMPDIR} || "/var/tmp";
-# -v -> list stale files
+if ($opt_h) {
+ print "Usage: check_cache.pl -drvh -c squid.conf\n";
+ print "\t-c the full path to squid.conf\n";
+ print "\t-d turn on debugging\n";
+ print "\t-r actually remove stale files\n";
+ print "\t-t temporary directory\n";
+ print "\t-v list stale files\n";
+ print "\t-h print the help\n";
+ exit;
+}
+
+$squidconf = $opt_c || "/usr/local/squid/etc/squid.conf";
+open (squidconf) || die "$squidconf: $!\n";
+$no_cachedir = 0;
+$swaplog = '';
+$level1dirno = 16;
+$level2dirno = 256;
+while (<squidconf>) {
+ chop;
+ if (/^cache_dir\s+(.*)/) {
+ push (@cachedir, $1);
+ } elsif (/cache_swap_log\s+(.*)/) {
+ $swaplog = $1;
+ } elsif (/swap_level1_dirs/) {
+ $level1dirno = $1;
+ } elsif (/swap_level21_dirs/) {
+ $level2dirno = $1;
+ }
+}
+close (squidconf);
+push (@cachedir, '/usr/local/squid/cache') unless ($#cachedir > $[-1);
+$swaplog = $cachedir[0] . '/log' unless ($swaplog);
+$no_cachedir = $#cachedir + 1;
+print "$no_cachedir CACHE DIRS: ", join(' ', @cachedir), "\n" if ($opt_d);
+print "SWAP LOG: $swaplog\n" if ($opt_d);
+$tmpdir = $opt_t || $ENV{TMPDIR} || "/var/tmp";
chdir($tmpdir);
-# snarf filenames from Harvest log & sort em
-system("cut -f1 -d' ' $logfile >pl$$");
+# snarf file numbers from Squid log & sort em
+system("cut -f1 -d' ' $swaplog |tr [a-z] [A-Z] >pl$$");
system("sort -T $tmpdir pl$$ >spl$$; rm pl$$");
# get list of files in cache & sort em
-system("find $cachedir -print -type f >cd$$");
+for ($i = 0 ; $i < $no_cachedir; $i++) {
+ chdir($cachedir[i]);
+ system("find ./ -print -type f > $tmpdir/fp$$");
+ chdir($tmpdir);
+# this cut prints only the lines with 4 fields so unnecessary lines
+# are supressed
+ system("cut -d'/' -f4 -s fp$$ >> cd$$ ; rm fp$$")
+}
system("sort -T $tmpdir cd$$ >scd$$; rm cd$$");
-# get list of objects in one file but not the other
+# get list of objects on disk (scd$$) but not in the log (spl$$)
system("comm -13 spl$$ scd$$ >comm$$; rm spl$$ scd$$");
+chdir($tmpdir);
# iterate through it
open(IN, "comm$$") || die "Can't open temporary file $tmpdir/comm$$: $!";
+unlink("comm$$");
while(<IN>) {
chop;
- print STDERR ">> inspecting $_\n" if $opt_d;
- next if -d "$_"; # don't want directories
- next if /(log|cached.out)/; # don't want to zap these!
+ $filename = $_;
+
+# calculate the full path of the current filename
+ $fileno = hex($filename);
+ $dirno = $fileno % $no_cachedir;
+ $a = $fileno / $no_cachedir;
+ $level1 = sprintf("%02X", $a % $level1dirno);
+ $level2 = sprintf("%02X", $a / $level1dirno % $level2dirno);
+ $filename = "$cachedir[dirno]/$level1/$level2/$filename";
- print "$_\n" if $opt_v; # print filename if asked
+ next if -d "$filename"; # don't want directories
+
+ print "$filename\n" if $opt_v; # print filename if asked
# skip if cached file appeared since script started running
- if (-M $_ < 0) {
- print STDERR "skipping $_\n" if $opt_d;
+ if (-M $filename < 0) {
+ print STDERR "skipping $filename\n" if $opt_d;
next;
}
- unlink($_) if $opt_r; # only remove if asked!
+ print "Orphan: $filename\n";
+ unlink($filename) if $opt_r; # only remove if asked!
}
close(IN);
-
-unlink("comm$$");
-