]> git.ipfire.org Git - thirdparty/squid.git/blob - contrib/url-normalizer.pl
Source Format Enforcement (#532)
[thirdparty/squid.git] / contrib / url-normalizer.pl
1 #!/usr/local/bin/perl -Tw
2 #
3 # * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
4 # *
5 # * Squid software is distributed under GPLv2+ license and includes
6 # * contributions from numerous individuals and organizations.
7 # * Please see the COPYING and CONTRIBUTORS files for details.
8 #
9
10 # From: Markus Gyger <mgyger@itr.ch>
11 #
12 # I'd like to see a redirector which "normalizes" URLs to have
13 # a higher chance to get a hit. I didn't see such a redirector,
14 # so I thought I would send my little attempt. However, I have
15 # no real idea how much CPU time it needs using the LWP modules,
16 # but it seems to work.
17
18 require 5.003;
19 use strict;
20 use URI::URL;
21
22 $| = 1; # force a flush after every print on STDOUT
23
24 my ($url, $addr, $fqdn, $ident, $method);
25
26 while (<>) {
27 ($url, $addr, $fqdn, $ident, $method) = m:(\S*) (\S*)/(\S*) (\S*) (\S*):;
28
29 # "normalize" URL
30 $url = url $url; # also removes default port number
31 $url->host(lc $url->host); # map host name to lower case
32 my $epath = $url->epath;
33 $epath =~ s/%7e/~/ig; # unescape ~
34 $epath =~ s/(%[\da-f]{2})/\U$1/ig; # capitalize escape digits
35 if ($url->scheme =~ /^(http|ftp)$/) {
36 $epath =~ s:/\./:/:g; # safe?
37 $epath =~ s://:/:g; # safe?
38 }
39 $url->epath($epath);
40
41
42 # ...
43
44
45 } continue {
46 print "$url $addr/$fqdn $ident $method\n"
47 }
48
49
50 BEGIN {
51 unless (URI::URL::implementor('cache_object')) {
52 package cache_object;
53 @cache_object::ISA = (URI::URL::implementor());
54 URI::URL::implementor('cache_object', 'cache_object');
55
56 sub default_port { 3128 }
57 }
58 }