]> git.ipfire.org Git - thirdparty/squid.git/blame - contrib/url-normalizer.pl
SourceFormat Enforcement
[thirdparty/squid.git] / contrib / url-normalizer.pl
CommitLineData
f3acd90c 1#!/usr/local/bin/perl -Tw
df3cec97 2#
4ac4a490 3# * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
df3cec97
AJ
4# *
5# * Squid software is distributed under GPLv2+ license and includes
6# * contributions from numerous individuals and organizations.
7# * Please see the COPYING and CONTRIBUTORS files for details.
8#
bbff12c7 9
10# From: Markus Gyger <mgyger@itr.ch>
11#
12# I'd like to see a redirector which "normalizes" URLs to have
13# a higher chance to get a hit. I didn't see such a redirector,
14# so I thought I would send my little attempt. However, I have
15# no real idea how much CPU time it needs using the LWP modules,
16# but it seems to work.
17
18require 5.003;
19use strict;
20use URI::URL;
21
22$| = 1; # force a flush after every print on STDOUT
23
24my ($url, $addr, $fqdn, $ident, $method);
25
26while (<>) {
27 ($url, $addr, $fqdn, $ident, $method) = m:(\S*) (\S*)/(\S*) (\S*) (\S*):;
28
29 # "normalize" URL
30 $url = url $url; # also removes default port number
31 $url->host(lc $url->host); # map host name to lower case
32 my $epath = $url->epath;
33 $epath =~ s/%7e/~/ig; # unescape ~
34 $epath =~ s/(%[\da-f]{2})/\U$1/ig; # capitalize escape digits
35 if ($url->scheme =~ /^(http|ftp)$/) {
36 $epath =~ s:/\./:/:g; # safe?
37 $epath =~ s://:/:g; # safe?
38 }
39 $url->epath($epath);
40
41
42 # ...
43
44
45} continue {
46 print "$url $addr/$fqdn $ident $method\n"
47}
48
49
50BEGIN {
51 unless (URI::URL::implementor('cache_object')) {
52 package cache_object;
53 @cache_object::ISA = (URI::URL::implementor());
54 URI::URL::implementor('cache_object', 'cache_object');
55
56 sub default_port { 3128 }
57 }
58}