]> git.ipfire.org Git - thirdparty/squid.git/blame - src/log/DB/log_db_daemon.pl.in
Source Format Enforcement (#1234)
[thirdparty/squid.git] / src / log / DB / log_db_daemon.pl.in
CommitLineData
1c363fe7
AJ
1#!@PERL@
2use strict;
3use warnings;
4use DBI;
5use English qw( -no_match_vars );
6use Getopt::Long;
7use Pod::Usage;
8
9$|=1;
10
11=pod
12
13=head1 NAME
14
15log_db_daemon - Database logging daemon for Squid
16
8311b837
AJ
17Version 0.5.
18
1c363fe7
AJ
19=head1 SYNOPSIS
20
21log_db_daemon DSN [options]
22
2b61af8e 23=head1 DESCRIPTION
1c363fe7
AJ
24
25This program writes Squid access.log entries to a database.
8311b837 26Presently only accepts the B<squid> native log format.
1c363fe7 27
8311b837 28The script has been developed and tested in the following environment:
1c363fe7 29
8311b837 30=over 4
1c363fe7 31
8311b837 32=item squid-2.7 Squid-3.2
1c363fe7 33
8311b837 34=item mysql 5.0.26 and 5.1
1c363fe7 35
8311b837 36=item perl 5.8.8
1c363fe7 37
8311b837 38=item OpenSUSE 10.2
1c363fe7 39
5275ac27
AJ
40=back
41
8311b837 42=head1 OPTIONS
1c363fe7 43
8311b837 44=over 12
1c363fe7 45
8311b837 46=item B<DSN>
1c363fe7 47
8311b837 48Database DSN encoded as a path. This is sent as the access_log file path.
1c363fe7 49
8311b837
AJ
50Sample configuration:
51 access_log daemon:/host/database/table/username/password squid
1c363fe7 52
8311b837
AJ
53 to leave a parameter unspecified use a double slash:
54 access_log daemon://database/table/username/password squid
1c363fe7 55
8311b837 56Default "DBI:mysql:database=squid"
1c363fe7 57
8311b837 58=item B<--debug>
5275ac27 59
8311b837 60Write debug info to stderr.
1c363fe7 61
8311b837 62=back
1c363fe7
AJ
63
64=head1 CONFIGURATION
65
66=head2 Squid configuration
67
68=head3 access_log directive
69
70The path to the access log file is used to provide the database connection parameters.
71
72 access_log daemon:/mysql_host:port/database/table/username/password squid
73
74The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
75
76The last parameter tells squid which log format to use when writing lines to the log daemon.
77Presently B<squid> format is supported.
78
79=over 4
80
81=item mysql_host:port
82
83Host where the mysql server is running. If left empty, 'localhost' is assumed.
84
85=item database
86
87Name of the database to connect to. If left empty, 'squid_log' is assumed.
88
89=item table
90
91Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
92
93=item username
94
95Username to use when connecting to the database. If left empty, 'squid' is assumed.
96
97=item password
98
99Password to use when connecting to the database. If left empty, no password is used.
100
101=back
102
103To leave all fields to their default values, you can use a single slash:
104
105 access_log daemon:/ squid
106
107To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
108
109 access_log daemon://///password squid
110
111=head3 logfile_daemon directive
112
113This is the current way of telling squid where the logfile daemon resides.
114
115 logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
116
117The script must be copied to the location specified in the directive.
118
119=head2 Database configuration
120
121Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
122
123=head3 Database
124
125Create the database:
126
127 CREATE DATABASE squid_log;
128
129=head3 User
130
131Create the user:
132
133 GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
134 FLUSH PRIVILEGES;
135
136Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
137
138=head3 Table
139
140The Daemon will attempt to initialize this table if none exists when it starts.
141
142The table created should look like:
143
144 CREATE TABLE access_log (
145 id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
146 time_since_epoch DECIMAL(15,3),
147 time_response INTEGER,
148 ip_client CHAR(15),
149 ip_server CHAR(15),
150 http_status_code VARCHAR(10),
151 http_reply_size INTEGER,
152 http_method VARCHAR(20),
153 http_url TEXT,
154 http_username VARCHAR(20),
155 http_mime_type VARCHAR(50),
156 squid_hier_status VARCHAR(20),
157 squid_request_status VARCHAR(20)
158 );
159
1c363fe7
AJ
160=head1 DATA EXTRACTION
161
162=head2 Sample queries.
163
164=over 4
165
166=item Clients accessing the cache
167
168 SELECT DISTINCT ip_client FROM access_log;
169
170=item Number of request per day
171
172 SELECT
173 DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
174 COUNT(*) AS num_of_requests
175 FROM access_log
176 GROUP BY 1
177 ORDER BY 1;
178
179=item Request status count
180
181To obtain the raw count of each request status:
182
183 SELECT squid_request_status, COUNT(*) AS n
184 FROM access_log
185 GROUP BY squid_request_status
186 ORDER BY 2 DESC;
187
188To calculate the percentage of each request status:
189
190 SELECT
191 squid_request_status,
192 (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
193 FROM access_log
194 GROUP BY squid_request_status
195 ORDER BY 2 DESC;
196
197To distinguish only between HITs and MISSes:
198
199 SELECT
200 'hits',
201 (SELECT COUNT(*)
202 FROM access_log
203 WHERE squid_request_status LIKE '%HIT%')
204 /
205 (SELECT COUNT(*) FROM access_log)*100
206 AS percentage
207 UNION
208 SELECT
209 'misses',
210 (SELECT COUNT(*)
211 FROM access_log
212 WHERE squid_request_status LIKE '%MISS%')
213 /
214 (SELECT COUNT(*) FROM access_log)*100
2b61af8e 215 AS percentage;
1c363fe7
AJ
216
217=item Response time ranges
218
219 SELECT
220 '0..500',
221 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
222 FROM access_log
223 WHERE time_response >= 0 AND time_response < 500
224 UNION
225 SELECT
226 '500..1000',
227 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
228 FROM access_log
229 WHERE time_response >= 500 AND time_response < 1000
230 UNION
231 SELECT
232 '1000..2000',
233 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
234 FROM access_log
235 WHERE time_response >= 1000 AND time_response < 2000
236 UNION
237 SELECT
238 '>= 2000',
239 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
240 FROM access_log
241 WHERE time_response >= 2000;
242
243=item Traffic by mime type
244
245 SELECT
246 http_mime_type,
247 SUM(http_reply_size) as total_bytes
248 FROM access_log
249 GROUP BY http_mime_type
250 ORDER BY 2 DESC;
251
252=item Traffic by client
253
254 SELECT
255 ip_client,
256 SUM(http_reply_size) AS total_bytes
257 FROM access_log
258 GROUP BY 1
259 ORDER BY 2 DESC;
260
261=back
262
8311b837
AJ
263=head1 KNOWN ISSUES
264
1c363fe7
AJ
265=head2 Speed issues
266
267The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
268
269Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
270
1c363fe7
AJ
271=head2 Table cleanup
272
273This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
274
2b61af8e 275One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to calculate those values.
1c363fe7
AJ
276
277Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
278
279=head2 Testing
280
281This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
282
283=head1 AUTHOR
284
8311b837
AJ
285This program was written by
286I<Marcello Romani <marcello.romani@libero.it>> ,
287I<Amos Jeffries <amosjeffries@squid-cache.org>>
1c363fe7 288
8311b837 289=head1 COPYRIGHT
1c363fe7 290
b8ae064d 291 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
ca02e0ec
AJ
292 *
293 * Squid software is distributed under GPLv2+ license and includes
294 * contributions from numerous individuals and organizations.
295 * Please see the COPYING and CONTRIBUTORS files for details.
296
1c363fe7
AJ
297Copyright (C) 2008 by Marcello Romani
298
299This library is free software; you can redistribute it and/or modify
300it under the same terms as Perl itself, either Perl version 5.8.8 or,
301at your option, any later version of Perl 5 you may have available.
302
8311b837
AJ
303=head1 QUESTIONS
304
305Questions on the usage of this program can be sent to the I<Squid Users mailing list <squid-users@lists.squid-cache.org>>
306
307=head1 REPORTING BUGS
308
309Bug reports need to be made in English.
310See http://wiki.squid-cache.org/SquidFaq/BugReporting for details of what you need to include with your bug report.
311
312Report bugs or bug fixes using http://bugs.squid-cache.org/
313
314Report serious security bugs to I<Squid Bugs <squid-bugs@lists.squid-cache.org>>
315
316Report ideas for new improvements to the I<Squid Developers mailing list <squid-dev@lists.squid-cache.org>>
317
318=head1 SEE ALSO
319
320squid (8), GPL (7),
321
322The Squid FAQ wiki http://wiki.squid-cache.org/SquidFaq
323
324The Squid Configuration Manual http://www.squid-cache.org/Doc/config/
325
1c363fe7 326=cut
8311b837
AJ
327
328# the first argument to this script is the log file path describing the DSN
329my $log_file = shift;
330
331# others may be options
332my $debug = 0;
333GetOptions(
47f28373
FC
334 'debug' => \$debug,
335 );
8311b837
AJ
336
337
338# utility routine to print messages on stderr (so they appear in cache log)
339# without using warn, which would clutter the log with source line numbers
340sub log_info {
341 my $msg = shift;
342 print STDERR "$msg\n";
343}
344
345# we use logfile to pass database access information to this script
346# sample configuration:
347# access_log daemon:/host/database/table/username/password squid
2f8abb64 348# to let a parameter unspecified, e.g. the database host, use a double slash:
8311b837
AJ
349# access_log daemon://database/table/username/password squid
350my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms;
351
352if ( !$host ) {
353 $host = 'localhost';
354 log_info("Database host not specified. Using $host.");
355}
356
357if ( !$database ) {
358 $database = 'squid_log';
359 log_info("Database name not specified. Using $database.");
360}
361
362if ( !$table ) {
363 $table = 'access_log';
364 log_info("Table parameter not specified. Using $table.");
365}
366
367if ( !$user ) {
368 $user = 'squid';
369 log_info("User parameter not specified. Using $user.");
370}
371
372if ( !$pass ) {
373 log_info('No password specified. Connecting with NO password.');
374}
375
376# fields that we should have in the table
377# Order here must match the order of fields in the Log format and parse() output array.
378my @db_fields = qw(
379 id
380 time_since_epoch
381 time_response
382 ip_client
383 squid_request_status
384 http_status_code
385 http_reply_size
386 http_method
387 http_url
388 http_username
389 squid_hier_status
390 ip_server
391 http_mime_type
47f28373 392 );
8311b837
AJ
393
394# perform db connection
395my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : "");
396my $dbh;
397my $sth;
398eval {
399 warn "Connecting... dsn='$dsn', username='$user', password='...'";
400 $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 });
47f28373 401 };
8311b837
AJ
402if ($EVAL_ERROR) {
403 die "Cannot connect to database: $DBI::errstr";
404}
405
406
407# a simple test to assure the specified table exists
408eval {
409 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
410 my $sth = $dbh->prepare($q);
411 $sth->execute;
47f28373 412 };
8311b837
AJ
413if ($EVAL_ERROR) {
414 # run a query to create the table of required syntax
415 my $create_query = 'CREATE TABLE ' . $table . ' (' .
47f28373
FC
416 " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
417 " time_since_epoch DECIMAL(15,3)," .
418 " time_response INTEGER," .
419 " ip_client CHAR(15)," .
420 " ip_server CHAR(15)," .
421 " http_status_code VARCHAR(10)," .
422 " http_reply_size INTEGER," .
423 " http_method VARCHAR(20)," .
424 " http_url TEXT," .
425 " http_username VARCHAR(20)," .
426 " http_mime_type VARCHAR(50)," .
427 " squid_request_status VARCHAR(50)," .
428 " squid_hier_status VARCHAR(20)" .
429 ");" ;
8311b837
AJ
430
431 $sth = $dbh->prepare($create_query);
432 $sth->execute;
433 # test again and fail hard if it is still broken.
434 eval {
435 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
436 my $sth = $dbh->prepare($q);
437 $sth->execute;
47f28373 438 };
8311b837
AJ
439 if ($EVAL_ERROR) {
440 die "Error initializing database table: $EVAL_ERROR";
441 };
442}
443# test
444
445# for better performance, prepare the statement at startup
446eval {
447 my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')';
47f28373 448#$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
8311b837 449 $sth = $dbh->prepare($q);
47f28373 450 };
8311b837
AJ
451if ($EVAL_ERROR) {
452 die "Error while preparing sql statement: $EVAL_ERROR";
453}
454
455sub parse($) {
456 my ($line) = @_;
457 my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/;
458}
459
460# main loop
461while (my $line = <>) {
462 chomp $line;
463
464 my $cmd = substr($line, 0, 1); # extract command byte
465
466 if ( $cmd eq 'L' ) {
467 my @log_entry = parse($line);
468 eval { # we catch db errors to avoid crashing squid in case something goes wrong...
469 $sth->execute(@log_entry) or die $sth->errstr
47f28373 470 };
8311b837
AJ
471 if ( $EVAL_ERROR ) { # leave a trace of the error in the logs
472 warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')';
473 }
474 }
475}
476
477$dbh->disconnect();
478
479__END__