]> git.ipfire.org Git - thirdparty/squid.git/blame - helpers/log_daemon/DB/log_db_daemon.pl.in
Boilerplate: update copyright blurbs on Squid helpers
[thirdparty/squid.git] / helpers / log_daemon / DB / log_db_daemon.pl.in
CommitLineData
1c363fe7
AJ
1#!@PERL@
2use strict;
3use warnings;
4use DBI;
5use English qw( -no_match_vars );
6use Getopt::Long;
7use Pod::Usage;
8
9$|=1;
10
11=pod
12
13=head1 NAME
14
15log_db_daemon - Database logging daemon for Squid
16
17=head1 SYNOPSIS
18
19log_db_daemon DSN [options]
20
21=head1 DESCRIPTOIN
22
23This program writes Squid access.log entries to a database.
24Presently only accepts the B<squid> native format
25
26=over 8
27
28=item B<DSN>
29
30Database DSN encoded as a path. This is sent as the access_log file path.
31
32Sample configuration:
33 access_log daemon:/host/database/table/username/password squid
34
35 to leave a parameter unspecified use a double slash:
36 access_log daemon://database/table/username/password squid
37
38Default "DBI:mysql:database=squid"
39
40=item B<--debug>
41
42Write debug messages to Squid stderr or cache.log
43
5275ac27
AJ
44=back
45
1c363fe7
AJ
46=cut
47
48# the first argument to this script is the log file path describing the DSN
49my $log_file = shift;
50
51# others may be options
52my $debug = 0;
53GetOptions(
54 'debug' => \$debug,
55 );
56
57
58# utility routine to print messages on stderr (so they appear in cache log)
59# without using warn, which would clutter the log with source line numbers
60sub log_info {
61 my $msg = shift;
62 print STDERR "$msg\n";
63}
64
65# we use logfile to pass database access information to this script
66# sample configuration:
67# access_log daemon:/host/database/table/username/password squid
68# to let a parmeter unspecified, e.g. the database host, use a double slash:
69# access_log daemon://database/table/username/password squid
70my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms;
71
72if ( !$host ) {
73 $host = 'localhost';
74 log_info("Database host not specified. Using $host.");
75}
76
77if ( !$database ) {
78 $database = 'squid_log';
79 log_info("Database name not specified. Using $database.");
80}
81
82if ( !$table ) {
83 $table = 'access_log';
84 log_info("Table parameter not specified. Using $table.");
85}
86
87if ( !$user ) {
88 $user = 'squid';
89 log_info("User parameter not specified. Using $user.");
90}
91
92if ( !$pass ) {
93 log_info('No password specified. Connecting with NO password.');
94}
95
96# fields that we should have in the table
97# Order here must match the order of fields in the Log format and parse() output array.
98my @db_fields = qw(
99 id
100 time_since_epoch
101 time_response
102 ip_client
103 squid_request_status
104 http_status_code
105 http_reply_size
106 http_method
107 http_url
108 http_username
109 squid_hier_status
110 ip_server
111 http_mime_type
112);
113
114# perform db connection
115my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : "");
116my $dbh;
117my $sth;
118eval {
119 warn "Connecting... dsn='$dsn', username='$user', password='...'";
120 $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 });
121};
122if ($EVAL_ERROR) {
123 die "Cannot connect to database: $DBI::errstr";
124}
125
126
127# a simple test to assure the specified table exists
128eval {
129 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
130 my $sth = $dbh->prepare($q);
131 $sth->execute;
132};
133if ($EVAL_ERROR) {
134 # run a query to create the table of required syntax
135 my $create_query = 'CREATE TABLE ' . $table . ' (' .
136 " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
137 " time_since_epoch DECIMAL(15,3)," .
138 " time_response INTEGER," .
139 " ip_client CHAR(15)," .
140 " ip_server CHAR(15)," .
141 " http_status_code VARCHAR(10)," .
142 " http_reply_size INTEGER," .
143 " http_method VARCHAR(20)," .
144 " http_url TEXT," .
145 " http_username VARCHAR(20)," .
146 " http_mime_type VARCHAR(50)," .
147 " squid_request_status VARCHAR(50)," .
148 " squid_hier_status VARCHAR(20)" .
149 ");" ;
150
151 $sth = $dbh->prepare($create_query);
152 $sth->execute;
153 # test again and fail hard if it is still broken.
154 eval {
155 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
156 my $sth = $dbh->prepare($q);
157 $sth->execute;
158 };
159 if ($EVAL_ERROR) {
160 die "Error initializing database table: $EVAL_ERROR";
161 };
162}
163# test
164
165# for better performance, prepare the statement at startup
166eval {
167 my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')';
168 #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
169 $sth = $dbh->prepare($q);
170};
171if ($EVAL_ERROR) {
172 die "Error while preparing sql statement: $EVAL_ERROR";
173}
174
175sub parse($) {
176 my ($line) = @_;
177 my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/;
178}
179
180# main loop
181while (my $line = <>) {
182 chomp $line;
183
184 my $cmd = substr($line, 0, 1); # extract command byte
185
186 if ( $cmd eq 'L' ) {
187 my @log_entry = parse($line);
188 eval { # we catch db errors to avoid crashing squid in case something goes wrong...
189 $sth->execute(@log_entry) or die $sth->errstr
190 };
191 if ( $EVAL_ERROR ) { # leave a trace of the error in the logs
192 warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')';
193 }
194 }
195}
196
197$dbh->disconnect();
198
199__END__
200
5275ac27
AJ
201=pod
202
1c363fe7
AJ
203=head1 DESCRIPTION
204
205This module exploits the new logfile daemon support available in squid 2.7 and 3.2 to store access log entries in a MySQL database.
206
207=head1 CONFIGURATION
208
209=head2 Squid configuration
210
211=head3 access_log directive
212
213The path to the access log file is used to provide the database connection parameters.
214
215 access_log daemon:/mysql_host:port/database/table/username/password squid
216
217The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
218
219The last parameter tells squid which log format to use when writing lines to the log daemon.
220Presently B<squid> format is supported.
221
222=over 4
223
224=item mysql_host:port
225
226Host where the mysql server is running. If left empty, 'localhost' is assumed.
227
228=item database
229
230Name of the database to connect to. If left empty, 'squid_log' is assumed.
231
232=item table
233
234Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
235
236=item username
237
238Username to use when connecting to the database. If left empty, 'squid' is assumed.
239
240=item password
241
242Password to use when connecting to the database. If left empty, no password is used.
243
244=back
245
246To leave all fields to their default values, you can use a single slash:
247
248 access_log daemon:/ squid
249
250To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
251
252 access_log daemon://///password squid
253
254=head3 logfile_daemon directive
255
256This is the current way of telling squid where the logfile daemon resides.
257
258 logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
259
260The script must be copied to the location specified in the directive.
261
262=head2 Database configuration
263
264Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
265
266=head3 Database
267
268Create the database:
269
270 CREATE DATABASE squid_log;
271
272=head3 User
273
274Create the user:
275
276 GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
277 FLUSH PRIVILEGES;
278
279Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
280
281=head3 Table
282
283The Daemon will attempt to initialize this table if none exists when it starts.
284
285The table created should look like:
286
287 CREATE TABLE access_log (
288 id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
289 time_since_epoch DECIMAL(15,3),
290 time_response INTEGER,
291 ip_client CHAR(15),
292 ip_server CHAR(15),
293 http_status_code VARCHAR(10),
294 http_reply_size INTEGER,
295 http_method VARCHAR(20),
296 http_url TEXT,
297 http_username VARCHAR(20),
298 http_mime_type VARCHAR(50),
299 squid_hier_status VARCHAR(20),
300 squid_request_status VARCHAR(20)
301 );
302
303=head1 VERSION INFORMATION
304
305This document refers to C<log_db_daemon> script version 0.5.
306
307The script has been developed and tested in the following environment:
308
309=over 4
310
311=item squid-2.7 Squid-3.2
312
313=item mysql 5.0.26 and 5.1
314
315=item perl 5.8.8
316
317=item OpenSUSE 10.2
318
319=back
320
321=head1 DATA EXTRACTION
322
323=head2 Sample queries.
324
325=over 4
326
327=item Clients accessing the cache
328
329 SELECT DISTINCT ip_client FROM access_log;
330
331=item Number of request per day
332
333 SELECT
334 DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
335 COUNT(*) AS num_of_requests
336 FROM access_log
337 GROUP BY 1
338 ORDER BY 1;
339
340=item Request status count
341
342To obtain the raw count of each request status:
343
344 SELECT squid_request_status, COUNT(*) AS n
345 FROM access_log
346 GROUP BY squid_request_status
347 ORDER BY 2 DESC;
348
349To calculate the percentage of each request status:
350
351 SELECT
352 squid_request_status,
353 (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
354 FROM access_log
355 GROUP BY squid_request_status
356 ORDER BY 2 DESC;
357
358To distinguish only between HITs and MISSes:
359
360 SELECT
361 'hits',
362 (SELECT COUNT(*)
363 FROM access_log
364 WHERE squid_request_status LIKE '%HIT%')
365 /
366 (SELECT COUNT(*) FROM access_log)*100
367 AS percentage
368 UNION
369 SELECT
370 'misses',
371 (SELECT COUNT(*)
372 FROM access_log
373 WHERE squid_request_status LIKE '%MISS%')
374 /
375 (SELECT COUNT(*) FROM access_log)*100
376 AS pecentage;
377
378=item Response time ranges
379
380 SELECT
381 '0..500',
382 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
383 FROM access_log
384 WHERE time_response >= 0 AND time_response < 500
385 UNION
386 SELECT
387 '500..1000',
388 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
389 FROM access_log
390 WHERE time_response >= 500 AND time_response < 1000
391 UNION
392 SELECT
393 '1000..2000',
394 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
395 FROM access_log
396 WHERE time_response >= 1000 AND time_response < 2000
397 UNION
398 SELECT
399 '>= 2000',
400 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
401 FROM access_log
402 WHERE time_response >= 2000;
403
404=item Traffic by mime type
405
406 SELECT
407 http_mime_type,
408 SUM(http_reply_size) as total_bytes
409 FROM access_log
410 GROUP BY http_mime_type
411 ORDER BY 2 DESC;
412
413=item Traffic by client
414
415 SELECT
416 ip_client,
417 SUM(http_reply_size) AS total_bytes
418 FROM access_log
419 GROUP BY 1
420 ORDER BY 2 DESC;
421
422=back
423
424=head2 Speed issues
425
426The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
427
428Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
429
430=head1 TODO
431
432=head2 Table cleanup
433
434This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
435
436One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to caluclate those values.
437
438Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
439
440=head2 Testing
441
442This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
443
444=head1 AUTHOR
445
446Marcello Romani, marcello.romani@libero.it
447Amos Jeffries, amosjeffries@squid-cache.org
448
449=head1 COPYRIGHT AND LICENSE
450
ca02e0ec
AJ
451 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
452 *
453 * Squid software is distributed under GPLv2+ license and includes
454 * contributions from numerous individuals and organizations.
455 * Please see the COPYING and CONTRIBUTORS files for details.
456
1c363fe7
AJ
457Copyright (C) 2008 by Marcello Romani
458
459This library is free software; you can redistribute it and/or modify
460it under the same terms as Perl itself, either Perl version 5.8.8 or,
461at your option, any later version of Perl 5 you may have available.
462
463=cut