]> git.ipfire.org Git - thirdparty/squid.git/blob - helpers/log_daemon/DB/log_db_daemon.pl.in
SourceFormat Enforcement
[thirdparty/squid.git] / helpers / log_daemon / DB / log_db_daemon.pl.in
1 #!@PERL@
2 use strict;
3 use warnings;
4 use DBI;
5 use English qw( -no_match_vars );
6 use Getopt::Long;
7 use Pod::Usage;
8
9 $|=1;
10
11 =pod
12
13 =head1 NAME
14
15 log_db_daemon - Database logging daemon for Squid
16
17 =head1 SYNOPSIS
18
19 log_db_daemon DSN [options]
20
21 =head1 DESCRIPTOIN
22
23 This program writes Squid access.log entries to a database.
24 Presently only accepts the B<squid> native format
25
26 =over 8
27
28 =item B<DSN>
29
30 Database DSN encoded as a path. This is sent as the access_log file path.
31
32 Sample configuration:
33 access_log daemon:/host/database/table/username/password squid
34
35 to leave a parameter unspecified use a double slash:
36 access_log daemon://database/table/username/password squid
37
38 Default "DBI:mysql:database=squid"
39
40 =item B<--debug>
41
42 Write debug messages to Squid stderr or cache.log
43
44 =back
45
46 =cut
47
48 # the first argument to this script is the log file path describing the DSN
49 my $log_file = shift;
50
51 # others may be options
52 my $debug = 0;
53 GetOptions(
54 'debug' => \$debug,
55 );
56
57
58 # utility routine to print messages on stderr (so they appear in cache log)
59 # without using warn, which would clutter the log with source line numbers
60 sub log_info {
61 my $msg = shift;
62 print STDERR "$msg\n";
63 }
64
65 # we use logfile to pass database access information to this script
66 # sample configuration:
67 # access_log daemon:/host/database/table/username/password squid
68 # to let a parmeter unspecified, e.g. the database host, use a double slash:
69 # access_log daemon://database/table/username/password squid
70 my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms;
71
72 if ( !$host ) {
73 $host = 'localhost';
74 log_info("Database host not specified. Using $host.");
75 }
76
77 if ( !$database ) {
78 $database = 'squid_log';
79 log_info("Database name not specified. Using $database.");
80 }
81
82 if ( !$table ) {
83 $table = 'access_log';
84 log_info("Table parameter not specified. Using $table.");
85 }
86
87 if ( !$user ) {
88 $user = 'squid';
89 log_info("User parameter not specified. Using $user.");
90 }
91
92 if ( !$pass ) {
93 log_info('No password specified. Connecting with NO password.');
94 }
95
96 # fields that we should have in the table
97 # Order here must match the order of fields in the Log format and parse() output array.
98 my @db_fields = qw(
99 id
100 time_since_epoch
101 time_response
102 ip_client
103 squid_request_status
104 http_status_code
105 http_reply_size
106 http_method
107 http_url
108 http_username
109 squid_hier_status
110 ip_server
111 http_mime_type
112 );
113
114 # perform db connection
115 my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : "");
116 my $dbh;
117 my $sth;
118 eval {
119 warn "Connecting... dsn='$dsn', username='$user', password='...'";
120 $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 });
121 };
122 if ($EVAL_ERROR) {
123 die "Cannot connect to database: $DBI::errstr";
124 }
125
126
127 # a simple test to assure the specified table exists
128 eval {
129 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
130 my $sth = $dbh->prepare($q);
131 $sth->execute;
132 };
133 if ($EVAL_ERROR) {
134 # run a query to create the table of required syntax
135 my $create_query = 'CREATE TABLE ' . $table . ' (' .
136 " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
137 " time_since_epoch DECIMAL(15,3)," .
138 " time_response INTEGER," .
139 " ip_client CHAR(15)," .
140 " ip_server CHAR(15)," .
141 " http_status_code VARCHAR(10)," .
142 " http_reply_size INTEGER," .
143 " http_method VARCHAR(20)," .
144 " http_url TEXT," .
145 " http_username VARCHAR(20)," .
146 " http_mime_type VARCHAR(50)," .
147 " squid_request_status VARCHAR(50)," .
148 " squid_hier_status VARCHAR(20)" .
149 ");" ;
150
151 $sth = $dbh->prepare($create_query);
152 $sth->execute;
153 # test again and fail hard if it is still broken.
154 eval {
155 my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
156 my $sth = $dbh->prepare($q);
157 $sth->execute;
158 };
159 if ($EVAL_ERROR) {
160 die "Error initializing database table: $EVAL_ERROR";
161 };
162 }
163 # test
164
165 # for better performance, prepare the statement at startup
166 eval {
167 my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')';
168 #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
169 $sth = $dbh->prepare($q);
170 };
171 if ($EVAL_ERROR) {
172 die "Error while preparing sql statement: $EVAL_ERROR";
173 }
174
175 sub parse($) {
176 my ($line) = @_;
177 my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/;
178 }
179
180 # main loop
181 while (my $line = <>) {
182 chomp $line;
183
184 my $cmd = substr($line, 0, 1); # extract command byte
185
186 if ( $cmd eq 'L' ) {
187 my @log_entry = parse($line);
188 eval { # we catch db errors to avoid crashing squid in case something goes wrong...
189 $sth->execute(@log_entry) or die $sth->errstr
190 };
191 if ( $EVAL_ERROR ) { # leave a trace of the error in the logs
192 warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')';
193 }
194 }
195 }
196
197 $dbh->disconnect();
198
199 __END__
200
201 =pod
202
203 =head1 DESCRIPTION
204
205 This module exploits the new logfile daemon support available in squid 2.7 and 3.2 to store access log entries in a MySQL database.
206
207 =head1 CONFIGURATION
208
209 =head2 Squid configuration
210
211 =head3 access_log directive
212
213 The path to the access log file is used to provide the database connection parameters.
214
215 access_log daemon:/mysql_host:port/database/table/username/password squid
216
217 The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
218
219 The last parameter tells squid which log format to use when writing lines to the log daemon.
220 Presently B<squid> format is supported.
221
222 =over 4
223
224 =item mysql_host:port
225
226 Host where the mysql server is running. If left empty, 'localhost' is assumed.
227
228 =item database
229
230 Name of the database to connect to. If left empty, 'squid_log' is assumed.
231
232 =item table
233
234 Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
235
236 =item username
237
238 Username to use when connecting to the database. If left empty, 'squid' is assumed.
239
240 =item password
241
242 Password to use when connecting to the database. If left empty, no password is used.
243
244 =back
245
246 To leave all fields to their default values, you can use a single slash:
247
248 access_log daemon:/ squid
249
250 To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
251
252 access_log daemon://///password squid
253
254 =head3 logfile_daemon directive
255
256 This is the current way of telling squid where the logfile daemon resides.
257
258 logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
259
260 The script must be copied to the location specified in the directive.
261
262 =head2 Database configuration
263
264 Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
265
266 =head3 Database
267
268 Create the database:
269
270 CREATE DATABASE squid_log;
271
272 =head3 User
273
274 Create the user:
275
276 GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
277 FLUSH PRIVILEGES;
278
279 Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
280
281 =head3 Table
282
283 The Daemon will attempt to initialize this table if none exists when it starts.
284
285 The table created should look like:
286
287 CREATE TABLE access_log (
288 id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
289 time_since_epoch DECIMAL(15,3),
290 time_response INTEGER,
291 ip_client CHAR(15),
292 ip_server CHAR(15),
293 http_status_code VARCHAR(10),
294 http_reply_size INTEGER,
295 http_method VARCHAR(20),
296 http_url TEXT,
297 http_username VARCHAR(20),
298 http_mime_type VARCHAR(50),
299 squid_hier_status VARCHAR(20),
300 squid_request_status VARCHAR(20)
301 );
302
303 =head1 VERSION INFORMATION
304
305 This document refers to C<log_db_daemon> script version 0.5.
306
307 The script has been developed and tested in the following environment:
308
309 =over 4
310
311 =item squid-2.7 Squid-3.2
312
313 =item mysql 5.0.26 and 5.1
314
315 =item perl 5.8.8
316
317 =item OpenSUSE 10.2
318
319 =back
320
321 =head1 DATA EXTRACTION
322
323 =head2 Sample queries.
324
325 =over 4
326
327 =item Clients accessing the cache
328
329 SELECT DISTINCT ip_client FROM access_log;
330
331 =item Number of request per day
332
333 SELECT
334 DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
335 COUNT(*) AS num_of_requests
336 FROM access_log
337 GROUP BY 1
338 ORDER BY 1;
339
340 =item Request status count
341
342 To obtain the raw count of each request status:
343
344 SELECT squid_request_status, COUNT(*) AS n
345 FROM access_log
346 GROUP BY squid_request_status
347 ORDER BY 2 DESC;
348
349 To calculate the percentage of each request status:
350
351 SELECT
352 squid_request_status,
353 (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
354 FROM access_log
355 GROUP BY squid_request_status
356 ORDER BY 2 DESC;
357
358 To distinguish only between HITs and MISSes:
359
360 SELECT
361 'hits',
362 (SELECT COUNT(*)
363 FROM access_log
364 WHERE squid_request_status LIKE '%HIT%')
365 /
366 (SELECT COUNT(*) FROM access_log)*100
367 AS percentage
368 UNION
369 SELECT
370 'misses',
371 (SELECT COUNT(*)
372 FROM access_log
373 WHERE squid_request_status LIKE '%MISS%')
374 /
375 (SELECT COUNT(*) FROM access_log)*100
376 AS pecentage;
377
378 =item Response time ranges
379
380 SELECT
381 '0..500',
382 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
383 FROM access_log
384 WHERE time_response >= 0 AND time_response < 500
385 UNION
386 SELECT
387 '500..1000',
388 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
389 FROM access_log
390 WHERE time_response >= 500 AND time_response < 1000
391 UNION
392 SELECT
393 '1000..2000',
394 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
395 FROM access_log
396 WHERE time_response >= 1000 AND time_response < 2000
397 UNION
398 SELECT
399 '>= 2000',
400 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
401 FROM access_log
402 WHERE time_response >= 2000;
403
404 =item Traffic by mime type
405
406 SELECT
407 http_mime_type,
408 SUM(http_reply_size) as total_bytes
409 FROM access_log
410 GROUP BY http_mime_type
411 ORDER BY 2 DESC;
412
413 =item Traffic by client
414
415 SELECT
416 ip_client,
417 SUM(http_reply_size) AS total_bytes
418 FROM access_log
419 GROUP BY 1
420 ORDER BY 2 DESC;
421
422 =back
423
424 =head2 Speed issues
425
426 The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
427
428 Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
429
430 =head1 TODO
431
432 =head2 Table cleanup
433
434 This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
435
436 One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to caluclate those values.
437
438 Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
439
440 =head2 Testing
441
442 This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
443
444 =head1 AUTHOR
445
446 Marcello Romani, marcello.romani@libero.it
447 Amos Jeffries, amosjeffries@squid-cache.org
448
449 =head1 COPYRIGHT AND LICENSE
450
451 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
452 *
453 * Squid software is distributed under GPLv2+ license and includes
454 * contributions from numerous individuals and organizations.
455 * Please see the COPYING and CONTRIBUTORS files for details.
456
457 Copyright (C) 2008 by Marcello Romani
458
459 This library is free software; you can redistribute it and/or modify
460 it under the same terms as Perl itself, either Perl version 5.8.8 or,
461 at your option, any later version of Perl 5 you may have available.
462
463 =cut