]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/log/DB/log_db_daemon.pl.in
5 use English
qw( -no_match_vars ) ;
15 log_db_daemon - Database logging daemon for Squid
21 log_db_daemon DSN [options]
25 This program writes Squid access.log entries to a database.
26 Presently only accepts the B<squid> native log format.
28 The script has been developed and tested in the following environment:
32 =item squid-2.7 Squid-3.2
34 =item mysql 5.0.26 and 5.1
48 Database DSN encoded as a path. This is sent as the access_log file path.
51 access_log daemon:/host/database/table/username/password squid
53 to leave a parameter unspecified use a double slash:
54 access_log daemon://database/table/username/password squid
56 Default "DBI:mysql:database=squid"
60 Write debug info to stderr.
66 =head2 Squid configuration
68 =head3 access_log directive
70 The path to the access log file is used to provide the database connection parameters.
72 access_log daemon:/mysql_host:port/database/table/username/password squid
74 The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
76 The last parameter tells squid which log format to use when writing lines to the log daemon.
77 Presently B<squid> format is supported.
83 Host where the mysql server is running. If left empty, 'localhost' is assumed.
87 Name of the database to connect to. If left empty, 'squid_log' is assumed.
91 Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
95 Username to use when connecting to the database. If left empty, 'squid' is assumed.
99 Password to use when connecting to the database. If left empty, no password is used.
103 To leave all fields to their default values, you can use a single slash:
105 access_log daemon:/ squid
107 To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
109 access_log daemon://///password squid
111 =head3 logfile_daemon directive
113 This is the current way of telling squid where the logfile daemon resides.
115 logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
117 The script must be copied to the location specified in the directive.
119 =head2 Database configuration
121 Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
127 CREATE DATABASE squid_log;
133 GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
136 Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
140 The Daemon will attempt to initialize this table if none exists when it starts.
142 The table created should look like:
144 CREATE TABLE access_log (
145 id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
146 time_since_epoch DECIMAL(15,3),
147 time_response INTEGER,
150 http_status_code VARCHAR(10),
151 http_reply_size INTEGER,
152 http_method VARCHAR(20),
154 http_username VARCHAR(20),
155 http_mime_type VARCHAR(50),
156 squid_hier_status VARCHAR(20),
157 squid_request_status VARCHAR(20)
160 =head1 DATA EXTRACTION
162 =head2 Sample queries.
166 =item Clients accessing the cache
168 SELECT DISTINCT ip_client FROM access_log;
170 =item Number of request per day
173 DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
174 COUNT(*) AS num_of_requests
179 =item Request status count
181 To obtain the raw count of each request status:
183 SELECT squid_request_status, COUNT(*) AS n
185 GROUP BY squid_request_status
188 To calculate the percentage of each request status:
191 squid_request_status,
192 (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
194 GROUP BY squid_request_status
197 To distinguish only between HITs and MISSes:
203 WHERE squid_request_status LIKE '%HIT%')
205 (SELECT COUNT(*) FROM access_log)*100
212 WHERE squid_request_status LIKE '%MISS%')
214 (SELECT COUNT(*) FROM access_log)*100
217 =item Response time ranges
221 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
223 WHERE time_response >= 0 AND time_response < 500
227 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
229 WHERE time_response >= 500 AND time_response < 1000
233 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
235 WHERE time_response >= 1000 AND time_response < 2000
239 COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
241 WHERE time_response >= 2000;
243 =item Traffic by mime type
247 SUM(http_reply_size) as total_bytes
249 GROUP BY http_mime_type
252 =item Traffic by client
256 SUM(http_reply_size) AS total_bytes
267 The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
269 Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
273 This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
275 One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to calculate those values.
277 Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
281 This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
285 This program was written by
286 I<Marcello Romani <marcello.romani@libero.it>> ,
287 I<Amos Jeffries <amosjeffries@squid-cache.org>>
291 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
293 * Squid software is distributed under GPLv2+ license and includes
294 * contributions from numerous individuals and organizations.
295 * Please see the COPYING and CONTRIBUTORS files for details.
297 Copyright (C) 2008 by Marcello Romani
299 This library is free software; you can redistribute it and/or modify
300 it under the same terms as Perl itself, either Perl version 5.8.8 or,
301 at your option, any later version of Perl 5 you may have available.
305 Questions on the usage of this program can be sent to the I<Squid Users mailing list <squid-users@lists.squid-cache.org>>
307 =head1 REPORTING BUGS
309 Bug reports need to be made in English.
310 See http://wiki.squid-cache.org/SquidFaq/BugReporting for details of what you need to include with your bug report.
312 Report bugs or bug fixes using http://bugs.squid-cache.org/
314 Report serious security bugs to I<Squid Bugs <squid-bugs@lists.squid-cache.org>>
316 Report ideas for new improvements to the I<Squid Developers mailing list <squid-dev@lists.squid-cache.org>>
322 The Squid FAQ wiki http://wiki.squid-cache.org/SquidFaq
324 The Squid Configuration Manual http://www.squid-cache.org/Doc/config/
328 # the first argument to this script is the log file path describing the DSN
329 my $log_file = shift ;
331 # others may be options
338 # utility routine to print messages on stderr (so they appear in cache log)
339 # without using warn, which would clutter the log with source line numbers
342 print STDERR
" $msg \n " ;
345 # we use logfile to pass database access information to this script
346 # sample configuration:
347 # access_log daemon:/host/database/table/username/password squid
348 # to let a parmeter unspecified, e.g. the database host, use a double slash:
349 # access_log daemon://database/table/username/password squid
350 my ( $host , $database , $table , $user , $pass ) = $log_file =~ / \/ (.* ?
) \
/(.*?)\/ (.* ?
) \
/(.*?)\/ (.* ?
) \z
/ xms
;
354 log_info
( "Database host not specified. Using $host ." );
358 $database = 'squid_log' ;
359 log_info
( "Database name not specified. Using $database ." );
363 $table = 'access_log' ;
364 log_info
( "Table parameter not specified. Using $table ." );
369 log_info
( "User parameter not specified. Using $user ." );
373 log_info
( 'No password specified. Connecting with NO password.' );
376 # fields that we should have in the table
377 # Order here must match the order of fields in the Log format and parse() output array.
394 # perform db connection
395 my $dsn = "DBI:mysql:database= $database " . ( $host ne "localhost" ?
": $host " : "" );
399 warn "Connecting... dsn=' $dsn ', username=' $user ', password='...'" ;
400 $dbh = DBI
-> connect ( $dsn , $user , $pass , { AutoCommit
=> 1 , RaiseError
=> 1 , PrintError
=> 1 });
403 die "Cannot connect to database: $DBI ::errstr" ;
407 # a simple test to assure the specified table exists
409 my $q = 'SELECT ' . join ( ',' , @db_fields ) . " FROM $table LIMIT 1" ;
410 my $sth = $dbh -> prepare ( $q );
414 # run a query to create the table of required syntax
415 my $create_query = 'CREATE TABLE ' . $table . ' (' .
416 " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
417 " time_since_epoch DECIMAL(15,3)," .
418 " time_response INTEGER," .
419 " ip_client CHAR(15)," .
420 " ip_server CHAR(15)," .
421 " http_status_code VARCHAR(10)," .
422 " http_reply_size INTEGER," .
423 " http_method VARCHAR(20)," .
425 " http_username VARCHAR(20)," .
426 " http_mime_type VARCHAR(50)," .
427 " squid_request_status VARCHAR(50)," .
428 " squid_hier_status VARCHAR(20)" .
431 $sth = $dbh -> prepare ( $create_query );
433 # test again and fail hard if it is still broken.
435 my $q = 'SELECT ' . join ( ',' , @db_fields ) . " FROM $table LIMIT 1" ;
436 my $sth = $dbh -> prepare ( $q );
440 die "Error initializing database table: $EVAL_ERROR " ;
445 # for better performance, prepare the statement at startup
447 my $q = "INSERT INTO $table (" . join ( ',' , @db_fields ) . ") VALUES(NULL" . ',?' x
( scalar ( @db_fields )- 1 ) . ')' ;
448 #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
449 $sth = $dbh -> prepare ( $q );
452 die "Error while preparing sql statement: $EVAL_ERROR " ;
457 my ( @t ) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/ ( \d
+ ?
) ( \d
+ ?
) (.* ?
) (.* ?
) (.* ?
) (.* ?
) \
/(.*?) (.*)$/ ;
461 while ( my $line = <>) {
464 my $cmd = substr ( $line , 0 , 1 ); # extract command byte
467 my @log_entry = parse
( $line );
468 eval { # we catch db errors to avoid crashing squid in case something goes wrong...
469 $sth -> execute ( @log_entry ) or die $sth -> errstr
471 if ( $EVAL_ERROR ) { # leave a trace of the error in the logs
472 warn $EVAL_ERROR . " values=(" . join ( ', ' , @log_entry ) . ')' ;