From 2c7e8c23e72fd52e5db145171e87914beea35b52 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fr=C3=A9d=C3=A9ric=20Marchal?= Date: Tue, 25 Jan 2011 21:08:34 +0000 Subject: [PATCH] Split the input log file into several files Each file contains one day worth of data. The name of the output file is made of a user supplied prefix and the date corresponding to the data in the file. The file may be written in a directory selected with command line option -o. Thanks to Mauricio Silveira. --- CMakeLists.txt | 2 +- ChangeLog | 3 +- include/defs.h | 2 +- include/info.h | 2 +- log.c | 16 +++++++---- sarg.1 | 22 +++++++++++++-- sarg_manpage.xml | 30 ++++++++++++++++---- splitlog.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 127 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c38d04..237a229 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(sarg C) SET(sarg_VERSION 2) SET(sarg_REVISION "3.2-pre1") SET(sarg_BUILD "") -SET(sarg_BUILDDATE "Jan-21-2011") +SET(sarg_BUILDDATE "Jan-25-2011") INCLUDE(AddFileDependencies) INCLUDE(CheckIncludeFile) diff --git a/ChangeLog b/ChangeLog index 18cf308..1282086 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,13 @@ SARG ChangeLog -Dec-21-2010 Version 2.3.2-pre1 +Jan-25-2010 Version 2.3.2-pre1 - Add support for sorttable.js (http://www.kryogenix.org/code/browser/sorttable/) to dynamically sort some tables (thanks to Éric). - Add the two command line options --lastlog and --keeplogs to set the number of reports to keep or to keep all the reports respectively (thanks to Emmanuel Lacour for the suggestion). - Report the user ID in the e-mail report. - Add an option to sort the top sites by time. - Delete unused files from the directory containing the user report (thanks to alf-man). - Add the index_fields option to hide the directory size column in the index sorted by date. + - Split the input log file in several files each containing one day worth of data (thanks to Mauricio Silveira). Sep-18-2010 Version 2.3.1 - Remove the distinct printf for the alpha architecture as it doesn't work anymore and is not necessary anyway. diff --git a/include/defs.h b/include/defs.h index 97a9950..72ee35e 100755 --- a/include/defs.h +++ b/include/defs.h @@ -173,7 +173,7 @@ void tmpsort(void); void sort_labels(const char **label,const char **order); // splitlog.c -void splitlog(const char *arq, char *df, int dfrom, int duntil, int convert); +void splitlog(const char *arq, const char *df, int dfrom, int duntil, int convert, const char *splitprefix); // squidguard_log.c void squidguard_log(void); diff --git a/include/info.h b/include/info.h index 32ba2d2..2195f8e 100755 --- a/include/info.h +++ b/include/info.h @@ -1,3 +1,3 @@ -#define VERSION PACKAGE_VERSION" Jan-21-2011" +#define VERSION PACKAGE_VERSION" Jan-25-2011" #define PGM PACKAGE_NAME #define URL "http://sarg.sourceforge.net" diff --git a/log.c b/log.c index 35734dd..71a7aeb 100644 --- a/log.c +++ b/log.c @@ -114,6 +114,7 @@ int main(int argc,char *argv[]) char *url; char *urly; char user[MAX_USER_LEN]; + char splitprefix[MAXLEN]; enum InputLogFormat ilf; int ilf_count[ILF_Last]; int ch; @@ -167,6 +168,7 @@ int main(int argc,char *argv[]) {"lastlog",required_argument,NULL,2}, {"keeplogs",no_argument,NULL,3}, {"split",no_argument,&split,1}, + {"splitprefix",required_argument,NULL,'P'}, {0,0,0,0} }; @@ -312,6 +314,7 @@ int main(int argc,char *argv[]) hmf=-1; site[0]='\0'; outdir[0]='\0'; + splitprefix[0]='\0'; elap[0]='\0'; email[0]='\0'; zip[0]='\0'; @@ -363,7 +366,7 @@ int main(int argc,char *argv[]) strcpy(Title,_("Squid User Access Report")); - while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){ + while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:P:hijmnprvxyz",long_options,&option_index)) != -1){ switch(ch) { case 0: @@ -441,6 +444,9 @@ int main(int argc,char *argv[]) case 'p': userip=true; break; + case 'P': + strcpy(splitprefix,optarg); + break; case 'r': realt=true; break; @@ -537,6 +543,9 @@ int main(int argc,char *argv[]) if (lastlog>=0) LastLog=lastlog; + if(outdir[0] == '\0') strcpy(outdir,OutputDir); + if(outdir[0] != '\0') strcat(outdir,"/"); + if(realt) { realtime(); exit(EXIT_SUCCESS); @@ -562,7 +571,7 @@ int main(int argc,char *argv[]) } if(split) { for (iarq=0 ; iarq .\" Generator: DocBook XSL Stylesheets v1.75.2 -.\" Date: 27 Nov 2010 +.\" Date: 25 Jan 2011 .\" Manual: SARG .\" Source: sarg .\" Language: English .\" -.TH "SARG" "1" "27 Nov 2010" "sarg" "SARG" +.TH "SARG" "1" "25 Jan 2011" "sarg" "SARG" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- @@ -231,6 +231,19 @@ Writes report in Generates reports using ip address instead of userid\&. .RE .PP +\fB\-P \fR\fB\fIprefix\fR\fR \fB\-\-splitprefix \fR\fB\fIprefix\fR\fR +.RS 4 +This option must be used with +\fB\-\-split\fR\&. If it is provided, the input log is split among several files each containing one day\&. The name of the output files is made of the +\fIprefix\fR +and the date formated as +\-YYYY\-MM\-DD\&. +.sp +The output files are written in the output directory specified with +\fB\-o\fR +or in the current directory\&. +.RE +.PP \fB\-s \fR\fB\fIstring\fR\fR .RS 4 Limits report to the site specified by @@ -245,6 +258,9 @@ Split the squid log file and output it as text on the standard output omitting t parameter\&. If it is combined with \fB\-\-convert\fR the dates are also converted to a human\-readable format\&. +.sp +Combined with +\fB\-P\fR, the log is written in several files each containing one day of the original log\&. .RE .PP \fB\-t \fR\fB\fIstring\fR\fR @@ -377,7 +393,7 @@ Author of the first manual page .RE .SH "COPYRIGHT" .br -Copyright \(co 2010 Frédéric Marchal +Copyright \(co 2011 Frédéric Marchal .br .SH "NOTES" .IP " 1." 4 diff --git a/sarg_manpage.xml b/sarg_manpage.xml index 942847b..6d4efba 100644 --- a/sarg_manpage.xml +++ b/sarg_manpage.xml @@ -9,15 +9,15 @@ sarg - 27 Nov 2010 - + 25 Jan 2011 + Frédéric Marchal Docbook version of the manual page fmarchal@users.sourceforge.net - + Billy Newsom @@ -30,9 +30,9 @@ Author of the first manual page gangitano@lugroma3.org - + - 2010 + 2011 Frédéric Marchal @@ -143,7 +143,7 @@ exporting the css as explained above. Use date to restrict the report to some date range during log file processing. Format for date is dd/mm/yyyy-dd/mm/yyyy -or a single date dd/mm/yyyy. Date ranges can also be specified as +or a single date dd/mm/yyyy. Date ranges can also be specified as day-n, week-n, or month-n where n is the number of days, weeks or months to jump backward. Note that there is no spaces around the hyphen. @@ -263,6 +263,20 @@ Generates reports using ip address instead of userid. + + + +This option must be used with . If it is provided, the input log is split among +several files each containing one day. The name of the output files is made of the prefix +and the date formated as -YYYY-MM-DD. + + +The output files are written in the output directory +specified with or in the current directory. + + + + @@ -280,6 +294,10 @@ range specified by the parameter. If it is combined with the dates are also converted to a human-readable format. + +Combined with , the log is written in several files each containing one day of the +original log. + diff --git a/splitlog.c b/splitlog.c index 69c0153..462b3f2 100644 --- a/splitlog.c +++ b/splitlog.c @@ -27,18 +27,54 @@ #include "include/conf.h" #include "include/defs.h" -void splitlog(const char *arq, char *df, int dfrom, int duntil, int convert) +/* +Extract a date range from a squid log file and write it into a separate file. + +It can optionally convert the date in human readable format. + +The output can be split by day into separate files. + +\param arq The squid log file to split. +\param df The date format if the date is to be converted in human readable form. Only the first +character is taken into account. It can be 'e' for European date format or anything else for +US date format. +\param dfrom The first date to output in the form (Year*10000+Month*100+Day). +\param duntil The last date to output in the form (Year*10000+Month*100+Day). +\param convert \c True if the date must be converted into human readable form. +\param splitprefix If not empty, the output file is written in separate files (one for each day) and +the files are named after the day they contain prefixed with the string contained in this variable. +*/ +void splitlog(const char *arq, const char *df, int dfrom, int duntil, int convert, const char *splitprefix) { FILE *fp_in; + FILE *fp_ou=NULL; char *buf; char data[30]; char dia[11]; + char output_file[MAXLEN]; time_t tt; + time_t min_tt; + time_t max_tt=0; int idata=0; + int autosplit=0; + int output_prefix_len=0; + int prev_year=0, prev_month=0, prev_day=0; struct tm *t; struct getwordstruct gwarea; longline line; + if (splitprefix[0]!='\0') { + // '/' + '-YYYY-mm-dd' + '\0' == 13 + output_prefix_len=snprintf(output_file,sizeof(output_file)-12,"%s%s",outdir,splitprefix); + if (output_prefix_len>=sizeof(output_file)-12) { + debuga(_("(splitlog) Output path is too long: %s%s-YYYY-mm-dd\n"),outdir,splitprefix); + exit(EXIT_FAILURE); + } + autosplit=1; + } else { + fp_ou=stdout; + } + if(arq[0] == '\0') arq="/var/log/squid/access.log"; @@ -51,6 +87,7 @@ void splitlog(const char *arq, char *df, int dfrom, int duntil, int convert) debuga(_("Not enough memory to read the log file %s\n"),arq); exit(EXIT_FAILURE); } + time(&min_tt); while((buf=longline_read(fp_in,line))!=NULL) { getword_start(&gwarea,buf); @@ -67,15 +104,37 @@ void splitlog(const char *arq, char *df, int dfrom, int duntil, int convert) continue; } + if (autosplit && (prev_year!=t->tm_year || prev_month!=t->tm_mon || prev_day!=t->tm_mday)) { + prev_year=t->tm_year; + prev_month=t->tm_mon; + prev_day=t->tm_mday; + if (fp_ou && fclose(fp_ou)==EOF) { + debuga(_("Failed to close file %s - %s\n"),output_file,strerror(errno)); + exit(EXIT_FAILURE); + } + strftime(output_file+output_prefix_len, sizeof(output_file)-output_prefix_len, "-%Y-%m-%d", t); + /* + The line must be added to a file we have already created. The file must be created if the date + is seen for the first time. The idea is to create the files from scratch if the split is started + a second time. + */ + if ((fp_ou=MY_FOPEN(output_file,(tt>=min_tt && tt<=max_tt) ? "a" : "w"))==NULL) { + debuga(_("(splitlog) Cannot open output log file %s - %s\n"),output_file,strerror(errno)); + exit(EXIT_FAILURE); + } + if (ttmax_tt) max_tt=tt; + } + if(!convert) { - printf("%s %s\n",data,gwarea.current); + fprintf(fp_ou,"%s %s\n",data,gwarea.current); } else { if(df[0]=='e') strftime(dia, sizeof(dia), "%d/%m/%Y", t); else strftime(dia, sizeof(dia), "%m/%d/%Y", t); - printf("%s %02d:%02d:%02d %s\n",dia,t->tm_hour,t->tm_min,t->tm_sec,gwarea.current); + fprintf(fp_ou,"%s %02d:%02d:%02d %s\n",dia,t->tm_hour,t->tm_min,t->tm_sec,gwarea.current); } } @@ -83,4 +142,10 @@ void splitlog(const char *arq, char *df, int dfrom, int duntil, int convert) if (fclose(fp_in)==EOF) { debuga(_("Failed to close file %s - %s\n"),arq,strerror(errno)); } + if (autosplit && fp_ou) { + if (fclose(fp_ou)==EOF) { + debuga(_("Failed to close file %s - %s\n"),output_file,strerror(errno)); + exit(EXIT_FAILURE); + } + } } -- 2.39.5