2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30 #include "include/filelist.h"
32 #define REPORT_EVERY_X_LINES 5000
33 #define MAX_OPEN_USER_FILES 10
37 struct userfilestruct
*next
;
38 struct userinfostruct
*user
;
42 enum ExcludeReasonEnum
44 //! User name too long.
46 //! Squid logged an incomplete query received from the client.
48 //! Log file turned over.
50 //! Excluded by exclude_string from sarg.conf.
52 //! Unknown input log file format.
54 //! Line to be ignored from the input log file.
56 //! Entry not withing the requested date range.
62 //! User is not in the include_users list.
64 //! HTTP code excluded by exclude_code file.
66 //! Invalid character found in user name.
70 //! Not the IP address requested with -a.
72 //! URL excluded by -c or exclude_hosts.
74 //! Entry time outside of requested hour range.
76 //! Not the URL requested by -s.
80 //! Not the user requested by -u.
84 //! User ignored by exclude_users
87 ER_Last
//!< last entry of the list
90 numlist weekdays
= { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91 numlist hours
= { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92 //! Domain suffix to strip from the user name.
93 char StripUserSuffix
[MAX_USER_LEN
]="";
94 //! Length of the suffix to strip from the user name.
97 extern FileListObject AccessLog
;
99 extern const struct ReadLogProcessStruct ReadSquidLog
;
100 extern const struct ReadLogProcessStruct ReadCommonLog
;
101 extern const struct ReadLogProcessStruct ReadSargLog
;
102 extern const struct ReadLogProcessStruct ReadExtLog
;
104 //! The list of the supported log formats.
105 static const struct ReadLogProcessStruct
const *LogFormats
[]=
113 //! The path to the sarg log file.
114 static char SargLogFile
[4096]="";
115 //! Handle to the sarg log file. NULL if not created.
116 static FILE *fp_log
=NULL
;
117 //! The number of records read from the input logs.
118 static long int totregsl
=0;
119 //! The number of records kept.
120 static long int totregsg
=0;
121 //! The number of records excluded.
122 static long int totregsx
=0;
123 //! The beginning of a linked list of user's file.
124 static struct userfilestruct
*first_user_file
=NULL
;
125 //! Count the number of occurence of each input log format.
126 static unsigned long int format_count
[sizeof(LogFormats
)/sizeof(*LogFormats
)];
127 //! The minimum date found in the input logs.
128 static int mindate
=0;
129 static int maxdate
=0;
130 //! Count the number of excluded records.
131 static unsigned long int excluded_count
[ER_Last
];
132 //! Earliest date found in the log.
133 static int EarliestDate
=-1;
134 //! The earliest date in time format.
135 static struct tm EarliestDateTime
;
136 //! Latest date found in the log.
137 static int LatestDate
=-1;
138 //! The latest date in time format.
139 static struct tm LatestDateTime
;
142 * Read from standard input.
144 * \param Data The file object.
145 * \param Buffer The boffer to store the data read.
146 * \param Size How many bytes to read.
148 * \return The number of bytes read.
150 static int Stdin_Read(void *Data
,void *Buffer
,int Size
)
152 return(fread(Buffer
,1,Size
,(FILE *)Data
));
156 * Check if end of file is reached.
158 * \param Data The file object.
160 * \return \c True if end of file is reached.
162 static int Stdin_Eof(void *Data
)
164 return(feof((FILE *)Data
));
168 * Mimic a close of standard input but do nothing
170 * \param Data File to close.
172 * \return EOF on error.
174 static int Stdin_Close(void *Data
)
180 * Open a file object to read from standard input.
182 * \return The object to pass to other function in this module.
184 static FileObject
*Stdin_Open(void)
188 FileObject_SetLastOpenError(NULL
);
189 File
=calloc(1,sizeof(*File
));
192 FileObject_SetLastOpenError(_("Not enough memory"));
196 File
->Read
=Stdin_Read
;
199 File
->Close
=Stdin_Close
;
204 * Initialize the memory structure needed by LogLine_Parse() to parse
207 * \param log_line The structure to initialize.
209 void LogLine_Init(struct LogLineStruct
*log_line
)
211 log_line
->current_format
=NULL
;
212 log_line
->current_format_idx
=-1;
213 log_line
->file_name
="";
214 log_line
->successive_errors
=0;
215 log_line
->total_errors
=0;
219 * Set the name of the log file being parsed.
221 * \param log_line Data structure to parse the log line.
222 * \param file_name The name of the log file being read.
224 void LogLine_File(struct LogLineStruct
*log_line
,const char *file_name
)
226 log_line
->file_name
=file_name
;
230 * Parse the next line from a log file.
232 * \param log_line A buffer to store the data about the current parsing.
233 * \param log_entry The variable to store the parsed data.
234 * \param linebuf The text line read from the log file.
238 enum ReadLogReturnCodeEnum
LogLine_Parse(struct LogLineStruct
*log_line
,struct ReadLogStruct
*log_entry
,char *linebuf
)
240 enum ReadLogReturnCodeEnum log_entry_status
=RLRC_Unknown
;
243 if (log_line
->current_format
)
245 memset(log_entry
,0,sizeof(*log_entry
));
246 log_entry_status
=log_line
->current_format
->ReadEntry(linebuf
,log_entry
);
249 // find out what line format to use
250 if (log_entry_status
==RLRC_Unknown
)
252 for (x
=0 ; x
<(int)(sizeof(LogFormats
)/sizeof(*LogFormats
)) ; x
++)
254 if (LogFormats
[x
]==log_line
->current_format
) continue;
255 memset(log_entry
,0,sizeof(*log_entry
));
256 log_entry_status
=LogFormats
[x
]->ReadEntry(linebuf
,log_entry
);
257 if (log_entry_status
!=RLRC_Unknown
)
259 log_line
->current_format
=LogFormats
[x
];
260 log_line
->current_format_idx
=x
;
261 if (debugz
>=LogLevel_Process
)
263 /* TRANSLATORS: The argument is the log format name as translated by you. */
264 debuga(__FILE__
,__LINE__
,_("Log format identified as \"%s\" for %s\n"),_(log_line
->current_format
->Name
),log_line
->file_name
);
269 if (x
>=(int)(sizeof(LogFormats
)/sizeof(*LogFormats
)))
271 if (++log_line
->successive_errors
>NumLogSuccessiveErrors
) {
272 debuga(__FILE__
,__LINE__
,ngettext("%d consecutive error found in the input log file %s\n",
273 "%d consecutive errors found in the input log file %s\n",log_line
->successive_errors
),log_line
->successive_errors
,log_line
->file_name
);
276 if (NumLogTotalErrors
>=0 && ++log_line
->total_errors
>NumLogTotalErrors
) {
277 debuga(__FILE__
,__LINE__
,ngettext("%d error found in the input log file (last in %s)\n",
278 "%d errors found in the input log file (last in %s)\n",log_line
->total_errors
),log_line
->total_errors
,log_line
->file_name
);
281 debuga(__FILE__
,__LINE__
,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line
->file_name
,linebuf
);
284 log_line
->successive_errors
=0;
287 if (log_line
->current_format_idx
<0 || log_line
->current_format
==NULL
) {
288 debuga(__FILE__
,__LINE__
,_("Sarg failed to determine the format of the input log file %s\n"),log_line
->file_name
);
291 if (log_entry_status
==RLRC_InternalError
) {
292 debuga(__FILE__
,__LINE__
,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line
->file_name
);
295 return(log_entry_status
);
299 Read a single log file.
301 \param arq The log file name to read.
303 static void ReadOneLogFile(struct ReadLogDataStruct
*Filter
,const char *arq
)
310 char tmp3
[MAXLEN
]="";
311 char download_url
[MAXLEN
];
312 char smartfilter
[MAXLEN
];
314 int OutputNonZero
= REPORT_EVERY_X_LINES
;
319 int maxopenfiles
=MAX_OPEN_USER_FILES
;
320 unsigned long int recs1
=0UL;
321 unsigned long int recs2
=0UL;
322 FileObject
*fp_in
=NULL
;
323 bool download_flag
=false;
325 enum ReadLogReturnCodeEnum log_entry_status
;
326 enum UserProcessError PUser
;
328 struct getwordstruct gwarea
;
329 struct userfilestruct
*prev_ufile
;
330 struct userinfostruct
*uinfo
;
331 struct userfilestruct
*ufile
;
332 struct userfilestruct
*ufile1
;
333 struct ReadLogStruct log_entry
;
334 struct LogLineStruct log_line
;
336 LogLine_Init(&log_line
);
337 LogLine_File(&log_line
,arq
);
338 for (x
=0 ; x
<sizeof(LogFormats
)/sizeof(*LogFormats
) ; x
++)
339 if (LogFormats
[x
]->NewFile
)
340 LogFormats
[x
]->NewFile(arq
);
342 if (arq
[0]=='-' && arq
[1]=='\0') {
345 debuga(__FILE__
,__LINE__
,_("Reading access log file: from stdin\n"));
347 if (Filter
->DateRange
[0]!='\0') {
348 if (stat(arq
,&logstat
)!=0) {
349 debuga(__FILE__
,__LINE__
,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq
,strerror(errno
));
351 struct tm
*logtime
=localtime(&logstat
.st_mtime
);
352 if ((logtime
->tm_year
+1900)*10000+(logtime
->tm_mon
+1)*100+logtime
->tm_mday
<dfrom
) {
353 debuga(__FILE__
,__LINE__
,_("Ignoring old log file %s\n"),arq
);
360 debuga(__FILE__
,__LINE__
,_("Cannot open input log file \"%s\": %s\n"),arq
,FileObject_GetLastOpenError());
363 if (debug
) debuga(__FILE__
,__LINE__
,_("Reading access log file: %s\n"),arq
);
371 // pre-read the file only if we have to show stats
372 if (ShowReadStatistics
&& ShowReadPercent
&& fp_in
->Rewind
) {
377 while ((nread
=FileObject_Read(fp_in
,tmp4
,sizeof(tmp4
)))>0) {
378 for (i
=0 ; i
<nread
; i
++)
380 if (tmp4
[i
]!='\n' && tmp4
[i
]!='\r') {
384 if (tmp4
[i
]=='\n' || tmp4
[i
]=='\r') {
390 FileObject_Rewind(fp_in
);
391 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1
,(float) 0);
396 if ((line
=longline_create())==NULL
) {
397 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),arq
);
401 while ((linebuf
=longline_read(fp_in
,line
))!=NULL
) {
405 if (ShowReadStatistics
&& --OutputNonZero
<=0) {
407 double perc
= recs2
* 100. / recs1
;
408 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2
,perc
);
410 printf(_("SARG: Records in file: %lu"),recs2
);
414 OutputNonZero
= REPORT_EVERY_X_LINES
;
418 The following checks are retained here as I don't know to
419 what format they apply. They date back to pre 2.4 versions.
421 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
422 if(strstr(linebuf
,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
423 excluded_count
[ER_IncompleteQuery
]++;
426 if(strstr(linebuf
,"logfile turned over") != 0) {//reported by newsyslog
427 excluded_count
[ER_LogfileTurnedOver
]++;
432 if(ExcludeString
[0] != '\0') {
434 getword_start(&gwarea
,ExcludeString
);
435 while(strchr(gwarea
.current
,':') != 0) {
436 if (getword_multisep(val1
,sizeof(val1
),&gwarea
,':')<0) {
437 debuga(__FILE__
,__LINE__
,_("Invalid record in exclusion string\n"));
440 if((str
=(char *) strstr(linebuf
,val1
)) != (char *) NULL
) {
445 if(!exstring
&& (str
=(char *) strstr(linebuf
,gwarea
.current
)) != (char *) NULL
)
448 excluded_count
[ER_ExcludeString
]++;
454 if (debugz
>=LogLevel_Data
)
455 printf("BUF=%s\n",linebuf
);
458 log_entry_status
=LogLine_Parse(&log_line
,&log_entry
,linebuf
);
459 if (log_entry_status
==RLRC_Unknown
)
461 excluded_count
[ER_UnknownFormat
]++;
464 if (log_entry_status
==RLRC_Ignore
) {
465 excluded_count
[ER_FormatData
]++;
468 format_count
[log_line
.current_format_idx
]++;
470 if (!fp_log
&& ParsedOutputLog
[0] && log_line
.current_format
!=&ReadSargLog
) {
471 if(access(ParsedOutputLog
,R_OK
) != 0) {
472 my_mkdir(ParsedOutputLog
);
474 if (snprintf(SargLogFile
,sizeof(SargLogFile
),"%s/sarg_temp.log",ParsedOutputLog
)>=sizeof(SargLogFile
)) {
475 debuga(__FILE__
,__LINE__
,_("Path too long: "));
476 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog
);
479 if((fp_log
=MY_FOPEN(SargLogFile
,"w"))==NULL
) {
480 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SargLogFile
,strerror(errno
));
483 fputs("*** SARG Log ***\n",fp_log
);
486 if (log_entry
.Ip
==NULL
) {
487 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no IP addresses\n"));
490 if (log_entry
.User
==NULL
) {
491 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no user\n"));
494 if (log_entry
.Url
==NULL
) {
495 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no URL\n"));
499 idata
=builddia(log_entry
.EntryTime
.tm_mday
,log_entry
.EntryTime
.tm_mon
+1,log_entry
.EntryTime
.tm_year
+1900);
500 if (debugz
>=LogLevel_Data
)
501 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter
->DateRange
,idata
,dfrom
,duntil
);
503 if (EarliestDate
<0 || idata
<EarliestDate
) {
505 memcpy(&EarliestDateTime
,&log_entry
.EntryTime
,sizeof(struct tm
));
507 if (LatestDate
<0 || idata
>LatestDate
) {
509 memcpy(&LatestDateTime
,&log_entry
.EntryTime
,sizeof(struct tm
));
511 if(Filter
->DateRange
[0] != '\0'){
512 if(idata
< dfrom
|| idata
> duntil
) {
513 excluded_count
[ER_OutOfDateRange
]++;
518 // Record only hours usage which is required
519 if( bsearch( &( log_entry
.EntryTime
.tm_wday
), weekdays
.list
, weekdays
.len
, sizeof( int ), compar
) == NULL
) {
520 excluded_count
[ER_OutOfWDayRange
]++;
524 if( bsearch( &( log_entry
.EntryTime
.tm_hour
), hours
.list
, hours
.len
, sizeof( int ), compar
) == NULL
) {
525 excluded_count
[ER_OutOfHourRange
]++;
529 PUser
=process_user(&log_entry
.User
,log_entry
.Ip
,&id_is_ip
);
532 case USERERR_NoError
:
534 case USERERR_NameTooLong
:
535 if (debugz
>=LogLevel_Process
) debuga(__FILE__
,__LINE__
,_("User ID too long: %s\n"),log_entry
.User
);
536 excluded_count
[ER_UserNameTooLong
]++;
539 case USERERR_Excluded
:
540 excluded_count
[ER_User
]++;
542 case USERERR_InvalidChar
:
543 excluded_count
[ER_InvalidUserChar
]++;
545 case USERERR_EmptyUser
:
546 excluded_count
[ER_NoUser
]++;
548 case USERERR_SysUser
:
549 excluded_count
[ER_SysUser
]++;
551 case USERERR_Ignored
:
552 excluded_count
[ER_IgnoredUser
]++;
555 case USERERR_Untracked
:
556 excluded_count
[ER_UntrackedUser
]++;
560 if(vercode(log_entry
.HttpCode
)) {
561 if (debugz
>=LogLevel_Process
) debuga(__FILE__
,__LINE__
,_("Excluded code: %s\n"),log_entry
.HttpCode
);
562 excluded_count
[ER_HttpCode
]++;
567 // replace any tab by a single space
568 for (str
=log_entry
.Url
; *str
; str
++)
569 if (*str
=='\t') *str
=' ';
570 for (str
=log_entry
.HttpCode
; *str
; str
++)
571 if (*str
=='\t') *str
=' ';
573 if (log_line
.current_format
!=&ReadSargLog
) {
575 The full URL is not saved in sarg log. There is no point in testing the URL to detect
578 download_flag
=is_download_suffix(log_entry
.Url
);
580 safe_strcpy(download_url
,log_entry
.Url
,sizeof(download_url
));
585 url
=process_url(log_entry
.Url
,LongUrl
);
586 if (!url
|| url
[0] == '\0') {
587 excluded_count
[ER_NoUrl
]++;
592 if(strcmp(addr
,log_entry
.Ip
)!=0) {
593 excluded_count
[ER_UntrackedIpAddr
]++;
597 if(Filter
->HostFilter
) {
598 if(!vhexclude(url
)) {
599 if (debugz
>=LogLevel_Data
) debuga(__FILE__
,__LINE__
,_("Excluded site: %s\n"),url
);
600 excluded_count
[ER_Url
]++;
606 if(Filter
->StartTime
>= 0 && Filter
->EndTime
>= 0) {
607 hmr
=log_entry
.EntryTime
.tm_hour
*100+log_entry
.EntryTime
.tm_min
;
608 if(hmr
< Filter
->StartTime
|| hmr
> Filter
->EndTime
) {
609 excluded_count
[ER_OutOfTimeRange
]++;
615 if(strstr(url
,site
)==0) {
616 excluded_count
[ER_UntrackedUrl
]++;
621 if (log_entry
.DataSize
<0) log_entry
.DataSize
=0;
623 if (log_entry
.ElapsedTime
<0) log_entry
.ElapsedTime
=0;
624 if (Filter
->max_elapsed
>0 && log_entry
.ElapsedTime
>Filter
->max_elapsed
) {
625 log_entry
.ElapsedTime
=0;
628 if((str
=(char *) strstr(linebuf
, "[SmartFilter:")) != (char *) NULL
) {
630 snprintf(smartfilter
,sizeof(smartfilter
),"\"%s\"",str
+1);
631 } else strcpy(smartfilter
,"\"\"");
635 for (ufile
=first_user_file
; ufile
&& strcmp(log_entry
.User
,ufile
->user
->id
)!=0 ; ufile
=ufile
->next
) {
637 if (ufile
->file
) nopen
++;
640 ufile
=malloc(sizeof(*ufile
));
642 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the user %s\n"),log_entry
.User
);
645 memset(ufile
,0,sizeof(*ufile
));
646 ufile
->next
=first_user_file
;
647 first_user_file
=ufile
;
649 * This id_is_ip stuff is just to store the string only once if the user is
650 * identified by its IP address instead of a distinct ID and IP address.
652 uinfo
=userinfo_create(log_entry
.User
,(id_is_ip
) ? NULL
: log_entry
.Ip
);
657 prev_ufile
->next
=ufile
->next
;
658 ufile
->next
=first_user_file
;
659 first_user_file
=ufile
;
662 #ifdef ENABLE_DOUBLE_CHECK_DATA
663 if (strcmp(log_entry
.HttpCode
,"TCP_DENIED/407")!=0) {
664 ufile
->user
->nbytes
+=log_entry
.DataSize
;
665 ufile
->user
->elap
+=log_entry
.ElapsedTime
;
669 if (ufile
->file
==NULL
) {
670 if (nopen
>=maxopenfiles
) {
672 for (ufile1
=first_user_file
; ufile1
; ufile1
=ufile1
->next
) {
673 if (ufile1
->file
!=NULL
) {
674 if (x
>=maxopenfiles
) {
675 if (fclose(ufile1
->file
)==EOF
) {
676 debuga(__FILE__
,__LINE__
,_("Write error in log file of user %s: %s\n"),ufile1
->user
->id
,strerror(errno
));
685 if (snprintf (tmp3
, sizeof(tmp3
), "%s/%s.user_unsort", tmp
, ufile
->user
->filename
)>=sizeof(tmp3
)) {
686 debuga(__FILE__
,__LINE__
,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp
, ufile
->user
->filename
);
689 if ((ufile
->file
= MY_FOPEN (tmp3
, "a")) == NULL
) {
690 debuga(__FILE__
,__LINE__
,_("(log) Cannot open temporary file %s: %s\n"), tmp3
, strerror(errno
));
695 strftime(dia
, sizeof(dia
), "%d/%m/%Y",&log_entry
.EntryTime
);
696 strftime(hora
,sizeof(hora
),"%H:%M:%S",&log_entry
.EntryTime
);
698 if (fprintf(ufile
->file
, "%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia
,hora
,
699 log_entry
.Ip
,url
,(uint64_t)log_entry
.DataSize
,
700 log_entry
.HttpCode
,log_entry
.ElapsedTime
,smartfilter
)<=0) {
701 debuga(__FILE__
,__LINE__
,_("Write error in the log file of user %s\n"),log_entry
.User
);
706 if (fp_log
&& log_line
.current_format
!=&ReadSargLog
) {
707 fprintf(fp_log
, "%s\t%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia
,hora
,
708 log_entry
.User
,log_entry
.Ip
,url
,(uint64_t)log_entry
.DataSize
,
709 log_entry
.HttpCode
,log_entry
.ElapsedTime
,smartfilter
);
714 denied_write(&log_entry
);
715 authfail_write(&log_entry
);
716 if (download_flag
) download_write(&log_entry
,download_url
);
718 if (log_line
.current_format
!=&ReadSargLog
) {
719 if (period
.start
.tm_year
==0 || idata
<mindate
|| compare_date(&period
.start
,&log_entry
.EntryTime
)>0){
721 memcpy(&period
.start
,&log_entry
.EntryTime
,sizeof(log_entry
.EntryTime
));
723 if (period
.end
.tm_year
==0 || idata
>maxdate
|| compare_date(&period
.end
,&log_entry
.EntryTime
)<0) {
725 memcpy(&period
.end
,&log_entry
.EntryTime
,sizeof(log_entry
.EntryTime
));
729 if (debugz
>=LogLevel_Data
){
730 printf("IP=\t%s\n",log_entry
.Ip
);
731 printf("USER=\t%s\n",log_entry
.User
);
732 printf("ELAP=\t%ld\n",log_entry
.ElapsedTime
);
733 printf("DATE=\t%s\n",dia
);
734 printf("TIME=\t%s\n",hora
);
735 //printf("FUNC=\t%s\n",fun);
736 printf("URL=\t%s\n",url
);
737 printf("CODE=\t%s\n",log_entry
.HttpCode
);
738 printf("LEN=\t%"PRIu64
"\n",(uint64_t)log_entry
.DataSize
);
741 longline_destroy(&line
);
743 if (FileObject_Close(fp_in
)) {
744 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),arq
,FileObject_GetLastCloseError());
747 if (ShowReadStatistics
) {
749 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2
, (float) 100 );
751 printf(_("SARG: Records in file: %lu\n"),recs2
);
756 * Display a line with the excluded entries count.
758 * \param Explain A translated string explaining the exluded count.
759 * \param Reason The reason number.
761 static void DisplayExcludeCount(const char *Explain
,enum ExcludeReasonEnum Reason
)
763 if (excluded_count
[Reason
]>0) {
764 debuga(__FILE__
,__LINE__
," %s: %lu\n",Explain
,excluded_count
[Reason
]);
771 \param Filter The filtering parameters for the file to load.
773 \retval 1 Records found.
774 \retval 0 No record found.
776 int ReadLogFile(struct ReadLogDataStruct
*Filter
)
780 struct userfilestruct
*ufile
;
781 struct userfilestruct
*ufile1
;
782 FileListIterator FIter
;
785 for (x
=0 ; x
<sizeof(format_count
)/sizeof(*format_count
) ; x
++) format_count
[x
]=0;
786 for (x
=0 ; x
<sizeof(excluded_count
)/sizeof(*excluded_count
) ; x
++) excluded_count
[x
]=0;
787 first_user_file
=NULL
;
795 FIter
=FileListIter_Open(AccessLog
);
796 while ((file
=FileListIter_Next(FIter
))!=NULL
)
797 ReadOneLogFile(Filter
,file
);
798 FileListIter_Close(FIter
);
802 char val4
[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
804 if (fclose(fp_log
)==EOF
) {
805 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),SargLogFile
,strerror(errno
));
808 strftime(val2
,sizeof(val2
),"%d%m%Y_%H%M",&period
.start
);
809 strftime(val1
,sizeof(val1
),"%d%m%Y_%H%M",&period
.end
);
810 if (snprintf(val4
,sizeof(val4
),"%s/sarg-%s-%s.log",ParsedOutputLog
,val2
,val1
)>=sizeof(val4
)) {
811 debuga(__FILE__
,__LINE__
,_("Path too long: "));
812 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog
,val2
,val1
);
815 if (rename(SargLogFile
,val4
)) {
816 debuga(__FILE__
,__LINE__
,_("failed to rename %s to %s - %s\n"),SargLogFile
,val4
,strerror(errno
));
818 strcpy(SargLogFile
,val4
);
820 if(strcmp(ParsedOutputLogCompress
,"nocompress") != 0 && ParsedOutputLogCompress
[0] != '\0') {
822 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
823 necessary around the command name, put them in the configuration file.
825 if (snprintf(val1
,sizeof(val1
),"%s \"%s\"",ParsedOutputLogCompress
,SargLogFile
)>=sizeof(val1
)) {
826 debuga(__FILE__
,__LINE__
,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress
,SargLogFile
);
829 cstatus
=system(val1
);
830 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
831 debuga(__FILE__
,__LINE__
,_("command return status %d\n"),WEXITSTATUS(cstatus
));
832 debuga(__FILE__
,__LINE__
,_("command: %s\n"),val1
);
838 debuga(__FILE__
,__LINE__
,_("Sarg parsed log saved as %s\n"),SargLogFile
);
845 for (ufile
=first_user_file
; ufile
; ufile
=ufile1
) {
847 if (ufile
->file
!=NULL
&& fclose(ufile
->file
)==EOF
) {
848 debuga(__FILE__
,__LINE__
,_("Write error in log file of user %s: %s\n"),ufile
->user
->id
,strerror(errno
));
855 unsigned long int totalcount
=0;
857 debuga(__FILE__
,__LINE__
,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl
,totregsg
,totregsx
);
859 for (x
=sizeof(excluded_count
)/sizeof(*excluded_count
)-1 ; x
>=0 && excluded_count
[x
]>0 ; x
--);
861 debuga(__FILE__
,__LINE__
,_("Reasons for excluded entries:\n"));
862 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong
);
863 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery
);
864 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver
);
865 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString
);
866 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat
);
867 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData
);
868 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange
);
869 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange
);
870 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange
);
871 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User
);
872 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode
);
873 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar
);
874 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl
);
875 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr
);
876 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url
);
877 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange
);
878 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl
);
879 DisplayExcludeCount(_("No user in entry"),ER_NoUser
);
880 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser
);
881 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser
);
882 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser
);
885 for (x
=0 ; x
<sizeof(LogFormats
)/sizeof(*LogFormats
) ; x
++) {
886 if (format_count
[x
]>0) {
887 /* TRANSLATORS: It displays the number of lines found in the input log files
888 * for each supported log format. The log format name is the %s and is a string
889 * you translate somewhere else. */
890 debuga(__FILE__
,__LINE__
,_("%s: %lu entries\n"),_(LogFormats
[x
]->Name
),format_count
[x
]);
891 totalcount
+=format_count
[x
];
895 if (totalcount
==0 && totregsg
)
896 debuga(__FILE__
,__LINE__
,_("Log with invalid format\n"));
899 return((totregsg
!=0) ? 1 : 0);
903 * Get the start and end date of the period covered by the log files.
905 bool GetLogPeriod(struct tm
*Start
,struct tm
*End
)
909 if (EarliestDate
>=0) {
910 memcpy(Start
,&EarliestDateTime
,sizeof(struct tm
));
913 memset(Start
,0,sizeof(struct tm
));
916 memcpy(End
,&LatestDateTime
,sizeof(struct tm
));
919 memset(End
,0,sizeof(struct tm
));