2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30 #include "include/filelist.h"
32 #define REPORT_EVERY_X_LINES 5000
33 #define MAX_OPEN_USER_FILES 10
37 struct userfilestruct
*next
;
38 struct userinfostruct
*user
;
42 enum ExcludeReasonEnum
44 //! User name too long.
46 //! Squid logged an incomplete query received from the client.
48 //! Log file turned over.
50 //! Excluded by exclude_string from sarg.conf.
52 //! Unknown input log file format.
54 //! Line to be ignored from the input log file.
56 //! Entry not withing the requested date range.
62 //! User is not in the include_users list.
64 //! HTTP code excluded by exclude_code file.
66 //! Invalid character found in user name.
70 //! Not the IP address requested with -a.
72 //! URL excluded by -c or exclude_hosts.
74 //! Entry time outside of requested hour range.
76 //! Not the URL requested by -s.
80 //! Not the user requested by -u.
84 //! User ignored by exclude_users
87 ER_Last
//!< last entry of the list
90 numlist weekdays
= { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91 numlist hours
= { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92 //! Domain suffix to strip from the user name.
93 char StripUserSuffix
[MAX_USER_LEN
]="";
94 //! Length of the suffix to strip from the user name.
97 extern FileListObject AccessLog
;
99 extern const struct ReadLogProcessStruct ReadSquidLog
;
100 extern const struct ReadLogProcessStruct ReadCommonLog
;
101 extern const struct ReadLogProcessStruct ReadSargLog
;
102 extern const struct ReadLogProcessStruct ReadExtLog
;
104 //! The list of the supported log formats.
105 static const struct ReadLogProcessStruct
const *LogFormats
[]=
113 //! The path to the sarg log file.
114 static char SargLogFile
[4096]="";
115 //! Handle to the sarg log file. NULL if not created.
116 static FILE *fp_log
=NULL
;
117 //! The number of records read from the input logs.
118 static long int totregsl
=0;
119 //! The number of records kept.
120 static long int totregsg
=0;
121 //! The number of records excluded.
122 static long int totregsx
=0;
123 //! The beginning of a linked list of user's file.
124 static struct userfilestruct
*first_user_file
=NULL
;
125 //! Count the number of occurence of each input log format.
126 static unsigned long int format_count
[sizeof(LogFormats
)/sizeof(*LogFormats
)];
127 //! The minimum date found in the input logs.
128 static int mindate
=0;
129 static int maxdate
=0;
130 //! Count the number of excluded records.
131 static unsigned long int excluded_count
[ER_Last
];
132 //! Earliest date found in the log.
133 static int EarliestDate
=-1;
134 //! The earliest date in time format.
135 static struct tm EarliestDateTime
;
136 //! Latest date found in the log.
137 static int LatestDate
=-1;
138 //! The latest date in time format.
139 static struct tm LatestDateTime
;
142 * Read from standard input.
144 * \param Data The file object.
145 * \param Buffer The boffer to store the data read.
146 * \param Size How many bytes to read.
148 * \return The number of bytes read.
150 static int Stdin_Read(void *Data
,void *Buffer
,int Size
)
152 return(fread(Buffer
,1,Size
,(FILE *)Data
));
156 * Check if end of file is reached.
158 * \param Data The file object.
160 * \return \c True if end of file is reached.
162 static int Stdin_Eof(void *Data
)
164 return(feof((FILE *)Data
));
168 * Mimic a close of standard input but do nothing
170 * \param Data File to close.
172 * \return EOF on error.
174 static int Stdin_Close(void *Data
)
180 * Open a file object to read from standard input.
182 * \return The object to pass to other function in this module.
184 static FileObject
*Stdin_Open(void)
188 FileObject_SetLastOpenError(NULL
);
189 File
=calloc(1,sizeof(*File
));
192 FileObject_SetLastOpenError(_("Not enough memory"));
196 File
->Read
=Stdin_Read
;
199 File
->Close
=Stdin_Close
;
204 * Initialize the memory structure needed by LogLine_Parse() to parse
207 * \param log_line The structure to initialize.
209 void LogLine_Init(struct LogLineStruct
*log_line
)
211 log_line
->current_format
=NULL
;
212 log_line
->current_format_idx
=-1;
213 log_line
->file_name
="";
214 log_line
->successive_errors
=0;
215 log_line
->total_errors
=0;
219 * Set the name of the log file being parsed.
221 * \param log_line Data structure to parse the log line.
222 * \param file_name The name of the log file being read.
224 void LogLine_File(struct LogLineStruct
*log_line
,const char *file_name
)
226 log_line
->file_name
=file_name
;
230 * Parse the next line from a log file.
232 * \param log_line A buffer to store the data about the current parsing.
233 * \param log_entry The variable to store the parsed data.
234 * \param linebuf The text line read from the log file.
238 enum ReadLogReturnCodeEnum
LogLine_Parse(struct LogLineStruct
*log_line
,struct ReadLogStruct
*log_entry
,char *linebuf
)
240 enum ReadLogReturnCodeEnum log_entry_status
=RLRC_Unknown
;
243 if (log_line
->current_format
)
245 memset(log_entry
,0,sizeof(*log_entry
));
246 log_entry_status
=log_line
->current_format
->ReadEntry(linebuf
,log_entry
);
249 // find out what line format to use
250 if (log_entry_status
==RLRC_Unknown
)
252 for (x
=0 ; x
<(int)(sizeof(LogFormats
)/sizeof(*LogFormats
)) ; x
++)
254 if (LogFormats
[x
]==log_line
->current_format
) continue;
255 memset(log_entry
,0,sizeof(*log_entry
));
256 log_entry_status
=LogFormats
[x
]->ReadEntry(linebuf
,log_entry
);
257 if (log_entry_status
!=RLRC_Unknown
)
259 log_line
->current_format
=LogFormats
[x
];
260 log_line
->current_format_idx
=x
;
261 if (debugz
>=LogLevel_Process
)
263 /* TRANSLATORS: The argument is the log format name as translated by you. */
264 debuga(__FILE__
,__LINE__
,_("Log format identified as \"%s\" for %s\n"),_(log_line
->current_format
->Name
),log_line
->file_name
);
269 if (x
>=(int)(sizeof(LogFormats
)/sizeof(*LogFormats
)))
271 if (++log_line
->successive_errors
>NumLogSuccessiveErrors
) {
272 debuga(__FILE__
,__LINE__
,ngettext("%d consecutive error found in the input log file %s\n",
273 "%d consecutive errors found in the input log file %s\n",log_line
->successive_errors
),log_line
->successive_errors
,log_line
->file_name
);
276 if (NumLogTotalErrors
>=0 && ++log_line
->total_errors
>NumLogTotalErrors
) {
277 debuga(__FILE__
,__LINE__
,ngettext("%d error found in the input log file (last in %s)\n",
278 "%d errors found in the input log file (last in %s)\n",log_line
->total_errors
),log_line
->total_errors
,log_line
->file_name
);
281 debuga(__FILE__
,__LINE__
,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line
->file_name
,linebuf
);
284 log_line
->successive_errors
=0;
287 if (log_line
->current_format_idx
<0 || log_line
->current_format
==NULL
) {
288 debuga(__FILE__
,__LINE__
,_("Sarg failed to determine the format of the input log file %s\n"),log_line
->file_name
);
291 if (log_entry_status
==RLRC_InternalError
) {
292 debuga(__FILE__
,__LINE__
,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line
->file_name
);
295 return(log_entry_status
);
299 Read a single log file.
301 \param arq The log file name to read.
303 static void ReadOneLogFile(struct ReadLogDataStruct
*Filter
,const char *arq
)
310 char tmp3
[MAXLEN
]="";
311 char download_url
[MAXLEN
];
312 char smartfilter
[MAXLEN
];
314 int OutputNonZero
= REPORT_EVERY_X_LINES
;
319 int maxopenfiles
=MAX_OPEN_USER_FILES
;
320 unsigned long int recs1
=0UL;
321 unsigned long int recs2
=0UL;
322 FileObject
*fp_in
=NULL
;
323 bool download_flag
=false;
325 enum ReadLogReturnCodeEnum log_entry_status
;
326 enum UserProcessError PUser
;
328 struct getwordstruct gwarea
;
329 struct userfilestruct
*prev_ufile
;
330 struct userinfostruct
*uinfo
;
331 struct userfilestruct
*ufile
;
332 struct userfilestruct
*ufile1
;
333 struct ReadLogStruct log_entry
;
334 struct LogLineStruct log_line
;
335 FILE *UseragentLog
=NULL
;
337 LogLine_Init(&log_line
);
338 LogLine_File(&log_line
,arq
);
339 for (x
=0 ; x
<sizeof(LogFormats
)/sizeof(*LogFormats
) ; x
++)
340 if (LogFormats
[x
]->NewFile
)
341 LogFormats
[x
]->NewFile(arq
);
343 if (arq
[0]=='-' && arq
[1]=='\0') {
346 debuga(__FILE__
,__LINE__
,_("Reading access log file: from stdin\n"));
348 if (Filter
->DateRange
[0]!='\0') {
349 if (stat(arq
,&logstat
)!=0) {
350 debuga(__FILE__
,__LINE__
,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq
,strerror(errno
));
352 struct tm
*logtime
=localtime(&logstat
.st_mtime
);
353 if ((logtime
->tm_year
+1900)*10000+(logtime
->tm_mon
+1)*100+logtime
->tm_mday
<dfrom
) {
354 debuga(__FILE__
,__LINE__
,_("Ignoring old log file %s\n"),arq
);
361 debuga(__FILE__
,__LINE__
,_("Cannot open input log file \"%s\": %s\n"),arq
,FileObject_GetLastOpenError());
364 if (debug
) debuga(__FILE__
,__LINE__
,_("Reading access log file: %s\n"),arq
);
372 // pre-read the file only if we have to show stats
373 if (ShowReadStatistics
&& ShowReadPercent
&& fp_in
->Rewind
) {
378 while ((nread
=FileObject_Read(fp_in
,tmp4
,sizeof(tmp4
)))>0) {
379 for (i
=0 ; i
<nread
; i
++)
381 if (tmp4
[i
]!='\n' && tmp4
[i
]!='\r') {
385 if (tmp4
[i
]=='\n' || tmp4
[i
]=='\r') {
391 FileObject_Rewind(fp_in
);
392 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1
,(float) 0);
397 if ((line
=longline_create())==NULL
) {
398 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),arq
);
402 while ((linebuf
=longline_read(fp_in
,line
))!=NULL
) {
406 if (ShowReadStatistics
&& --OutputNonZero
<=0) {
408 double perc
= recs2
* 100. / recs1
;
409 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2
,perc
);
411 printf(_("SARG: Records in file: %lu"),recs2
);
415 OutputNonZero
= REPORT_EVERY_X_LINES
;
419 The following checks are retained here as I don't know to
420 what format they apply. They date back to pre 2.4 versions.
422 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
423 if(strstr(linebuf
,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
424 excluded_count
[ER_IncompleteQuery
]++;
427 if(strstr(linebuf
,"logfile turned over") != 0) {//reported by newsyslog
428 excluded_count
[ER_LogfileTurnedOver
]++;
433 if(ExcludeString
[0] != '\0') {
435 getword_start(&gwarea
,ExcludeString
);
436 while(strchr(gwarea
.current
,':') != 0) {
437 if (getword_multisep(val1
,sizeof(val1
),&gwarea
,':')<0) {
438 debuga(__FILE__
,__LINE__
,_("Invalid record in exclusion string\n"));
441 if((str
=(char *) strstr(linebuf
,val1
)) != (char *) NULL
) {
446 if(!exstring
&& (str
=(char *) strstr(linebuf
,gwarea
.current
)) != (char *) NULL
)
449 excluded_count
[ER_ExcludeString
]++;
455 if (debugz
>=LogLevel_Data
)
456 printf("BUF=%s\n",linebuf
);
459 log_entry_status
=LogLine_Parse(&log_line
,&log_entry
,linebuf
);
460 if (log_entry_status
==RLRC_Unknown
)
462 excluded_count
[ER_UnknownFormat
]++;
465 if (log_entry_status
==RLRC_Ignore
) {
466 excluded_count
[ER_FormatData
]++;
469 format_count
[log_line
.current_format_idx
]++;
471 if (!fp_log
&& ParsedOutputLog
[0] && log_line
.current_format
!=&ReadSargLog
) {
472 if(access(ParsedOutputLog
,R_OK
) != 0) {
473 my_mkdir(ParsedOutputLog
);
475 if (snprintf(SargLogFile
,sizeof(SargLogFile
),"%s/sarg_temp.log",ParsedOutputLog
)>=sizeof(SargLogFile
)) {
476 debuga(__FILE__
,__LINE__
,_("Path too long: "));
477 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog
);
480 if((fp_log
=MY_FOPEN(SargLogFile
,"w"))==NULL
) {
481 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SargLogFile
,strerror(errno
));
484 fputs("*** SARG Log ***\n",fp_log
);
487 if (log_entry
.Ip
==NULL
) {
488 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no IP addresses\n"));
491 if (log_entry
.User
==NULL
) {
492 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no user\n"));
495 if (log_entry
.Url
==NULL
) {
496 debuga(__FILE__
,__LINE__
,_("Unknown input log file format: no URL\n"));
500 idata
=builddia(log_entry
.EntryTime
.tm_mday
,log_entry
.EntryTime
.tm_mon
+1,log_entry
.EntryTime
.tm_year
+1900);
501 if (debugz
>=LogLevel_Data
)
502 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter
->DateRange
,idata
,dfrom
,duntil
);
504 if (EarliestDate
<0 || idata
<EarliestDate
) {
506 memcpy(&EarliestDateTime
,&log_entry
.EntryTime
,sizeof(struct tm
));
508 if (LatestDate
<0 || idata
>LatestDate
) {
510 memcpy(&LatestDateTime
,&log_entry
.EntryTime
,sizeof(struct tm
));
512 if(Filter
->DateRange
[0] != '\0'){
513 if(idata
< dfrom
|| idata
> duntil
) {
514 excluded_count
[ER_OutOfDateRange
]++;
519 // Record only hours usage which is required
520 if( bsearch( &( log_entry
.EntryTime
.tm_wday
), weekdays
.list
, weekdays
.len
, sizeof( int ), compar
) == NULL
) {
521 excluded_count
[ER_OutOfWDayRange
]++;
525 if( bsearch( &( log_entry
.EntryTime
.tm_hour
), hours
.list
, hours
.len
, sizeof( int ), compar
) == NULL
) {
526 excluded_count
[ER_OutOfHourRange
]++;
530 PUser
=process_user(&log_entry
.User
,log_entry
.Ip
,&id_is_ip
);
533 case USERERR_NoError
:
535 case USERERR_NameTooLong
:
536 if (debugz
>=LogLevel_Process
) debuga(__FILE__
,__LINE__
,_("User ID too long: %s\n"),log_entry
.User
);
537 excluded_count
[ER_UserNameTooLong
]++;
540 case USERERR_Excluded
:
541 excluded_count
[ER_User
]++;
543 case USERERR_InvalidChar
:
544 excluded_count
[ER_InvalidUserChar
]++;
546 case USERERR_EmptyUser
:
547 excluded_count
[ER_NoUser
]++;
549 case USERERR_SysUser
:
550 excluded_count
[ER_SysUser
]++;
552 case USERERR_Ignored
:
553 excluded_count
[ER_IgnoredUser
]++;
556 case USERERR_Untracked
:
557 excluded_count
[ER_UntrackedUser
]++;
561 if(vercode(log_entry
.HttpCode
)) {
562 if (debugz
>=LogLevel_Process
) debuga(__FILE__
,__LINE__
,_("Excluded code: %s\n"),log_entry
.HttpCode
);
563 excluded_count
[ER_HttpCode
]++;
568 // replace any tab by a single space
569 for (str
=log_entry
.Url
; *str
; str
++)
570 if (*str
=='\t') *str
=' ';
571 for (str
=log_entry
.HttpCode
; *str
; str
++)
572 if (*str
=='\t') *str
=' ';
574 if (log_line
.current_format
!=&ReadSargLog
) {
576 The full URL is not saved in sarg log. There is no point in testing the URL to detect
579 download_flag
=is_download_suffix(log_entry
.Url
);
581 safe_strcpy(download_url
,log_entry
.Url
,sizeof(download_url
));
586 url
=process_url(log_entry
.Url
,LongUrl
);
587 if (!url
|| url
[0] == '\0') {
588 excluded_count
[ER_NoUrl
]++;
593 if(strcmp(addr
,log_entry
.Ip
)!=0) {
594 excluded_count
[ER_UntrackedIpAddr
]++;
598 if(Filter
->HostFilter
) {
599 if(!vhexclude(url
)) {
600 if (debugz
>=LogLevel_Data
) debuga(__FILE__
,__LINE__
,_("Excluded site: %s\n"),url
);
601 excluded_count
[ER_Url
]++;
607 if(Filter
->StartTime
>= 0 && Filter
->EndTime
>= 0) {
608 hmr
=log_entry
.EntryTime
.tm_hour
*100+log_entry
.EntryTime
.tm_min
;
609 if(hmr
< Filter
->StartTime
|| hmr
> Filter
->EndTime
) {
610 excluded_count
[ER_OutOfTimeRange
]++;
616 if(strstr(url
,site
)==0) {
617 excluded_count
[ER_UntrackedUrl
]++;
622 if (log_entry
.DataSize
<0) log_entry
.DataSize
=0;
624 if (log_entry
.ElapsedTime
<0) log_entry
.ElapsedTime
=0;
625 if (Filter
->max_elapsed
>0 && log_entry
.ElapsedTime
>Filter
->max_elapsed
) {
626 log_entry
.ElapsedTime
=0;
629 if((str
=(char *) strstr(linebuf
, "[SmartFilter:")) != (char *) NULL
) {
631 snprintf(smartfilter
,sizeof(smartfilter
),"\"%s\"",str
+1);
632 } else strcpy(smartfilter
,"\"\"");
636 for (ufile
=first_user_file
; ufile
&& strcmp(log_entry
.User
,ufile
->user
->id
)!=0 ; ufile
=ufile
->next
) {
638 if (ufile
->file
) nopen
++;
641 ufile
=malloc(sizeof(*ufile
));
643 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the user %s\n"),log_entry
.User
);
646 memset(ufile
,0,sizeof(*ufile
));
647 ufile
->next
=first_user_file
;
648 first_user_file
=ufile
;
650 * This id_is_ip stuff is just to store the string only once if the user is
651 * identified by its IP address instead of a distinct ID and IP address.
653 uinfo
=userinfo_create(log_entry
.User
,(id_is_ip
) ? NULL
: log_entry
.Ip
);
658 prev_ufile
->next
=ufile
->next
;
659 ufile
->next
=first_user_file
;
660 first_user_file
=ufile
;
663 #ifdef ENABLE_DOUBLE_CHECK_DATA
664 if (strcmp(log_entry
.HttpCode
,"TCP_DENIED/407")!=0) {
665 ufile
->user
->nbytes
+=log_entry
.DataSize
;
666 ufile
->user
->elap
+=log_entry
.ElapsedTime
;
670 if (ufile
->file
==NULL
) {
671 if (nopen
>=maxopenfiles
) {
673 for (ufile1
=first_user_file
; ufile1
; ufile1
=ufile1
->next
) {
674 if (ufile1
->file
!=NULL
) {
675 if (x
>=maxopenfiles
) {
676 if (fclose(ufile1
->file
)==EOF
) {
677 debuga(__FILE__
,__LINE__
,_("Write error in log file of user %s: %s\n"),ufile1
->user
->id
,strerror(errno
));
686 if (snprintf (tmp3
, sizeof(tmp3
), "%s/%s.user_unsort", tmp
, ufile
->user
->filename
)>=sizeof(tmp3
)) {
687 debuga(__FILE__
,__LINE__
,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp
, ufile
->user
->filename
);
690 if ((ufile
->file
= MY_FOPEN (tmp3
, "a")) == NULL
) {
691 debuga(__FILE__
,__LINE__
,_("(log) Cannot open temporary file %s: %s\n"), tmp3
, strerror(errno
));
696 strftime(dia
, sizeof(dia
), "%d/%m/%Y",&log_entry
.EntryTime
);
697 strftime(hora
,sizeof(hora
),"%H:%M:%S",&log_entry
.EntryTime
);
699 if (fprintf(ufile
->file
, "%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia
,hora
,
700 log_entry
.Ip
,url
,(uint64_t)log_entry
.DataSize
,
701 log_entry
.HttpCode
,log_entry
.ElapsedTime
,smartfilter
)<=0) {
702 debuga(__FILE__
,__LINE__
,_("Write error in the log file of user %s\n"),log_entry
.User
);
707 if (fp_log
&& log_line
.current_format
!=&ReadSargLog
) {
708 fprintf(fp_log
, "%s\t%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia
,hora
,
709 log_entry
.User
,log_entry
.Ip
,url
,(uint64_t)log_entry
.DataSize
,
710 log_entry
.HttpCode
,log_entry
.ElapsedTime
,smartfilter
);
715 denied_write(&log_entry
);
716 authfail_write(&log_entry
);
717 if (download_flag
) download_write(&log_entry
,download_url
);
718 if (log_entry
.UserAgent
)
721 UseragentLog
=UserAgent_Open();
722 UserAgent_Write(UseragentLog
,&log_entry
.EntryTime
,log_entry
.Ip
,log_entry
.User
,log_entry
.UserAgent
);
725 if (log_line
.current_format
!=&ReadSargLog
) {
726 if (period
.start
.tm_year
==0 || idata
<mindate
|| compare_date(&period
.start
,&log_entry
.EntryTime
)>0){
728 memcpy(&period
.start
,&log_entry
.EntryTime
,sizeof(log_entry
.EntryTime
));
730 if (period
.end
.tm_year
==0 || idata
>maxdate
|| compare_date(&period
.end
,&log_entry
.EntryTime
)<0) {
732 memcpy(&period
.end
,&log_entry
.EntryTime
,sizeof(log_entry
.EntryTime
));
736 if (debugz
>=LogLevel_Data
){
737 printf("IP=\t%s\n",log_entry
.Ip
);
738 printf("USER=\t%s\n",log_entry
.User
);
739 printf("ELAP=\t%ld\n",log_entry
.ElapsedTime
);
740 printf("DATE=\t%s\n",dia
);
741 printf("TIME=\t%s\n",hora
);
742 //printf("FUNC=\t%s\n",fun);
743 printf("URL=\t%s\n",url
);
744 printf("CODE=\t%s\n",log_entry
.HttpCode
);
745 printf("LEN=\t%"PRIu64
"\n",(uint64_t)log_entry
.DataSize
);
748 longline_destroy(&line
);
750 if (FileObject_Close(fp_in
)) {
751 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),arq
,FileObject_GetLastCloseError());
754 if (UseragentLog
) fclose(UseragentLog
);
755 if (ShowReadStatistics
) {
757 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2
, (float) 100 );
759 printf(_("SARG: Records in file: %lu\n"),recs2
);
764 * Display a line with the excluded entries count.
766 * \param Explain A translated string explaining the exluded count.
767 * \param Reason The reason number.
769 static void DisplayExcludeCount(const char *Explain
,enum ExcludeReasonEnum Reason
)
771 if (excluded_count
[Reason
]>0) {
772 debuga(__FILE__
,__LINE__
," %s: %lu\n",Explain
,excluded_count
[Reason
]);
779 \param Filter The filtering parameters for the file to load.
781 \retval 1 Records found.
782 \retval 0 No record found.
784 int ReadLogFile(struct ReadLogDataStruct
*Filter
)
788 struct userfilestruct
*ufile
;
789 struct userfilestruct
*ufile1
;
790 FileListIterator FIter
;
793 for (x
=0 ; x
<sizeof(format_count
)/sizeof(*format_count
) ; x
++) format_count
[x
]=0;
794 for (x
=0 ; x
<sizeof(excluded_count
)/sizeof(*excluded_count
) ; x
++) excluded_count
[x
]=0;
795 first_user_file
=NULL
;
803 FIter
=FileListIter_Open(AccessLog
);
804 while ((file
=FileListIter_Next(FIter
))!=NULL
)
805 ReadOneLogFile(Filter
,file
);
806 FileListIter_Close(FIter
);
810 char val4
[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
812 if (fclose(fp_log
)==EOF
) {
813 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),SargLogFile
,strerror(errno
));
816 strftime(val2
,sizeof(val2
),"%d%m%Y_%H%M",&period
.start
);
817 strftime(val1
,sizeof(val1
),"%d%m%Y_%H%M",&period
.end
);
818 if (snprintf(val4
,sizeof(val4
),"%s/sarg-%s-%s.log",ParsedOutputLog
,val2
,val1
)>=sizeof(val4
)) {
819 debuga(__FILE__
,__LINE__
,_("Path too long: "));
820 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog
,val2
,val1
);
823 if (rename(SargLogFile
,val4
)) {
824 debuga(__FILE__
,__LINE__
,_("failed to rename %s to %s - %s\n"),SargLogFile
,val4
,strerror(errno
));
826 strcpy(SargLogFile
,val4
);
828 if(strcmp(ParsedOutputLogCompress
,"nocompress") != 0 && ParsedOutputLogCompress
[0] != '\0') {
830 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
831 necessary around the command name, put them in the configuration file.
833 if (snprintf(val1
,sizeof(val1
),"%s \"%s\"",ParsedOutputLogCompress
,SargLogFile
)>=sizeof(val1
)) {
834 debuga(__FILE__
,__LINE__
,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress
,SargLogFile
);
837 cstatus
=system(val1
);
838 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
839 debuga(__FILE__
,__LINE__
,_("command return status %d\n"),WEXITSTATUS(cstatus
));
840 debuga(__FILE__
,__LINE__
,_("command: %s\n"),val1
);
846 debuga(__FILE__
,__LINE__
,_("Sarg parsed log saved as %s\n"),SargLogFile
);
853 for (ufile
=first_user_file
; ufile
; ufile
=ufile1
) {
855 if (ufile
->file
!=NULL
&& fclose(ufile
->file
)==EOF
) {
856 debuga(__FILE__
,__LINE__
,_("Write error in log file of user %s: %s\n"),ufile
->user
->id
,strerror(errno
));
863 unsigned long int totalcount
=0;
865 debuga(__FILE__
,__LINE__
,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl
,totregsg
,totregsx
);
867 for (x
=sizeof(excluded_count
)/sizeof(*excluded_count
)-1 ; x
>=0 && excluded_count
[x
]>0 ; x
--);
869 debuga(__FILE__
,__LINE__
,_("Reasons for excluded entries:\n"));
870 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong
);
871 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery
);
872 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver
);
873 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString
);
874 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat
);
875 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData
);
876 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange
);
877 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange
);
878 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange
);
879 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User
);
880 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode
);
881 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar
);
882 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl
);
883 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr
);
884 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url
);
885 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange
);
886 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl
);
887 DisplayExcludeCount(_("No user in entry"),ER_NoUser
);
888 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser
);
889 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser
);
890 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser
);
893 for (x
=0 ; x
<sizeof(LogFormats
)/sizeof(*LogFormats
) ; x
++) {
894 if (format_count
[x
]>0) {
895 /* TRANSLATORS: It displays the number of lines found in the input log files
896 * for each supported log format. The log format name is the %s and is a string
897 * you translate somewhere else. */
898 debuga(__FILE__
,__LINE__
,_("%s: %lu entries\n"),_(LogFormats
[x
]->Name
),format_count
[x
]);
899 totalcount
+=format_count
[x
];
903 if (totalcount
==0 && totregsg
)
904 debuga(__FILE__
,__LINE__
,_("Log with invalid format\n"));
907 return((totregsg
!=0) ? 1 : 0);
911 * Get the start and end date of the period covered by the log files.
913 bool GetLogPeriod(struct tm
*Start
,struct tm
*End
)
917 if (EarliestDate
>=0) {
918 memcpy(Start
,&EarliestDateTime
,sizeof(struct tm
));
921 memset(Start
,0,sizeof(struct tm
));
924 memcpy(End
,&LatestDateTime
,sizeof(struct tm
));
927 memset(End
,0,sizeof(struct tm
));