]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog.c
Improve the documentation for strip_user_suffix
[thirdparty/sarg.git] / readlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2015
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30 #include "include/filelist.h"
31
32 #define REPORT_EVERY_X_LINES 5000
33 #define MAX_OPEN_USER_FILES 10
34
35 struct userfilestruct
36 {
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40 };
41
42 enum ExcludeReasonEnum
43 {
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88 };
89
90 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92 //! Domain suffix to strip from the user name.
93 char StripUserSuffix[MAX_USER_LEN]="";
94
95 extern char *userfile;
96 extern FileListObject AccessLog;
97
98 extern const struct ReadLogProcessStruct ReadSquidLog;
99 extern const struct ReadLogProcessStruct ReadCommonLog;
100 extern const struct ReadLogProcessStruct ReadSargLog;
101 extern const struct ReadLogProcessStruct ReadExtLog;
102
103 //! The list of the supported log formats.
104 static const struct ReadLogProcessStruct const *LogFormats[]=
105 {
106 &ReadSquidLog,
107 &ReadCommonLog,
108 &ReadSargLog,
109 &ReadExtLog
110 };
111
112 //! The path to the sarg log file.
113 static char SargLogFile[4096]="";
114 //! Handle to the sarg log file. NULL if not created.
115 static FILE *fp_log=NULL;
116 //! The number of records read from the input logs.
117 static long int totregsl=0;
118 //! The number of records kept.
119 static long int totregsg=0;
120 //! The number of records excluded.
121 static long int totregsx=0;
122 //! The beginning of a linked list of user's file.
123 static struct userfilestruct *first_user_file=NULL;
124 //! Count the number of occurence of each input log format.
125 static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
126 //! The minimum date found in the input logs.
127 static int mindate=0;
128 static int maxdate=0;
129 //! Count the number of excluded records.
130 static unsigned long int excluded_count[ER_Last];
131 //! Earliest date found in the log.
132 static int EarliestDate=-1;
133 //! The earliest date in time format.
134 static struct tm EarliestDateTime;
135 //! Latest date found in the log.
136 static int LatestDate=-1;
137 //! The latest date in time format.
138 static struct tm LatestDateTime;
139
140 /*!
141 * Read from standard input.
142 *
143 * \param Data The file object.
144 * \param Buffer The boffer to store the data read.
145 * \param Size How many bytes to read.
146 *
147 * \return The number of bytes read.
148 */
149 static int Stdin_Read(void *Data,void *Buffer,int Size)
150 {
151 return(fread(Buffer,1,Size,(FILE *)Data));
152 }
153
154 /*!
155 * Check if end of file is reached.
156 *
157 * \param Data The file object.
158 *
159 * \return \c True if end of file is reached.
160 */
161 static int Stdin_Eof(void *Data)
162 {
163 return(feof((FILE *)Data));
164 }
165
166 /*!
167 * Mimic a close of standard input but do nothing
168 *
169 * \param Data File to close.
170 *
171 * \return EOF on error.
172 */
173 static int Stdin_Close(void *Data)
174 {
175 return(0);
176 }
177
178 /*!
179 * Open a file object to read from standard input.
180 *
181 * \return The object to pass to other function in this module.
182 */
183 static FileObject *Stdin_Open(void)
184 {
185 FileObject *File;
186
187 FileObject_SetLastOpenError(NULL);
188 File=calloc(1,sizeof(*File));
189 if (!File)
190 {
191 FileObject_SetLastOpenError(_("Not enough memory"));
192 return(NULL);
193 }
194 File->Data=stdin;
195 File->Read=Stdin_Read;
196 File->Eof=Stdin_Eof;
197 File->Rewind=NULL;
198 File->Close=Stdin_Close;
199 return(File);
200 }
201
202 /*!
203 * Initialize the memory structure needed by LogLine_Parse() to parse
204 * a log line.
205 *
206 * \param log_line The structure to initialize.
207 */
208 void LogLine_Init(struct LogLineStruct *log_line)
209 {
210 log_line->current_format=NULL;
211 log_line->current_format_idx=-1;
212 log_line->file_name="";
213 log_line->successive_errors=0;
214 log_line->total_errors=0;
215 }
216
217 /*!
218 * Set the name of the log file being parsed.
219 *
220 * \param log_line Data structure to parse the log line.
221 * \param file_name The name of the log file being read.
222 */
223 void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
224 {
225 log_line->file_name=file_name;
226 }
227
228 /*!
229 * Parse the next line from a log file.
230 *
231 * \param log_line A buffer to store the data about the current parsing.
232 * \param log_entry The variable to store the parsed data.
233 * \param linebuf The text line read from the log file.
234 *
235 * \return
236 */
237 enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
238 {
239 enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
240 int x;
241
242 if (log_line->current_format)
243 {
244 memset(log_entry,0,sizeof(*log_entry));
245 log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
246 }
247
248 // find out what line format to use
249 if (log_entry_status==RLRC_Unknown)
250 {
251 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
252 {
253 if (LogFormats[x]==log_line->current_format) continue;
254 memset(log_entry,0,sizeof(*log_entry));
255 log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
256 if (log_entry_status!=RLRC_Unknown)
257 {
258 log_line->current_format=LogFormats[x];
259 log_line->current_format_idx=x;
260 if (debugz>=LogLevel_Process)
261 {
262 /* TRANSLATORS: The argument is the log format name as translated by you. */
263 debuga(__FILE__,__LINE__,_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
264 }
265 break;
266 }
267 }
268 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
269 {
270 if (++log_line->successive_errors>NumLogSuccessiveErrors) {
271 debuga(__FILE__,__LINE__,ngettext("%d consecutive error found in the input log file %s\n",
272 "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
273 exit(EXIT_FAILURE);
274 }
275 if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
276 debuga(__FILE__,__LINE__,ngettext("%d error found in the input log file (last in %s)\n",
277 "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
278 exit(EXIT_FAILURE);
279 }
280 debuga(__FILE__,__LINE__,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
281 }
282 else
283 log_line->successive_errors=0;
284 }
285
286 if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
287 debuga(__FILE__,__LINE__,_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
288 exit(EXIT_FAILURE);
289 }
290 if (log_entry_status==RLRC_InternalError) {
291 debuga(__FILE__,__LINE__,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
292 exit(EXIT_FAILURE);
293 }
294 return(log_entry_status);
295 }
296
297 /*!
298 Read a single log file.
299
300 \param arq The log file name to read.
301 */
302 static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
303 {
304 longline line;
305 char *linebuf;
306 char *str;
307 char hora[30];
308 char dia[128]="";
309 char wuser[MAXLEN];
310 char tmp3[MAXLEN]="";
311 char download_url[MAXLEN];
312 char smartfilter[MAXLEN];
313 const char *url;
314 const char *user;
315 const char *UserPtr;
316 int OutputNonZero = REPORT_EVERY_X_LINES ;
317 int idata=0;
318 int x;
319 int hmr;
320 int nopen;
321 int StripSuffixLen;
322 int maxopenfiles=MAX_OPEN_USER_FILES;
323 unsigned long int recs1=0UL;
324 unsigned long int recs2=0UL;
325 FileObject *fp_in=NULL;
326 bool download_flag=false;
327 bool id_is_ip;
328 enum ReadLogReturnCodeEnum log_entry_status;
329 struct stat logstat;
330 struct getwordstruct gwarea;
331 struct userfilestruct *prev_ufile;
332 struct userinfostruct *uinfo;
333 struct userfilestruct *ufile;
334 struct userfilestruct *ufile1;
335 struct ReadLogStruct log_entry;
336 struct LogLineStruct log_line;
337
338 LogLine_Init(&log_line);
339 LogLine_File(&log_line,arq);
340 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
341 if (LogFormats[x]->NewFile)
342 LogFormats[x]->NewFile(arq);
343
344 if (arq[0]=='-' && arq[1]=='\0') {
345 fp_in=Stdin_Open();
346 if(debug)
347 debuga(__FILE__,__LINE__,_("Reading access log file: from stdin\n"));
348 } else {
349 if (Filter->DateRange[0]!='\0') {
350 if (stat(arq,&logstat)!=0) {
351 debuga(__FILE__,__LINE__,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
352 } else {
353 struct tm *logtime=localtime(&logstat.st_mtime);
354 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
355 debuga(__FILE__,__LINE__,_("Ignoring old log file %s\n"),arq);
356 return;
357 }
358 }
359 }
360 fp_in=decomp(arq);
361 if (fp_in==NULL) {
362 debuga(__FILE__,__LINE__,_("Cannot open input log file \"%s\": %s\n"),arq,FileObject_GetLastOpenError());
363 exit(EXIT_FAILURE);
364 }
365 if (debug) debuga(__FILE__,__LINE__,_("Reading access log file: %s\n"),arq);
366 }
367
368 download_flag=false;
369
370 recs1=0UL;
371 recs2=0UL;
372 StripSuffixLen=strlen(StripUserSuffix);
373
374 // pre-read the file only if we have to show stats
375 if (ShowReadStatistics && ShowReadPercent && fp_in->Rewind) {
376 int nread,i;
377 bool skipcr=false;
378 char tmp4[MAXLEN];
379
380 while ((nread=FileObject_Read(fp_in,tmp4,sizeof(tmp4)))>0) {
381 for (i=0 ; i<nread ; i++)
382 if (skipcr) {
383 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
384 skipcr=false;
385 }
386 } else {
387 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
388 skipcr=true;
389 recs1++;
390 }
391 }
392 }
393 FileObject_Rewind(fp_in);
394 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
395 putchar('\r');
396 fflush( stdout ) ;
397 }
398
399 if ((line=longline_create())==NULL) {
400 debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),arq);
401 exit(EXIT_FAILURE);
402 }
403
404 while ((linebuf=longline_read(fp_in,line))!=NULL) {
405 lines_read++;
406
407 recs2++;
408 if (ShowReadStatistics && --OutputNonZero<=0) {
409 if (recs1>0) {
410 double perc = recs2 * 100. / recs1 ;
411 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
412 } else {
413 printf(_("SARG: Records in file: %lu"),recs2);
414 }
415 putchar('\r');
416 fflush (stdout);
417 OutputNonZero = REPORT_EVERY_X_LINES ;
418 }
419
420 /*
421 The following checks are retained here as I don't know to
422 what format they apply. They date back to pre 2.4 versions.
423 */
424 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
425 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
426 excluded_count[ER_IncompleteQuery]++;
427 continue;
428 }
429 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
430 excluded_count[ER_LogfileTurnedOver]++;
431 continue;
432 }
433
434 // exclude_string
435 if(ExcludeString[0] != '\0') {
436 bool exstring=false;
437 getword_start(&gwarea,ExcludeString);
438 while(strchr(gwarea.current,':') != 0) {
439 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
440 debuga(__FILE__,__LINE__,_("Invalid record in exclusion string\n"));
441 exit(EXIT_FAILURE);
442 }
443 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
444 exstring=true;
445 break;
446 }
447 }
448 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
449 exstring=true;
450 if(exstring) {
451 excluded_count[ER_ExcludeString]++;
452 continue;
453 }
454 }
455
456 totregsl++;
457 if (debugz>=LogLevel_Data)
458 printf("BUF=%s\n",linebuf);
459
460 // process the line
461 log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
462 if (log_entry_status==RLRC_Unknown)
463 {
464 excluded_count[ER_UnknownFormat]++;
465 continue;
466 }
467 if (log_entry_status==RLRC_Ignore) {
468 excluded_count[ER_FormatData]++;
469 continue;
470 }
471 format_count[log_line.current_format_idx]++;
472
473 if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
474 if(access(ParsedOutputLog,R_OK) != 0) {
475 my_mkdir(ParsedOutputLog);
476 }
477 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
478 debuga(__FILE__,__LINE__,_("Path too long: "));
479 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
480 exit(EXIT_FAILURE);
481 }
482 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
483 debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
484 exit(EXIT_FAILURE);
485 }
486 fputs("*** SARG Log ***\n",fp_log);
487 }
488
489 if (log_entry.Ip==NULL) {
490 debuga(__FILE__,__LINE__,_("Unknown input log file format: no IP addresses\n"));
491 break;
492 }
493 if (log_entry.User==NULL) {
494 debuga(__FILE__,__LINE__,_("Unknown input log file format: no user\n"));
495 break;
496 }
497 if (log_entry.Url==NULL) {
498 debuga(__FILE__,__LINE__,_("Unknown input log file format: no URL\n"));
499 break;
500 }
501
502 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
503 if (debugz>=LogLevel_Data)
504 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
505
506 if (EarliestDate<0 || idata<EarliestDate) {
507 EarliestDate=idata;
508 memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
509 }
510 if (LatestDate<0 || idata>LatestDate) {
511 LatestDate=idata;
512 memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
513 }
514 if(Filter->DateRange[0] != '\0'){
515 if(idata < dfrom || idata > duntil) {
516 excluded_count[ER_OutOfDateRange]++;
517 continue;
518 }
519 }
520
521 // Record only hours usage which is required
522 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
523 excluded_count[ER_OutOfWDayRange]++;
524 continue;
525 }
526
527 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
528 excluded_count[ER_OutOfHourRange]++;
529 continue;
530 }
531
532 if (StripSuffixLen>0)
533 {
534 x=strlen(log_entry.User);
535 if (x>StripSuffixLen && strcasecmp(log_entry.User+(x-StripSuffixLen),StripUserSuffix)==0)
536 log_entry.User[x-StripSuffixLen]='\0';
537 }
538 if(strlen(log_entry.User) > MAX_USER_LEN) {
539 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
540 excluded_count[ER_UserNameTooLong]++;
541 totregsx++;
542 continue;
543 }
544
545 // include_users
546 if(IncludeUsers[0] != '\0') {
547 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
548 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
549 excluded_count[ER_User]++;
550 continue;
551 }
552 }
553
554 if(vercode(log_entry.HttpCode)) {
555 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded code: %s\n"),log_entry.HttpCode);
556 excluded_count[ER_HttpCode]++;
557 totregsx++;
558 continue;
559 }
560
561 if(testvaliduserchar(log_entry.User)) {
562 excluded_count[ER_InvalidUserChar]++;
563 continue;
564 }
565
566 // replace any tab by a single space
567 for (str=log_entry.Url ; *str ; str++)
568 if (*str=='\t') *str=' ';
569 for (str=log_entry.HttpCode ; *str ; str++)
570 if (*str=='\t') *str=' ';
571
572 if (log_line.current_format!=&ReadSargLog) {
573 /*
574 The full URL is not saved in sarg log. There is no point in testing the URL to detect
575 a downloaded file.
576 */
577 download_flag=is_download_suffix(log_entry.Url);
578 if (download_flag) {
579 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
580 }
581 } else
582 download_flag=false;
583
584 url=process_url(log_entry.Url,LongUrl);
585 if (!url || url[0] == '\0') {
586 excluded_count[ER_NoUrl]++;
587 continue;
588 }
589
590 if(addr[0] != '\0'){
591 if(strcmp(addr,log_entry.Ip)!=0) {
592 excluded_count[ER_UntrackedIpAddr]++;
593 continue;
594 }
595 }
596 if(Filter->HostFilter) {
597 if(!vhexclude(url)) {
598 if (debugz>=LogLevel_Data) debuga(__FILE__,__LINE__,_("Excluded site: %s\n"),url);
599 excluded_count[ER_Url]++;
600 totregsx++;
601 continue;
602 }
603 }
604
605 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
606 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
607 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
608 excluded_count[ER_OutOfTimeRange]++;
609 continue;
610 }
611 }
612
613 if(site[0] != '\0'){
614 if(strstr(url,site)==0) {
615 excluded_count[ER_UntrackedUrl]++;
616 continue;
617 }
618 }
619
620 if(UserIp) {
621 UserPtr=log_entry.Ip;
622 id_is_ip=true;
623 } else {
624 UserPtr=log_entry.User;
625 id_is_ip=false;
626 if ((UserPtr[0]=='\0') || (UserPtr[1]=='\0' && (UserPtr[0]=='-' || UserPtr[0]==' '))) {
627 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
628 UserPtr=log_entry.Ip;
629 id_is_ip=true;
630 }
631 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
632 excluded_count[ER_NoUser]++;
633 continue;
634 }
635 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
636 UserPtr="everybody";
637 } else {
638 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
639 if ((str=strchr(UserPtr,'+'))!=NULL || (str=strchr(UserPtr,'\\'))!=NULL || (str=strchr(UserPtr,'_'))!=NULL) {
640 UserPtr=str+1;
641 }
642 }
643 }
644 }
645
646 if(us[0] != '\0'){
647 if(strcmp(UserPtr,us)!=0) {
648 excluded_count[ER_UntrackedUser]++;
649 continue;
650 }
651 }
652
653 if(Filter->SysUsers) {
654 snprintf(wuser,sizeof(wuser),":%s:",UserPtr);
655 if(strstr(userfile, wuser) == 0) {
656 excluded_count[ER_SysUser]++;
657 continue;
658 }
659 }
660
661 if(Filter->UserFilter) {
662 if(!vuexclude(UserPtr)) {
663 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded user: %s\n"),UserPtr);
664 excluded_count[ER_IgnoredUser]++;
665 totregsx++;
666 continue;
667 }
668 }
669
670 user=process_user(UserPtr);
671 if (UserPtr!=user)
672 {
673 UserPtr=user;
674 id_is_ip=false;
675 }
676 if (UserPtr[0]=='\0' || (UserPtr[1]=='\0' && (UserPtr[0]=='-' || UserPtr[0]==' ' || UserPtr[0]==':'))) {
677 excluded_count[ER_NoUser]++;
678 continue;
679 }
680
681 if (log_entry.DataSize<0) log_entry.DataSize=0;
682
683 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
684 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
685 log_entry.ElapsedTime=0;
686 }
687
688 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
689 fixendofline(str);
690 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
691 } else strcpy(smartfilter,"\"\"");
692
693 nopen=0;
694 prev_ufile=NULL;
695 for (ufile=first_user_file ; ufile && strcmp(UserPtr,ufile->user->id)!=0 ; ufile=ufile->next) {
696 prev_ufile=ufile;
697 if (ufile->file) nopen++;
698 }
699 if (!ufile) {
700 ufile=malloc(sizeof(*ufile));
701 if (!ufile) {
702 debuga(__FILE__,__LINE__,_("Not enough memory to store the user %s\n"),UserPtr);
703 exit(EXIT_FAILURE);
704 }
705 memset(ufile,0,sizeof(*ufile));
706 ufile->next=first_user_file;
707 first_user_file=ufile;
708 /*
709 * This id_is_ip stuff is just to store the string only once if the user is
710 * identified by its IP address instead of a distinct ID and IP address.
711 */
712 uinfo=userinfo_create(UserPtr,(id_is_ip) ? NULL : log_entry.Ip);
713 ufile->user=uinfo;
714 nusers++;
715 } else {
716 if (prev_ufile) {
717 prev_ufile->next=ufile->next;
718 ufile->next=first_user_file;
719 first_user_file=ufile;
720 }
721 }
722 #ifdef ENABLE_DOUBLE_CHECK_DATA
723 if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
724 ufile->user->nbytes+=log_entry.DataSize;
725 ufile->user->elap+=log_entry.ElapsedTime;
726 }
727 #endif
728
729 if (ufile->file==NULL) {
730 if (nopen>=maxopenfiles) {
731 x=0;
732 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
733 if (ufile1->file!=NULL) {
734 if (x>=maxopenfiles) {
735 if (fclose(ufile1->file)==EOF) {
736 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
737 exit(EXIT_FAILURE);
738 }
739 ufile1->file=NULL;
740 }
741 x++;
742 }
743 }
744 }
745 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
746 debuga(__FILE__,__LINE__,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
747 exit(EXIT_FAILURE);
748 }
749 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
750 debuga(__FILE__,__LINE__,_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
751 exit (1);
752 }
753 }
754
755 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
756 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
757
758 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
759 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
760 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
761 debuga(__FILE__,__LINE__,_("Write error in the log file of user %s\n"),UserPtr);
762 exit(EXIT_FAILURE);
763 }
764 records_kept++;
765
766 if (fp_log && log_line.current_format!=&ReadSargLog) {
767 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
768 UserPtr,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
769 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
770 }
771
772 totregsg++;
773
774 denied_write(&log_entry);
775 authfail_write(&log_entry);
776 if (download_flag) download_write(&log_entry,download_url);
777
778 if (log_line.current_format!=&ReadSargLog) {
779 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
780 mindate=idata;
781 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
782 }
783 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
784 maxdate=idata;
785 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
786 }
787 }
788
789 if (debugz>=LogLevel_Data){
790 printf("IP=\t%s\n",log_entry.Ip);
791 printf("USER=\t%s\n",UserPtr);
792 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
793 printf("DATE=\t%s\n",dia);
794 printf("TIME=\t%s\n",hora);
795 //printf("FUNC=\t%s\n",fun);
796 printf("URL=\t%s\n",url);
797 printf("CODE=\t%s\n",log_entry.HttpCode);
798 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
799 }
800 }
801 longline_destroy(&line);
802
803 if (FileObject_Close(fp_in)) {
804 debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),arq,FileObject_GetLastCloseError());
805 exit(EXIT_FAILURE);
806 }
807 if (ShowReadStatistics) {
808 if (ShowReadPercent)
809 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
810 else
811 printf(_("SARG: Records in file: %lu\n"),recs2);
812 }
813 }
814
815 /*!
816 * Display a line with the excluded entries count.
817 *
818 * \param Explain A translated string explaining the exluded count.
819 * \param Reason The reason number.
820 */
821 static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
822 {
823 if (excluded_count[Reason]>0) {
824 debuga(__FILE__,__LINE__," %s: %lu\n",Explain,excluded_count[Reason]);
825 }
826 }
827
828 /*!
829 Read the log files.
830
831 \param Filter The filtering parameters for the file to load.
832
833 \retval 1 Records found.
834 \retval 0 No record found.
835 */
836 int ReadLogFile(struct ReadLogDataStruct *Filter)
837 {
838 int x;
839 int cstatus;
840 struct userfilestruct *ufile;
841 struct userfilestruct *ufile1;
842 FileListIterator FIter;
843 const char *file;
844
845 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
846 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
847 first_user_file=NULL;
848
849 if (!dataonly) {
850 denied_open();
851 authfail_open();
852 download_open();
853 }
854
855 FIter=FileListIter_Open(AccessLog);
856 while ((file=FileListIter_Next(FIter))!=NULL)
857 ReadOneLogFile(Filter,file);
858 FileListIter_Close(FIter);
859
860 if(fp_log != NULL) {
861 char val2[40];
862 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
863
864 if (fclose(fp_log)==EOF) {
865 debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
866 exit(EXIT_FAILURE);
867 }
868 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
869 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
870 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
871 debuga(__FILE__,__LINE__,_("Path too long: "));
872 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
873 exit(EXIT_FAILURE);
874 }
875 if (rename(SargLogFile,val4)) {
876 debuga(__FILE__,__LINE__,_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
877 } else {
878 strcpy(SargLogFile,val4);
879
880 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
881 /*
882 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
883 necessary around the command name, put them in the configuration file.
884 */
885 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
886 debuga(__FILE__,__LINE__,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
887 exit(EXIT_FAILURE);
888 }
889 cstatus=system(val1);
890 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
891 debuga(__FILE__,__LINE__,_("command return status %d\n"),WEXITSTATUS(cstatus));
892 debuga(__FILE__,__LINE__,_("command: %s\n"),val1);
893 exit(EXIT_FAILURE);
894 }
895 }
896 }
897 if(debug)
898 debuga(__FILE__,__LINE__,_("Sarg parsed log saved as %s\n"),SargLogFile);
899 }
900
901 denied_close();
902 authfail_close();
903 download_close();
904
905 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
906 ufile1=ufile->next;
907 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
908 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
909 exit(EXIT_FAILURE);
910 }
911 free(ufile);
912 }
913
914 if (debug) {
915 unsigned long int totalcount=0;
916
917 debuga(__FILE__,__LINE__,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
918
919 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
920 if (x>=0) {
921 debuga(__FILE__,__LINE__,_("Reasons for excluded entries:\n"));
922 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
923 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
924 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
925 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
926 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
927 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
928 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
929 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
930 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
931 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
932 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
933 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
934 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
935 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
936 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
937 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
938 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
939 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
940 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
941 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
942 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
943 }
944
945 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
946 if (format_count[x]>0) {
947 /* TRANSLATORS: It displays the number of lines found in the input log files
948 * for each supported log format. The log format name is the %s and is a string
949 * you translate somewhere else. */
950 debuga(__FILE__,__LINE__,_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
951 totalcount+=format_count[x];
952 }
953 }
954
955 if (totalcount==0 && totregsg)
956 debuga(__FILE__,__LINE__,_("Log with invalid format\n"));
957 }
958
959 return((totregsg!=0) ? 1 : 0);
960 }
961
962 /*!
963 * Get the start and end date of the period covered by the log files.
964 */
965 bool GetLogPeriod(struct tm *Start,struct tm *End)
966 {
967 bool Valid=false;
968
969 if (EarliestDate>=0) {
970 memcpy(Start,&EarliestDateTime,sizeof(struct tm));
971 Valid=true;
972 } else {
973 memset(Start,0,sizeof(struct tm));
974 }
975 if (LatestDate>=0) {
976 memcpy(End,&LatestDateTime,sizeof(struct tm));
977 Valid=true;
978 } else {
979 memset(End,0,sizeof(struct tm));
980 }
981 return(Valid);
982 }