]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog.c
b306509787ef8127a37bd78faf73fc2ec7de6ec1
[thirdparty/sarg.git] / readlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2013
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30 #include "include/filelist.h"
31
32 #define REPORT_EVERY_X_LINES 5000
33 #define MAX_OPEN_USER_FILES 10
34
35 struct userfilestruct
36 {
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40 };
41
42 enum ExcludeReasonEnum
43 {
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88 };
89
90 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92
93 extern char *userfile;
94 extern FileListObject AccessLog;
95
96 extern const struct ReadLogProcessStruct ReadSquidLog;
97 extern const struct ReadLogProcessStruct ReadCommonLog;
98 extern const struct ReadLogProcessStruct ReadSargLog;
99 extern const struct ReadLogProcessStruct ReadExtLog;
100
101 //! The list of the supported log formats.
102 static const struct ReadLogProcessStruct const *LogFormats[]=
103 {
104 &ReadSquidLog,
105 &ReadCommonLog,
106 &ReadSargLog,
107 &ReadExtLog
108 };
109
110 //! The path to the sarg log file.
111 static char SargLogFile[4096]="";
112 //! Handle to the sarg log file. NULL if not created.
113 static FILE *fp_log=NULL;
114 //! The number of records read from the input logs.
115 static long int totregsl=0;
116 //! The number of records kept.
117 static long int totregsg=0;
118 //! The number of records excluded.
119 static long int totregsx=0;
120 //! The beginning of a linked list of user's file.
121 static struct userfilestruct *first_user_file=NULL;
122 //! Count the number of occurence of each input log format.
123 static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
124 //! The minimum date found in the input logs.
125 static int mindate=0;
126 static int maxdate=0;
127 //! Count the number of excluded records.
128 static unsigned long int excluded_count[ER_Last];
129 //! Earliest date found in the log.
130 static int EarliestDate=-1;
131 //! The earliest date in time format.
132 static struct tm EarliestDateTime;
133 //! Latest date found in the log.
134 static int LatestDate=-1;
135 //! The latest date in time format.
136 static struct tm LatestDateTime;
137
138 /*!
139 Read a single log file.
140
141 \param arq The log file name to read.
142 */
143 static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
144 {
145 longline line;
146 char *linebuf;
147 char *str;
148 char hora[30];
149 char dia[128]="";
150 char wuser[MAXLEN];
151 char tmp3[MAXLEN]="";
152 char download_url[MAXLEN];
153 char smartfilter[MAXLEN];
154 const char *url;
155 const char *user;
156 int current_format_idx;
157 int OutputNonZero = REPORT_EVERY_X_LINES ;
158 int idata=0;
159 int x;
160 int hmr;
161 int nopen;
162 int maxopenfiles=MAX_OPEN_USER_FILES;
163 int successive_errors=0;
164 int total_errors=0;
165 unsigned long int recs1=0UL;
166 unsigned long int recs2=0UL;
167 FILE *fp_in=NULL;
168 bool from_pipe;
169 bool from_stdin;
170 bool download_flag=false;
171 bool id_is_ip;
172 enum ReadLogReturnCodeEnum log_entry_status;
173 struct stat logstat;
174 struct getwordstruct gwarea;
175 struct userfilestruct *prev_ufile;
176 struct userinfostruct *uinfo;
177 struct userfilestruct *ufile;
178 struct userfilestruct *ufile1;
179 struct ReadLogStruct log_entry;
180 const struct ReadLogProcessStruct *current_format=NULL;
181
182 current_format=NULL;
183 current_format_idx=-1;
184 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
185 if (LogFormats[x]->NewFile)
186 LogFormats[x]->NewFile(arq);
187
188 if (arq[0]=='-' && arq[1]=='\0') {
189 if(debug)
190 debuga(_("Reading access log file: from stdin\n"));
191 fp_in=stdin;
192 from_stdin=true;
193 } else {
194 if (Filter->DateRange[0]!='\0') {
195 if (stat(arq,&logstat)!=0) {
196 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
197 } else {
198 struct tm *logtime=localtime(&logstat.st_mtime);
199 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
200 debuga(_("Ignoring old log file %s\n"),arq);
201 return;
202 }
203 }
204 }
205 fp_in=decomp(arq,&from_pipe);
206 if(fp_in==NULL) {
207 debuga(_("Cannot open input log file \"%s\": %s\n"),arq,strerror(errno));
208 exit(EXIT_FAILURE);
209 }
210 if(debug) debuga(_("Reading access log file: %s\n"),arq);
211 from_stdin=false;
212 }
213
214 download_flag=false;
215
216 recs1=0UL;
217 recs2=0UL;
218
219 // pre-read the file only if we have to show stats
220 if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
221 size_t nread,i;
222 bool skipcr=false;
223 char tmp4[MAXLEN];
224
225 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
226 for (i=0 ; i<nread ; i++)
227 if (skipcr) {
228 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
229 skipcr=false;
230 }
231 } else {
232 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
233 skipcr=true;
234 recs1++;
235 }
236 }
237 }
238 rewind(fp_in);
239 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
240 putchar('\r');
241 fflush( stdout ) ;
242 }
243
244 if ((line=longline_create())==NULL) {
245 debuga(_("Not enough memory to read log file %s\n"),arq);
246 exit(EXIT_FAILURE);
247 }
248
249 while ((linebuf=longline_read(fp_in,line))!=NULL) {
250 lines_read++;
251
252 recs2++;
253 if (ShowReadStatistics && --OutputNonZero<=0) {
254 if (recs1>0) {
255 double perc = recs2 * 100. / recs1 ;
256 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
257 } else {
258 printf(_("SARG: Records in file: %lu"),recs2);
259 }
260 putchar('\r');
261 fflush (stdout);
262 OutputNonZero = REPORT_EVERY_X_LINES ;
263 }
264
265 /*
266 The following checks are retained here as I don't know to
267 what format they apply. They date back to pre 2.4 versions.
268 */
269 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
270 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
271 excluded_count[ER_IncompleteQuery]++;
272 continue;
273 }
274 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
275 excluded_count[ER_LogfileTurnedOver]++;
276 continue;
277 }
278
279 // exclude_string
280 if(ExcludeString[0] != '\0') {
281 bool exstring=false;
282 getword_start(&gwarea,ExcludeString);
283 while(strchr(gwarea.current,':') != 0) {
284 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
285 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
286 exit(EXIT_FAILURE);
287 }
288 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
289 exstring=true;
290 break;
291 }
292 }
293 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
294 exstring=true;
295 if(exstring) {
296 excluded_count[ER_ExcludeString]++;
297 continue;
298 }
299 }
300
301 totregsl++;
302 if(debugm)
303 printf("BUF=%s\n",linebuf);
304
305 // process the line
306 log_entry_status=RLRC_Unknown;
307 memset(&log_entry,0,sizeof(log_entry));
308 if (current_format) {
309 log_entry_status=current_format->ReadEntry(linebuf,&log_entry);
310 }
311
312 // find out what line format to use
313 if (log_entry_status==RLRC_Unknown) {
314 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++) {
315 if (LogFormats[x]==current_format) continue;
316 memset(&log_entry,0,sizeof(log_entry));
317 log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry);
318 if (log_entry_status!=RLRC_Unknown) break;
319 }
320 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
321 if (++successive_errors>NumLogSuccessiveErrors) {
322 debuga(ngettext("%d consecutive error found in the input log file %s\n",
323 "%d consecutive errors found in the input log file %s\n",successive_errors),successive_errors,arq);
324 exit(EXIT_FAILURE);
325 }
326 if (NumLogTotalErrors>=0 && ++total_errors>NumLogTotalErrors) {
327 debuga(ngettext("%d error found in the input log file (last in %s)\n",
328 "%d errors found in the input log file (last in %s)\n",total_errors),total_errors,arq);
329 exit(EXIT_FAILURE);
330 }
331 debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf);
332 excluded_count[ER_UnknownFormat]++;
333 continue;
334 }
335 current_format=LogFormats[x];
336 current_format_idx=x;
337 if (debugz>=LogLevel_Process) {
338 /* TRANSLATORS: The argument is the log format name as translated by you. */
339 debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq);
340 }
341 successive_errors=0;
342 }
343 if (log_entry_status==RLRC_Ignore) {
344 excluded_count[ER_FormatData]++;
345 continue;
346 }
347 if (current_format_idx<0 || current_format==NULL) {
348 debuga(_("Sarg failed to determine the format of the input log file %s\n"),arq);
349 exit(EXIT_FAILURE);
350 }
351 if (log_entry_status==RLRC_InternalError) {
352 debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq);
353 exit(EXIT_FAILURE);
354 }
355 format_count[current_format_idx]++;
356
357 if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) {
358 if(access(ParsedOutputLog,R_OK) != 0) {
359 my_mkdir(ParsedOutputLog);
360 }
361 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
362 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
363 exit(EXIT_FAILURE);
364 }
365 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
366 debuga(_("(log) Cannot open log file %s: %s\n"),SargLogFile,strerror(errno));
367 exit(EXIT_FAILURE);
368 }
369 fputs("*** SARG Log ***\n",fp_log);
370 }
371
372 if (log_entry.Ip==NULL) {
373 debuga(_("Unknown input log file format: no IP addresses\n"));
374 break;
375 }
376 if (log_entry.User==NULL) {
377 debuga(_("Unknown input log file format: no user\n"));
378 break;
379 }
380 if (log_entry.Url==NULL) {
381 debuga(_("Unknown input log file format: no URL\n"));
382 break;
383 }
384
385 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
386 if(debugm)
387 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
388
389 if (EarliestDate<0 || idata<EarliestDate) {
390 EarliestDate=idata;
391 memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
392 }
393 if (LatestDate<0 || idata>LatestDate) {
394 LatestDate=idata;
395 memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
396 }
397 if(Filter->DateRange[0] != '\0'){
398 if(idata < dfrom || idata > duntil) {
399 excluded_count[ER_OutOfDateRange]++;
400 continue;
401 }
402 }
403
404 // Record only hours usage which is required
405 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
406 excluded_count[ER_OutOfWDayRange]++;
407 continue;
408 }
409
410 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
411 excluded_count[ER_OutOfHourRange]++;
412 continue;
413 }
414
415
416 if(strlen(log_entry.User) > MAX_USER_LEN) {
417 if (debugz>=LogLevel_Process) debuga(_("User ID too long: %s\n"),log_entry.User);
418 excluded_count[ER_UserNameTooLong]++;
419 totregsx++;
420 continue;
421 }
422
423 // include_users
424 if(IncludeUsers[0] != '\0') {
425 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
426 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
427 excluded_count[ER_User]++;
428 continue;
429 }
430 }
431
432 if(vercode(log_entry.HttpCode)) {
433 if (debugz>=LogLevel_Process) debuga(_("Excluded code: %s\n"),log_entry.HttpCode);
434 excluded_count[ER_HttpCode]++;
435 totregsx++;
436 continue;
437 }
438
439 if(testvaliduserchar(log_entry.User)) {
440 excluded_count[ER_InvalidUserChar]++;
441 continue;
442 }
443
444 // replace any tab by a single space
445 for (str=log_entry.Url ; *str ; str++)
446 if (*str=='\t') *str=' ';
447 for (str=log_entry.HttpCode ; *str ; str++)
448 if (*str=='\t') *str=' ';
449
450 if (current_format!=&ReadSargLog) {
451 /*
452 The full URL is not saved in sarg log. There is no point in testing the URL to detect
453 a downloaded file.
454 */
455 download_flag=is_download_suffix(log_entry.Url);
456 if (download_flag) {
457 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
458 }
459 } else
460 download_flag=false;
461
462 url=process_url(log_entry.Url,LongUrl);
463 if (!url || url[0] == '\0') {
464 excluded_count[ER_NoUrl]++;
465 continue;
466 }
467
468 if(addr[0] != '\0'){
469 if(strcmp(addr,log_entry.Ip)!=0) {
470 excluded_count[ER_UntrackedIpAddr]++;
471 continue;
472 }
473 }
474 if(Filter->HostFilter) {
475 if(!vhexclude(url)) {
476 if (debugz>=LogLevel_Process) debuga(_("Excluded site: %s\n"),url);
477 excluded_count[ER_Url]++;
478 totregsx++;
479 continue;
480 }
481 }
482
483 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
484 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
485 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
486 excluded_count[ER_OutOfTimeRange]++;
487 continue;
488 }
489 }
490
491 if(site[0] != '\0'){
492 if(strstr(url,site)==0) {
493 excluded_count[ER_UntrackedUrl]++;
494 continue;
495 }
496 }
497
498 if(UserIp) {
499 log_entry.User=log_entry.Ip;
500 id_is_ip=true;
501 } else {
502 id_is_ip=false;
503 if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
504 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
505 log_entry.User=log_entry.Ip;
506 id_is_ip=true;
507 }
508 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
509 excluded_count[ER_NoUser]++;
510 continue;
511 }
512 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
513 log_entry.User="everybody";
514 } else {
515 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
516 if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) {
517 log_entry.User=str+1;
518 }
519 }
520 }
521 }
522
523 if(us[0] != '\0'){
524 if(strcmp(log_entry.User,us)!=0) {
525 excluded_count[ER_UntrackedUser]++;
526 continue;
527 }
528 }
529
530 if(Filter->SysUsers) {
531 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
532 if(strstr(userfile, wuser) == 0) {
533 excluded_count[ER_SysUser]++;
534 continue;
535 }
536 }
537
538 if(Filter->UserFilter) {
539 if(!vuexclude(log_entry.User)) {
540 if (debugz>=LogLevel_Process) debuga(_("Excluded user: %s\n"),log_entry.User);
541 excluded_count[ER_IgnoredUser]++;
542 totregsx++;
543 continue;
544 }
545 }
546
547 user=process_user(log_entry.User);
548 if (log_entry.User!=user) {
549 log_entry.User=user;
550 id_is_ip=false;
551 }
552 if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
553 log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
554 excluded_count[ER_NoUser]++;
555 continue;
556 }
557
558 if (log_entry.DataSize<0) log_entry.DataSize=0;
559
560 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
561 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
562 log_entry.ElapsedTime=0;
563 }
564
565 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
566 fixendofline(str);
567 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
568 } else strcpy(smartfilter,"\"\"");
569
570 nopen=0;
571 prev_ufile=NULL;
572 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
573 prev_ufile=ufile;
574 if (ufile->file) nopen++;
575 }
576 if (!ufile) {
577 ufile=malloc(sizeof(*ufile));
578 if (!ufile) {
579 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
580 exit(EXIT_FAILURE);
581 }
582 memset(ufile,0,sizeof(*ufile));
583 ufile->next=first_user_file;
584 first_user_file=ufile;
585 /*
586 * This id_is_ip stuff is just to store the string only once if the user is
587 * identified by its IP address instead of a distinct ID and IP address.
588 */
589 uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
590 ufile->user=uinfo;
591 nusers++;
592 } else {
593 if (prev_ufile) {
594 prev_ufile->next=ufile->next;
595 ufile->next=first_user_file;
596 first_user_file=ufile;
597 }
598 }
599 #ifdef ENABLE_DOUBLE_CHECK_DATA
600 ufile->user->nbytes+=log_entry.DataSize;
601 ufile->user->elap+=log_entry.ElapsedTime;
602 #endif
603
604 if (ufile->file==NULL) {
605 if (nopen>=maxopenfiles) {
606 x=0;
607 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
608 if (ufile1->file!=NULL) {
609 if (x>=maxopenfiles) {
610 if (fclose(ufile1->file)==EOF) {
611 debuga(_("Write error in the log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
612 exit(EXIT_FAILURE);
613 }
614 ufile1->file=NULL;
615 }
616 x++;
617 }
618 }
619 }
620 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
621 debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
622 exit(EXIT_FAILURE);
623 }
624 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
625 debuga(_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
626 exit (1);
627 }
628 }
629
630 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
631 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
632
633 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
634 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
635 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
636 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
637 exit(EXIT_FAILURE);
638 }
639 records_kept++;
640
641 if (fp_log && current_format!=&ReadSargLog) {
642 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
643 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
644 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
645 }
646
647 totregsg++;
648
649 denied_write(&log_entry);
650 authfail_write(&log_entry);
651 if (download_flag) download_write(&log_entry,download_url);
652
653 if (current_format!=&ReadSargLog) {
654 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
655 mindate=idata;
656 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
657 }
658 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
659 maxdate=idata;
660 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
661 }
662 }
663
664 if(debugm){
665 printf("IP=\t%s\n",log_entry.Ip);
666 printf("USER=\t%s\n",log_entry.User);
667 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
668 printf("DATE=\t%s\n",dia);
669 printf("TIME=\t%s\n",hora);
670 //printf("FUNC=\t%s\n",fun);
671 printf("URL=\t%s\n",url);
672 printf("CODE=\t%s\n",log_entry.HttpCode);
673 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
674 }
675 }
676 longline_destroy(&line);
677
678 if (!from_stdin) {
679 if (from_pipe)
680 pclose(fp_in);
681 else {
682 fclose(fp_in);
683 if (ShowReadStatistics) {
684 if (ShowReadPercent)
685 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
686 else
687 printf(_("SARG: Records in file: %lu\n"),recs2);
688 }
689 }
690 }
691 }
692
693 /*!
694 * Display a line with the excluded entries count.
695 *
696 * \param Explain A translated string explaining the exluded count.
697 * \param Reason The reason number.
698 */
699 static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
700 {
701 if (excluded_count[Reason]>0) {
702 debuga(" %s: %lu\n",Explain,excluded_count[Reason]);
703 }
704 }
705
706 /*!
707 Read the log files.
708
709 \param Filter The filtering parameters for the file to load.
710
711 \retval 1 Records found.
712 \retval 0 No record found.
713 */
714 int ReadLogFile(struct ReadLogDataStruct *Filter)
715 {
716 int x;
717 int cstatus;
718 struct userfilestruct *ufile;
719 struct userfilestruct *ufile1;
720 FileListIterator FIter;
721 const char *file;
722
723 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
724 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
725 first_user_file=NULL;
726
727 if (!dataonly) {
728 denied_open();
729 authfail_open();
730 download_open();
731 }
732
733 FIter=FileListIter_Open(AccessLog);
734 while ((file=FileListIter_Next(FIter))!=NULL)
735 ReadOneLogFile(Filter,file);
736 FileListIter_Close(FIter);
737
738 if(fp_log != NULL) {
739 char val2[40];
740 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
741
742 if (fclose(fp_log)==EOF) {
743 debuga(_("Write error in %s: %s\n"),SargLogFile,strerror(errno));
744 exit(EXIT_FAILURE);
745 }
746 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
747 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
748 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
749 debuga(_("File name too long: %s/sarg-%s-%s.log\n"),ParsedOutputLog,val2,val1);
750 exit(EXIT_FAILURE);
751 }
752 if (rename(SargLogFile,val4)) {
753 debuga(_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
754 } else {
755 strcpy(SargLogFile,val4);
756
757 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
758 /*
759 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
760 necessary around the command name, put them in the configuration file.
761 */
762 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
763 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
764 exit(EXIT_FAILURE);
765 }
766 cstatus=system(val1);
767 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
768 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
769 debuga(_("command: %s\n"),val1);
770 exit(EXIT_FAILURE);
771 }
772 }
773 }
774 if(debug)
775 debuga(_("Sarg parsed log saved as %s\n"),SargLogFile);
776 }
777
778 denied_close();
779 authfail_close();
780 download_close();
781
782 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
783 ufile1=ufile->next;
784 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
785 debuga(_("Write error in the log file of user %s: %s\n"),ufile->user->id,strerror(errno));
786 exit(EXIT_FAILURE);
787 }
788 free(ufile);
789 }
790
791 if (debug) {
792 unsigned long int totalcount=0;
793
794 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
795
796 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
797 if (x>=0) {
798 debuga(_("Reasons for excluded entries:\n"));
799 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
800 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
801 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
802 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
803 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
804 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
805 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
806 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
807 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
808 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
809 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
810 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
811 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
812 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
813 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
814 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
815 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
816 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
817 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
818 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
819 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
820 }
821
822 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
823 if (format_count[x]>0) {
824 /* TRANSLATORS: It displays the number of lines found in the input log files
825 * for each supported log format. The log format name is the %s and is a string
826 * you translate somewhere else. */
827 debuga(_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
828 totalcount+=format_count[x];
829 }
830 }
831
832 if (totalcount==0 && totregsg)
833 debuga(_("Log with invalid format\n"));
834 }
835
836 return((totregsg!=0) ? 1 : 0);
837 }
838
839 /*!
840 * Get the start and end date of the period covered by the log files.
841 */
842 void GetLogPeriod(struct tm *Start,struct tm *End)
843 {
844 if (EarliestDate>=0) {
845 memcpy(Start,&EarliestDateTime,sizeof(struct tm));
846 } else {
847 memset(Start,0,sizeof(struct tm));
848 }
849 if (LatestDate>=0) {
850 memcpy(End,&LatestDateTime,sizeof(struct tm));
851 } else {
852 memset(End,0,sizeof(struct tm));
853 }
854 }