]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Update the man page
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
110ce984 3 * 1998, 2015
27d1fa35
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
6068ae56 30#include "include/filelist.h"
27d1fa35
FM
31
32#define REPORT_EVERY_X_LINES 5000
33#define MAX_OPEN_USER_FILES 10
34
35struct userfilestruct
36{
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40};
41
7c8c06c5
FM
42enum ExcludeReasonEnum
43{
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
7c8c06c5
FM
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88};
89
27d1fa35
FM
90numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
36a0b94c
FM
92//! Domain suffix to strip from the user name.
93char StripUserSuffix[MAX_USER_LEN]="";
2c058416
FM
94//! Length of the suffix to strip from the user name.
95int StripSuffixLen=0;
27d1fa35 96
6068ae56 97extern FileListObject AccessLog;
27d1fa35 98
1c91da07
FM
99extern const struct ReadLogProcessStruct ReadSquidLog;
100extern const struct ReadLogProcessStruct ReadCommonLog;
101extern const struct ReadLogProcessStruct ReadSargLog;
102extern const struct ReadLogProcessStruct ReadExtLog;
103
104//! The list of the supported log formats.
105static const struct ReadLogProcessStruct const *LogFormats[]=
106{
107 &ReadSquidLog,
108 &ReadCommonLog,
109 &ReadSargLog,
110 &ReadExtLog
111};
112
944cf283
FM
113//! The path to the sarg log file.
114static char SargLogFile[4096]="";
115//! Handle to the sarg log file. NULL if not created.
116static FILE *fp_log=NULL;
117//! The number of records read from the input logs.
118static long int totregsl=0;
119//! The number of records kept.
120static long int totregsg=0;
121//! The number of records excluded.
122static long int totregsx=0;
123//! The beginning of a linked list of user's file.
124static struct userfilestruct *first_user_file=NULL;
125//! Count the number of occurence of each input log format.
8e501bd6 126static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
944cf283
FM
127//! The minimum date found in the input logs.
128static int mindate=0;
129static int maxdate=0;
7c8c06c5
FM
130//! Count the number of excluded records.
131static unsigned long int excluded_count[ER_Last];
6a943fc1
FM
132//! Earliest date found in the log.
133static int EarliestDate=-1;
134//! The earliest date in time format.
135static struct tm EarliestDateTime;
136//! Latest date found in the log.
137static int LatestDate=-1;
138//! The latest date in time format.
139static struct tm LatestDateTime;
27d1fa35 140
800eafb8
FM
141/*!
142 * Read from standard input.
143 *
144 * \param Data The file object.
145 * \param Buffer The boffer to store the data read.
146 * \param Size How many bytes to read.
147 *
148 * \return The number of bytes read.
149 */
150static int Stdin_Read(void *Data,void *Buffer,int Size)
151{
152 return(fread(Buffer,1,Size,(FILE *)Data));
153}
154
155/*!
156 * Check if end of file is reached.
157 *
158 * \param Data The file object.
159 *
160 * \return \c True if end of file is reached.
161 */
162static int Stdin_Eof(void *Data)
163{
164 return(feof((FILE *)Data));
165}
166
167/*!
168 * Mimic a close of standard input but do nothing
169 *
170 * \param Data File to close.
171 *
172 * \return EOF on error.
173 */
174static int Stdin_Close(void *Data)
175{
176 return(0);
177}
178
179/*!
180 * Open a file object to read from standard input.
181 *
182 * \return The object to pass to other function in this module.
183 */
184static FileObject *Stdin_Open(void)
185{
186 FileObject *File;
187
188 FileObject_SetLastOpenError(NULL);
189 File=calloc(1,sizeof(*File));
190 if (!File)
191 {
192 FileObject_SetLastOpenError(_("Not enough memory"));
193 return(NULL);
194 }
195 File->Data=stdin;
196 File->Read=Stdin_Read;
197 File->Eof=Stdin_Eof;
198 File->Rewind=NULL;
199 File->Close=Stdin_Close;
200 return(File);
201}
202
8b4e9578
FM
203/*!
204 * Initialize the memory structure needed by LogLine_Parse() to parse
205 * a log line.
206 *
207 * \param log_line The structure to initialize.
208 */
209void LogLine_Init(struct LogLineStruct *log_line)
210{
211 log_line->current_format=NULL;
212 log_line->current_format_idx=-1;
213 log_line->file_name="";
214 log_line->successive_errors=0;
215 log_line->total_errors=0;
216}
217
218/*!
219 * Set the name of the log file being parsed.
220 *
221 * \param log_line Data structure to parse the log line.
222 * \param file_name The name of the log file being read.
223 */
224void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
225{
226 log_line->file_name=file_name;
227}
228
229/*!
230 * Parse the next line from a log file.
231 *
232 * \param log_line A buffer to store the data about the current parsing.
233 * \param log_entry The variable to store the parsed data.
234 * \param linebuf The text line read from the log file.
235 *
236 * \return
237 */
238enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
239{
240 enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
241 int x;
242
243 if (log_line->current_format)
244 {
245 memset(log_entry,0,sizeof(*log_entry));
246 log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
247 }
248
249 // find out what line format to use
250 if (log_entry_status==RLRC_Unknown)
251 {
252 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
253 {
254 if (LogFormats[x]==log_line->current_format) continue;
255 memset(log_entry,0,sizeof(*log_entry));
256 log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
257 if (log_entry_status!=RLRC_Unknown)
258 {
259 log_line->current_format=LogFormats[x];
260 log_line->current_format_idx=x;
261 if (debugz>=LogLevel_Process)
262 {
263 /* TRANSLATORS: The argument is the log format name as translated by you. */
af961877 264 debuga(__FILE__,__LINE__,_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
8b4e9578
FM
265 }
266 break;
267 }
268 }
269 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
270 {
271 if (++log_line->successive_errors>NumLogSuccessiveErrors) {
af961877 272 debuga(__FILE__,__LINE__,ngettext("%d consecutive error found in the input log file %s\n",
8b4e9578
FM
273 "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
274 exit(EXIT_FAILURE);
275 }
276 if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
af961877 277 debuga(__FILE__,__LINE__,ngettext("%d error found in the input log file (last in %s)\n",
8b4e9578
FM
278 "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
279 exit(EXIT_FAILURE);
280 }
af961877 281 debuga(__FILE__,__LINE__,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
8b4e9578
FM
282 }
283 else
284 log_line->successive_errors=0;
285 }
286
287 if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
af961877 288 debuga(__FILE__,__LINE__,_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
8b4e9578
FM
289 exit(EXIT_FAILURE);
290 }
291 if (log_entry_status==RLRC_InternalError) {
af961877 292 debuga(__FILE__,__LINE__,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
8b4e9578
FM
293 exit(EXIT_FAILURE);
294 }
295 return(log_entry_status);
296}
297
944cf283
FM
298/*!
299Read a single log file.
27d1fa35 300
944cf283 301\param arq The log file name to read.
27d1fa35 302*/
944cf283 303static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
27d1fa35 304{
27d1fa35
FM
305 longline line;
306 char *linebuf;
307 char *str;
27d1fa35 308 char hora[30];
c5d6ef4b 309 char dia[128]="";
944cf283 310 char tmp3[MAXLEN]="";
27d1fa35
FM
311 char download_url[MAXLEN];
312 char smartfilter[MAXLEN];
27d1fa35 313 const char *url;
27d1fa35
FM
314 int OutputNonZero = REPORT_EVERY_X_LINES ;
315 int idata=0;
27d1fa35
FM
316 int x;
317 int hmr;
318 int nopen;
319 int maxopenfiles=MAX_OPEN_USER_FILES;
27d1fa35
FM
320 unsigned long int recs1=0UL;
321 unsigned long int recs2=0UL;
800eafb8 322 FileObject *fp_in=NULL;
27d1fa35
FM
323 bool download_flag=false;
324 bool id_is_ip;
1c91da07 325 enum ReadLogReturnCodeEnum log_entry_status;
074a6d8f 326 enum UserProcessError PUser;
27d1fa35
FM
327 struct stat logstat;
328 struct getwordstruct gwarea;
27d1fa35
FM
329 struct userfilestruct *prev_ufile;
330 struct userinfostruct *uinfo;
27d1fa35
FM
331 struct userfilestruct *ufile;
332 struct userfilestruct *ufile1;
c5d6ef4b 333 struct ReadLogStruct log_entry;
8b4e9578 334 struct LogLineStruct log_line;
468d879d 335 FILE *UseragentLog=NULL;
27d1fa35 336
8b4e9578
FM
337 LogLine_Init(&log_line);
338 LogLine_File(&log_line,arq);
944cf283
FM
339 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
340 if (LogFormats[x]->NewFile)
341 LogFormats[x]->NewFile(arq);
27d1fa35 342
944cf283 343 if (arq[0]=='-' && arq[1]=='\0') {
800eafb8 344 fp_in=Stdin_Open();
944cf283 345 if(debug)
af961877 346 debuga(__FILE__,__LINE__,_("Reading access log file: from stdin\n"));
944cf283
FM
347 } else {
348 if (Filter->DateRange[0]!='\0') {
349 if (stat(arq,&logstat)!=0) {
af961877 350 debuga(__FILE__,__LINE__,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
944cf283
FM
351 } else {
352 struct tm *logtime=localtime(&logstat.st_mtime);
353 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
af961877 354 debuga(__FILE__,__LINE__,_("Ignoring old log file %s\n"),arq);
944cf283 355 return;
27d1fa35
FM
356 }
357 }
27d1fa35 358 }
800eafb8
FM
359 fp_in=decomp(arq);
360 if (fp_in==NULL) {
361 debuga(__FILE__,__LINE__,_("Cannot open input log file \"%s\": %s\n"),arq,FileObject_GetLastOpenError());
944cf283
FM
362 exit(EXIT_FAILURE);
363 }
800eafb8 364 if (debug) debuga(__FILE__,__LINE__,_("Reading access log file: %s\n"),arq);
944cf283 365 }
27d1fa35 366
944cf283 367 download_flag=false;
27d1fa35 368
944cf283
FM
369 recs1=0UL;
370 recs2=0UL;
2f4787e6 371
944cf283 372 // pre-read the file only if we have to show stats
800eafb8 373 if (ShowReadStatistics && ShowReadPercent && fp_in->Rewind) {
a1e4e370 374 int nread,i;
944cf283
FM
375 bool skipcr=false;
376 char tmp4[MAXLEN];
27d1fa35 377
800eafb8 378 while ((nread=FileObject_Read(fp_in,tmp4,sizeof(tmp4)))>0) {
944cf283
FM
379 for (i=0 ; i<nread ; i++)
380 if (skipcr) {
381 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
382 skipcr=false;
27d1fa35 383 }
944cf283
FM
384 } else {
385 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
386 skipcr=true;
387 recs1++;
388 }
389 }
27d1fa35 390 }
800eafb8 391 FileObject_Rewind(fp_in);
944cf283
FM
392 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
393 putchar('\r');
394 fflush( stdout ) ;
395 }
27d1fa35 396
944cf283 397 if ((line=longline_create())==NULL) {
af961877 398 debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),arq);
944cf283
FM
399 exit(EXIT_FAILURE);
400 }
27d1fa35 401
944cf283 402 while ((linebuf=longline_read(fp_in,line))!=NULL) {
944cf283 403 lines_read++;
27d1fa35 404
944cf283
FM
405 recs2++;
406 if (ShowReadStatistics && --OutputNonZero<=0) {
407 if (recs1>0) {
408 double perc = recs2 * 100. / recs1 ;
409 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
410 } else {
411 printf(_("SARG: Records in file: %lu"),recs2);
27d1fa35 412 }
944cf283
FM
413 putchar('\r');
414 fflush (stdout);
415 OutputNonZero = REPORT_EVERY_X_LINES ;
416 }
0c87646f 417
944cf283
FM
418 /*
419 The following checks are retained here as I don't know to
420 what format they apply. They date back to pre 2.4 versions.
421 */
422 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
7c8c06c5
FM
423 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
424 excluded_count[ER_IncompleteQuery]++;
425 continue;
426 }
427 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
428 excluded_count[ER_LogfileTurnedOver]++;
429 continue;
430 }
944cf283
FM
431
432 // exclude_string
433 if(ExcludeString[0] != '\0') {
434 bool exstring=false;
435 getword_start(&gwarea,ExcludeString);
436 while(strchr(gwarea.current,':') != 0) {
437 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
af961877 438 debuga(__FILE__,__LINE__,_("Invalid record in exclusion string\n"));
944cf283 439 exit(EXIT_FAILURE);
27d1fa35 440 }
944cf283 441 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
27d1fa35 442 exstring=true;
944cf283
FM
443 break;
444 }
27d1fa35 445 }
944cf283
FM
446 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
447 exstring=true;
7c8c06c5
FM
448 if(exstring) {
449 excluded_count[ER_ExcludeString]++;
450 continue;
451 }
944cf283 452 }
27d1fa35 453
944cf283 454 totregsl++;
4246ae8d 455 if (debugz>=LogLevel_Data)
944cf283 456 printf("BUF=%s\n",linebuf);
27d1fa35 457
944cf283 458 // process the line
8b4e9578
FM
459 log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
460 if (log_entry_status==RLRC_Unknown)
461 {
462 excluded_count[ER_UnknownFormat]++;
463 continue;
944cf283
FM
464 }
465 if (log_entry_status==RLRC_Ignore) {
7c8c06c5 466 excluded_count[ER_FormatData]++;
944cf283
FM
467 continue;
468 }
8b4e9578 469 format_count[log_line.current_format_idx]++;
944cf283 470
8b4e9578 471 if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
944cf283
FM
472 if(access(ParsedOutputLog,R_OK) != 0) {
473 my_mkdir(ParsedOutputLog);
474 }
475 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
af961877 476 debuga(__FILE__,__LINE__,_("Path too long: "));
041018b6 477 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
1c91da07
FM
478 exit(EXIT_FAILURE);
479 }
944cf283 480 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
af961877 481 debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
1c91da07
FM
482 exit(EXIT_FAILURE);
483 }
944cf283
FM
484 fputs("*** SARG Log ***\n",fp_log);
485 }
1c91da07 486
944cf283 487 if (log_entry.Ip==NULL) {
af961877 488 debuga(__FILE__,__LINE__,_("Unknown input log file format: no IP addresses\n"));
944cf283
FM
489 break;
490 }
491 if (log_entry.User==NULL) {
af961877 492 debuga(__FILE__,__LINE__,_("Unknown input log file format: no user\n"));
944cf283
FM
493 break;
494 }
495 if (log_entry.Url==NULL) {
af961877 496 debuga(__FILE__,__LINE__,_("Unknown input log file format: no URL\n"));
944cf283
FM
497 break;
498 }
1c91da07 499
944cf283 500 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
4246ae8d 501 if (debugz>=LogLevel_Data)
944cf283 502 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
27d1fa35 503
6a943fc1
FM
504 if (EarliestDate<0 || idata<EarliestDate) {
505 EarliestDate=idata;
506 memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
507 }
508 if (LatestDate<0 || idata>LatestDate) {
509 LatestDate=idata;
510 memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
511 }
944cf283 512 if(Filter->DateRange[0] != '\0'){
7c8c06c5
FM
513 if(idata < dfrom || idata > duntil) {
514 excluded_count[ER_OutOfDateRange]++;
515 continue;
516 }
944cf283 517 }
27d1fa35 518
944cf283 519 // Record only hours usage which is required
7c8c06c5
FM
520 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
521 excluded_count[ER_OutOfWDayRange]++;
944cf283 522 continue;
7c8c06c5 523 }
27d1fa35 524
7c8c06c5
FM
525 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
526 excluded_count[ER_OutOfHourRange]++;
944cf283 527 continue;
7c8c06c5 528 }
27d1fa35 529
074a6d8f
FM
530 PUser=process_user(&log_entry.User,log_entry.Ip,&id_is_ip);
531 switch (PUser)
36a0b94c 532 {
074a6d8f
FM
533 case USERERR_NoError:
534 break;
535 case USERERR_NameTooLong:
536 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
537 excluded_count[ER_UserNameTooLong]++;
538 totregsx++;
539 continue;
540 case USERERR_Excluded:
7c8c06c5 541 excluded_count[ER_User]++;
27d1fa35 542 continue;
074a6d8f
FM
543 case USERERR_InvalidChar:
544 excluded_count[ER_InvalidUserChar]++;
545 continue;
546 case USERERR_EmptyUser:
547 excluded_count[ER_NoUser]++;
548 continue;
549 case USERERR_SysUser:
550 excluded_count[ER_SysUser]++;
551 continue;
552 case USERERR_Ignored:
553 excluded_count[ER_IgnoredUser]++;
554 totregsx++;
555 continue;
556 case USERERR_Untracked:
557 excluded_count[ER_UntrackedUser]++;
558 continue;
944cf283 559 }
27d1fa35 560
944cf283 561 if(vercode(log_entry.HttpCode)) {
af961877 562 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded code: %s\n"),log_entry.HttpCode);
7c8c06c5 563 excluded_count[ER_HttpCode]++;
944cf283
FM
564 totregsx++;
565 continue;
566 }
567
944cf283
FM
568 // replace any tab by a single space
569 for (str=log_entry.Url ; *str ; str++)
570 if (*str=='\t') *str=' ';
571 for (str=log_entry.HttpCode ; *str ; str++)
572 if (*str=='\t') *str=' ';
573
8b4e9578 574 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
575 /*
576 The full URL is not saved in sarg log. There is no point in testing the URL to detect
577 a downloaded file.
578 */
579 download_flag=is_download_suffix(log_entry.Url);
580 if (download_flag) {
581 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
27d1fa35 582 }
944cf283
FM
583 } else
584 download_flag=false;
27d1fa35 585
944cf283 586 url=process_url(log_entry.Url,LongUrl);
7c8c06c5
FM
587 if (!url || url[0] == '\0') {
588 excluded_count[ER_NoUrl]++;
589 continue;
590 }
944cf283
FM
591
592 if(addr[0] != '\0'){
7c8c06c5
FM
593 if(strcmp(addr,log_entry.Ip)!=0) {
594 excluded_count[ER_UntrackedIpAddr]++;
595 continue;
596 }
944cf283
FM
597 }
598 if(Filter->HostFilter) {
599 if(!vhexclude(url)) {
af961877 600 if (debugz>=LogLevel_Data) debuga(__FILE__,__LINE__,_("Excluded site: %s\n"),url);
7c8c06c5 601 excluded_count[ER_Url]++;
27d1fa35
FM
602 totregsx++;
603 continue;
604 }
944cf283 605 }
27d1fa35 606
944cf283
FM
607 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
608 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
7c8c06c5
FM
609 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
610 excluded_count[ER_OutOfTimeRange]++;
611 continue;
612 }
944cf283 613 }
27d1fa35 614
944cf283 615 if(site[0] != '\0'){
7c8c06c5
FM
616 if(strstr(url,site)==0) {
617 excluded_count[ER_UntrackedUrl]++;
618 continue;
619 }
944cf283 620 }
27d1fa35 621
944cf283 622 if (log_entry.DataSize<0) log_entry.DataSize=0;
0c87646f 623
944cf283
FM
624 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
625 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
626 log_entry.ElapsedTime=0;
627 }
27d1fa35 628
944cf283
FM
629 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
630 fixendofline(str);
631 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
632 } else strcpy(smartfilter,"\"\"");
27d1fa35 633
944cf283
FM
634 nopen=0;
635 prev_ufile=NULL;
7799ec8d 636 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
944cf283
FM
637 prev_ufile=ufile;
638 if (ufile->file) nopen++;
639 }
640 if (!ufile) {
641 ufile=malloc(sizeof(*ufile));
27d1fa35 642 if (!ufile) {
7799ec8d 643 debuga(__FILE__,__LINE__,_("Not enough memory to store the user %s\n"),log_entry.User);
944cf283
FM
644 exit(EXIT_FAILURE);
645 }
646 memset(ufile,0,sizeof(*ufile));
647 ufile->next=first_user_file;
648 first_user_file=ufile;
8e134f1a
FM
649 /*
650 * This id_is_ip stuff is just to store the string only once if the user is
651 * identified by its IP address instead of a distinct ID and IP address.
652 */
7799ec8d 653 uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
944cf283 654 ufile->user=uinfo;
944cf283
FM
655 nusers++;
656 } else {
657 if (prev_ufile) {
658 prev_ufile->next=ufile->next;
27d1fa35
FM
659 ufile->next=first_user_file;
660 first_user_file=ufile;
27d1fa35 661 }
944cf283 662 }
1c91da07 663#ifdef ENABLE_DOUBLE_CHECK_DATA
d6e3e724
FM
664 if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
665 ufile->user->nbytes+=log_entry.DataSize;
666 ufile->user->elap+=log_entry.ElapsedTime;
667 }
1c91da07 668#endif
27d1fa35 669
944cf283
FM
670 if (ufile->file==NULL) {
671 if (nopen>=maxopenfiles) {
672 x=0;
673 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
674 if (ufile1->file!=NULL) {
675 if (x>=maxopenfiles) {
676 if (fclose(ufile1->file)==EOF) {
af961877 677 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
944cf283 678 exit(EXIT_FAILURE);
27d1fa35 679 }
944cf283 680 ufile1->file=NULL;
27d1fa35 681 }
944cf283 682 x++;
27d1fa35
FM
683 }
684 }
27d1fa35 685 }
944cf283 686 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
af961877 687 debuga(__FILE__,__LINE__,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
27d1fa35
FM
688 exit(EXIT_FAILURE);
689 }
944cf283 690 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
af961877 691 debuga(__FILE__,__LINE__,_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
2daef207 692 exit(EXIT_FAILURE);
1c91da07 693 }
944cf283 694 }
27d1fa35 695
944cf283
FM
696 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
697 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
27d1fa35 698
944cf283
FM
699 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
700 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
701 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
7799ec8d 702 debuga(__FILE__,__LINE__,_("Write error in the log file of user %s\n"),log_entry.User);
944cf283
FM
703 exit(EXIT_FAILURE);
704 }
705 records_kept++;
27d1fa35 706
8b4e9578 707 if (fp_log && log_line.current_format!=&ReadSargLog) {
944cf283 708 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
7799ec8d 709 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
944cf283
FM
710 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
711 }
712
713 totregsg++;
714
715 denied_write(&log_entry);
716 authfail_write(&log_entry);
88776d28 717 if (download_flag) download_write(&log_entry,download_url);
468d879d
FM
718 if (log_entry.UserAgent)
719 {
720 if (!UseragentLog)
721 UseragentLog=UserAgent_Open();
ba3b652e 722 UserAgent_Write(UseragentLog,&log_entry.EntryTime,log_entry.Ip,log_entry.User,log_entry.UserAgent);
468d879d 723 }
27d1fa35 724
8b4e9578 725 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
726 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
727 mindate=idata;
728 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
729 }
730 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
731 maxdate=idata;
732 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
733 }
734 }
735
4246ae8d 736 if (debugz>=LogLevel_Data){
944cf283 737 printf("IP=\t%s\n",log_entry.Ip);
7799ec8d 738 printf("USER=\t%s\n",log_entry.User);
944cf283
FM
739 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
740 printf("DATE=\t%s\n",dia);
741 printf("TIME=\t%s\n",hora);
742 //printf("FUNC=\t%s\n",fun);
743 printf("URL=\t%s\n",url);
744 printf("CODE=\t%s\n",log_entry.HttpCode);
745 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
746 }
747 }
748 longline_destroy(&line);
749
800eafb8
FM
750 if (FileObject_Close(fp_in)) {
751 debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),arq,FileObject_GetLastCloseError());
752 exit(EXIT_FAILURE);
753 }
468d879d 754 if (UseragentLog) fclose(UseragentLog);
800eafb8
FM
755 if (ShowReadStatistics) {
756 if (ShowReadPercent)
757 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
758 else
759 printf(_("SARG: Records in file: %lu\n"),recs2);
944cf283
FM
760 }
761}
762
7c8c06c5
FM
763/*!
764 * Display a line with the excluded entries count.
765 *
766 * \param Explain A translated string explaining the exluded count.
767 * \param Reason The reason number.
768 */
769static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
770{
771 if (excluded_count[Reason]>0) {
af961877 772 debuga(__FILE__,__LINE__," %s: %lu\n",Explain,excluded_count[Reason]);
7c8c06c5
FM
773 }
774}
775
944cf283
FM
776/*!
777Read the log files.
778
779\param Filter The filtering parameters for the file to load.
780
781\retval 1 Records found.
782\retval 0 No record found.
783*/
784int ReadLogFile(struct ReadLogDataStruct *Filter)
785{
944cf283
FM
786 int x;
787 int cstatus;
788 struct userfilestruct *ufile;
789 struct userfilestruct *ufile1;
6068ae56
FM
790 FileListIterator FIter;
791 const char *file;
944cf283
FM
792
793 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
7c8c06c5 794 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
944cf283
FM
795 first_user_file=NULL;
796
797 if (!dataonly) {
798 denied_open();
799 authfail_open();
800 download_open();
801 }
802
6068ae56
FM
803 FIter=FileListIter_Open(AccessLog);
804 while ((file=FileListIter_Next(FIter))!=NULL)
805 ReadOneLogFile(Filter,file);
806 FileListIter_Close(FIter);
944cf283 807
27d1fa35 808 if(fp_log != NULL) {
27d1fa35 809 char val2[40];
944cf283 810 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
0c87646f 811
507460ae 812 if (fclose(fp_log)==EOF) {
af961877 813 debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
507460ae
FM
814 exit(EXIT_FAILURE);
815 }
944cf283
FM
816 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
817 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
818 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
af961877 819 debuga(__FILE__,__LINE__,_("Path too long: "));
041018b6 820 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
27d1fa35
FM
821 exit(EXIT_FAILURE);
822 }
944cf283 823 if (rename(SargLogFile,val4)) {
af961877 824 debuga(__FILE__,__LINE__,_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
27d1fa35 825 } else {
944cf283 826 strcpy(SargLogFile,val4);
27d1fa35
FM
827
828 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
829 /*
830 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
831 necessary around the command name, put them in the configuration file.
832 */
944cf283 833 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
af961877 834 debuga(__FILE__,__LINE__,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
27d1fa35
FM
835 exit(EXIT_FAILURE);
836 }
837 cstatus=system(val1);
838 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
af961877
FM
839 debuga(__FILE__,__LINE__,_("command return status %d\n"),WEXITSTATUS(cstatus));
840 debuga(__FILE__,__LINE__,_("command: %s\n"),val1);
27d1fa35
FM
841 exit(EXIT_FAILURE);
842 }
843 }
844 }
845 if(debug)
af961877 846 debuga(__FILE__,__LINE__,_("Sarg parsed log saved as %s\n"),SargLogFile);
0c87646f 847 }
27d1fa35 848
8e53b2e7 849 denied_close();
16b013cc 850 authfail_close();
11284535 851 download_close();
27d1fa35
FM
852
853 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
854 ufile1=ufile->next;
507460ae 855 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
af961877 856 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
507460ae
FM
857 exit(EXIT_FAILURE);
858 }
27d1fa35
FM
859 free(ufile);
860 }
861
862 if (debug) {
8e501bd6 863 unsigned long int totalcount=0;
27d1fa35 864
af961877 865 debuga(__FILE__,__LINE__,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
27d1fa35 866
7c8c06c5
FM
867 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
868 if (x>=0) {
af961877 869 debuga(__FILE__,__LINE__,_("Reasons for excluded entries:\n"));
7c8c06c5
FM
870 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
871 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
872 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
7c8c06c5
FM
873 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
874 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
875 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
876 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
877 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
878 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
879 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
880 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
881 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
882 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
883 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
884 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
885 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
886 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
887 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
888 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
889 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
890 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
891 }
892
1c91da07
FM
893 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
894 if (format_count[x]>0) {
895 /* TRANSLATORS: It displays the number of lines found in the input log files
896 * for each supported log format. The log format name is the %s and is a string
897 * you translate somewhere else. */
af961877 898 debuga(__FILE__,__LINE__,_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
1c91da07
FM
899 totalcount+=format_count[x];
900 }
901 }
27d1fa35 902
1c91da07 903 if (totalcount==0 && totregsg)
af961877 904 debuga(__FILE__,__LINE__,_("Log with invalid format\n"));
27d1fa35
FM
905 }
906
27d1fa35
FM
907 return((totregsg!=0) ? 1 : 0);
908}
6a943fc1
FM
909
910/*!
911 * Get the start and end date of the period covered by the log files.
912 */
b6fb8c79 913bool GetLogPeriod(struct tm *Start,struct tm *End)
6a943fc1 914{
b6fb8c79
FM
915 bool Valid=false;
916
6a943fc1
FM
917 if (EarliestDate>=0) {
918 memcpy(Start,&EarliestDateTime,sizeof(struct tm));
b6fb8c79 919 Valid=true;
6a943fc1
FM
920 } else {
921 memset(Start,0,sizeof(struct tm));
922 }
923 if (LatestDate>=0) {
924 memcpy(End,&LatestDateTime,sizeof(struct tm));
b6fb8c79 925 Valid=true;
6a943fc1
FM
926 } else {
927 memset(End,0,sizeof(struct tm));
928 }
b6fb8c79 929 return(Valid);
6a943fc1 930}