]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Fix an error introduced when stripping the user domain
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
110ce984 3 * 1998, 2015
27d1fa35
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
6068ae56 30#include "include/filelist.h"
27d1fa35
FM
31
32#define REPORT_EVERY_X_LINES 5000
33#define MAX_OPEN_USER_FILES 10
34
35struct userfilestruct
36{
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40};
41
7c8c06c5
FM
42enum ExcludeReasonEnum
43{
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
7c8c06c5
FM
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88};
89
27d1fa35
FM
90numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
36a0b94c
FM
92//! Domain suffix to strip from the user name.
93char StripUserSuffix[MAX_USER_LEN]="";
27d1fa35 94
27d1fa35 95extern char *userfile;
6068ae56 96extern FileListObject AccessLog;
27d1fa35 97
1c91da07
FM
98extern const struct ReadLogProcessStruct ReadSquidLog;
99extern const struct ReadLogProcessStruct ReadCommonLog;
100extern const struct ReadLogProcessStruct ReadSargLog;
101extern const struct ReadLogProcessStruct ReadExtLog;
102
103//! The list of the supported log formats.
104static const struct ReadLogProcessStruct const *LogFormats[]=
105{
106 &ReadSquidLog,
107 &ReadCommonLog,
108 &ReadSargLog,
109 &ReadExtLog
110};
111
944cf283
FM
112//! The path to the sarg log file.
113static char SargLogFile[4096]="";
114//! Handle to the sarg log file. NULL if not created.
115static FILE *fp_log=NULL;
116//! The number of records read from the input logs.
117static long int totregsl=0;
118//! The number of records kept.
119static long int totregsg=0;
120//! The number of records excluded.
121static long int totregsx=0;
122//! The beginning of a linked list of user's file.
123static struct userfilestruct *first_user_file=NULL;
124//! Count the number of occurence of each input log format.
8e501bd6 125static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
944cf283
FM
126//! The minimum date found in the input logs.
127static int mindate=0;
128static int maxdate=0;
7c8c06c5
FM
129//! Count the number of excluded records.
130static unsigned long int excluded_count[ER_Last];
6a943fc1
FM
131//! Earliest date found in the log.
132static int EarliestDate=-1;
133//! The earliest date in time format.
134static struct tm EarliestDateTime;
135//! Latest date found in the log.
136static int LatestDate=-1;
137//! The latest date in time format.
138static struct tm LatestDateTime;
27d1fa35 139
800eafb8
FM
140/*!
141 * Read from standard input.
142 *
143 * \param Data The file object.
144 * \param Buffer The boffer to store the data read.
145 * \param Size How many bytes to read.
146 *
147 * \return The number of bytes read.
148 */
149static int Stdin_Read(void *Data,void *Buffer,int Size)
150{
151 return(fread(Buffer,1,Size,(FILE *)Data));
152}
153
154/*!
155 * Check if end of file is reached.
156 *
157 * \param Data The file object.
158 *
159 * \return \c True if end of file is reached.
160 */
161static int Stdin_Eof(void *Data)
162{
163 return(feof((FILE *)Data));
164}
165
166/*!
167 * Mimic a close of standard input but do nothing
168 *
169 * \param Data File to close.
170 *
171 * \return EOF on error.
172 */
173static int Stdin_Close(void *Data)
174{
175 return(0);
176}
177
178/*!
179 * Open a file object to read from standard input.
180 *
181 * \return The object to pass to other function in this module.
182 */
183static FileObject *Stdin_Open(void)
184{
185 FileObject *File;
186
187 FileObject_SetLastOpenError(NULL);
188 File=calloc(1,sizeof(*File));
189 if (!File)
190 {
191 FileObject_SetLastOpenError(_("Not enough memory"));
192 return(NULL);
193 }
194 File->Data=stdin;
195 File->Read=Stdin_Read;
196 File->Eof=Stdin_Eof;
197 File->Rewind=NULL;
198 File->Close=Stdin_Close;
199 return(File);
200}
201
8b4e9578
FM
202/*!
203 * Initialize the memory structure needed by LogLine_Parse() to parse
204 * a log line.
205 *
206 * \param log_line The structure to initialize.
207 */
208void LogLine_Init(struct LogLineStruct *log_line)
209{
210 log_line->current_format=NULL;
211 log_line->current_format_idx=-1;
212 log_line->file_name="";
213 log_line->successive_errors=0;
214 log_line->total_errors=0;
215}
216
217/*!
218 * Set the name of the log file being parsed.
219 *
220 * \param log_line Data structure to parse the log line.
221 * \param file_name The name of the log file being read.
222 */
223void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
224{
225 log_line->file_name=file_name;
226}
227
228/*!
229 * Parse the next line from a log file.
230 *
231 * \param log_line A buffer to store the data about the current parsing.
232 * \param log_entry The variable to store the parsed data.
233 * \param linebuf The text line read from the log file.
234 *
235 * \return
236 */
237enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
238{
239 enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
240 int x;
241
242 if (log_line->current_format)
243 {
244 memset(log_entry,0,sizeof(*log_entry));
245 log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
246 }
247
248 // find out what line format to use
249 if (log_entry_status==RLRC_Unknown)
250 {
251 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
252 {
253 if (LogFormats[x]==log_line->current_format) continue;
254 memset(log_entry,0,sizeof(*log_entry));
255 log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
256 if (log_entry_status!=RLRC_Unknown)
257 {
258 log_line->current_format=LogFormats[x];
259 log_line->current_format_idx=x;
260 if (debugz>=LogLevel_Process)
261 {
262 /* TRANSLATORS: The argument is the log format name as translated by you. */
af961877 263 debuga(__FILE__,__LINE__,_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
8b4e9578
FM
264 }
265 break;
266 }
267 }
268 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
269 {
270 if (++log_line->successive_errors>NumLogSuccessiveErrors) {
af961877 271 debuga(__FILE__,__LINE__,ngettext("%d consecutive error found in the input log file %s\n",
8b4e9578
FM
272 "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
273 exit(EXIT_FAILURE);
274 }
275 if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
af961877 276 debuga(__FILE__,__LINE__,ngettext("%d error found in the input log file (last in %s)\n",
8b4e9578
FM
277 "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
278 exit(EXIT_FAILURE);
279 }
af961877 280 debuga(__FILE__,__LINE__,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
8b4e9578
FM
281 }
282 else
283 log_line->successive_errors=0;
284 }
285
286 if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
af961877 287 debuga(__FILE__,__LINE__,_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
8b4e9578
FM
288 exit(EXIT_FAILURE);
289 }
290 if (log_entry_status==RLRC_InternalError) {
af961877 291 debuga(__FILE__,__LINE__,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
8b4e9578
FM
292 exit(EXIT_FAILURE);
293 }
294 return(log_entry_status);
295}
296
944cf283
FM
297/*!
298Read a single log file.
27d1fa35 299
944cf283 300\param arq The log file name to read.
27d1fa35 301*/
944cf283 302static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
27d1fa35 303{
27d1fa35
FM
304 longline line;
305 char *linebuf;
306 char *str;
27d1fa35 307 char hora[30];
c5d6ef4b 308 char dia[128]="";
27d1fa35 309 char wuser[MAXLEN];
944cf283 310 char tmp3[MAXLEN]="";
27d1fa35
FM
311 char download_url[MAXLEN];
312 char smartfilter[MAXLEN];
27d1fa35 313 const char *url;
dc34d345 314 const char *user;
7799ec8d 315 char UserBuffer[MAX_USER_LEN];
27d1fa35
FM
316 int OutputNonZero = REPORT_EVERY_X_LINES ;
317 int idata=0;
27d1fa35
FM
318 int x;
319 int hmr;
320 int nopen;
36a0b94c 321 int StripSuffixLen;
27d1fa35 322 int maxopenfiles=MAX_OPEN_USER_FILES;
27d1fa35
FM
323 unsigned long int recs1=0UL;
324 unsigned long int recs2=0UL;
800eafb8 325 FileObject *fp_in=NULL;
27d1fa35
FM
326 bool download_flag=false;
327 bool id_is_ip;
1c91da07 328 enum ReadLogReturnCodeEnum log_entry_status;
27d1fa35
FM
329 struct stat logstat;
330 struct getwordstruct gwarea;
27d1fa35
FM
331 struct userfilestruct *prev_ufile;
332 struct userinfostruct *uinfo;
27d1fa35
FM
333 struct userfilestruct *ufile;
334 struct userfilestruct *ufile1;
c5d6ef4b 335 struct ReadLogStruct log_entry;
8b4e9578 336 struct LogLineStruct log_line;
27d1fa35 337
8b4e9578
FM
338 LogLine_Init(&log_line);
339 LogLine_File(&log_line,arq);
944cf283
FM
340 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
341 if (LogFormats[x]->NewFile)
342 LogFormats[x]->NewFile(arq);
27d1fa35 343
944cf283 344 if (arq[0]=='-' && arq[1]=='\0') {
800eafb8 345 fp_in=Stdin_Open();
944cf283 346 if(debug)
af961877 347 debuga(__FILE__,__LINE__,_("Reading access log file: from stdin\n"));
944cf283
FM
348 } else {
349 if (Filter->DateRange[0]!='\0') {
350 if (stat(arq,&logstat)!=0) {
af961877 351 debuga(__FILE__,__LINE__,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
944cf283
FM
352 } else {
353 struct tm *logtime=localtime(&logstat.st_mtime);
354 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
af961877 355 debuga(__FILE__,__LINE__,_("Ignoring old log file %s\n"),arq);
944cf283 356 return;
27d1fa35
FM
357 }
358 }
27d1fa35 359 }
800eafb8
FM
360 fp_in=decomp(arq);
361 if (fp_in==NULL) {
362 debuga(__FILE__,__LINE__,_("Cannot open input log file \"%s\": %s\n"),arq,FileObject_GetLastOpenError());
944cf283
FM
363 exit(EXIT_FAILURE);
364 }
800eafb8 365 if (debug) debuga(__FILE__,__LINE__,_("Reading access log file: %s\n"),arq);
944cf283 366 }
27d1fa35 367
944cf283 368 download_flag=false;
27d1fa35 369
944cf283
FM
370 recs1=0UL;
371 recs2=0UL;
36a0b94c 372 StripSuffixLen=strlen(StripUserSuffix);
2f4787e6 373
944cf283 374 // pre-read the file only if we have to show stats
800eafb8 375 if (ShowReadStatistics && ShowReadPercent && fp_in->Rewind) {
a1e4e370 376 int nread,i;
944cf283
FM
377 bool skipcr=false;
378 char tmp4[MAXLEN];
27d1fa35 379
800eafb8 380 while ((nread=FileObject_Read(fp_in,tmp4,sizeof(tmp4)))>0) {
944cf283
FM
381 for (i=0 ; i<nread ; i++)
382 if (skipcr) {
383 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
384 skipcr=false;
27d1fa35 385 }
944cf283
FM
386 } else {
387 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
388 skipcr=true;
389 recs1++;
390 }
391 }
27d1fa35 392 }
800eafb8 393 FileObject_Rewind(fp_in);
944cf283
FM
394 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
395 putchar('\r');
396 fflush( stdout ) ;
397 }
27d1fa35 398
944cf283 399 if ((line=longline_create())==NULL) {
af961877 400 debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),arq);
944cf283
FM
401 exit(EXIT_FAILURE);
402 }
27d1fa35 403
944cf283 404 while ((linebuf=longline_read(fp_in,line))!=NULL) {
944cf283 405 lines_read++;
27d1fa35 406
944cf283
FM
407 recs2++;
408 if (ShowReadStatistics && --OutputNonZero<=0) {
409 if (recs1>0) {
410 double perc = recs2 * 100. / recs1 ;
411 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
412 } else {
413 printf(_("SARG: Records in file: %lu"),recs2);
27d1fa35 414 }
944cf283
FM
415 putchar('\r');
416 fflush (stdout);
417 OutputNonZero = REPORT_EVERY_X_LINES ;
418 }
0c87646f 419
944cf283
FM
420 /*
421 The following checks are retained here as I don't know to
422 what format they apply. They date back to pre 2.4 versions.
423 */
424 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
7c8c06c5
FM
425 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
426 excluded_count[ER_IncompleteQuery]++;
427 continue;
428 }
429 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
430 excluded_count[ER_LogfileTurnedOver]++;
431 continue;
432 }
944cf283
FM
433
434 // exclude_string
435 if(ExcludeString[0] != '\0') {
436 bool exstring=false;
437 getword_start(&gwarea,ExcludeString);
438 while(strchr(gwarea.current,':') != 0) {
439 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
af961877 440 debuga(__FILE__,__LINE__,_("Invalid record in exclusion string\n"));
944cf283 441 exit(EXIT_FAILURE);
27d1fa35 442 }
944cf283 443 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
27d1fa35 444 exstring=true;
944cf283
FM
445 break;
446 }
27d1fa35 447 }
944cf283
FM
448 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
449 exstring=true;
7c8c06c5
FM
450 if(exstring) {
451 excluded_count[ER_ExcludeString]++;
452 continue;
453 }
944cf283 454 }
27d1fa35 455
944cf283 456 totregsl++;
4246ae8d 457 if (debugz>=LogLevel_Data)
944cf283 458 printf("BUF=%s\n",linebuf);
27d1fa35 459
944cf283 460 // process the line
8b4e9578
FM
461 log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
462 if (log_entry_status==RLRC_Unknown)
463 {
464 excluded_count[ER_UnknownFormat]++;
465 continue;
944cf283
FM
466 }
467 if (log_entry_status==RLRC_Ignore) {
7c8c06c5 468 excluded_count[ER_FormatData]++;
944cf283
FM
469 continue;
470 }
8b4e9578 471 format_count[log_line.current_format_idx]++;
944cf283 472
8b4e9578 473 if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
944cf283
FM
474 if(access(ParsedOutputLog,R_OK) != 0) {
475 my_mkdir(ParsedOutputLog);
476 }
477 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
af961877 478 debuga(__FILE__,__LINE__,_("Path too long: "));
041018b6 479 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
1c91da07
FM
480 exit(EXIT_FAILURE);
481 }
944cf283 482 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
af961877 483 debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
1c91da07
FM
484 exit(EXIT_FAILURE);
485 }
944cf283
FM
486 fputs("*** SARG Log ***\n",fp_log);
487 }
1c91da07 488
944cf283 489 if (log_entry.Ip==NULL) {
af961877 490 debuga(__FILE__,__LINE__,_("Unknown input log file format: no IP addresses\n"));
944cf283
FM
491 break;
492 }
493 if (log_entry.User==NULL) {
af961877 494 debuga(__FILE__,__LINE__,_("Unknown input log file format: no user\n"));
944cf283
FM
495 break;
496 }
497 if (log_entry.Url==NULL) {
af961877 498 debuga(__FILE__,__LINE__,_("Unknown input log file format: no URL\n"));
944cf283
FM
499 break;
500 }
1c91da07 501
944cf283 502 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
4246ae8d 503 if (debugz>=LogLevel_Data)
944cf283 504 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
27d1fa35 505
6a943fc1
FM
506 if (EarliestDate<0 || idata<EarliestDate) {
507 EarliestDate=idata;
508 memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
509 }
510 if (LatestDate<0 || idata>LatestDate) {
511 LatestDate=idata;
512 memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
513 }
944cf283 514 if(Filter->DateRange[0] != '\0'){
7c8c06c5
FM
515 if(idata < dfrom || idata > duntil) {
516 excluded_count[ER_OutOfDateRange]++;
517 continue;
518 }
944cf283 519 }
27d1fa35 520
944cf283 521 // Record only hours usage which is required
7c8c06c5
FM
522 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
523 excluded_count[ER_OutOfWDayRange]++;
944cf283 524 continue;
7c8c06c5 525 }
27d1fa35 526
7c8c06c5
FM
527 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
528 excluded_count[ER_OutOfHourRange]++;
944cf283 529 continue;
7c8c06c5 530 }
27d1fa35 531
36a0b94c
FM
532 if (StripSuffixLen>0)
533 {
534 x=strlen(log_entry.User);
535 if (x>StripSuffixLen && strcasecmp(log_entry.User+(x-StripSuffixLen),StripUserSuffix)==0)
7799ec8d
FM
536 {
537 if (x-StripSuffixLen>=sizeof(UserBuffer)) {
538 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
539 excluded_count[ER_UserNameTooLong]++;
540 totregsx++;
541 continue;
542 }
543 safe_strcpy(UserBuffer,log_entry.User,sizeof(UserBuffer));
544 UserBuffer[x-StripSuffixLen]='\0';
545 log_entry.User=UserBuffer;
546 }
36a0b94c 547 }
944cf283 548 if(strlen(log_entry.User) > MAX_USER_LEN) {
af961877 549 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
7c8c06c5 550 excluded_count[ER_UserNameTooLong]++;
944cf283
FM
551 totregsx++;
552 continue;
553 }
27d1fa35 554
944cf283
FM
555 // include_users
556 if(IncludeUsers[0] != '\0') {
557 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
7c8c06c5
FM
558 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
559 excluded_count[ER_User]++;
27d1fa35 560 continue;
7c8c06c5 561 }
944cf283 562 }
27d1fa35 563
944cf283 564 if(vercode(log_entry.HttpCode)) {
af961877 565 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded code: %s\n"),log_entry.HttpCode);
7c8c06c5 566 excluded_count[ER_HttpCode]++;
944cf283
FM
567 totregsx++;
568 continue;
569 }
570
7c8c06c5
FM
571 if(testvaliduserchar(log_entry.User)) {
572 excluded_count[ER_InvalidUserChar]++;
944cf283 573 continue;
7c8c06c5 574 }
944cf283
FM
575
576 // replace any tab by a single space
577 for (str=log_entry.Url ; *str ; str++)
578 if (*str=='\t') *str=' ';
579 for (str=log_entry.HttpCode ; *str ; str++)
580 if (*str=='\t') *str=' ';
581
8b4e9578 582 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
583 /*
584 The full URL is not saved in sarg log. There is no point in testing the URL to detect
585 a downloaded file.
586 */
587 download_flag=is_download_suffix(log_entry.Url);
588 if (download_flag) {
589 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
27d1fa35 590 }
944cf283
FM
591 } else
592 download_flag=false;
27d1fa35 593
944cf283 594 url=process_url(log_entry.Url,LongUrl);
7c8c06c5
FM
595 if (!url || url[0] == '\0') {
596 excluded_count[ER_NoUrl]++;
597 continue;
598 }
944cf283
FM
599
600 if(addr[0] != '\0'){
7c8c06c5
FM
601 if(strcmp(addr,log_entry.Ip)!=0) {
602 excluded_count[ER_UntrackedIpAddr]++;
603 continue;
604 }
944cf283
FM
605 }
606 if(Filter->HostFilter) {
607 if(!vhexclude(url)) {
af961877 608 if (debugz>=LogLevel_Data) debuga(__FILE__,__LINE__,_("Excluded site: %s\n"),url);
7c8c06c5 609 excluded_count[ER_Url]++;
27d1fa35
FM
610 totregsx++;
611 continue;
612 }
944cf283 613 }
27d1fa35 614
944cf283
FM
615 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
616 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
7c8c06c5
FM
617 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
618 excluded_count[ER_OutOfTimeRange]++;
619 continue;
620 }
944cf283 621 }
27d1fa35 622
944cf283 623 if(site[0] != '\0'){
7c8c06c5
FM
624 if(strstr(url,site)==0) {
625 excluded_count[ER_UntrackedUrl]++;
626 continue;
627 }
944cf283 628 }
27d1fa35 629
944cf283 630 if(UserIp) {
7799ec8d 631 log_entry.User=log_entry.Ip;
944cf283
FM
632 id_is_ip=true;
633 } else {
7799ec8d 634 log_entry.User=log_entry.User;
944cf283 635 id_is_ip=false;
7799ec8d 636 if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
944cf283 637 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
7799ec8d 638 log_entry.User=log_entry.Ip;
944cf283 639 id_is_ip=true;
27d1fa35 640 }
7c8c06c5
FM
641 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
642 excluded_count[ER_NoUser]++;
27d1fa35 643 continue;
7c8c06c5 644 }
944cf283 645 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
7799ec8d 646 log_entry.User="everybody";
27d1fa35 647 } else {
944cf283 648 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
7799ec8d
FM
649 if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) {
650 log_entry.User=str+1;
27d1fa35
FM
651 }
652 }
653 }
944cf283 654 }
27d1fa35 655
944cf283 656 if(us[0] != '\0'){
7799ec8d 657 if(strcmp(log_entry.User,us)!=0) {
7c8c06c5
FM
658 excluded_count[ER_UntrackedUser]++;
659 continue;
660 }
944cf283 661 }
27d1fa35 662
944cf283 663 if(Filter->SysUsers) {
7799ec8d 664 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
7c8c06c5
FM
665 if(strstr(userfile, wuser) == 0) {
666 excluded_count[ER_SysUser]++;
944cf283 667 continue;
7c8c06c5 668 }
944cf283 669 }
27d1fa35 670
944cf283 671 if(Filter->UserFilter) {
7799ec8d
FM
672 if(!vuexclude(log_entry.User)) {
673 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded user: %s\n"),log_entry.User);
7c8c06c5 674 excluded_count[ER_IgnoredUser]++;
944cf283
FM
675 totregsx++;
676 continue;
27d1fa35 677 }
944cf283 678 }
27d1fa35 679
7799ec8d
FM
680 user=process_user(log_entry.User);
681 if (log_entry.User!=user)
36a0b94c 682 {
7799ec8d 683 log_entry.User=user;
dc34d345
FM
684 id_is_ip=false;
685 }
7799ec8d 686 if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
7c8c06c5 687 excluded_count[ER_NoUser]++;
944cf283 688 continue;
7c8c06c5 689 }
27d1fa35 690
944cf283 691 if (log_entry.DataSize<0) log_entry.DataSize=0;
0c87646f 692
944cf283
FM
693 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
694 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
695 log_entry.ElapsedTime=0;
696 }
27d1fa35 697
944cf283
FM
698 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
699 fixendofline(str);
700 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
701 } else strcpy(smartfilter,"\"\"");
27d1fa35 702
944cf283
FM
703 nopen=0;
704 prev_ufile=NULL;
7799ec8d 705 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
944cf283
FM
706 prev_ufile=ufile;
707 if (ufile->file) nopen++;
708 }
709 if (!ufile) {
710 ufile=malloc(sizeof(*ufile));
27d1fa35 711 if (!ufile) {
7799ec8d 712 debuga(__FILE__,__LINE__,_("Not enough memory to store the user %s\n"),log_entry.User);
944cf283
FM
713 exit(EXIT_FAILURE);
714 }
715 memset(ufile,0,sizeof(*ufile));
716 ufile->next=first_user_file;
717 first_user_file=ufile;
8e134f1a
FM
718 /*
719 * This id_is_ip stuff is just to store the string only once if the user is
720 * identified by its IP address instead of a distinct ID and IP address.
721 */
7799ec8d 722 uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
944cf283 723 ufile->user=uinfo;
944cf283
FM
724 nusers++;
725 } else {
726 if (prev_ufile) {
727 prev_ufile->next=ufile->next;
27d1fa35
FM
728 ufile->next=first_user_file;
729 first_user_file=ufile;
27d1fa35 730 }
944cf283 731 }
1c91da07 732#ifdef ENABLE_DOUBLE_CHECK_DATA
d6e3e724
FM
733 if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
734 ufile->user->nbytes+=log_entry.DataSize;
735 ufile->user->elap+=log_entry.ElapsedTime;
736 }
1c91da07 737#endif
27d1fa35 738
944cf283
FM
739 if (ufile->file==NULL) {
740 if (nopen>=maxopenfiles) {
741 x=0;
742 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
743 if (ufile1->file!=NULL) {
744 if (x>=maxopenfiles) {
745 if (fclose(ufile1->file)==EOF) {
af961877 746 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
944cf283 747 exit(EXIT_FAILURE);
27d1fa35 748 }
944cf283 749 ufile1->file=NULL;
27d1fa35 750 }
944cf283 751 x++;
27d1fa35
FM
752 }
753 }
27d1fa35 754 }
944cf283 755 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
af961877 756 debuga(__FILE__,__LINE__,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
27d1fa35
FM
757 exit(EXIT_FAILURE);
758 }
944cf283 759 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
af961877 760 debuga(__FILE__,__LINE__,_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
944cf283 761 exit (1);
1c91da07 762 }
944cf283 763 }
27d1fa35 764
944cf283
FM
765 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
766 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
27d1fa35 767
944cf283
FM
768 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
769 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
770 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
7799ec8d 771 debuga(__FILE__,__LINE__,_("Write error in the log file of user %s\n"),log_entry.User);
944cf283
FM
772 exit(EXIT_FAILURE);
773 }
774 records_kept++;
27d1fa35 775
8b4e9578 776 if (fp_log && log_line.current_format!=&ReadSargLog) {
944cf283 777 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
7799ec8d 778 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
944cf283
FM
779 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
780 }
781
782 totregsg++;
783
784 denied_write(&log_entry);
785 authfail_write(&log_entry);
88776d28 786 if (download_flag) download_write(&log_entry,download_url);
27d1fa35 787
8b4e9578 788 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
789 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
790 mindate=idata;
791 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
792 }
793 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
794 maxdate=idata;
795 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
796 }
797 }
798
4246ae8d 799 if (debugz>=LogLevel_Data){
944cf283 800 printf("IP=\t%s\n",log_entry.Ip);
7799ec8d 801 printf("USER=\t%s\n",log_entry.User);
944cf283
FM
802 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
803 printf("DATE=\t%s\n",dia);
804 printf("TIME=\t%s\n",hora);
805 //printf("FUNC=\t%s\n",fun);
806 printf("URL=\t%s\n",url);
807 printf("CODE=\t%s\n",log_entry.HttpCode);
808 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
809 }
810 }
811 longline_destroy(&line);
812
800eafb8
FM
813 if (FileObject_Close(fp_in)) {
814 debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),arq,FileObject_GetLastCloseError());
815 exit(EXIT_FAILURE);
816 }
817 if (ShowReadStatistics) {
818 if (ShowReadPercent)
819 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
820 else
821 printf(_("SARG: Records in file: %lu\n"),recs2);
944cf283
FM
822 }
823}
824
7c8c06c5
FM
825/*!
826 * Display a line with the excluded entries count.
827 *
828 * \param Explain A translated string explaining the exluded count.
829 * \param Reason The reason number.
830 */
831static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
832{
833 if (excluded_count[Reason]>0) {
af961877 834 debuga(__FILE__,__LINE__," %s: %lu\n",Explain,excluded_count[Reason]);
7c8c06c5
FM
835 }
836}
837
944cf283
FM
838/*!
839Read the log files.
840
841\param Filter The filtering parameters for the file to load.
842
843\retval 1 Records found.
844\retval 0 No record found.
845*/
846int ReadLogFile(struct ReadLogDataStruct *Filter)
847{
944cf283
FM
848 int x;
849 int cstatus;
850 struct userfilestruct *ufile;
851 struct userfilestruct *ufile1;
6068ae56
FM
852 FileListIterator FIter;
853 const char *file;
944cf283
FM
854
855 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
7c8c06c5 856 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
944cf283
FM
857 first_user_file=NULL;
858
859 if (!dataonly) {
860 denied_open();
861 authfail_open();
862 download_open();
863 }
864
6068ae56
FM
865 FIter=FileListIter_Open(AccessLog);
866 while ((file=FileListIter_Next(FIter))!=NULL)
867 ReadOneLogFile(Filter,file);
868 FileListIter_Close(FIter);
944cf283 869
27d1fa35 870 if(fp_log != NULL) {
27d1fa35 871 char val2[40];
944cf283 872 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
0c87646f 873
507460ae 874 if (fclose(fp_log)==EOF) {
af961877 875 debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
507460ae
FM
876 exit(EXIT_FAILURE);
877 }
944cf283
FM
878 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
879 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
880 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
af961877 881 debuga(__FILE__,__LINE__,_("Path too long: "));
041018b6 882 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
27d1fa35
FM
883 exit(EXIT_FAILURE);
884 }
944cf283 885 if (rename(SargLogFile,val4)) {
af961877 886 debuga(__FILE__,__LINE__,_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
27d1fa35 887 } else {
944cf283 888 strcpy(SargLogFile,val4);
27d1fa35
FM
889
890 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
891 /*
892 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
893 necessary around the command name, put them in the configuration file.
894 */
944cf283 895 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
af961877 896 debuga(__FILE__,__LINE__,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
27d1fa35
FM
897 exit(EXIT_FAILURE);
898 }
899 cstatus=system(val1);
900 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
af961877
FM
901 debuga(__FILE__,__LINE__,_("command return status %d\n"),WEXITSTATUS(cstatus));
902 debuga(__FILE__,__LINE__,_("command: %s\n"),val1);
27d1fa35
FM
903 exit(EXIT_FAILURE);
904 }
905 }
906 }
907 if(debug)
af961877 908 debuga(__FILE__,__LINE__,_("Sarg parsed log saved as %s\n"),SargLogFile);
0c87646f 909 }
27d1fa35 910
8e53b2e7 911 denied_close();
16b013cc 912 authfail_close();
11284535 913 download_close();
27d1fa35
FM
914
915 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
916 ufile1=ufile->next;
507460ae 917 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
af961877 918 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
507460ae
FM
919 exit(EXIT_FAILURE);
920 }
27d1fa35
FM
921 free(ufile);
922 }
923
924 if (debug) {
8e501bd6 925 unsigned long int totalcount=0;
27d1fa35 926
af961877 927 debuga(__FILE__,__LINE__,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
27d1fa35 928
7c8c06c5
FM
929 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
930 if (x>=0) {
af961877 931 debuga(__FILE__,__LINE__,_("Reasons for excluded entries:\n"));
7c8c06c5
FM
932 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
933 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
934 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
7c8c06c5
FM
935 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
936 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
937 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
938 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
939 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
940 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
941 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
942 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
943 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
944 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
945 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
946 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
947 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
948 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
949 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
950 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
951 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
952 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
953 }
954
1c91da07
FM
955 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
956 if (format_count[x]>0) {
957 /* TRANSLATORS: It displays the number of lines found in the input log files
958 * for each supported log format. The log format name is the %s and is a string
959 * you translate somewhere else. */
af961877 960 debuga(__FILE__,__LINE__,_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
1c91da07
FM
961 totalcount+=format_count[x];
962 }
963 }
27d1fa35 964
1c91da07 965 if (totalcount==0 && totregsg)
af961877 966 debuga(__FILE__,__LINE__,_("Log with invalid format\n"));
27d1fa35
FM
967 }
968
27d1fa35
FM
969 return((totregsg!=0) ? 1 : 0);
970}
6a943fc1
FM
971
972/*!
973 * Get the start and end date of the period covered by the log files.
974 */
b6fb8c79 975bool GetLogPeriod(struct tm *Start,struct tm *End)
6a943fc1 976{
b6fb8c79
FM
977 bool Valid=false;
978
6a943fc1
FM
979 if (EarliestDate>=0) {
980 memcpy(Start,&EarliestDateTime,sizeof(struct tm));
b6fb8c79 981 Valid=true;
6a943fc1
FM
982 } else {
983 memset(Start,0,sizeof(struct tm));
984 }
985 if (LatestDate>=0) {
986 memcpy(End,&LatestDateTime,sizeof(struct tm));
b6fb8c79 987 Valid=true;
6a943fc1
FM
988 } else {
989 memset(End,0,sizeof(struct tm));
990 }
b6fb8c79 991 return(Valid);
6a943fc1 992}