]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Merge messages about IP addresses.
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
110ce984 3 * 1998, 2015
27d1fa35
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
6068ae56 30#include "include/filelist.h"
27d1fa35
FM
31
32#define REPORT_EVERY_X_LINES 5000
33#define MAX_OPEN_USER_FILES 10
34
35struct userfilestruct
36{
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40};
41
7c8c06c5
FM
42enum ExcludeReasonEnum
43{
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
7c8c06c5
FM
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88};
89
27d1fa35
FM
90numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92
27d1fa35 93extern char *userfile;
6068ae56 94extern FileListObject AccessLog;
27d1fa35 95
1c91da07
FM
96extern const struct ReadLogProcessStruct ReadSquidLog;
97extern const struct ReadLogProcessStruct ReadCommonLog;
98extern const struct ReadLogProcessStruct ReadSargLog;
99extern const struct ReadLogProcessStruct ReadExtLog;
100
101//! The list of the supported log formats.
102static const struct ReadLogProcessStruct const *LogFormats[]=
103{
104 &ReadSquidLog,
105 &ReadCommonLog,
106 &ReadSargLog,
107 &ReadExtLog
108};
109
944cf283
FM
110//! The path to the sarg log file.
111static char SargLogFile[4096]="";
112//! Handle to the sarg log file. NULL if not created.
113static FILE *fp_log=NULL;
114//! The number of records read from the input logs.
115static long int totregsl=0;
116//! The number of records kept.
117static long int totregsg=0;
118//! The number of records excluded.
119static long int totregsx=0;
120//! The beginning of a linked list of user's file.
121static struct userfilestruct *first_user_file=NULL;
122//! Count the number of occurence of each input log format.
8e501bd6 123static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
944cf283
FM
124//! The minimum date found in the input logs.
125static int mindate=0;
126static int maxdate=0;
7c8c06c5
FM
127//! Count the number of excluded records.
128static unsigned long int excluded_count[ER_Last];
6a943fc1
FM
129//! Earliest date found in the log.
130static int EarliestDate=-1;
131//! The earliest date in time format.
132static struct tm EarliestDateTime;
133//! Latest date found in the log.
134static int LatestDate=-1;
135//! The latest date in time format.
136static struct tm LatestDateTime;
27d1fa35 137
8b4e9578
FM
138/*!
139 * Initialize the memory structure needed by LogLine_Parse() to parse
140 * a log line.
141 *
142 * \param log_line The structure to initialize.
143 */
144void LogLine_Init(struct LogLineStruct *log_line)
145{
146 log_line->current_format=NULL;
147 log_line->current_format_idx=-1;
148 log_line->file_name="";
149 log_line->successive_errors=0;
150 log_line->total_errors=0;
151}
152
153/*!
154 * Set the name of the log file being parsed.
155 *
156 * \param log_line Data structure to parse the log line.
157 * \param file_name The name of the log file being read.
158 */
159void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
160{
161 log_line->file_name=file_name;
162}
163
164/*!
165 * Parse the next line from a log file.
166 *
167 * \param log_line A buffer to store the data about the current parsing.
168 * \param log_entry The variable to store the parsed data.
169 * \param linebuf The text line read from the log file.
170 *
171 * \return
172 */
173enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
174{
175 enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
176 int x;
177
178 if (log_line->current_format)
179 {
180 memset(log_entry,0,sizeof(*log_entry));
181 log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
182 }
183
184 // find out what line format to use
185 if (log_entry_status==RLRC_Unknown)
186 {
187 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
188 {
189 if (LogFormats[x]==log_line->current_format) continue;
190 memset(log_entry,0,sizeof(*log_entry));
191 log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
192 if (log_entry_status!=RLRC_Unknown)
193 {
194 log_line->current_format=LogFormats[x];
195 log_line->current_format_idx=x;
196 if (debugz>=LogLevel_Process)
197 {
198 /* TRANSLATORS: The argument is the log format name as translated by you. */
199 debuga(_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
200 }
201 break;
202 }
203 }
204 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
205 {
206 if (++log_line->successive_errors>NumLogSuccessiveErrors) {
207 debuga(ngettext("%d consecutive error found in the input log file %s\n",
208 "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
209 exit(EXIT_FAILURE);
210 }
211 if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
212 debuga(ngettext("%d error found in the input log file (last in %s)\n",
213 "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
214 exit(EXIT_FAILURE);
215 }
216 debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
217 }
218 else
219 log_line->successive_errors=0;
220 }
221
222 if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
223 debuga(_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
224 exit(EXIT_FAILURE);
225 }
226 if (log_entry_status==RLRC_InternalError) {
227 debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
228 exit(EXIT_FAILURE);
229 }
230 return(log_entry_status);
231}
232
944cf283
FM
233/*!
234Read a single log file.
27d1fa35 235
944cf283 236\param arq The log file name to read.
27d1fa35 237*/
944cf283 238static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
27d1fa35 239{
27d1fa35
FM
240 longline line;
241 char *linebuf;
242 char *str;
27d1fa35 243 char hora[30];
c5d6ef4b 244 char dia[128]="";
27d1fa35 245 char wuser[MAXLEN];
944cf283 246 char tmp3[MAXLEN]="";
27d1fa35
FM
247 char download_url[MAXLEN];
248 char smartfilter[MAXLEN];
27d1fa35 249 const char *url;
dc34d345 250 const char *user;
27d1fa35
FM
251 int OutputNonZero = REPORT_EVERY_X_LINES ;
252 int idata=0;
27d1fa35
FM
253 int x;
254 int hmr;
255 int nopen;
256 int maxopenfiles=MAX_OPEN_USER_FILES;
27d1fa35
FM
257 unsigned long int recs1=0UL;
258 unsigned long int recs2=0UL;
27d1fa35 259 FILE *fp_in=NULL;
27d1fa35
FM
260 bool from_pipe;
261 bool from_stdin;
262 bool download_flag=false;
263 bool id_is_ip;
1c91da07 264 enum ReadLogReturnCodeEnum log_entry_status;
27d1fa35
FM
265 struct stat logstat;
266 struct getwordstruct gwarea;
27d1fa35
FM
267 struct userfilestruct *prev_ufile;
268 struct userinfostruct *uinfo;
27d1fa35
FM
269 struct userfilestruct *ufile;
270 struct userfilestruct *ufile1;
c5d6ef4b 271 struct ReadLogStruct log_entry;
8b4e9578 272 struct LogLineStruct log_line;
27d1fa35 273
8b4e9578
FM
274 LogLine_Init(&log_line);
275 LogLine_File(&log_line,arq);
944cf283
FM
276 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
277 if (LogFormats[x]->NewFile)
278 LogFormats[x]->NewFile(arq);
27d1fa35 279
944cf283
FM
280 if (arq[0]=='-' && arq[1]=='\0') {
281 if(debug)
282 debuga(_("Reading access log file: from stdin\n"));
283 fp_in=stdin;
284 from_stdin=true;
285 } else {
286 if (Filter->DateRange[0]!='\0') {
287 if (stat(arq,&logstat)!=0) {
288 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
289 } else {
290 struct tm *logtime=localtime(&logstat.st_mtime);
291 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
292 debuga(_("Ignoring old log file %s\n"),arq);
293 return;
27d1fa35
FM
294 }
295 }
27d1fa35 296 }
944cf283
FM
297 fp_in=decomp(arq,&from_pipe);
298 if(fp_in==NULL) {
d2cd218e 299 debuga(_("Cannot open input log file \"%s\": %s\n"),arq,strerror(errno));
944cf283
FM
300 exit(EXIT_FAILURE);
301 }
302 if(debug) debuga(_("Reading access log file: %s\n"),arq);
303 from_stdin=false;
304 }
27d1fa35 305
944cf283 306 download_flag=false;
27d1fa35 307
944cf283
FM
308 recs1=0UL;
309 recs2=0UL;
2f4787e6 310
944cf283
FM
311 // pre-read the file only if we have to show stats
312 if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
313 size_t nread,i;
314 bool skipcr=false;
315 char tmp4[MAXLEN];
27d1fa35 316
944cf283
FM
317 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
318 for (i=0 ; i<nread ; i++)
319 if (skipcr) {
320 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
321 skipcr=false;
27d1fa35 322 }
944cf283
FM
323 } else {
324 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
325 skipcr=true;
326 recs1++;
327 }
328 }
27d1fa35 329 }
944cf283
FM
330 rewind(fp_in);
331 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
332 putchar('\r');
333 fflush( stdout ) ;
334 }
27d1fa35 335
944cf283
FM
336 if ((line=longline_create())==NULL) {
337 debuga(_("Not enough memory to read log file %s\n"),arq);
338 exit(EXIT_FAILURE);
339 }
27d1fa35 340
944cf283 341 while ((linebuf=longline_read(fp_in,line))!=NULL) {
944cf283 342 lines_read++;
27d1fa35 343
944cf283
FM
344 recs2++;
345 if (ShowReadStatistics && --OutputNonZero<=0) {
346 if (recs1>0) {
347 double perc = recs2 * 100. / recs1 ;
348 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
349 } else {
350 printf(_("SARG: Records in file: %lu"),recs2);
27d1fa35 351 }
944cf283
FM
352 putchar('\r');
353 fflush (stdout);
354 OutputNonZero = REPORT_EVERY_X_LINES ;
355 }
0c87646f 356
944cf283
FM
357 /*
358 The following checks are retained here as I don't know to
359 what format they apply. They date back to pre 2.4 versions.
360 */
361 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
7c8c06c5
FM
362 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
363 excluded_count[ER_IncompleteQuery]++;
364 continue;
365 }
366 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
367 excluded_count[ER_LogfileTurnedOver]++;
368 continue;
369 }
944cf283
FM
370
371 // exclude_string
372 if(ExcludeString[0] != '\0') {
373 bool exstring=false;
374 getword_start(&gwarea,ExcludeString);
375 while(strchr(gwarea.current,':') != 0) {
376 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
00d1f9ed 377 debuga(_("Invalid record in exclusion string\n"));
944cf283 378 exit(EXIT_FAILURE);
27d1fa35 379 }
944cf283 380 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
27d1fa35 381 exstring=true;
944cf283
FM
382 break;
383 }
27d1fa35 384 }
944cf283
FM
385 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
386 exstring=true;
7c8c06c5
FM
387 if(exstring) {
388 excluded_count[ER_ExcludeString]++;
389 continue;
390 }
944cf283 391 }
27d1fa35 392
944cf283 393 totregsl++;
4246ae8d 394 if (debugz>=LogLevel_Data)
944cf283 395 printf("BUF=%s\n",linebuf);
27d1fa35 396
944cf283 397 // process the line
8b4e9578
FM
398 log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
399 if (log_entry_status==RLRC_Unknown)
400 {
401 excluded_count[ER_UnknownFormat]++;
402 continue;
944cf283
FM
403 }
404 if (log_entry_status==RLRC_Ignore) {
7c8c06c5 405 excluded_count[ER_FormatData]++;
944cf283
FM
406 continue;
407 }
8b4e9578 408 format_count[log_line.current_format_idx]++;
944cf283 409
8b4e9578 410 if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
944cf283
FM
411 if(access(ParsedOutputLog,R_OK) != 0) {
412 my_mkdir(ParsedOutputLog);
413 }
414 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
041018b6
FM
415 debuga(_("Path too long: "));
416 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
1c91da07
FM
417 exit(EXIT_FAILURE);
418 }
944cf283 419 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
96dadc9f 420 debuga(_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
1c91da07
FM
421 exit(EXIT_FAILURE);
422 }
944cf283
FM
423 fputs("*** SARG Log ***\n",fp_log);
424 }
1c91da07 425
944cf283
FM
426 if (log_entry.Ip==NULL) {
427 debuga(_("Unknown input log file format: no IP addresses\n"));
428 break;
429 }
430 if (log_entry.User==NULL) {
431 debuga(_("Unknown input log file format: no user\n"));
432 break;
433 }
434 if (log_entry.Url==NULL) {
435 debuga(_("Unknown input log file format: no URL\n"));
436 break;
437 }
1c91da07 438
944cf283 439 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
4246ae8d 440 if (debugz>=LogLevel_Data)
944cf283 441 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
27d1fa35 442
6a943fc1
FM
443 if (EarliestDate<0 || idata<EarliestDate) {
444 EarliestDate=idata;
445 memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
446 }
447 if (LatestDate<0 || idata>LatestDate) {
448 LatestDate=idata;
449 memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
450 }
944cf283 451 if(Filter->DateRange[0] != '\0'){
7c8c06c5
FM
452 if(idata < dfrom || idata > duntil) {
453 excluded_count[ER_OutOfDateRange]++;
454 continue;
455 }
944cf283 456 }
27d1fa35 457
944cf283 458 // Record only hours usage which is required
7c8c06c5
FM
459 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
460 excluded_count[ER_OutOfWDayRange]++;
944cf283 461 continue;
7c8c06c5 462 }
27d1fa35 463
7c8c06c5
FM
464 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
465 excluded_count[ER_OutOfHourRange]++;
944cf283 466 continue;
7c8c06c5 467 }
27d1fa35 468
27d1fa35 469
944cf283 470 if(strlen(log_entry.User) > MAX_USER_LEN) {
cb59dc47 471 if (debugz>=LogLevel_Process) debuga(_("User ID too long: %s\n"),log_entry.User);
7c8c06c5 472 excluded_count[ER_UserNameTooLong]++;
944cf283
FM
473 totregsx++;
474 continue;
475 }
27d1fa35 476
944cf283
FM
477 // include_users
478 if(IncludeUsers[0] != '\0') {
479 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
7c8c06c5
FM
480 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
481 excluded_count[ER_User]++;
27d1fa35 482 continue;
7c8c06c5 483 }
944cf283 484 }
27d1fa35 485
944cf283 486 if(vercode(log_entry.HttpCode)) {
cb59dc47 487 if (debugz>=LogLevel_Process) debuga(_("Excluded code: %s\n"),log_entry.HttpCode);
7c8c06c5 488 excluded_count[ER_HttpCode]++;
944cf283
FM
489 totregsx++;
490 continue;
491 }
492
7c8c06c5
FM
493 if(testvaliduserchar(log_entry.User)) {
494 excluded_count[ER_InvalidUserChar]++;
944cf283 495 continue;
7c8c06c5 496 }
944cf283
FM
497
498 // replace any tab by a single space
499 for (str=log_entry.Url ; *str ; str++)
500 if (*str=='\t') *str=' ';
501 for (str=log_entry.HttpCode ; *str ; str++)
502 if (*str=='\t') *str=' ';
503
8b4e9578 504 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
505 /*
506 The full URL is not saved in sarg log. There is no point in testing the URL to detect
507 a downloaded file.
508 */
509 download_flag=is_download_suffix(log_entry.Url);
510 if (download_flag) {
511 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
27d1fa35 512 }
944cf283
FM
513 } else
514 download_flag=false;
27d1fa35 515
944cf283 516 url=process_url(log_entry.Url,LongUrl);
7c8c06c5
FM
517 if (!url || url[0] == '\0') {
518 excluded_count[ER_NoUrl]++;
519 continue;
520 }
944cf283
FM
521
522 if(addr[0] != '\0'){
7c8c06c5
FM
523 if(strcmp(addr,log_entry.Ip)!=0) {
524 excluded_count[ER_UntrackedIpAddr]++;
525 continue;
526 }
944cf283
FM
527 }
528 if(Filter->HostFilter) {
529 if(!vhexclude(url)) {
87d42c97 530 if (debugz>=LogLevel_Data) debuga(_("Excluded site: %s\n"),url);
7c8c06c5 531 excluded_count[ER_Url]++;
27d1fa35
FM
532 totregsx++;
533 continue;
534 }
944cf283 535 }
27d1fa35 536
944cf283
FM
537 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
538 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
7c8c06c5
FM
539 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
540 excluded_count[ER_OutOfTimeRange]++;
541 continue;
542 }
944cf283 543 }
27d1fa35 544
944cf283 545 if(site[0] != '\0'){
7c8c06c5
FM
546 if(strstr(url,site)==0) {
547 excluded_count[ER_UntrackedUrl]++;
548 continue;
549 }
944cf283 550 }
27d1fa35 551
944cf283
FM
552 if(UserIp) {
553 log_entry.User=log_entry.Ip;
554 id_is_ip=true;
555 } else {
556 id_is_ip=false;
557 if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
558 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
559 log_entry.User=log_entry.Ip;
560 id_is_ip=true;
27d1fa35 561 }
7c8c06c5
FM
562 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
563 excluded_count[ER_NoUser]++;
27d1fa35 564 continue;
7c8c06c5 565 }
944cf283
FM
566 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
567 log_entry.User="everybody";
27d1fa35 568 } else {
944cf283 569 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
e9ecdd7f 570 if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) {
944cf283 571 log_entry.User=str+1;
27d1fa35
FM
572 }
573 }
574 }
944cf283 575 }
27d1fa35 576
944cf283 577 if(us[0] != '\0'){
7c8c06c5
FM
578 if(strcmp(log_entry.User,us)!=0) {
579 excluded_count[ER_UntrackedUser]++;
580 continue;
581 }
944cf283 582 }
27d1fa35 583
944cf283
FM
584 if(Filter->SysUsers) {
585 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
7c8c06c5
FM
586 if(strstr(userfile, wuser) == 0) {
587 excluded_count[ER_SysUser]++;
944cf283 588 continue;
7c8c06c5 589 }
944cf283 590 }
27d1fa35 591
944cf283
FM
592 if(Filter->UserFilter) {
593 if(!vuexclude(log_entry.User)) {
cb59dc47 594 if (debugz>=LogLevel_Process) debuga(_("Excluded user: %s\n"),log_entry.User);
7c8c06c5 595 excluded_count[ER_IgnoredUser]++;
944cf283
FM
596 totregsx++;
597 continue;
27d1fa35 598 }
944cf283 599 }
27d1fa35 600
dc34d345
FM
601 user=process_user(log_entry.User);
602 if (log_entry.User!=user) {
603 log_entry.User=user;
604 id_is_ip=false;
605 }
944cf283 606 if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
7c8c06c5
FM
607 log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
608 excluded_count[ER_NoUser]++;
944cf283 609 continue;
7c8c06c5 610 }
27d1fa35 611
944cf283 612 if (log_entry.DataSize<0) log_entry.DataSize=0;
0c87646f 613
944cf283
FM
614 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
615 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
616 log_entry.ElapsedTime=0;
617 }
27d1fa35 618
944cf283
FM
619 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
620 fixendofline(str);
621 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
622 } else strcpy(smartfilter,"\"\"");
27d1fa35 623
944cf283
FM
624 nopen=0;
625 prev_ufile=NULL;
626 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
627 prev_ufile=ufile;
628 if (ufile->file) nopen++;
629 }
630 if (!ufile) {
631 ufile=malloc(sizeof(*ufile));
27d1fa35 632 if (!ufile) {
944cf283
FM
633 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
634 exit(EXIT_FAILURE);
635 }
636 memset(ufile,0,sizeof(*ufile));
637 ufile->next=first_user_file;
638 first_user_file=ufile;
8e134f1a
FM
639 /*
640 * This id_is_ip stuff is just to store the string only once if the user is
641 * identified by its IP address instead of a distinct ID and IP address.
642 */
aa6ac9f2 643 uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
944cf283 644 ufile->user=uinfo;
944cf283
FM
645 nusers++;
646 } else {
647 if (prev_ufile) {
648 prev_ufile->next=ufile->next;
27d1fa35
FM
649 ufile->next=first_user_file;
650 first_user_file=ufile;
27d1fa35 651 }
944cf283 652 }
1c91da07 653#ifdef ENABLE_DOUBLE_CHECK_DATA
d6e3e724
FM
654 if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
655 ufile->user->nbytes+=log_entry.DataSize;
656 ufile->user->elap+=log_entry.ElapsedTime;
657 }
1c91da07 658#endif
27d1fa35 659
944cf283
FM
660 if (ufile->file==NULL) {
661 if (nopen>=maxopenfiles) {
662 x=0;
663 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
664 if (ufile1->file!=NULL) {
665 if (x>=maxopenfiles) {
666 if (fclose(ufile1->file)==EOF) {
204781f4 667 debuga(_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
944cf283 668 exit(EXIT_FAILURE);
27d1fa35 669 }
944cf283 670 ufile1->file=NULL;
27d1fa35 671 }
944cf283 672 x++;
27d1fa35
FM
673 }
674 }
27d1fa35 675 }
944cf283
FM
676 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
677 debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
27d1fa35
FM
678 exit(EXIT_FAILURE);
679 }
944cf283
FM
680 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
681 debuga(_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
682 exit (1);
1c91da07 683 }
944cf283 684 }
27d1fa35 685
944cf283
FM
686 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
687 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
27d1fa35 688
944cf283
FM
689 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
690 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
691 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
692 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
693 exit(EXIT_FAILURE);
694 }
695 records_kept++;
27d1fa35 696
8b4e9578 697 if (fp_log && log_line.current_format!=&ReadSargLog) {
944cf283
FM
698 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
699 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
700 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
701 }
702
703 totregsg++;
704
705 denied_write(&log_entry);
706 authfail_write(&log_entry);
88776d28 707 if (download_flag) download_write(&log_entry,download_url);
27d1fa35 708
8b4e9578 709 if (log_line.current_format!=&ReadSargLog) {
944cf283
FM
710 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
711 mindate=idata;
712 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
713 }
714 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
715 maxdate=idata;
716 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
717 }
718 }
719
4246ae8d 720 if (debugz>=LogLevel_Data){
944cf283
FM
721 printf("IP=\t%s\n",log_entry.Ip);
722 printf("USER=\t%s\n",log_entry.User);
723 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
724 printf("DATE=\t%s\n",dia);
725 printf("TIME=\t%s\n",hora);
726 //printf("FUNC=\t%s\n",fun);
727 printf("URL=\t%s\n",url);
728 printf("CODE=\t%s\n",log_entry.HttpCode);
729 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
730 }
731 }
732 longline_destroy(&line);
733
944cf283
FM
734 if (!from_stdin) {
735 if (from_pipe)
736 pclose(fp_in);
737 else {
738 fclose(fp_in);
739 if (ShowReadStatistics) {
740 if (ShowReadPercent)
741 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
742 else
743 printf(_("SARG: Records in file: %lu\n"),recs2);
744 }
745 }
746 }
747}
748
7c8c06c5
FM
749/*!
750 * Display a line with the excluded entries count.
751 *
752 * \param Explain A translated string explaining the exluded count.
753 * \param Reason The reason number.
754 */
755static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
756{
757 if (excluded_count[Reason]>0) {
758 debuga(" %s: %lu\n",Explain,excluded_count[Reason]);
759 }
760}
761
944cf283
FM
762/*!
763Read the log files.
764
765\param Filter The filtering parameters for the file to load.
766
767\retval 1 Records found.
768\retval 0 No record found.
769*/
770int ReadLogFile(struct ReadLogDataStruct *Filter)
771{
944cf283
FM
772 int x;
773 int cstatus;
774 struct userfilestruct *ufile;
775 struct userfilestruct *ufile1;
6068ae56
FM
776 FileListIterator FIter;
777 const char *file;
944cf283
FM
778
779 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
7c8c06c5 780 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
944cf283
FM
781 first_user_file=NULL;
782
783 if (!dataonly) {
784 denied_open();
785 authfail_open();
786 download_open();
787 }
788
6068ae56
FM
789 FIter=FileListIter_Open(AccessLog);
790 while ((file=FileListIter_Next(FIter))!=NULL)
791 ReadOneLogFile(Filter,file);
792 FileListIter_Close(FIter);
944cf283 793
27d1fa35 794 if(fp_log != NULL) {
27d1fa35 795 char val2[40];
944cf283 796 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
0c87646f 797
507460ae 798 if (fclose(fp_log)==EOF) {
96dadc9f 799 debuga(_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
507460ae
FM
800 exit(EXIT_FAILURE);
801 }
944cf283
FM
802 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
803 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
804 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
041018b6
FM
805 debuga(_("Path too long: "));
806 debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
27d1fa35
FM
807 exit(EXIT_FAILURE);
808 }
944cf283
FM
809 if (rename(SargLogFile,val4)) {
810 debuga(_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
27d1fa35 811 } else {
944cf283 812 strcpy(SargLogFile,val4);
27d1fa35
FM
813
814 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
815 /*
816 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
817 necessary around the command name, put them in the configuration file.
818 */
944cf283
FM
819 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
820 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
27d1fa35
FM
821 exit(EXIT_FAILURE);
822 }
823 cstatus=system(val1);
824 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
825 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
826 debuga(_("command: %s\n"),val1);
827 exit(EXIT_FAILURE);
828 }
829 }
830 }
831 if(debug)
944cf283 832 debuga(_("Sarg parsed log saved as %s\n"),SargLogFile);
0c87646f 833 }
27d1fa35 834
8e53b2e7 835 denied_close();
16b013cc 836 authfail_close();
11284535 837 download_close();
27d1fa35
FM
838
839 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
840 ufile1=ufile->next;
507460ae 841 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
204781f4 842 debuga(_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
507460ae
FM
843 exit(EXIT_FAILURE);
844 }
27d1fa35
FM
845 free(ufile);
846 }
847
848 if (debug) {
8e501bd6 849 unsigned long int totalcount=0;
27d1fa35
FM
850
851 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
852
7c8c06c5
FM
853 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
854 if (x>=0) {
855 debuga(_("Reasons for excluded entries:\n"));
856 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
857 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
858 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
7c8c06c5
FM
859 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
860 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
861 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
862 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
863 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
864 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
865 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
866 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
867 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
868 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
869 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
870 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
871 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
872 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
873 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
874 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
875 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
876 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
877 }
878
1c91da07
FM
879 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
880 if (format_count[x]>0) {
881 /* TRANSLATORS: It displays the number of lines found in the input log files
882 * for each supported log format. The log format name is the %s and is a string
883 * you translate somewhere else. */
8e501bd6 884 debuga(_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
1c91da07
FM
885 totalcount+=format_count[x];
886 }
887 }
27d1fa35 888
1c91da07 889 if (totalcount==0 && totregsg)
27d1fa35
FM
890 debuga(_("Log with invalid format\n"));
891 }
892
27d1fa35
FM
893 return((totregsg!=0) ? 1 : 0);
894}
6a943fc1
FM
895
896/*!
897 * Get the start and end date of the period covered by the log files.
898 */
899void GetLogPeriod(struct tm *Start,struct tm *End)
900{
901 if (EarliestDate>=0) {
902 memcpy(Start,&EarliestDateTime,sizeof(struct tm));
903 } else {
904 memset(Start,0,sizeof(struct tm));
905 }
906 if (LatestDate>=0) {
907 memcpy(End,&LatestDateTime,sizeof(struct tm));
908 } else {
909 memset(End,0,sizeof(struct tm));
910 }
911}