]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Fix several possible sprintf buffer overflows
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
67302a9e 3 * 1998, 2013
27d1fa35
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
6068ae56 30#include "include/filelist.h"
27d1fa35
FM
31
32#define REPORT_EVERY_X_LINES 5000
33#define MAX_OPEN_USER_FILES 10
34
35struct userfilestruct
36{
37 struct userfilestruct *next;
38 struct userinfostruct *user;
39 FILE *file;
40};
41
7c8c06c5
FM
42enum ExcludeReasonEnum
43{
44 //! User name too long.
45 ER_UserNameTooLong,
46 //! Squid logged an incomplete query received from the client.
47 ER_IncompleteQuery,
48 //! Log file turned over.
49 ER_LogfileTurnedOver,
7c8c06c5
FM
50 //! Excluded by exclude_string from sarg.conf.
51 ER_ExcludeString,
52 //! Unknown input log file format.
53 ER_UnknownFormat,
54 //! Line to be ignored from the input log file.
55 ER_FormatData,
56 //! Entry not withing the requested date range.
57 ER_OutOfDateRange,
58 //! Ignored week day.
59 ER_OutOfWDayRange,
60 //! Ignored hour.
61 ER_OutOfHourRange,
62 //! User is not in the include_users list.
63 ER_User,
64 //! HTTP code excluded by exclude_code file.
65 ER_HttpCode,
66 //! Invalid character found in user name.
67 ER_InvalidUserChar,
68 //! No URL in entry.
69 ER_NoUrl,
70 //! Not the IP address requested with -a.
71 ER_UntrackedIpAddr,
72 //! URL excluded by -c or exclude_hosts.
73 ER_Url,
74 //! Entry time outside of requested hour range.
75 ER_OutOfTimeRange,
76 //! Not the URL requested by -s.
77 ER_UntrackedUrl,
78 //! No user in entry.
79 ER_NoUser,
80 //! Not the user requested by -u.
81 ER_UntrackedUser,
82 //! System user.
83 ER_SysUser,
84 //! User ignored by exclude_users
85 ER_IgnoredUser,
86
87 ER_Last //!< last entry of the list
88};
89
27d1fa35
FM
90numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
91numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
92
27d1fa35 93extern char *userfile;
6068ae56 94extern FileListObject AccessLog;
27d1fa35 95
1c91da07
FM
96extern const struct ReadLogProcessStruct ReadSquidLog;
97extern const struct ReadLogProcessStruct ReadCommonLog;
98extern const struct ReadLogProcessStruct ReadSargLog;
99extern const struct ReadLogProcessStruct ReadExtLog;
100
101//! The list of the supported log formats.
102static const struct ReadLogProcessStruct const *LogFormats[]=
103{
104 &ReadSquidLog,
105 &ReadCommonLog,
106 &ReadSargLog,
107 &ReadExtLog
108};
109
944cf283
FM
110//! The path to the sarg log file.
111static char SargLogFile[4096]="";
112//! Handle to the sarg log file. NULL if not created.
113static FILE *fp_log=NULL;
114//! The number of records read from the input logs.
115static long int totregsl=0;
116//! The number of records kept.
117static long int totregsg=0;
118//! The number of records excluded.
119static long int totregsx=0;
120//! The beginning of a linked list of user's file.
121static struct userfilestruct *first_user_file=NULL;
122//! Count the number of occurence of each input log format.
8e501bd6 123static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
944cf283
FM
124//! The minimum date found in the input logs.
125static int mindate=0;
126static int maxdate=0;
7c8c06c5
FM
127//! Count the number of excluded records.
128static unsigned long int excluded_count[ER_Last];
27d1fa35 129
944cf283
FM
130/*!
131Read a single log file.
27d1fa35 132
944cf283 133\param arq The log file name to read.
27d1fa35 134*/
944cf283 135static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
27d1fa35 136{
27d1fa35
FM
137 longline line;
138 char *linebuf;
139 char *str;
27d1fa35 140 char hora[30];
c5d6ef4b 141 char dia[128]="";
27d1fa35 142 char wuser[MAXLEN];
944cf283 143 char tmp3[MAXLEN]="";
27d1fa35
FM
144 char download_url[MAXLEN];
145 char smartfilter[MAXLEN];
27d1fa35 146 const char *url;
dc34d345 147 const char *user;
944cf283 148 int current_format_idx;
27d1fa35
FM
149 int OutputNonZero = REPORT_EVERY_X_LINES ;
150 int idata=0;
27d1fa35
FM
151 int x;
152 int hmr;
153 int nopen;
154 int maxopenfiles=MAX_OPEN_USER_FILES;
4d62cb0a
FM
155 int successive_errors=0;
156 int total_errors=0;
27d1fa35
FM
157 unsigned long int recs1=0UL;
158 unsigned long int recs2=0UL;
27d1fa35 159 FILE *fp_in=NULL;
27d1fa35
FM
160 bool from_pipe;
161 bool from_stdin;
162 bool download_flag=false;
163 bool id_is_ip;
1c91da07 164 enum ReadLogReturnCodeEnum log_entry_status;
27d1fa35
FM
165 struct stat logstat;
166 struct getwordstruct gwarea;
27d1fa35
FM
167 struct userfilestruct *prev_ufile;
168 struct userinfostruct *uinfo;
27d1fa35
FM
169 struct userfilestruct *ufile;
170 struct userfilestruct *ufile1;
c5d6ef4b 171 struct ReadLogStruct log_entry;
1c91da07 172 const struct ReadLogProcessStruct *current_format=NULL;
27d1fa35 173
944cf283
FM
174 current_format=NULL;
175 current_format_idx=-1;
176 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
177 if (LogFormats[x]->NewFile)
178 LogFormats[x]->NewFile(arq);
27d1fa35 179
944cf283
FM
180 if (arq[0]=='-' && arq[1]=='\0') {
181 if(debug)
182 debuga(_("Reading access log file: from stdin\n"));
183 fp_in=stdin;
184 from_stdin=true;
185 } else {
186 if (Filter->DateRange[0]!='\0') {
187 if (stat(arq,&logstat)!=0) {
188 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
189 } else {
190 struct tm *logtime=localtime(&logstat.st_mtime);
191 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
192 debuga(_("Ignoring old log file %s\n"),arq);
193 return;
27d1fa35
FM
194 }
195 }
27d1fa35 196 }
944cf283
FM
197 fp_in=decomp(arq,&from_pipe);
198 if(fp_in==NULL) {
d2cd218e 199 debuga(_("Cannot open input log file \"%s\": %s\n"),arq,strerror(errno));
944cf283
FM
200 exit(EXIT_FAILURE);
201 }
202 if(debug) debuga(_("Reading access log file: %s\n"),arq);
203 from_stdin=false;
204 }
27d1fa35 205
944cf283 206 download_flag=false;
27d1fa35 207
944cf283
FM
208 recs1=0UL;
209 recs2=0UL;
2f4787e6 210
944cf283
FM
211 // pre-read the file only if we have to show stats
212 if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
213 size_t nread,i;
214 bool skipcr=false;
215 char tmp4[MAXLEN];
27d1fa35 216
944cf283
FM
217 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
218 for (i=0 ; i<nread ; i++)
219 if (skipcr) {
220 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
221 skipcr=false;
27d1fa35 222 }
944cf283
FM
223 } else {
224 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
225 skipcr=true;
226 recs1++;
227 }
228 }
27d1fa35 229 }
944cf283
FM
230 rewind(fp_in);
231 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
232 putchar('\r');
233 fflush( stdout ) ;
234 }
27d1fa35 235
944cf283
FM
236 if ((line=longline_create())==NULL) {
237 debuga(_("Not enough memory to read log file %s\n"),arq);
238 exit(EXIT_FAILURE);
239 }
27d1fa35 240
944cf283 241 while ((linebuf=longline_read(fp_in,line))!=NULL) {
944cf283 242 lines_read++;
27d1fa35 243
944cf283
FM
244 recs2++;
245 if (ShowReadStatistics && --OutputNonZero<=0) {
246 if (recs1>0) {
247 double perc = recs2 * 100. / recs1 ;
248 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
249 } else {
250 printf(_("SARG: Records in file: %lu"),recs2);
27d1fa35 251 }
944cf283
FM
252 putchar('\r');
253 fflush (stdout);
254 OutputNonZero = REPORT_EVERY_X_LINES ;
255 }
0c87646f 256
944cf283
FM
257 /*
258 The following checks are retained here as I don't know to
259 what format they apply. They date back to pre 2.4 versions.
260 */
261 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
7c8c06c5
FM
262 if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
263 excluded_count[ER_IncompleteQuery]++;
264 continue;
265 }
266 if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
267 excluded_count[ER_LogfileTurnedOver]++;
268 continue;
269 }
944cf283
FM
270
271 // exclude_string
272 if(ExcludeString[0] != '\0') {
273 bool exstring=false;
274 getword_start(&gwarea,ExcludeString);
275 while(strchr(gwarea.current,':') != 0) {
276 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
277 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
278 exit(EXIT_FAILURE);
27d1fa35 279 }
944cf283 280 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
27d1fa35 281 exstring=true;
944cf283
FM
282 break;
283 }
27d1fa35 284 }
944cf283
FM
285 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
286 exstring=true;
7c8c06c5
FM
287 if(exstring) {
288 excluded_count[ER_ExcludeString]++;
289 continue;
290 }
944cf283 291 }
27d1fa35 292
944cf283
FM
293 totregsl++;
294 if(debugm)
295 printf("BUF=%s\n",linebuf);
27d1fa35 296
944cf283
FM
297 // process the line
298 log_entry_status=RLRC_Unknown;
299 memset(&log_entry,0,sizeof(log_entry));
300 if (current_format) {
301 log_entry_status=current_format->ReadEntry(linebuf,&log_entry);
302 }
0c87646f 303
944cf283
FM
304 // find out what line format to use
305 if (log_entry_status==RLRC_Unknown) {
306 for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++) {
307 if (LogFormats[x]==current_format) continue;
308 memset(&log_entry,0,sizeof(log_entry));
309 log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry);
310 if (log_entry_status!=RLRC_Unknown) break;
311 }
312 if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
313 if (++successive_errors>NumLogSuccessiveErrors) {
314 debuga(ngettext("%d consecutive error found in the input log file %s\n",
315 "%d consecutive errors found in the input log file %s\n",successive_errors),successive_errors,arq);
316 exit(EXIT_FAILURE);
1c91da07 317 }
944cf283
FM
318 if (NumLogTotalErrors>=0 && ++total_errors>NumLogTotalErrors) {
319 debuga(ngettext("%d error found in the input log file (last in %s)\n",
320 "%d errors found in the input log file (last in %s)\n",total_errors),total_errors,arq);
321 exit(EXIT_FAILURE);
1c91da07 322 }
944cf283 323 debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf);
7c8c06c5 324 excluded_count[ER_UnknownFormat]++;
1c91da07
FM
325 continue;
326 }
944cf283
FM
327 current_format=LogFormats[x];
328 current_format_idx=x;
329 if (debugz) {
330 /* TRANSLATORS: The argument is the log format name as translated by you. */
331 debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq);
332 }
333 successive_errors=0;
334 }
335 if (log_entry_status==RLRC_Ignore) {
7c8c06c5 336 excluded_count[ER_FormatData]++;
944cf283
FM
337 continue;
338 }
339 if (current_format_idx<0 || current_format==NULL) {
340 debuga(_("Sarg failed to determine the format of the input log file %s\n"),arq);
341 exit(EXIT_FAILURE);
342 }
343 if (log_entry_status==RLRC_InternalError) {
344 debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq);
345 exit(EXIT_FAILURE);
346 }
347 format_count[current_format_idx]++;
348
349 if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) {
350 if(access(ParsedOutputLog,R_OK) != 0) {
351 my_mkdir(ParsedOutputLog);
352 }
353 if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
354 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
1c91da07
FM
355 exit(EXIT_FAILURE);
356 }
944cf283
FM
357 if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
358 debuga(_("(log) Cannot open log file %s: %s\n"),SargLogFile,strerror(errno));
1c91da07
FM
359 exit(EXIT_FAILURE);
360 }
944cf283
FM
361 fputs("*** SARG Log ***\n",fp_log);
362 }
1c91da07 363
944cf283
FM
364 if (log_entry.Ip==NULL) {
365 debuga(_("Unknown input log file format: no IP addresses\n"));
366 break;
367 }
368 if (log_entry.User==NULL) {
369 debuga(_("Unknown input log file format: no user\n"));
370 break;
371 }
372 if (log_entry.Url==NULL) {
373 debuga(_("Unknown input log file format: no URL\n"));
374 break;
375 }
1c91da07 376
944cf283
FM
377 idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
378 if(debugm)
379 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
27d1fa35 380
944cf283 381 if(Filter->DateRange[0] != '\0'){
7c8c06c5
FM
382 if(idata < dfrom || idata > duntil) {
383 excluded_count[ER_OutOfDateRange]++;
384 continue;
385 }
944cf283 386 }
27d1fa35 387
944cf283 388 // Record only hours usage which is required
7c8c06c5
FM
389 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
390 excluded_count[ER_OutOfWDayRange]++;
944cf283 391 continue;
7c8c06c5 392 }
27d1fa35 393
7c8c06c5
FM
394 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
395 excluded_count[ER_OutOfHourRange]++;
944cf283 396 continue;
7c8c06c5 397 }
27d1fa35 398
27d1fa35 399
944cf283
FM
400 if(strlen(log_entry.User) > MAX_USER_LEN) {
401 if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
7c8c06c5 402 excluded_count[ER_UserNameTooLong]++;
944cf283
FM
403 totregsx++;
404 continue;
405 }
27d1fa35 406
944cf283
FM
407 // include_users
408 if(IncludeUsers[0] != '\0') {
409 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
7c8c06c5
FM
410 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
411 excluded_count[ER_User]++;
27d1fa35 412 continue;
7c8c06c5 413 }
944cf283 414 }
27d1fa35 415
944cf283
FM
416 if(vercode(log_entry.HttpCode)) {
417 if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
7c8c06c5 418 excluded_count[ER_HttpCode]++;
944cf283
FM
419 totregsx++;
420 continue;
421 }
422
7c8c06c5
FM
423 if(testvaliduserchar(log_entry.User)) {
424 excluded_count[ER_InvalidUserChar]++;
944cf283 425 continue;
7c8c06c5 426 }
944cf283
FM
427
428 // replace any tab by a single space
429 for (str=log_entry.Url ; *str ; str++)
430 if (*str=='\t') *str=' ';
431 for (str=log_entry.HttpCode ; *str ; str++)
432 if (*str=='\t') *str=' ';
433
434 if (current_format!=&ReadSargLog) {
435 /*
436 The full URL is not saved in sarg log. There is no point in testing the URL to detect
437 a downloaded file.
438 */
439 download_flag=is_download_suffix(log_entry.Url);
440 if (download_flag) {
441 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
27d1fa35 442 }
944cf283
FM
443 } else
444 download_flag=false;
27d1fa35 445
944cf283 446 url=process_url(log_entry.Url,LongUrl);
7c8c06c5
FM
447 if (!url || url[0] == '\0') {
448 excluded_count[ER_NoUrl]++;
449 continue;
450 }
944cf283
FM
451
452 if(addr[0] != '\0'){
7c8c06c5
FM
453 if(strcmp(addr,log_entry.Ip)!=0) {
454 excluded_count[ER_UntrackedIpAddr]++;
455 continue;
456 }
944cf283
FM
457 }
458 if(Filter->HostFilter) {
459 if(!vhexclude(url)) {
460 if (debugm) printf(_("Excluded site: %s\n"),url);
7c8c06c5 461 excluded_count[ER_Url]++;
27d1fa35
FM
462 totregsx++;
463 continue;
464 }
944cf283 465 }
27d1fa35 466
944cf283
FM
467 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
468 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
7c8c06c5
FM
469 if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
470 excluded_count[ER_OutOfTimeRange]++;
471 continue;
472 }
944cf283 473 }
27d1fa35 474
944cf283 475 if(site[0] != '\0'){
7c8c06c5
FM
476 if(strstr(url,site)==0) {
477 excluded_count[ER_UntrackedUrl]++;
478 continue;
479 }
944cf283 480 }
27d1fa35 481
944cf283
FM
482 if(UserIp) {
483 log_entry.User=log_entry.Ip;
484 id_is_ip=true;
485 } else {
486 id_is_ip=false;
487 if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
488 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
489 log_entry.User=log_entry.Ip;
490 id_is_ip=true;
27d1fa35 491 }
7c8c06c5
FM
492 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
493 excluded_count[ER_NoUser]++;
27d1fa35 494 continue;
7c8c06c5 495 }
944cf283
FM
496 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
497 log_entry.User="everybody";
27d1fa35 498 } else {
944cf283 499 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
e9ecdd7f 500 if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) {
944cf283 501 log_entry.User=str+1;
27d1fa35
FM
502 }
503 }
504 }
944cf283 505 }
27d1fa35 506
944cf283 507 if(us[0] != '\0'){
7c8c06c5
FM
508 if(strcmp(log_entry.User,us)!=0) {
509 excluded_count[ER_UntrackedUser]++;
510 continue;
511 }
944cf283 512 }
27d1fa35 513
944cf283
FM
514 if(Filter->SysUsers) {
515 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
7c8c06c5
FM
516 if(strstr(userfile, wuser) == 0) {
517 excluded_count[ER_SysUser]++;
944cf283 518 continue;
7c8c06c5 519 }
944cf283 520 }
27d1fa35 521
944cf283
FM
522 if(Filter->UserFilter) {
523 if(!vuexclude(log_entry.User)) {
524 if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
7c8c06c5 525 excluded_count[ER_IgnoredUser]++;
944cf283
FM
526 totregsx++;
527 continue;
27d1fa35 528 }
944cf283 529 }
27d1fa35 530
dc34d345
FM
531 user=process_user(log_entry.User);
532 if (log_entry.User!=user) {
533 log_entry.User=user;
534 id_is_ip=false;
535 }
944cf283 536 if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
7c8c06c5
FM
537 log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
538 excluded_count[ER_NoUser]++;
944cf283 539 continue;
7c8c06c5 540 }
27d1fa35 541
944cf283 542 if (log_entry.DataSize<0) log_entry.DataSize=0;
0c87646f 543
944cf283
FM
544 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
545 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
546 log_entry.ElapsedTime=0;
547 }
27d1fa35 548
944cf283
FM
549 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
550 fixendofline(str);
551 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
552 } else strcpy(smartfilter,"\"\"");
27d1fa35 553
944cf283
FM
554 nopen=0;
555 prev_ufile=NULL;
556 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
557 prev_ufile=ufile;
558 if (ufile->file) nopen++;
559 }
560 if (!ufile) {
561 ufile=malloc(sizeof(*ufile));
27d1fa35 562 if (!ufile) {
944cf283
FM
563 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
564 exit(EXIT_FAILURE);
565 }
566 memset(ufile,0,sizeof(*ufile));
567 ufile->next=first_user_file;
568 first_user_file=ufile;
aa6ac9f2 569 uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
944cf283 570 ufile->user=uinfo;
944cf283
FM
571 nusers++;
572 } else {
573 if (prev_ufile) {
574 prev_ufile->next=ufile->next;
27d1fa35
FM
575 ufile->next=first_user_file;
576 first_user_file=ufile;
27d1fa35 577 }
944cf283 578 }
1c91da07 579#ifdef ENABLE_DOUBLE_CHECK_DATA
944cf283
FM
580 ufile->user->nbytes+=log_entry.DataSize;
581 ufile->user->elap+=log_entry.ElapsedTime;
1c91da07 582#endif
27d1fa35 583
944cf283
FM
584 if (ufile->file==NULL) {
585 if (nopen>=maxopenfiles) {
586 x=0;
587 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
588 if (ufile1->file!=NULL) {
589 if (x>=maxopenfiles) {
590 if (fclose(ufile1->file)==EOF) {
591 debuga(_("Write error in the log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
592 exit(EXIT_FAILURE);
27d1fa35 593 }
944cf283 594 ufile1->file=NULL;
27d1fa35 595 }
944cf283 596 x++;
27d1fa35
FM
597 }
598 }
27d1fa35 599 }
944cf283
FM
600 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
601 debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
27d1fa35
FM
602 exit(EXIT_FAILURE);
603 }
944cf283
FM
604 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
605 debuga(_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
606 exit (1);
1c91da07 607 }
944cf283 608 }
27d1fa35 609
944cf283
FM
610 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
611 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
27d1fa35 612
944cf283
FM
613 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
614 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
615 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
616 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
617 exit(EXIT_FAILURE);
618 }
619 records_kept++;
27d1fa35 620
944cf283
FM
621 if (fp_log && current_format!=&ReadSargLog) {
622 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
623 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
624 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
625 }
626
627 totregsg++;
628
629 denied_write(&log_entry);
630 authfail_write(&log_entry);
88776d28 631 if (download_flag) download_write(&log_entry,download_url);
27d1fa35 632
944cf283
FM
633 if (current_format!=&ReadSargLog) {
634 if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
635 mindate=idata;
636 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
637 }
638 if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
639 maxdate=idata;
640 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
641 }
642 }
643
944cf283
FM
644 if(debugm){
645 printf("IP=\t%s\n",log_entry.Ip);
646 printf("USER=\t%s\n",log_entry.User);
647 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
648 printf("DATE=\t%s\n",dia);
649 printf("TIME=\t%s\n",hora);
650 //printf("FUNC=\t%s\n",fun);
651 printf("URL=\t%s\n",url);
652 printf("CODE=\t%s\n",log_entry.HttpCode);
653 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
654 }
655 }
656 longline_destroy(&line);
657
944cf283
FM
658 if (!from_stdin) {
659 if (from_pipe)
660 pclose(fp_in);
661 else {
662 fclose(fp_in);
663 if (ShowReadStatistics) {
664 if (ShowReadPercent)
665 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
666 else
667 printf(_("SARG: Records in file: %lu\n"),recs2);
668 }
669 }
670 }
671}
672
7c8c06c5
FM
673/*!
674 * Display a line with the excluded entries count.
675 *
676 * \param Explain A translated string explaining the exluded count.
677 * \param Reason The reason number.
678 */
679static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
680{
681 if (excluded_count[Reason]>0) {
682 debuga(" %s: %lu\n",Explain,excluded_count[Reason]);
683 }
684}
685
944cf283
FM
686/*!
687Read the log files.
688
689\param Filter The filtering parameters for the file to load.
690
691\retval 1 Records found.
692\retval 0 No record found.
693*/
694int ReadLogFile(struct ReadLogDataStruct *Filter)
695{
944cf283
FM
696 int x;
697 int cstatus;
698 struct userfilestruct *ufile;
699 struct userfilestruct *ufile1;
6068ae56
FM
700 FileListIterator FIter;
701 const char *file;
944cf283
FM
702
703 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
7c8c06c5 704 for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
944cf283
FM
705 first_user_file=NULL;
706
707 if (!dataonly) {
708 denied_open();
709 authfail_open();
710 download_open();
711 }
712
6068ae56
FM
713 FIter=FileListIter_Open(AccessLog);
714 while ((file=FileListIter_Next(FIter))!=NULL)
715 ReadOneLogFile(Filter,file);
716 FileListIter_Close(FIter);
944cf283 717
27d1fa35 718 if(fp_log != NULL) {
27d1fa35 719 char val2[40];
944cf283 720 char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
0c87646f 721
507460ae 722 if (fclose(fp_log)==EOF) {
944cf283 723 debuga(_("Write error in %s: %s\n"),SargLogFile,strerror(errno));
507460ae
FM
724 exit(EXIT_FAILURE);
725 }
944cf283
FM
726 strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
727 strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
728 if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
729 debuga(_("File name too long: %s/sarg-%s-%s.log\n"),ParsedOutputLog,val2,val1);
27d1fa35
FM
730 exit(EXIT_FAILURE);
731 }
944cf283
FM
732 if (rename(SargLogFile,val4)) {
733 debuga(_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
27d1fa35 734 } else {
944cf283 735 strcpy(SargLogFile,val4);
27d1fa35
FM
736
737 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
738 /*
739 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
740 necessary around the command name, put them in the configuration file.
741 */
944cf283
FM
742 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
743 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
27d1fa35
FM
744 exit(EXIT_FAILURE);
745 }
746 cstatus=system(val1);
747 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
748 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
749 debuga(_("command: %s\n"),val1);
750 exit(EXIT_FAILURE);
751 }
752 }
753 }
754 if(debug)
944cf283 755 debuga(_("Sarg parsed log saved as %s\n"),SargLogFile);
0c87646f 756 }
27d1fa35 757
8e53b2e7 758 denied_close();
16b013cc 759 authfail_close();
11284535 760 download_close();
27d1fa35
FM
761
762 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
763 ufile1=ufile->next;
507460ae
FM
764 if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
765 debuga(_("Write error in the log file of user %s: %s\n"),ufile->user->id,strerror(errno));
766 exit(EXIT_FAILURE);
767 }
27d1fa35
FM
768 free(ufile);
769 }
770
771 if (debug) {
8e501bd6 772 unsigned long int totalcount=0;
27d1fa35
FM
773
774 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
775
7c8c06c5
FM
776 for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
777 if (x>=0) {
778 debuga(_("Reasons for excluded entries:\n"));
779 DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
780 DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
781 DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
7c8c06c5
FM
782 DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
783 DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
784 DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
785 DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
786 DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
787 DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
788 DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
789 DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
790 DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
791 DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
792 DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
793 DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
794 DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
795 DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
796 DisplayExcludeCount(_("No user in entry"),ER_NoUser);
797 DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
798 DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
799 DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
800 }
801
1c91da07
FM
802 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
803 if (format_count[x]>0) {
804 /* TRANSLATORS: It displays the number of lines found in the input log files
805 * for each supported log format. The log format name is the %s and is a string
806 * you translate somewhere else. */
8e501bd6 807 debuga(_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
1c91da07
FM
808 totalcount+=format_count[x];
809 }
810 }
27d1fa35 811
1c91da07 812 if (totalcount==0 && totregsg)
27d1fa35
FM
813 debuga(_("Log with invalid format\n"));
814 }
815
944cf283 816 if (debugz)
27d1fa35 817 debugaz(_("period=%s\n"),period.text);
27d1fa35 818
27d1fa35
FM
819 return((totregsg!=0) ? 1 : 0);
820}