]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Store the HTTP code inside the structure
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
30
31#define REPORT_EVERY_X_LINES 5000
32#define MAX_OPEN_USER_FILES 10
33
34struct userfilestruct
35{
36 struct userfilestruct *next;
37 struct userinfostruct *user;
38 FILE *file;
39};
40
41numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
42numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
43
27d1fa35
FM
44extern char *userfile;
45
46/*!
47Read the log files.
48
49\param Filter The filtering parameters for the file to load.
50
51\retval 1 Records found.
52\retval 0 No record found.
53*/
54int ReadLogFile(struct ReadLogDataStruct *Filter)
55{
56 enum isa_col_id {
57 ISACOL_Ip,
58 ISACOL_UserName,
59 ISACOL_Date,
60 ISACOL_Time,
61 ISACOL_TimeTaken,
62 ISACOL_Bytes,
63 ISACOL_Uri,
64 ISACOL_Status,
65 ISACOL_Last //last entry of the list !
66 };
67 enum InputLogFormat {
68 ILF_Unknown,
69 ILF_Squid,
70 ILF_Common,
71 ILF_Sarg,
72 ILF_Isa,
73 ILF_Last //last entry of the list !
74 };
75
76 enum InputLogFormat ilf;
77 int ilf_count[ILF_Last];
78 longline line;
79 char *linebuf;
80 char *str;
81 char *full_url;
82 char arq_log[255];
83 char fun[MAXLEN];
84 char elap[255];
85 char user[MAX_USER_LEN];
86 char data[255];
6debcf3d 87 char ip[60];
27d1fa35
FM
88 char hora[30];
89 char mes[30];
90 char tbuf2[128];
c5d6ef4b 91 char dia[128]="";
27d1fa35
FM
92 char wuser[MAXLEN];
93 char tmp3[MAXLEN];
94 char sz_Download_Unsort[20000];
95 char start_hour[128];
27d1fa35 96 char authfail_unsort[MAXLEN];
27d1fa35
FM
97 char download_url[MAXLEN];
98 char smartfilter[MAXLEN];
99 const char *arq;
100 const char *url;
101 int iarq;
102 int blen;
103 int OutputNonZero = REPORT_EVERY_X_LINES ;
104 int idata=0;
105 int isa_ncols=0,isa_cols[ISACOL_Last];
106 int x;
107 int hmr;
108 int nopen;
109 int maxopenfiles=MAX_OPEN_USER_FILES;
110 int mindate=0;
111 int maxdate=0;
112 int cstatus;
113 unsigned long int recs1=0UL;
114 unsigned long int recs2=0UL;
115 long int totregsl=0;
116 long int totregsg=0;
117 long int totregsx=0;
27d1fa35
FM
118 long long int iyear, imonth, iday;
119 FILE *fp_in=NULL;
120 FILE *fp_log=NULL;
121 FILE *fp_denied=NULL;
122 FILE *fp_authfail=NULL;
123 FILE *fp_Download_Unsort=NULL;
124 bool from_pipe;
125 bool from_stdin;
126 bool download_flag=false;
127 bool id_is_ip;
128 bool totper=false;
129 struct stat logstat;
130 struct getwordstruct gwarea;
27d1fa35
FM
131 struct tm tt;
132 struct userfilestruct *prev_ufile;
133 struct userinfostruct *uinfo;
134 struct userfilestruct *first_user_file=NULL;
135 struct userfilestruct *ufile;
136 struct userfilestruct *ufile1;
c5d6ef4b 137 struct ReadLogStruct log_entry;
27d1fa35
FM
138 time_t tnum;
139
140 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
141 tmp3[0]='\0';
142 start_hour[0]='\0';
143 first_user_file=NULL;
144
145 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
27d1fa35
FM
146 snprintf(authfail_unsort,sizeof(authfail_unsort),"%s/authfail.int_unsort",tmp);
147
148 if(DataFile[0]=='\0') {
149 if((ReportType & REPORT_TYPE_DENIED) != 0) {
60b48ae5 150 fp_denied=denied_open();
27d1fa35
FM
151 }
152
153 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
154 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
155 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
156 exit(EXIT_FAILURE);
157 }
158 }
159 }
160
161 if ((line=longline_create())==NULL) {
162 debuga(_("Not enough memory to read a log file\n"));
163 exit(EXIT_FAILURE);
164 }
165
166 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
167 arq=AccessLog[iarq];
168
169 if(strcmp(arq,"-")==0) {
170 if(debug)
171 debuga(_("Reading access log file: from stdin\n"));
172 fp_in=stdin;
173 from_stdin=true;
174 } else {
175 if (Filter->DateRange[0]!='\0') {
176 if (stat(arq,&logstat)!=0) {
177 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
178 } else {
179 struct tm *logtime=localtime(&logstat.st_mtime);
180 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
181 debuga(_("Ignoring old log file %s\n"),arq);
182 continue;
183 }
184 }
185 }
186 fp_in=decomp(arq,&from_pipe);
187 if(fp_in==NULL) {
188 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
189 exit(EXIT_FAILURE);
190 }
191 if(debug) debuga(_("Reading access log file: %s\n"),arq);
192 from_stdin=false;
193 }
194
195 ilf=ILF_Unknown;
196 download_flag=false;
197
198 // pre-read the file only if we have to show stats
199 if (ShowReadStatistics && !from_stdin && !from_pipe) {
200 size_t nread,i;
201 bool skipcr=false;
202 char tmp4[MAXLEN];
203
204 recs1=0UL;
205 recs2=0UL;
206
207 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
208 for (i=0 ; i<nread ; i++)
209 if (skipcr) {
210 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
211 skipcr=false;
212 }
213 } else {
214 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
215 skipcr=true;
216 recs1++;
217 }
218 }
219 }
220 rewind(fp_in);
221 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
222 putchar('\r');
223 fflush( stdout ) ;
224 }
225
226 longline_reset(line);
227
228 while ((linebuf=longline_read(fp_in,line))!=NULL) {
229 blen=strlen(linebuf);
230
231 if (ilf==ILF_Unknown) {
232 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
233 fixendofline(linebuf);
234 if (debug)
235 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
236 ilf=ILF_Isa;
237 ilf_count[ilf]++;
238 continue;
239 }
240
241 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
242 if (getperiod_fromsarglog(arq,&period)<0) {
243 debuga(_("The name of the file is invalid: %s\n"),arq);
244 exit(EXIT_FAILURE);
245 }
246 ilf=ILF_Sarg;
247 ilf_count[ilf]++;
248 continue;
249 }
250 }
251
252 if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
253 if(access(ParsedOutputLog,R_OK) != 0) {
254 my_mkdir(ParsedOutputLog);
255 }
256 if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
257 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
258 exit(EXIT_FAILURE);
259 }
260 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
261 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
262 exit(EXIT_FAILURE);
263 }
264 fputs("*** SARG Log ***\n",fp_log);
265 }
266
267 recs2++;
268 if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
269 double perc = recs2 * 100. / recs1 ;
270 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
271 putchar('\r');
272 fflush (stdout);
273 OutputNonZero = REPORT_EVERY_X_LINES ;
274 }
275 if(blen < 58) continue;
276 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
277 if(strstr(linebuf,"logfile turned over") != 0) continue;
278 if(linebuf[0] == ' ') continue;
279
280 // exclude_string
281 if(ExcludeString[0] != '\0') {
282 bool exstring=false;
283 getword_start(&gwarea,ExcludeString);
284 while(strchr(gwarea.current,':') != 0) {
285 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
286 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
287 exit(EXIT_FAILURE);
288 }
289 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
290 exstring=true;
291 break;
292 }
293 }
294 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
295 exstring=true;
296 if(exstring) continue;
297 }
298
299 totregsl++;
300 if(debugm)
301 printf("BUF=%s\n",linebuf);
302
6debcf3d 303 memset(&log_entry,0,sizeof(log_entry));
27d1fa35
FM
304 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
305 getword_start(&gwarea,linebuf);
306 if (getword(data,sizeof(data),&gwarea,' ')<0) {
307 debuga(_("Maybe you have a broken time in your access.log file\n"));
308 exit(EXIT_FAILURE);
309 }
310 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
311 strcpy(ip,data);
6debcf3d 312 log_entry.Ip=ip;
27d1fa35
FM
313 if(squid24) {
314 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
315 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
316 exit(EXIT_FAILURE);
317 }
318 } else {
319 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
320 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
321 exit(EXIT_FAILURE);
322 }
323 }
525f6fa7 324 log_entry.User=user;
27d1fa35
FM
325 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
326 getword(fun,sizeof(fun),&gwarea,' ')<0) {
327 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
328 exit(EXIT_FAILURE);
329 }
330 if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0) {
331 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
332 exit(EXIT_FAILURE);
333 }
334 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
335 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
336 exit(EXIT_FAILURE);
337 }
338 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
339 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
340 exit(EXIT_FAILURE);
341 }
2c4bc22b 342 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
27d1fa35
FM
343 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
344 exit(EXIT_FAILURE);
345 }
346 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
347 if (getword(code,sizeof(code),&gwarea,' ')<0) {
348 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
349 exit(EXIT_FAILURE);
350 }
351 } else {
352 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
353 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
354 exit(EXIT_FAILURE);
355 }
356 }
357
358 if ((str = strchr(code, ':')) != NULL)
359 *str = '/';
ae412eca 360 log_entry.HttpCode=code;
27d1fa35 361
27d1fa35
FM
362 ilf=ILF_Common;
363 ilf_count[ilf]++;
364
365 getword_start(&gwarea,data+1);
366 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
367 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
368 exit(EXIT_FAILURE);
369 }
370 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
371 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
372 exit(EXIT_FAILURE);
373 }
374 getword_start(&gwarea,data);
375 if (getword_atoll(&iday,&gwarea,'/')<0){
376 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
377 exit(EXIT_FAILURE);
378 }
379 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
380 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
381 exit(EXIT_FAILURE);
382 }
383 if (getword_atoll(&iyear,&gwarea,'/')<0){
384 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
385 exit(EXIT_FAILURE);
386 }
387
388 imonth=month2num(mes)+1;
389 idata=builddia(iday,imonth,iyear);
390 computedate(iyear,imonth,iday,&tt);
391 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
392 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
393 debuga(_("Invalid time found in %s\n"),arq);
394 exit(EXIT_FAILURE);
395 }
c5d6ef4b 396 log_entry.EntryTime=&tt;
27d1fa35
FM
397 }
398
399 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
400 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
401 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
402 exit(EXIT_FAILURE);
403 }
404 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
405 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
406 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
407 exit(EXIT_FAILURE);
408 }
409 if(strlen(elap) < 1) continue;
2c4bc22b 410 log_entry.ElapsedTime=atol(elap);
27d1fa35
FM
411 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
412 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
413 exit(EXIT_FAILURE);
414 }
6debcf3d 415 log_entry.Ip=ip;
27d1fa35
FM
416 if (getword(code,sizeof(code),&gwarea,' ')<0){
417 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
418 exit(EXIT_FAILURE);
419 }
ae412eca 420 log_entry.HttpCode=code;
2c4bc22b 421 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
27d1fa35
FM
422 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
423 exit(EXIT_FAILURE);
424 }
425 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
426 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
427 exit(EXIT_FAILURE);
428 }
429 if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0){
430 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
431 exit(EXIT_FAILURE);
432 }
433 if (getword(user,sizeof(user),&gwarea,' ')<0){
434 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
435 exit(EXIT_FAILURE);
436 }
525f6fa7 437 log_entry.User=user;
27d1fa35
FM
438 ilf=ILF_Squid;
439 ilf_count[ilf]++;
440
441 tnum=atoi(data);
c5d6ef4b
FM
442 log_entry.EntryTime=localtime(&tnum);
443 if (log_entry.EntryTime == NULL) {
27d1fa35
FM
444 debuga(_("Cannot convert the timestamp from the squid log file\n"));
445 exit(EXIT_FAILURE);
446 }
447
c5d6ef4b 448 strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
27d1fa35 449
c5d6ef4b 450 idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
27d1fa35
FM
451 }
452 }
453 if (ilf==ILF_Sarg) {
454 getword_start(&gwarea,linebuf);
455 if (getword(data,sizeof(data),&gwarea,'\t')<0){
456 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
457 exit(EXIT_FAILURE);
458 }
459 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
460 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
461 exit(EXIT_FAILURE);
462 }
463 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
464 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
465 exit(EXIT_FAILURE);
466 }
525f6fa7 467 log_entry.User=user;
27d1fa35
FM
468 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
469 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
470 exit(EXIT_FAILURE);
471 }
6debcf3d 472 log_entry.Ip=ip;
27d1fa35
FM
473 if (getword_ptr(linebuf,&full_url,&gwarea,'\t')<0){
474 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
475 exit(EXIT_FAILURE);
476 }
2c4bc22b 477 if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
27d1fa35
FM
478 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
479 exit(EXIT_FAILURE);
480 }
481 if (getword(code,sizeof(code),&gwarea,'\t')<0){
482 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
483 exit(EXIT_FAILURE);
484 }
ae412eca 485 log_entry.HttpCode=code;
2c4bc22b 486 if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
27d1fa35
FM
487 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
488 exit(EXIT_FAILURE);
489 }
490 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
491 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
492 exit(EXIT_FAILURE);
493 }
494 getword_start(&gwarea,data);
495 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
496 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
497 exit(EXIT_FAILURE);
498 }
499 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
500 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
501 exit(EXIT_FAILURE);
502 }
503 if (getword_atoll(&iyear,&gwarea,'\0')<0){
504 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
505 exit(EXIT_FAILURE);
506 }
507 idata=builddia(iday,imonth,iyear);
508 computedate(iyear,imonth,iday,&tt);
509 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
510 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
511 debuga(_("Invalid time found in %s\n"),arq);
512 exit(EXIT_FAILURE);
513 }
c5d6ef4b 514 log_entry.EntryTime=&tt;
27d1fa35
FM
515 }
516 if (ilf==ILF_Isa) {
517 if (linebuf[0] == '#') {
518 int ncols,cols[ISACOL_Last];
519
520 fixendofline(linebuf);
521 getword_start(&gwarea,linebuf);
522 // remove the #Fields: column at the beginning of the line
523 if (getword_skip(1000,&gwarea,' ')<0){
524 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
525 exit(EXIT_FAILURE);
526 }
527 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
528 ncols=0;
529 while(gwarea.current[0] != '\0') {
530 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
531 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
532 exit(EXIT_FAILURE);
533 }
534 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
535 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
536 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
537 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
538 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
539 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
540 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
541 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
542 ncols++;
543 }
544 if (cols[ISACOL_Ip]>=0) {
545 isa_ncols=ncols;
546 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
547 isa_cols[ncols]=cols[ncols];
548 }
549 continue;
550 }
551 if (!isa_ncols) continue;
552 getword_start(&gwarea,linebuf);
553 for (x=0 ; x<isa_ncols ; x++) {
554 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
555 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
556 exit(EXIT_FAILURE);
557 }
558 if (x==isa_cols[ISACOL_Ip]) {
559 if (strlen(str)>=sizeof(ip)) {
560 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
561 exit(EXIT_FAILURE);
562 }
563 strcpy(ip,str);
6debcf3d 564 log_entry.Ip=ip;
27d1fa35
FM
565 } else if (x==isa_cols[ISACOL_UserName]) {
566 if (strlen(str)>=sizeof(user)) {
567 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
568 exit(EXIT_FAILURE);
569 }
570 strcpy(user,str);
525f6fa7 571 log_entry.User=user;
27d1fa35
FM
572 } else if (x==isa_cols[ISACOL_Date]) {
573 if (strlen(str)>=sizeof(data)) {
574 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
575 exit(EXIT_FAILURE);
576 }
577 strcpy(data,str);
578 } else if (x==isa_cols[ISACOL_Time]) {
579 if (strlen(str)>=sizeof(hora)) {
580 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
581 exit(EXIT_FAILURE);
582 }
583 strcpy(hora,str);
584 } else if (x==isa_cols[ISACOL_TimeTaken]) {
2c4bc22b 585 log_entry.ElapsedTime=atol(str);
27d1fa35 586 } else if (x==isa_cols[ISACOL_Bytes]) {
2c4bc22b 587 log_entry.DataSize=atoll(str);
27d1fa35
FM
588 } else if (x==isa_cols[ISACOL_Uri]) {
589 full_url=str;
590 } else if (x==isa_cols[ISACOL_Status]) {
591 if (strlen(str)>=sizeof(code)) {
592 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
593 exit(EXIT_FAILURE);
594 }
595 strcpy(code,str);
596 }
597 }
598
599 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
ae412eca
FM
600 static char valcode[12];
601 sprintf(valcode,"DENIED/%s",code);
602 log_entry.HttpCode=valcode;
603 } else {
604 log_entry.HttpCode=code;
27d1fa35
FM
605 }
606 getword_start(&gwarea,data);
607 if (getword_atoll(&iyear,&gwarea,'-')<0){
608 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
609 exit(EXIT_FAILURE);
610 }
611 if (getword_atoll(&imonth,&gwarea,'-')<0){
612 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
613 exit(EXIT_FAILURE);
614 }
615 if (getword_atoll(&iday,&gwarea,'\0')<0){
616 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
617 exit(EXIT_FAILURE);
618 }
619
620 idata=builddia(iday,imonth,iyear);
621 computedate(iyear,imonth,iday,&tt);
622 if (isa_cols[ISACOL_Time]>=0) {
623 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
624 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
625 debuga(_("Invalid time found in %s\n"),arq);
626 exit(EXIT_FAILURE);
627 }
628 }
c5d6ef4b 629 log_entry.EntryTime=&tt;
27d1fa35 630 }
c5d6ef4b 631 if (log_entry.EntryTime==NULL) {
6debcf3d
FM
632 debuga(_("Unknown input log file format: no time\n"));
633 break;
634 }
635 if (log_entry.Ip==NULL) {
636 debuga(_("Unknown input log file format: no IP addresses\n"));
27d1fa35
FM
637 break;
638 }
525f6fa7
FM
639 if (log_entry.User==NULL) {
640 debuga(_("Unknown input log file format: no user\n"));
641 break;
642 }
27d1fa35 643
27d1fa35
FM
644 if(debugm)
645 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
646
647 if(Filter->DateRange[0] != '\0'){
648 if(idata < dfrom || idata > duntil) continue;
649 }
650
651 // Record only hours usage which is required
c5d6ef4b
FM
652 if( bsearch( &( log_entry.EntryTime->tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
653 continue;
27d1fa35 654
c5d6ef4b
FM
655 if( bsearch( &( log_entry.EntryTime->tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
656 continue;
27d1fa35
FM
657
658
525f6fa7
FM
659 if(strlen(log_entry.User) > MAX_USER_LEN) {
660 if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
27d1fa35
FM
661 totregsx++;
662 continue;
663 }
664
665 // include_users
666 if(IncludeUsers[0] != '\0') {
525f6fa7 667 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
27d1fa35
FM
668 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
669 continue;
670 }
671
ae412eca
FM
672 if(vercode(log_entry.HttpCode)) {
673 if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
27d1fa35
FM
674 totregsx++;
675 continue;
676 }
677
525f6fa7 678 if(testvaliduserchar(log_entry.User))
27d1fa35
FM
679 continue;
680
27d1fa35
FM
681 // replace any tab by a single space
682 for (str=full_url ; *str ; str++)
683 if (*str=='\t') *str=' ';
ae412eca 684 for (str=log_entry.HttpCode ; *str ; str++)
27d1fa35
FM
685 if (*str=='\t') *str=' ';
686
687 if(ilf!=ILF_Sarg) {
688 /*
689 The full URL is not saved in sarg log. There is no point in testing the URL to detect
690 a downloaded file.
691 */
692 download_flag=is_download_suffix(full_url);
693 if (download_flag) {
694 safe_strcpy(download_url,full_url,sizeof(download_url));
695 download_count++;
696 }
697 } else
698 download_flag=false;
699
700 url=process_url(full_url,LongUrl);
701 if (!url || url[0] == '\0') continue;
702
703 if(addr[0] != '\0'){
6debcf3d 704 if(strcmp(addr,log_entry.Ip)!=0) continue;
27d1fa35
FM
705 }
706 if(Filter->HostFilter) {
707 if(!vhexclude(url)) {
708 if (debugm) printf(_("Excluded site: %s\n"),url);
709 totregsx++;
710 continue;
711 }
712 }
713
714 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
c5d6ef4b 715 hmr=log_entry.EntryTime->tm_hour*100+log_entry.EntryTime->tm_min;
27d1fa35
FM
716 if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
717 }
718
719 if(site[0] != '\0'){
720 if(strstr(url,site)==0) continue;
721 }
722
723 if(UserIp) {
525f6fa7 724 log_entry.User=log_entry.Ip;
27d1fa35
FM
725 id_is_ip=true;
726 } else {
727 id_is_ip=false;
525f6fa7 728 if(strcmp(log_entry.User,"-") == 0 || strcmp(log_entry.User," ") == 0 || strcmp(log_entry.User,"") == 0) {
27d1fa35 729 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
525f6fa7 730 log_entry.User=log_entry.Ip;
27d1fa35
FM
731 id_is_ip=true;
732 }
733 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
734 continue;
735 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
525f6fa7 736 log_entry.User="everybody";
27d1fa35 737 } else {
525f6fa7 738 strlow(log_entry.User);
27d1fa35 739 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
525f6fa7
FM
740 if((str = strchr(log_entry.User,'_')) != 0) {
741 log_entry.User=str+1;
27d1fa35 742 }
525f6fa7
FM
743 if((str = strchr(log_entry.User,'+')) != 0) {
744 log_entry.User=str+1;
27d1fa35
FM
745 }
746 }
747 }
748 }
749
750 if(us[0] != '\0'){
525f6fa7 751 if(strcmp(log_entry.User,us)!=0) continue;
27d1fa35
FM
752 }
753
754 if(Filter->SysUsers) {
525f6fa7 755 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
27d1fa35
FM
756 if(strstr(userfile, wuser) == 0)
757 continue;
758 }
759
760 if(Filter->UserFilter) {
525f6fa7
FM
761 if(!vuexclude(log_entry.User)) {
762 if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
27d1fa35
FM
763 totregsx++;
764 continue;
765 }
766 }
767
525f6fa7
FM
768 if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 ||
769 strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0)
27d1fa35
FM
770 continue;
771
2c4bc22b 772 if (log_entry.DataSize<0) log_entry.DataSize=0;
27d1fa35 773
2c4bc22b
FM
774 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
775 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
776 log_entry.ElapsedTime=0;
27d1fa35
FM
777 }
778
779 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
780 fixendofline(str);
781 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
782 } else strcpy(smartfilter,"\"\"");
783
784 nopen=0;
785 prev_ufile=NULL;
525f6fa7 786 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
27d1fa35
FM
787 prev_ufile=ufile;
788 if (ufile->file) nopen++;
789 }
790 if (!ufile) {
791 ufile=malloc(sizeof(*ufile));
792 if (!ufile) {
525f6fa7 793 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
27d1fa35
FM
794 exit(EXIT_FAILURE);
795 }
796 memset(ufile,0,sizeof(*ufile));
797 ufile->next=first_user_file;
798 first_user_file=ufile;
525f6fa7 799 uinfo=userinfo_create(log_entry.User);
27d1fa35
FM
800 ufile->user=uinfo;
801 uinfo->id_is_ip=id_is_ip;
802 } else {
803 if (prev_ufile) {
804 prev_ufile->next=ufile->next;
805 ufile->next=first_user_file;
806 first_user_file=ufile;
807 }
808 }
809 #ifdef ENABLE_DOUBLE_CHECK_DATA
2c4bc22b
FM
810 ufile->user->nbytes+=log_entry.DataSize;
811 ufile->user->elap+=log_entry.ElapsedTime;
27d1fa35
FM
812 #endif
813
814 if (ufile->file==NULL) {
815 if (nopen>=maxopenfiles) {
816 x=0;
817 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
818 if (ufile1->file!=NULL) {
819 if (x>=maxopenfiles) {
820 if (fclose(ufile1->file)==EOF) {
821 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
822 exit(EXIT_FAILURE);
823 }
824 ufile1->file=NULL;
825 }
826 x++;
827 }
828 }
829 }
830 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
831 debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
832 exit(EXIT_FAILURE);
833 }
834 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
835 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
836 exit (1);
837 }
838 }
839
c5d6ef4b
FM
840 strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime);
841 strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime);
842
ae412eca 843 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
525f6fa7 844 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
27d1fa35
FM
845 exit(EXIT_FAILURE);
846 }
847
848 if(fp_log && ilf!=ILF_Sarg)
ae412eca 849 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
27d1fa35
FM
850
851 totregsg++;
852
ae412eca 853 if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
27d1fa35
FM
854 ndownload = 1;
855
856 if ( ! fp_Download_Unsort ) {
857 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
858 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
859 exit (1);
860 }
861 }
525f6fa7 862 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url);
27d1fa35
FM
863 }
864
ae412eca 865 if(fp_denied && strstr(log_entry.HttpCode,"DENIED/403") != 0) {
525f6fa7 866 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,full_url);
60b48ae5 867 denied_count++;
27d1fa35
FM
868 }
869 if((ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
ae412eca 870 if(fp_authfail && (strstr(log_entry.HttpCode,"DENIED/401") != 0 || strstr(log_entry.HttpCode,"DENIED/407") != 0)) {
525f6fa7 871 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,full_url);
27d1fa35
FM
872 authfail_count++;
873 }
874 }
875
876 if (ilf!=ILF_Sarg) {
877 if(!totper || idata<mindate){
878 mindate=idata;
c5d6ef4b 879 memcpy(&period.start,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
27d1fa35
FM
880 strcpy(start_hour,tbuf2);
881 }
882 if (!totper || idata>maxdate) {
883 maxdate=idata;
c5d6ef4b 884 memcpy(&period.end,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
27d1fa35
FM
885 }
886 totper=true;
887 }
888
889 if(debugm){
6debcf3d 890 printf("IP=\t%s\n",log_entry.Ip);
525f6fa7 891 printf("USER=\t%s\n",log_entry.User);
2c4bc22b 892 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
27d1fa35
FM
893 printf("DATE=\t%s\n",dia);
894 printf("TIME=\t%s\n",hora);
895 printf("FUNC=\t%s\n",fun);
896 printf("URL=\t%s\n",url);
ae412eca 897 printf("CODE=\t%s\n",log_entry.HttpCode);
2c4bc22b 898 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
899 }
900 }
901
902 if (!from_stdin) {
903 if (from_pipe)
904 pclose(fp_in);
905 else {
906 fclose(fp_in);
907 if( ShowReadStatistics )
908 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
909 }
910 }
911 }
912 longline_destroy(&line);
913
914 if(fp_log != NULL) {
915 char end_hour[128];
916 char val2[40];
917 char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
918
919 fclose(fp_log);
920 safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
921 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
922 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
923 if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
924 debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
925 exit(EXIT_FAILURE);
926 }
927 if (rename(arq_log,val4)) {
928 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
929 } else {
930 strcpy(arq_log,val4);
931
932 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
933 /*
934 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
935 necessary around the command name, put them in the configuration file.
936 */
937 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
938 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
939 exit(EXIT_FAILURE);
940 }
941 cstatus=system(val1);
942 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
943 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
944 debuga(_("command: %s\n"),val1);
945 exit(EXIT_FAILURE);
946 }
947 }
948 }
949 if(debug)
950 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
951 }
952
953 if (fp_denied) fclose(fp_denied);
954 if (fp_authfail) fclose(fp_authfail);
955 if (fp_Download_Unsort) fclose (fp_Download_Unsort);
956
957 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
958 ufile1=ufile->next;
959 if (ufile->file!=NULL) fclose(ufile->file);
960 free(ufile);
961 }
962
963 if (debug) {
964 int totalcount=0;
965
966 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
967
968 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
969
970 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
971 debuga(_("Log with mixed records format (squid and common log)\n"));
972
973 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
974 debuga(_("Common log format\n"));
975
976 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
977 debuga(_("Squid log format\n"));
978
979 if(ilf_count[ILF_Sarg]>0)
980 debuga(_("Sarg log format\n"));
981
982 if(totalcount==0 && totregsg)
983 debuga(_("Log with invalid format\n"));
984 }
985
986 if(debugz){
987 debugaz(_("date=%s\n"),dia);
988 debugaz(_("period=%s\n"),period.text);
989 }
990
27d1fa35
FM
991 return((totregsg!=0) ? 1 : 0);
992}