]>
Commit | Line | Data |
---|---|---|
27d1fa35 FM |
1 | /* |
2 | * SARG Squid Analysis Report Generator http://sarg.sourceforge.net | |
67302a9e | 3 | * 1998, 2013 |
27d1fa35 FM |
4 | * |
5 | * SARG donations: | |
6 | * please look at http://sarg.sourceforge.net/donations.php | |
7 | * Support: | |
8 | * http://sourceforge.net/projects/sarg/forums/forum/363374 | |
9 | * --------------------------------------------------------------------- | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "include/conf.h" | |
28 | #include "include/defs.h" | |
29 | #include "include/readlog.h" | |
6068ae56 | 30 | #include "include/filelist.h" |
27d1fa35 FM |
31 | |
32 | #define REPORT_EVERY_X_LINES 5000 | |
33 | #define MAX_OPEN_USER_FILES 10 | |
34 | ||
35 | struct userfilestruct | |
36 | { | |
37 | struct userfilestruct *next; | |
38 | struct userinfostruct *user; | |
39 | FILE *file; | |
40 | }; | |
41 | ||
7c8c06c5 FM |
42 | enum ExcludeReasonEnum |
43 | { | |
44 | //! User name too long. | |
45 | ER_UserNameTooLong, | |
46 | //! Squid logged an incomplete query received from the client. | |
47 | ER_IncompleteQuery, | |
48 | //! Log file turned over. | |
49 | ER_LogfileTurnedOver, | |
7c8c06c5 FM |
50 | //! Excluded by exclude_string from sarg.conf. |
51 | ER_ExcludeString, | |
52 | //! Unknown input log file format. | |
53 | ER_UnknownFormat, | |
54 | //! Line to be ignored from the input log file. | |
55 | ER_FormatData, | |
56 | //! Entry not withing the requested date range. | |
57 | ER_OutOfDateRange, | |
58 | //! Ignored week day. | |
59 | ER_OutOfWDayRange, | |
60 | //! Ignored hour. | |
61 | ER_OutOfHourRange, | |
62 | //! User is not in the include_users list. | |
63 | ER_User, | |
64 | //! HTTP code excluded by exclude_code file. | |
65 | ER_HttpCode, | |
66 | //! Invalid character found in user name. | |
67 | ER_InvalidUserChar, | |
68 | //! No URL in entry. | |
69 | ER_NoUrl, | |
70 | //! Not the IP address requested with -a. | |
71 | ER_UntrackedIpAddr, | |
72 | //! URL excluded by -c or exclude_hosts. | |
73 | ER_Url, | |
74 | //! Entry time outside of requested hour range. | |
75 | ER_OutOfTimeRange, | |
76 | //! Not the URL requested by -s. | |
77 | ER_UntrackedUrl, | |
78 | //! No user in entry. | |
79 | ER_NoUser, | |
80 | //! Not the user requested by -u. | |
81 | ER_UntrackedUser, | |
82 | //! System user. | |
83 | ER_SysUser, | |
84 | //! User ignored by exclude_users | |
85 | ER_IgnoredUser, | |
86 | ||
87 | ER_Last //!< last entry of the list | |
88 | }; | |
89 | ||
27d1fa35 FM |
90 | numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; |
91 | numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; | |
92 | ||
27d1fa35 | 93 | extern char *userfile; |
6068ae56 | 94 | extern FileListObject AccessLog; |
27d1fa35 | 95 | |
1c91da07 FM |
96 | extern const struct ReadLogProcessStruct ReadSquidLog; |
97 | extern const struct ReadLogProcessStruct ReadCommonLog; | |
98 | extern const struct ReadLogProcessStruct ReadSargLog; | |
99 | extern const struct ReadLogProcessStruct ReadExtLog; | |
100 | ||
101 | //! The list of the supported log formats. | |
102 | static const struct ReadLogProcessStruct const *LogFormats[]= | |
103 | { | |
104 | &ReadSquidLog, | |
105 | &ReadCommonLog, | |
106 | &ReadSargLog, | |
107 | &ReadExtLog | |
108 | }; | |
109 | ||
944cf283 FM |
110 | //! The path to the sarg log file. |
111 | static char SargLogFile[4096]=""; | |
112 | //! Handle to the sarg log file. NULL if not created. | |
113 | static FILE *fp_log=NULL; | |
114 | //! The number of records read from the input logs. | |
115 | static long int totregsl=0; | |
116 | //! The number of records kept. | |
117 | static long int totregsg=0; | |
118 | //! The number of records excluded. | |
119 | static long int totregsx=0; | |
120 | //! The beginning of a linked list of user's file. | |
121 | static struct userfilestruct *first_user_file=NULL; | |
122 | //! Count the number of occurence of each input log format. | |
8e501bd6 | 123 | static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)]; |
944cf283 FM |
124 | //! The minimum date found in the input logs. |
125 | static int mindate=0; | |
126 | static int maxdate=0; | |
7c8c06c5 FM |
127 | //! Count the number of excluded records. |
128 | static unsigned long int excluded_count[ER_Last]; | |
27d1fa35 | 129 | |
944cf283 FM |
130 | /*! |
131 | Read a single log file. | |
27d1fa35 | 132 | |
944cf283 | 133 | \param arq The log file name to read. |
27d1fa35 | 134 | */ |
944cf283 | 135 | static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) |
27d1fa35 | 136 | { |
27d1fa35 FM |
137 | longline line; |
138 | char *linebuf; | |
139 | char *str; | |
27d1fa35 | 140 | char hora[30]; |
c5d6ef4b | 141 | char dia[128]=""; |
27d1fa35 | 142 | char wuser[MAXLEN]; |
944cf283 | 143 | char tmp3[MAXLEN]=""; |
27d1fa35 FM |
144 | char download_url[MAXLEN]; |
145 | char smartfilter[MAXLEN]; | |
27d1fa35 | 146 | const char *url; |
dc34d345 | 147 | const char *user; |
944cf283 | 148 | int current_format_idx; |
27d1fa35 FM |
149 | int OutputNonZero = REPORT_EVERY_X_LINES ; |
150 | int idata=0; | |
27d1fa35 FM |
151 | int x; |
152 | int hmr; | |
153 | int nopen; | |
154 | int maxopenfiles=MAX_OPEN_USER_FILES; | |
4d62cb0a FM |
155 | int successive_errors=0; |
156 | int total_errors=0; | |
27d1fa35 FM |
157 | unsigned long int recs1=0UL; |
158 | unsigned long int recs2=0UL; | |
27d1fa35 | 159 | FILE *fp_in=NULL; |
27d1fa35 FM |
160 | bool from_pipe; |
161 | bool from_stdin; | |
162 | bool download_flag=false; | |
163 | bool id_is_ip; | |
1c91da07 | 164 | enum ReadLogReturnCodeEnum log_entry_status; |
27d1fa35 FM |
165 | struct stat logstat; |
166 | struct getwordstruct gwarea; | |
27d1fa35 FM |
167 | struct userfilestruct *prev_ufile; |
168 | struct userinfostruct *uinfo; | |
27d1fa35 FM |
169 | struct userfilestruct *ufile; |
170 | struct userfilestruct *ufile1; | |
c5d6ef4b | 171 | struct ReadLogStruct log_entry; |
1c91da07 | 172 | const struct ReadLogProcessStruct *current_format=NULL; |
27d1fa35 | 173 | |
944cf283 FM |
174 | current_format=NULL; |
175 | current_format_idx=-1; | |
176 | for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) | |
177 | if (LogFormats[x]->NewFile) | |
178 | LogFormats[x]->NewFile(arq); | |
27d1fa35 | 179 | |
944cf283 FM |
180 | if (arq[0]=='-' && arq[1]=='\0') { |
181 | if(debug) | |
182 | debuga(_("Reading access log file: from stdin\n")); | |
183 | fp_in=stdin; | |
184 | from_stdin=true; | |
185 | } else { | |
186 | if (Filter->DateRange[0]!='\0') { | |
187 | if (stat(arq,&logstat)!=0) { | |
188 | debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno)); | |
189 | } else { | |
190 | struct tm *logtime=localtime(&logstat.st_mtime); | |
191 | if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) { | |
192 | debuga(_("Ignoring old log file %s\n"),arq); | |
193 | return; | |
27d1fa35 FM |
194 | } |
195 | } | |
27d1fa35 | 196 | } |
944cf283 FM |
197 | fp_in=decomp(arq,&from_pipe); |
198 | if(fp_in==NULL) { | |
d2cd218e | 199 | debuga(_("Cannot open input log file \"%s\": %s\n"),arq,strerror(errno)); |
944cf283 FM |
200 | exit(EXIT_FAILURE); |
201 | } | |
202 | if(debug) debuga(_("Reading access log file: %s\n"),arq); | |
203 | from_stdin=false; | |
204 | } | |
27d1fa35 | 205 | |
944cf283 | 206 | download_flag=false; |
27d1fa35 | 207 | |
944cf283 FM |
208 | recs1=0UL; |
209 | recs2=0UL; | |
2f4787e6 | 210 | |
944cf283 FM |
211 | // pre-read the file only if we have to show stats |
212 | if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) { | |
213 | size_t nread,i; | |
214 | bool skipcr=false; | |
215 | char tmp4[MAXLEN]; | |
27d1fa35 | 216 | |
944cf283 FM |
217 | while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) { |
218 | for (i=0 ; i<nread ; i++) | |
219 | if (skipcr) { | |
220 | if (tmp4[i]!='\n' && tmp4[i]!='\r') { | |
221 | skipcr=false; | |
27d1fa35 | 222 | } |
944cf283 FM |
223 | } else { |
224 | if (tmp4[i]=='\n' || tmp4[i]=='\r') { | |
225 | skipcr=true; | |
226 | recs1++; | |
227 | } | |
228 | } | |
27d1fa35 | 229 | } |
944cf283 FM |
230 | rewind(fp_in); |
231 | printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0); | |
232 | putchar('\r'); | |
233 | fflush( stdout ) ; | |
234 | } | |
27d1fa35 | 235 | |
944cf283 FM |
236 | if ((line=longline_create())==NULL) { |
237 | debuga(_("Not enough memory to read log file %s\n"),arq); | |
238 | exit(EXIT_FAILURE); | |
239 | } | |
27d1fa35 | 240 | |
944cf283 | 241 | while ((linebuf=longline_read(fp_in,line))!=NULL) { |
944cf283 | 242 | lines_read++; |
27d1fa35 | 243 | |
944cf283 FM |
244 | recs2++; |
245 | if (ShowReadStatistics && --OutputNonZero<=0) { | |
246 | if (recs1>0) { | |
247 | double perc = recs2 * 100. / recs1 ; | |
248 | printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc); | |
249 | } else { | |
250 | printf(_("SARG: Records in file: %lu"),recs2); | |
27d1fa35 | 251 | } |
944cf283 FM |
252 | putchar('\r'); |
253 | fflush (stdout); | |
254 | OutputNonZero = REPORT_EVERY_X_LINES ; | |
255 | } | |
0c87646f | 256 | |
944cf283 FM |
257 | /* |
258 | The following checks are retained here as I don't know to | |
259 | what format they apply. They date back to pre 2.4 versions. | |
260 | */ | |
261 | //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line | |
7c8c06c5 FM |
262 | if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query |
263 | excluded_count[ER_IncompleteQuery]++; | |
264 | continue; | |
265 | } | |
266 | if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog | |
267 | excluded_count[ER_LogfileTurnedOver]++; | |
268 | continue; | |
269 | } | |
944cf283 FM |
270 | |
271 | // exclude_string | |
272 | if(ExcludeString[0] != '\0') { | |
273 | bool exstring=false; | |
274 | getword_start(&gwarea,ExcludeString); | |
275 | while(strchr(gwarea.current,':') != 0) { | |
276 | if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) { | |
277 | debuga(_("Maybe you have a broken record or garbage in your exclusion string\n")); | |
278 | exit(EXIT_FAILURE); | |
27d1fa35 | 279 | } |
944cf283 | 280 | if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) { |
27d1fa35 | 281 | exstring=true; |
944cf283 FM |
282 | break; |
283 | } | |
27d1fa35 | 284 | } |
944cf283 FM |
285 | if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL ) |
286 | exstring=true; | |
7c8c06c5 FM |
287 | if(exstring) { |
288 | excluded_count[ER_ExcludeString]++; | |
289 | continue; | |
290 | } | |
944cf283 | 291 | } |
27d1fa35 | 292 | |
944cf283 FM |
293 | totregsl++; |
294 | if(debugm) | |
295 | printf("BUF=%s\n",linebuf); | |
27d1fa35 | 296 | |
944cf283 FM |
297 | // process the line |
298 | log_entry_status=RLRC_Unknown; | |
299 | memset(&log_entry,0,sizeof(log_entry)); | |
300 | if (current_format) { | |
301 | log_entry_status=current_format->ReadEntry(linebuf,&log_entry); | |
302 | } | |
0c87646f | 303 | |
944cf283 FM |
304 | // find out what line format to use |
305 | if (log_entry_status==RLRC_Unknown) { | |
306 | for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++) { | |
307 | if (LogFormats[x]==current_format) continue; | |
308 | memset(&log_entry,0,sizeof(log_entry)); | |
309 | log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry); | |
310 | if (log_entry_status!=RLRC_Unknown) break; | |
311 | } | |
312 | if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) { | |
313 | if (++successive_errors>NumLogSuccessiveErrors) { | |
314 | debuga(ngettext("%d consecutive error found in the input log file %s\n", | |
315 | "%d consecutive errors found in the input log file %s\n",successive_errors),successive_errors,arq); | |
316 | exit(EXIT_FAILURE); | |
1c91da07 | 317 | } |
944cf283 FM |
318 | if (NumLogTotalErrors>=0 && ++total_errors>NumLogTotalErrors) { |
319 | debuga(ngettext("%d error found in the input log file (last in %s)\n", | |
320 | "%d errors found in the input log file (last in %s)\n",total_errors),total_errors,arq); | |
321 | exit(EXIT_FAILURE); | |
1c91da07 | 322 | } |
944cf283 | 323 | debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf); |
7c8c06c5 | 324 | excluded_count[ER_UnknownFormat]++; |
1c91da07 FM |
325 | continue; |
326 | } | |
944cf283 FM |
327 | current_format=LogFormats[x]; |
328 | current_format_idx=x; | |
329 | if (debugz) { | |
330 | /* TRANSLATORS: The argument is the log format name as translated by you. */ | |
331 | debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq); | |
332 | } | |
333 | successive_errors=0; | |
334 | } | |
335 | if (log_entry_status==RLRC_Ignore) { | |
7c8c06c5 | 336 | excluded_count[ER_FormatData]++; |
944cf283 FM |
337 | continue; |
338 | } | |
339 | if (current_format_idx<0 || current_format==NULL) { | |
340 | debuga(_("Sarg failed to determine the format of the input log file %s\n"),arq); | |
341 | exit(EXIT_FAILURE); | |
342 | } | |
343 | if (log_entry_status==RLRC_InternalError) { | |
344 | debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq); | |
345 | exit(EXIT_FAILURE); | |
346 | } | |
347 | format_count[current_format_idx]++; | |
348 | ||
349 | if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) { | |
350 | if(access(ParsedOutputLog,R_OK) != 0) { | |
351 | my_mkdir(ParsedOutputLog); | |
352 | } | |
353 | if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) { | |
354 | debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); | |
1c91da07 FM |
355 | exit(EXIT_FAILURE); |
356 | } | |
944cf283 FM |
357 | if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) { |
358 | debuga(_("(log) Cannot open log file %s: %s\n"),SargLogFile,strerror(errno)); | |
1c91da07 FM |
359 | exit(EXIT_FAILURE); |
360 | } | |
944cf283 FM |
361 | fputs("*** SARG Log ***\n",fp_log); |
362 | } | |
1c91da07 | 363 | |
944cf283 FM |
364 | if (log_entry.Ip==NULL) { |
365 | debuga(_("Unknown input log file format: no IP addresses\n")); | |
366 | break; | |
367 | } | |
368 | if (log_entry.User==NULL) { | |
369 | debuga(_("Unknown input log file format: no user\n")); | |
370 | break; | |
371 | } | |
372 | if (log_entry.Url==NULL) { | |
373 | debuga(_("Unknown input log file format: no URL\n")); | |
374 | break; | |
375 | } | |
1c91da07 | 376 | |
944cf283 FM |
377 | idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900); |
378 | if(debugm) | |
379 | printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil); | |
27d1fa35 | 380 | |
944cf283 | 381 | if(Filter->DateRange[0] != '\0'){ |
7c8c06c5 FM |
382 | if(idata < dfrom || idata > duntil) { |
383 | excluded_count[ER_OutOfDateRange]++; | |
384 | continue; | |
385 | } | |
944cf283 | 386 | } |
27d1fa35 | 387 | |
944cf283 | 388 | // Record only hours usage which is required |
7c8c06c5 FM |
389 | if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) { |
390 | excluded_count[ER_OutOfWDayRange]++; | |
944cf283 | 391 | continue; |
7c8c06c5 | 392 | } |
27d1fa35 | 393 | |
7c8c06c5 FM |
394 | if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) { |
395 | excluded_count[ER_OutOfHourRange]++; | |
944cf283 | 396 | continue; |
7c8c06c5 | 397 | } |
27d1fa35 | 398 | |
27d1fa35 | 399 | |
944cf283 FM |
400 | if(strlen(log_entry.User) > MAX_USER_LEN) { |
401 | if (debugm) printf(_("User ID too long: %s\n"),log_entry.User); | |
7c8c06c5 | 402 | excluded_count[ER_UserNameTooLong]++; |
944cf283 FM |
403 | totregsx++; |
404 | continue; | |
405 | } | |
27d1fa35 | 406 | |
944cf283 FM |
407 | // include_users |
408 | if(IncludeUsers[0] != '\0') { | |
409 | snprintf(val1,sizeof(val1),":%s:",log_entry.User); | |
7c8c06c5 FM |
410 | if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) { |
411 | excluded_count[ER_User]++; | |
27d1fa35 | 412 | continue; |
7c8c06c5 | 413 | } |
944cf283 | 414 | } |
27d1fa35 | 415 | |
944cf283 FM |
416 | if(vercode(log_entry.HttpCode)) { |
417 | if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode); | |
7c8c06c5 | 418 | excluded_count[ER_HttpCode]++; |
944cf283 FM |
419 | totregsx++; |
420 | continue; | |
421 | } | |
422 | ||
7c8c06c5 FM |
423 | if(testvaliduserchar(log_entry.User)) { |
424 | excluded_count[ER_InvalidUserChar]++; | |
944cf283 | 425 | continue; |
7c8c06c5 | 426 | } |
944cf283 FM |
427 | |
428 | // replace any tab by a single space | |
429 | for (str=log_entry.Url ; *str ; str++) | |
430 | if (*str=='\t') *str=' '; | |
431 | for (str=log_entry.HttpCode ; *str ; str++) | |
432 | if (*str=='\t') *str=' '; | |
433 | ||
434 | if (current_format!=&ReadSargLog) { | |
435 | /* | |
436 | The full URL is not saved in sarg log. There is no point in testing the URL to detect | |
437 | a downloaded file. | |
438 | */ | |
439 | download_flag=is_download_suffix(log_entry.Url); | |
440 | if (download_flag) { | |
441 | safe_strcpy(download_url,log_entry.Url,sizeof(download_url)); | |
27d1fa35 | 442 | } |
944cf283 FM |
443 | } else |
444 | download_flag=false; | |
27d1fa35 | 445 | |
944cf283 | 446 | url=process_url(log_entry.Url,LongUrl); |
7c8c06c5 FM |
447 | if (!url || url[0] == '\0') { |
448 | excluded_count[ER_NoUrl]++; | |
449 | continue; | |
450 | } | |
944cf283 FM |
451 | |
452 | if(addr[0] != '\0'){ | |
7c8c06c5 FM |
453 | if(strcmp(addr,log_entry.Ip)!=0) { |
454 | excluded_count[ER_UntrackedIpAddr]++; | |
455 | continue; | |
456 | } | |
944cf283 FM |
457 | } |
458 | if(Filter->HostFilter) { | |
459 | if(!vhexclude(url)) { | |
460 | if (debugm) printf(_("Excluded site: %s\n"),url); | |
7c8c06c5 | 461 | excluded_count[ER_Url]++; |
27d1fa35 FM |
462 | totregsx++; |
463 | continue; | |
464 | } | |
944cf283 | 465 | } |
27d1fa35 | 466 | |
944cf283 FM |
467 | if(Filter->StartTime >= 0 && Filter->EndTime >= 0) { |
468 | hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min; | |
7c8c06c5 FM |
469 | if(hmr < Filter->StartTime || hmr > Filter->EndTime) { |
470 | excluded_count[ER_OutOfTimeRange]++; | |
471 | continue; | |
472 | } | |
944cf283 | 473 | } |
27d1fa35 | 474 | |
944cf283 | 475 | if(site[0] != '\0'){ |
7c8c06c5 FM |
476 | if(strstr(url,site)==0) { |
477 | excluded_count[ER_UntrackedUrl]++; | |
478 | continue; | |
479 | } | |
944cf283 | 480 | } |
27d1fa35 | 481 | |
944cf283 FM |
482 | if(UserIp) { |
483 | log_entry.User=log_entry.Ip; | |
484 | id_is_ip=true; | |
485 | } else { | |
486 | id_is_ip=false; | |
487 | if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) { | |
488 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) { | |
489 | log_entry.User=log_entry.Ip; | |
490 | id_is_ip=true; | |
27d1fa35 | 491 | } |
7c8c06c5 FM |
492 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) { |
493 | excluded_count[ER_NoUser]++; | |
27d1fa35 | 494 | continue; |
7c8c06c5 | 495 | } |
944cf283 FM |
496 | if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) |
497 | log_entry.User="everybody"; | |
27d1fa35 | 498 | } else { |
944cf283 | 499 | if(NtlmUserFormat == NTLMUSERFORMAT_USER) { |
e9ecdd7f | 500 | if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) { |
944cf283 | 501 | log_entry.User=str+1; |
27d1fa35 FM |
502 | } |
503 | } | |
504 | } | |
944cf283 | 505 | } |
27d1fa35 | 506 | |
944cf283 | 507 | if(us[0] != '\0'){ |
7c8c06c5 FM |
508 | if(strcmp(log_entry.User,us)!=0) { |
509 | excluded_count[ER_UntrackedUser]++; | |
510 | continue; | |
511 | } | |
944cf283 | 512 | } |
27d1fa35 | 513 | |
944cf283 FM |
514 | if(Filter->SysUsers) { |
515 | snprintf(wuser,sizeof(wuser),":%s:",log_entry.User); | |
7c8c06c5 FM |
516 | if(strstr(userfile, wuser) == 0) { |
517 | excluded_count[ER_SysUser]++; | |
944cf283 | 518 | continue; |
7c8c06c5 | 519 | } |
944cf283 | 520 | } |
27d1fa35 | 521 | |
944cf283 FM |
522 | if(Filter->UserFilter) { |
523 | if(!vuexclude(log_entry.User)) { | |
524 | if (debugm) printf(_("Excluded user: %s\n"),log_entry.User); | |
7c8c06c5 | 525 | excluded_count[ER_IgnoredUser]++; |
944cf283 FM |
526 | totregsx++; |
527 | continue; | |
27d1fa35 | 528 | } |
944cf283 | 529 | } |
27d1fa35 | 530 | |
dc34d345 FM |
531 | user=process_user(log_entry.User); |
532 | if (log_entry.User!=user) { | |
533 | log_entry.User=user; | |
534 | id_is_ip=false; | |
535 | } | |
944cf283 | 536 | if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || |
7c8c06c5 FM |
537 | log_entry.User[0]==' ' || log_entry.User[0]==':'))) { |
538 | excluded_count[ER_NoUser]++; | |
944cf283 | 539 | continue; |
7c8c06c5 | 540 | } |
27d1fa35 | 541 | |
944cf283 | 542 | if (log_entry.DataSize<0) log_entry.DataSize=0; |
0c87646f | 543 | |
944cf283 FM |
544 | if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0; |
545 | if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) { | |
546 | log_entry.ElapsedTime=0; | |
547 | } | |
27d1fa35 | 548 | |
944cf283 FM |
549 | if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) { |
550 | fixendofline(str); | |
551 | snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1); | |
552 | } else strcpy(smartfilter,"\"\""); | |
27d1fa35 | 553 | |
944cf283 FM |
554 | nopen=0; |
555 | prev_ufile=NULL; | |
556 | for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) { | |
557 | prev_ufile=ufile; | |
558 | if (ufile->file) nopen++; | |
559 | } | |
560 | if (!ufile) { | |
561 | ufile=malloc(sizeof(*ufile)); | |
27d1fa35 | 562 | if (!ufile) { |
944cf283 FM |
563 | debuga(_("Not enough memory to store the user %s\n"),log_entry.User); |
564 | exit(EXIT_FAILURE); | |
565 | } | |
566 | memset(ufile,0,sizeof(*ufile)); | |
567 | ufile->next=first_user_file; | |
568 | first_user_file=ufile; | |
aa6ac9f2 | 569 | uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip); |
944cf283 | 570 | ufile->user=uinfo; |
944cf283 FM |
571 | nusers++; |
572 | } else { | |
573 | if (prev_ufile) { | |
574 | prev_ufile->next=ufile->next; | |
27d1fa35 FM |
575 | ufile->next=first_user_file; |
576 | first_user_file=ufile; | |
27d1fa35 | 577 | } |
944cf283 | 578 | } |
1c91da07 | 579 | #ifdef ENABLE_DOUBLE_CHECK_DATA |
944cf283 FM |
580 | ufile->user->nbytes+=log_entry.DataSize; |
581 | ufile->user->elap+=log_entry.ElapsedTime; | |
1c91da07 | 582 | #endif |
27d1fa35 | 583 | |
944cf283 FM |
584 | if (ufile->file==NULL) { |
585 | if (nopen>=maxopenfiles) { | |
586 | x=0; | |
587 | for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) { | |
588 | if (ufile1->file!=NULL) { | |
589 | if (x>=maxopenfiles) { | |
590 | if (fclose(ufile1->file)==EOF) { | |
591 | debuga(_("Write error in the log file of user %s: %s\n"),ufile1->user->id,strerror(errno)); | |
592 | exit(EXIT_FAILURE); | |
27d1fa35 | 593 | } |
944cf283 | 594 | ufile1->file=NULL; |
27d1fa35 | 595 | } |
944cf283 | 596 | x++; |
27d1fa35 FM |
597 | } |
598 | } | |
27d1fa35 | 599 | } |
944cf283 FM |
600 | if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) { |
601 | debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename); | |
27d1fa35 FM |
602 | exit(EXIT_FAILURE); |
603 | } | |
944cf283 FM |
604 | if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) { |
605 | debuga(_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno)); | |
606 | exit (1); | |
1c91da07 | 607 | } |
944cf283 | 608 | } |
27d1fa35 | 609 | |
944cf283 FM |
610 | strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime); |
611 | strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime); | |
27d1fa35 | 612 | |
944cf283 FM |
613 | if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, |
614 | log_entry.Ip,url,(uint64_t)log_entry.DataSize, | |
615 | log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) { | |
616 | debuga(_("Write error in the log file of user %s\n"),log_entry.User); | |
617 | exit(EXIT_FAILURE); | |
618 | } | |
619 | records_kept++; | |
27d1fa35 | 620 | |
944cf283 FM |
621 | if (fp_log && current_format!=&ReadSargLog) { |
622 | fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, | |
623 | log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize, | |
624 | log_entry.HttpCode,log_entry.ElapsedTime,smartfilter); | |
625 | } | |
626 | ||
627 | totregsg++; | |
628 | ||
629 | denied_write(&log_entry); | |
630 | authfail_write(&log_entry); | |
88776d28 | 631 | if (download_flag) download_write(&log_entry,download_url); |
27d1fa35 | 632 | |
944cf283 FM |
633 | if (current_format!=&ReadSargLog) { |
634 | if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){ | |
635 | mindate=idata; | |
636 | memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime)); | |
637 | } | |
638 | if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) { | |
639 | maxdate=idata; | |
640 | memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime)); | |
27d1fa35 FM |
641 | } |
642 | } | |
643 | ||
944cf283 FM |
644 | if(debugm){ |
645 | printf("IP=\t%s\n",log_entry.Ip); | |
646 | printf("USER=\t%s\n",log_entry.User); | |
647 | printf("ELAP=\t%ld\n",log_entry.ElapsedTime); | |
648 | printf("DATE=\t%s\n",dia); | |
649 | printf("TIME=\t%s\n",hora); | |
650 | //printf("FUNC=\t%s\n",fun); | |
651 | printf("URL=\t%s\n",url); | |
652 | printf("CODE=\t%s\n",log_entry.HttpCode); | |
653 | printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize); | |
27d1fa35 FM |
654 | } |
655 | } | |
656 | longline_destroy(&line); | |
657 | ||
944cf283 FM |
658 | if (!from_stdin) { |
659 | if (from_pipe) | |
660 | pclose(fp_in); | |
661 | else { | |
662 | fclose(fp_in); | |
663 | if (ShowReadStatistics) { | |
664 | if (ShowReadPercent) | |
665 | printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 ); | |
666 | else | |
667 | printf(_("SARG: Records in file: %lu\n"),recs2); | |
668 | } | |
669 | } | |
670 | } | |
671 | } | |
672 | ||
7c8c06c5 FM |
673 | /*! |
674 | * Display a line with the excluded entries count. | |
675 | * | |
676 | * \param Explain A translated string explaining the exluded count. | |
677 | * \param Reason The reason number. | |
678 | */ | |
679 | static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason) | |
680 | { | |
681 | if (excluded_count[Reason]>0) { | |
682 | debuga(" %s: %lu\n",Explain,excluded_count[Reason]); | |
683 | } | |
684 | } | |
685 | ||
944cf283 FM |
686 | /*! |
687 | Read the log files. | |
688 | ||
689 | \param Filter The filtering parameters for the file to load. | |
690 | ||
691 | \retval 1 Records found. | |
692 | \retval 0 No record found. | |
693 | */ | |
694 | int ReadLogFile(struct ReadLogDataStruct *Filter) | |
695 | { | |
944cf283 FM |
696 | int x; |
697 | int cstatus; | |
698 | struct userfilestruct *ufile; | |
699 | struct userfilestruct *ufile1; | |
6068ae56 FM |
700 | FileListIterator FIter; |
701 | const char *file; | |
944cf283 FM |
702 | |
703 | for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0; | |
7c8c06c5 | 704 | for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0; |
944cf283 FM |
705 | first_user_file=NULL; |
706 | ||
707 | if (!dataonly) { | |
708 | denied_open(); | |
709 | authfail_open(); | |
710 | download_open(); | |
711 | } | |
712 | ||
6068ae56 FM |
713 | FIter=FileListIter_Open(AccessLog); |
714 | while ((file=FileListIter_Next(FIter))!=NULL) | |
715 | ReadOneLogFile(Filter,file); | |
716 | FileListIter_Close(FIter); | |
944cf283 | 717 | |
27d1fa35 | 718 | if(fp_log != NULL) { |
27d1fa35 | 719 | char val2[40]; |
944cf283 | 720 | char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below |
0c87646f | 721 | |
507460ae | 722 | if (fclose(fp_log)==EOF) { |
944cf283 | 723 | debuga(_("Write error in %s: %s\n"),SargLogFile,strerror(errno)); |
507460ae FM |
724 | exit(EXIT_FAILURE); |
725 | } | |
944cf283 FM |
726 | strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start); |
727 | strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end); | |
728 | if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) { | |
729 | debuga(_("File name too long: %s/sarg-%s-%s.log\n"),ParsedOutputLog,val2,val1); | |
27d1fa35 FM |
730 | exit(EXIT_FAILURE); |
731 | } | |
944cf283 FM |
732 | if (rename(SargLogFile,val4)) { |
733 | debuga(_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno)); | |
27d1fa35 | 734 | } else { |
944cf283 | 735 | strcpy(SargLogFile,val4); |
27d1fa35 FM |
736 | |
737 | if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') { | |
738 | /* | |
739 | No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are | |
740 | necessary around the command name, put them in the configuration file. | |
741 | */ | |
944cf283 FM |
742 | if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) { |
743 | debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile); | |
27d1fa35 FM |
744 | exit(EXIT_FAILURE); |
745 | } | |
746 | cstatus=system(val1); | |
747 | if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { | |
748 | debuga(_("command return status %d\n"),WEXITSTATUS(cstatus)); | |
749 | debuga(_("command: %s\n"),val1); | |
750 | exit(EXIT_FAILURE); | |
751 | } | |
752 | } | |
753 | } | |
754 | if(debug) | |
944cf283 | 755 | debuga(_("Sarg parsed log saved as %s\n"),SargLogFile); |
0c87646f | 756 | } |
27d1fa35 | 757 | |
8e53b2e7 | 758 | denied_close(); |
16b013cc | 759 | authfail_close(); |
11284535 | 760 | download_close(); |
27d1fa35 FM |
761 | |
762 | for (ufile=first_user_file ; ufile ; ufile=ufile1) { | |
763 | ufile1=ufile->next; | |
507460ae FM |
764 | if (ufile->file!=NULL && fclose(ufile->file)==EOF) { |
765 | debuga(_("Write error in the log file of user %s: %s\n"),ufile->user->id,strerror(errno)); | |
766 | exit(EXIT_FAILURE); | |
767 | } | |
27d1fa35 FM |
768 | free(ufile); |
769 | } | |
770 | ||
771 | if (debug) { | |
8e501bd6 | 772 | unsigned long int totalcount=0; |
27d1fa35 FM |
773 | |
774 | debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx); | |
775 | ||
7c8c06c5 FM |
776 | for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--); |
777 | if (x>=0) { | |
778 | debuga(_("Reasons for excluded entries:\n")); | |
779 | DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong); | |
780 | DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery); | |
781 | DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver); | |
7c8c06c5 FM |
782 | DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString); |
783 | DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat); | |
784 | DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData); | |
785 | DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange); | |
786 | DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange); | |
787 | DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange); | |
788 | DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User); | |
789 | DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode); | |
790 | DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar); | |
791 | DisplayExcludeCount(_("No URL in entry"),ER_NoUrl); | |
792 | DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr); | |
793 | DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url); | |
794 | DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange); | |
795 | DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl); | |
796 | DisplayExcludeCount(_("No user in entry"),ER_NoUser); | |
797 | DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser); | |
798 | DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser); | |
799 | DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser); | |
800 | } | |
801 | ||
1c91da07 FM |
802 | for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) { |
803 | if (format_count[x]>0) { | |
804 | /* TRANSLATORS: It displays the number of lines found in the input log files | |
805 | * for each supported log format. The log format name is the %s and is a string | |
806 | * you translate somewhere else. */ | |
8e501bd6 | 807 | debuga(_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]); |
1c91da07 FM |
808 | totalcount+=format_count[x]; |
809 | } | |
810 | } | |
27d1fa35 | 811 | |
1c91da07 | 812 | if (totalcount==0 && totregsg) |
27d1fa35 FM |
813 | debuga(_("Log with invalid format\n")); |
814 | } | |
815 | ||
944cf283 | 816 | if (debugz) |
27d1fa35 | 817 | debugaz(_("period=%s\n"),period.text); |
27d1fa35 | 818 | |
27d1fa35 FM |
819 | return((totregsg!=0) ? 1 : 0); |
820 | } |