]>
Commit | Line | Data |
---|---|---|
27d1fa35 FM |
1 | /* |
2 | * SARG Squid Analysis Report Generator http://sarg.sourceforge.net | |
67302a9e | 3 | * 1998, 2013 |
27d1fa35 FM |
4 | * |
5 | * SARG donations: | |
6 | * please look at http://sarg.sourceforge.net/donations.php | |
7 | * Support: | |
8 | * http://sourceforge.net/projects/sarg/forums/forum/363374 | |
9 | * --------------------------------------------------------------------- | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "include/conf.h" | |
28 | #include "include/defs.h" | |
29 | #include "include/readlog.h" | |
6068ae56 | 30 | #include "include/filelist.h" |
27d1fa35 FM |
31 | |
32 | #define REPORT_EVERY_X_LINES 5000 | |
33 | #define MAX_OPEN_USER_FILES 10 | |
34 | ||
35 | struct userfilestruct | |
36 | { | |
37 | struct userfilestruct *next; | |
38 | struct userinfostruct *user; | |
39 | FILE *file; | |
40 | }; | |
41 | ||
7c8c06c5 FM |
42 | enum ExcludeReasonEnum |
43 | { | |
44 | //! User name too long. | |
45 | ER_UserNameTooLong, | |
46 | //! Squid logged an incomplete query received from the client. | |
47 | ER_IncompleteQuery, | |
48 | //! Log file turned over. | |
49 | ER_LogfileTurnedOver, | |
7c8c06c5 FM |
50 | //! Excluded by exclude_string from sarg.conf. |
51 | ER_ExcludeString, | |
52 | //! Unknown input log file format. | |
53 | ER_UnknownFormat, | |
54 | //! Line to be ignored from the input log file. | |
55 | ER_FormatData, | |
56 | //! Entry not withing the requested date range. | |
57 | ER_OutOfDateRange, | |
58 | //! Ignored week day. | |
59 | ER_OutOfWDayRange, | |
60 | //! Ignored hour. | |
61 | ER_OutOfHourRange, | |
62 | //! User is not in the include_users list. | |
63 | ER_User, | |
64 | //! HTTP code excluded by exclude_code file. | |
65 | ER_HttpCode, | |
66 | //! Invalid character found in user name. | |
67 | ER_InvalidUserChar, | |
68 | //! No URL in entry. | |
69 | ER_NoUrl, | |
70 | //! Not the IP address requested with -a. | |
71 | ER_UntrackedIpAddr, | |
72 | //! URL excluded by -c or exclude_hosts. | |
73 | ER_Url, | |
74 | //! Entry time outside of requested hour range. | |
75 | ER_OutOfTimeRange, | |
76 | //! Not the URL requested by -s. | |
77 | ER_UntrackedUrl, | |
78 | //! No user in entry. | |
79 | ER_NoUser, | |
80 | //! Not the user requested by -u. | |
81 | ER_UntrackedUser, | |
82 | //! System user. | |
83 | ER_SysUser, | |
84 | //! User ignored by exclude_users | |
85 | ER_IgnoredUser, | |
86 | ||
87 | ER_Last //!< last entry of the list | |
88 | }; | |
89 | ||
27d1fa35 FM |
90 | numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; |
91 | numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; | |
92 | ||
27d1fa35 | 93 | extern char *userfile; |
6068ae56 | 94 | extern FileListObject AccessLog; |
27d1fa35 | 95 | |
1c91da07 FM |
96 | extern const struct ReadLogProcessStruct ReadSquidLog; |
97 | extern const struct ReadLogProcessStruct ReadCommonLog; | |
98 | extern const struct ReadLogProcessStruct ReadSargLog; | |
99 | extern const struct ReadLogProcessStruct ReadExtLog; | |
100 | ||
101 | //! The list of the supported log formats. | |
102 | static const struct ReadLogProcessStruct const *LogFormats[]= | |
103 | { | |
104 | &ReadSquidLog, | |
105 | &ReadCommonLog, | |
106 | &ReadSargLog, | |
107 | &ReadExtLog | |
108 | }; | |
109 | ||
944cf283 FM |
110 | //! The path to the sarg log file. |
111 | static char SargLogFile[4096]=""; | |
112 | //! Handle to the sarg log file. NULL if not created. | |
113 | static FILE *fp_log=NULL; | |
114 | //! The number of records read from the input logs. | |
115 | static long int totregsl=0; | |
116 | //! The number of records kept. | |
117 | static long int totregsg=0; | |
118 | //! The number of records excluded. | |
119 | static long int totregsx=0; | |
120 | //! The beginning of a linked list of user's file. | |
121 | static struct userfilestruct *first_user_file=NULL; | |
122 | //! Count the number of occurence of each input log format. | |
8e501bd6 | 123 | static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)]; |
944cf283 FM |
124 | //! The minimum date found in the input logs. |
125 | static int mindate=0; | |
126 | static int maxdate=0; | |
7c8c06c5 FM |
127 | //! Count the number of excluded records. |
128 | static unsigned long int excluded_count[ER_Last]; | |
6a943fc1 FM |
129 | //! Earliest date found in the log. |
130 | static int EarliestDate=-1; | |
131 | //! The earliest date in time format. | |
132 | static struct tm EarliestDateTime; | |
133 | //! Latest date found in the log. | |
134 | static int LatestDate=-1; | |
135 | //! The latest date in time format. | |
136 | static struct tm LatestDateTime; | |
27d1fa35 | 137 | |
944cf283 FM |
138 | /*! |
139 | Read a single log file. | |
27d1fa35 | 140 | |
944cf283 | 141 | \param arq The log file name to read. |
27d1fa35 | 142 | */ |
944cf283 | 143 | static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) |
27d1fa35 | 144 | { |
27d1fa35 FM |
145 | longline line; |
146 | char *linebuf; | |
147 | char *str; | |
27d1fa35 | 148 | char hora[30]; |
c5d6ef4b | 149 | char dia[128]=""; |
27d1fa35 | 150 | char wuser[MAXLEN]; |
944cf283 | 151 | char tmp3[MAXLEN]=""; |
27d1fa35 FM |
152 | char download_url[MAXLEN]; |
153 | char smartfilter[MAXLEN]; | |
27d1fa35 | 154 | const char *url; |
dc34d345 | 155 | const char *user; |
944cf283 | 156 | int current_format_idx; |
27d1fa35 FM |
157 | int OutputNonZero = REPORT_EVERY_X_LINES ; |
158 | int idata=0; | |
27d1fa35 FM |
159 | int x; |
160 | int hmr; | |
161 | int nopen; | |
162 | int maxopenfiles=MAX_OPEN_USER_FILES; | |
4d62cb0a FM |
163 | int successive_errors=0; |
164 | int total_errors=0; | |
27d1fa35 FM |
165 | unsigned long int recs1=0UL; |
166 | unsigned long int recs2=0UL; | |
27d1fa35 | 167 | FILE *fp_in=NULL; |
27d1fa35 FM |
168 | bool from_pipe; |
169 | bool from_stdin; | |
170 | bool download_flag=false; | |
171 | bool id_is_ip; | |
1c91da07 | 172 | enum ReadLogReturnCodeEnum log_entry_status; |
27d1fa35 FM |
173 | struct stat logstat; |
174 | struct getwordstruct gwarea; | |
27d1fa35 FM |
175 | struct userfilestruct *prev_ufile; |
176 | struct userinfostruct *uinfo; | |
27d1fa35 FM |
177 | struct userfilestruct *ufile; |
178 | struct userfilestruct *ufile1; | |
c5d6ef4b | 179 | struct ReadLogStruct log_entry; |
1c91da07 | 180 | const struct ReadLogProcessStruct *current_format=NULL; |
27d1fa35 | 181 | |
944cf283 FM |
182 | current_format=NULL; |
183 | current_format_idx=-1; | |
184 | for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) | |
185 | if (LogFormats[x]->NewFile) | |
186 | LogFormats[x]->NewFile(arq); | |
27d1fa35 | 187 | |
944cf283 FM |
188 | if (arq[0]=='-' && arq[1]=='\0') { |
189 | if(debug) | |
190 | debuga(_("Reading access log file: from stdin\n")); | |
191 | fp_in=stdin; | |
192 | from_stdin=true; | |
193 | } else { | |
194 | if (Filter->DateRange[0]!='\0') { | |
195 | if (stat(arq,&logstat)!=0) { | |
196 | debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno)); | |
197 | } else { | |
198 | struct tm *logtime=localtime(&logstat.st_mtime); | |
199 | if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) { | |
200 | debuga(_("Ignoring old log file %s\n"),arq); | |
201 | return; | |
27d1fa35 FM |
202 | } |
203 | } | |
27d1fa35 | 204 | } |
944cf283 FM |
205 | fp_in=decomp(arq,&from_pipe); |
206 | if(fp_in==NULL) { | |
d2cd218e | 207 | debuga(_("Cannot open input log file \"%s\": %s\n"),arq,strerror(errno)); |
944cf283 FM |
208 | exit(EXIT_FAILURE); |
209 | } | |
210 | if(debug) debuga(_("Reading access log file: %s\n"),arq); | |
211 | from_stdin=false; | |
212 | } | |
27d1fa35 | 213 | |
944cf283 | 214 | download_flag=false; |
27d1fa35 | 215 | |
944cf283 FM |
216 | recs1=0UL; |
217 | recs2=0UL; | |
2f4787e6 | 218 | |
944cf283 FM |
219 | // pre-read the file only if we have to show stats |
220 | if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) { | |
221 | size_t nread,i; | |
222 | bool skipcr=false; | |
223 | char tmp4[MAXLEN]; | |
27d1fa35 | 224 | |
944cf283 FM |
225 | while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) { |
226 | for (i=0 ; i<nread ; i++) | |
227 | if (skipcr) { | |
228 | if (tmp4[i]!='\n' && tmp4[i]!='\r') { | |
229 | skipcr=false; | |
27d1fa35 | 230 | } |
944cf283 FM |
231 | } else { |
232 | if (tmp4[i]=='\n' || tmp4[i]=='\r') { | |
233 | skipcr=true; | |
234 | recs1++; | |
235 | } | |
236 | } | |
27d1fa35 | 237 | } |
944cf283 FM |
238 | rewind(fp_in); |
239 | printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0); | |
240 | putchar('\r'); | |
241 | fflush( stdout ) ; | |
242 | } | |
27d1fa35 | 243 | |
944cf283 FM |
244 | if ((line=longline_create())==NULL) { |
245 | debuga(_("Not enough memory to read log file %s\n"),arq); | |
246 | exit(EXIT_FAILURE); | |
247 | } | |
27d1fa35 | 248 | |
944cf283 | 249 | while ((linebuf=longline_read(fp_in,line))!=NULL) { |
944cf283 | 250 | lines_read++; |
27d1fa35 | 251 | |
944cf283 FM |
252 | recs2++; |
253 | if (ShowReadStatistics && --OutputNonZero<=0) { | |
254 | if (recs1>0) { | |
255 | double perc = recs2 * 100. / recs1 ; | |
256 | printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc); | |
257 | } else { | |
258 | printf(_("SARG: Records in file: %lu"),recs2); | |
27d1fa35 | 259 | } |
944cf283 FM |
260 | putchar('\r'); |
261 | fflush (stdout); | |
262 | OutputNonZero = REPORT_EVERY_X_LINES ; | |
263 | } | |
0c87646f | 264 | |
944cf283 FM |
265 | /* |
266 | The following checks are retained here as I don't know to | |
267 | what format they apply. They date back to pre 2.4 versions. | |
268 | */ | |
269 | //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line | |
7c8c06c5 FM |
270 | if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query |
271 | excluded_count[ER_IncompleteQuery]++; | |
272 | continue; | |
273 | } | |
274 | if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog | |
275 | excluded_count[ER_LogfileTurnedOver]++; | |
276 | continue; | |
277 | } | |
944cf283 FM |
278 | |
279 | // exclude_string | |
280 | if(ExcludeString[0] != '\0') { | |
281 | bool exstring=false; | |
282 | getword_start(&gwarea,ExcludeString); | |
283 | while(strchr(gwarea.current,':') != 0) { | |
284 | if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) { | |
285 | debuga(_("Maybe you have a broken record or garbage in your exclusion string\n")); | |
286 | exit(EXIT_FAILURE); | |
27d1fa35 | 287 | } |
944cf283 | 288 | if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) { |
27d1fa35 | 289 | exstring=true; |
944cf283 FM |
290 | break; |
291 | } | |
27d1fa35 | 292 | } |
944cf283 FM |
293 | if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL ) |
294 | exstring=true; | |
7c8c06c5 FM |
295 | if(exstring) { |
296 | excluded_count[ER_ExcludeString]++; | |
297 | continue; | |
298 | } | |
944cf283 | 299 | } |
27d1fa35 | 300 | |
944cf283 FM |
301 | totregsl++; |
302 | if(debugm) | |
303 | printf("BUF=%s\n",linebuf); | |
27d1fa35 | 304 | |
944cf283 FM |
305 | // process the line |
306 | log_entry_status=RLRC_Unknown; | |
307 | memset(&log_entry,0,sizeof(log_entry)); | |
308 | if (current_format) { | |
309 | log_entry_status=current_format->ReadEntry(linebuf,&log_entry); | |
310 | } | |
0c87646f | 311 | |
944cf283 FM |
312 | // find out what line format to use |
313 | if (log_entry_status==RLRC_Unknown) { | |
314 | for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++) { | |
315 | if (LogFormats[x]==current_format) continue; | |
316 | memset(&log_entry,0,sizeof(log_entry)); | |
317 | log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry); | |
318 | if (log_entry_status!=RLRC_Unknown) break; | |
319 | } | |
320 | if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) { | |
321 | if (++successive_errors>NumLogSuccessiveErrors) { | |
322 | debuga(ngettext("%d consecutive error found in the input log file %s\n", | |
323 | "%d consecutive errors found in the input log file %s\n",successive_errors),successive_errors,arq); | |
324 | exit(EXIT_FAILURE); | |
1c91da07 | 325 | } |
944cf283 FM |
326 | if (NumLogTotalErrors>=0 && ++total_errors>NumLogTotalErrors) { |
327 | debuga(ngettext("%d error found in the input log file (last in %s)\n", | |
328 | "%d errors found in the input log file (last in %s)\n",total_errors),total_errors,arq); | |
329 | exit(EXIT_FAILURE); | |
1c91da07 | 330 | } |
944cf283 | 331 | debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf); |
7c8c06c5 | 332 | excluded_count[ER_UnknownFormat]++; |
1c91da07 FM |
333 | continue; |
334 | } | |
944cf283 FM |
335 | current_format=LogFormats[x]; |
336 | current_format_idx=x; | |
337 | if (debugz) { | |
338 | /* TRANSLATORS: The argument is the log format name as translated by you. */ | |
339 | debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq); | |
340 | } | |
341 | successive_errors=0; | |
342 | } | |
343 | if (log_entry_status==RLRC_Ignore) { | |
7c8c06c5 | 344 | excluded_count[ER_FormatData]++; |
944cf283 FM |
345 | continue; |
346 | } | |
347 | if (current_format_idx<0 || current_format==NULL) { | |
348 | debuga(_("Sarg failed to determine the format of the input log file %s\n"),arq); | |
349 | exit(EXIT_FAILURE); | |
350 | } | |
351 | if (log_entry_status==RLRC_InternalError) { | |
352 | debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq); | |
353 | exit(EXIT_FAILURE); | |
354 | } | |
355 | format_count[current_format_idx]++; | |
356 | ||
357 | if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) { | |
358 | if(access(ParsedOutputLog,R_OK) != 0) { | |
359 | my_mkdir(ParsedOutputLog); | |
360 | } | |
361 | if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) { | |
362 | debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); | |
1c91da07 FM |
363 | exit(EXIT_FAILURE); |
364 | } | |
944cf283 FM |
365 | if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) { |
366 | debuga(_("(log) Cannot open log file %s: %s\n"),SargLogFile,strerror(errno)); | |
1c91da07 FM |
367 | exit(EXIT_FAILURE); |
368 | } | |
944cf283 FM |
369 | fputs("*** SARG Log ***\n",fp_log); |
370 | } | |
1c91da07 | 371 | |
944cf283 FM |
372 | if (log_entry.Ip==NULL) { |
373 | debuga(_("Unknown input log file format: no IP addresses\n")); | |
374 | break; | |
375 | } | |
376 | if (log_entry.User==NULL) { | |
377 | debuga(_("Unknown input log file format: no user\n")); | |
378 | break; | |
379 | } | |
380 | if (log_entry.Url==NULL) { | |
381 | debuga(_("Unknown input log file format: no URL\n")); | |
382 | break; | |
383 | } | |
1c91da07 | 384 | |
944cf283 FM |
385 | idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900); |
386 | if(debugm) | |
387 | printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil); | |
27d1fa35 | 388 | |
6a943fc1 FM |
389 | if (EarliestDate<0 || idata<EarliestDate) { |
390 | EarliestDate=idata; | |
391 | memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm)); | |
392 | } | |
393 | if (LatestDate<0 || idata>LatestDate) { | |
394 | LatestDate=idata; | |
395 | memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm)); | |
396 | } | |
944cf283 | 397 | if(Filter->DateRange[0] != '\0'){ |
7c8c06c5 FM |
398 | if(idata < dfrom || idata > duntil) { |
399 | excluded_count[ER_OutOfDateRange]++; | |
400 | continue; | |
401 | } | |
944cf283 | 402 | } |
27d1fa35 | 403 | |
944cf283 | 404 | // Record only hours usage which is required |
7c8c06c5 FM |
405 | if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) { |
406 | excluded_count[ER_OutOfWDayRange]++; | |
944cf283 | 407 | continue; |
7c8c06c5 | 408 | } |
27d1fa35 | 409 | |
7c8c06c5 FM |
410 | if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) { |
411 | excluded_count[ER_OutOfHourRange]++; | |
944cf283 | 412 | continue; |
7c8c06c5 | 413 | } |
27d1fa35 | 414 | |
27d1fa35 | 415 | |
944cf283 FM |
416 | if(strlen(log_entry.User) > MAX_USER_LEN) { |
417 | if (debugm) printf(_("User ID too long: %s\n"),log_entry.User); | |
7c8c06c5 | 418 | excluded_count[ER_UserNameTooLong]++; |
944cf283 FM |
419 | totregsx++; |
420 | continue; | |
421 | } | |
27d1fa35 | 422 | |
944cf283 FM |
423 | // include_users |
424 | if(IncludeUsers[0] != '\0') { | |
425 | snprintf(val1,sizeof(val1),":%s:",log_entry.User); | |
7c8c06c5 FM |
426 | if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) { |
427 | excluded_count[ER_User]++; | |
27d1fa35 | 428 | continue; |
7c8c06c5 | 429 | } |
944cf283 | 430 | } |
27d1fa35 | 431 | |
944cf283 FM |
432 | if(vercode(log_entry.HttpCode)) { |
433 | if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode); | |
7c8c06c5 | 434 | excluded_count[ER_HttpCode]++; |
944cf283 FM |
435 | totregsx++; |
436 | continue; | |
437 | } | |
438 | ||
7c8c06c5 FM |
439 | if(testvaliduserchar(log_entry.User)) { |
440 | excluded_count[ER_InvalidUserChar]++; | |
944cf283 | 441 | continue; |
7c8c06c5 | 442 | } |
944cf283 FM |
443 | |
444 | // replace any tab by a single space | |
445 | for (str=log_entry.Url ; *str ; str++) | |
446 | if (*str=='\t') *str=' '; | |
447 | for (str=log_entry.HttpCode ; *str ; str++) | |
448 | if (*str=='\t') *str=' '; | |
449 | ||
450 | if (current_format!=&ReadSargLog) { | |
451 | /* | |
452 | The full URL is not saved in sarg log. There is no point in testing the URL to detect | |
453 | a downloaded file. | |
454 | */ | |
455 | download_flag=is_download_suffix(log_entry.Url); | |
456 | if (download_flag) { | |
457 | safe_strcpy(download_url,log_entry.Url,sizeof(download_url)); | |
27d1fa35 | 458 | } |
944cf283 FM |
459 | } else |
460 | download_flag=false; | |
27d1fa35 | 461 | |
944cf283 | 462 | url=process_url(log_entry.Url,LongUrl); |
7c8c06c5 FM |
463 | if (!url || url[0] == '\0') { |
464 | excluded_count[ER_NoUrl]++; | |
465 | continue; | |
466 | } | |
944cf283 FM |
467 | |
468 | if(addr[0] != '\0'){ | |
7c8c06c5 FM |
469 | if(strcmp(addr,log_entry.Ip)!=0) { |
470 | excluded_count[ER_UntrackedIpAddr]++; | |
471 | continue; | |
472 | } | |
944cf283 FM |
473 | } |
474 | if(Filter->HostFilter) { | |
475 | if(!vhexclude(url)) { | |
476 | if (debugm) printf(_("Excluded site: %s\n"),url); | |
7c8c06c5 | 477 | excluded_count[ER_Url]++; |
27d1fa35 FM |
478 | totregsx++; |
479 | continue; | |
480 | } | |
944cf283 | 481 | } |
27d1fa35 | 482 | |
944cf283 FM |
483 | if(Filter->StartTime >= 0 && Filter->EndTime >= 0) { |
484 | hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min; | |
7c8c06c5 FM |
485 | if(hmr < Filter->StartTime || hmr > Filter->EndTime) { |
486 | excluded_count[ER_OutOfTimeRange]++; | |
487 | continue; | |
488 | } | |
944cf283 | 489 | } |
27d1fa35 | 490 | |
944cf283 | 491 | if(site[0] != '\0'){ |
7c8c06c5 FM |
492 | if(strstr(url,site)==0) { |
493 | excluded_count[ER_UntrackedUrl]++; | |
494 | continue; | |
495 | } | |
944cf283 | 496 | } |
27d1fa35 | 497 | |
944cf283 FM |
498 | if(UserIp) { |
499 | log_entry.User=log_entry.Ip; | |
500 | id_is_ip=true; | |
501 | } else { | |
502 | id_is_ip=false; | |
503 | if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) { | |
504 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) { | |
505 | log_entry.User=log_entry.Ip; | |
506 | id_is_ip=true; | |
27d1fa35 | 507 | } |
7c8c06c5 FM |
508 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) { |
509 | excluded_count[ER_NoUser]++; | |
27d1fa35 | 510 | continue; |
7c8c06c5 | 511 | } |
944cf283 FM |
512 | if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) |
513 | log_entry.User="everybody"; | |
27d1fa35 | 514 | } else { |
944cf283 | 515 | if(NtlmUserFormat == NTLMUSERFORMAT_USER) { |
e9ecdd7f | 516 | if ((str=strchr(log_entry.User,'+'))!=NULL || (str=strchr(log_entry.User,'\\'))!=NULL || (str=strchr(log_entry.User,'_'))!=NULL) { |
944cf283 | 517 | log_entry.User=str+1; |
27d1fa35 FM |
518 | } |
519 | } | |
520 | } | |
944cf283 | 521 | } |
27d1fa35 | 522 | |
944cf283 | 523 | if(us[0] != '\0'){ |
7c8c06c5 FM |
524 | if(strcmp(log_entry.User,us)!=0) { |
525 | excluded_count[ER_UntrackedUser]++; | |
526 | continue; | |
527 | } | |
944cf283 | 528 | } |
27d1fa35 | 529 | |
944cf283 FM |
530 | if(Filter->SysUsers) { |
531 | snprintf(wuser,sizeof(wuser),":%s:",log_entry.User); | |
7c8c06c5 FM |
532 | if(strstr(userfile, wuser) == 0) { |
533 | excluded_count[ER_SysUser]++; | |
944cf283 | 534 | continue; |
7c8c06c5 | 535 | } |
944cf283 | 536 | } |
27d1fa35 | 537 | |
944cf283 FM |
538 | if(Filter->UserFilter) { |
539 | if(!vuexclude(log_entry.User)) { | |
540 | if (debugm) printf(_("Excluded user: %s\n"),log_entry.User); | |
7c8c06c5 | 541 | excluded_count[ER_IgnoredUser]++; |
944cf283 FM |
542 | totregsx++; |
543 | continue; | |
27d1fa35 | 544 | } |
944cf283 | 545 | } |
27d1fa35 | 546 | |
dc34d345 FM |
547 | user=process_user(log_entry.User); |
548 | if (log_entry.User!=user) { | |
549 | log_entry.User=user; | |
550 | id_is_ip=false; | |
551 | } | |
944cf283 | 552 | if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || |
7c8c06c5 FM |
553 | log_entry.User[0]==' ' || log_entry.User[0]==':'))) { |
554 | excluded_count[ER_NoUser]++; | |
944cf283 | 555 | continue; |
7c8c06c5 | 556 | } |
27d1fa35 | 557 | |
944cf283 | 558 | if (log_entry.DataSize<0) log_entry.DataSize=0; |
0c87646f | 559 | |
944cf283 FM |
560 | if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0; |
561 | if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) { | |
562 | log_entry.ElapsedTime=0; | |
563 | } | |
27d1fa35 | 564 | |
944cf283 FM |
565 | if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) { |
566 | fixendofline(str); | |
567 | snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1); | |
568 | } else strcpy(smartfilter,"\"\""); | |
27d1fa35 | 569 | |
944cf283 FM |
570 | nopen=0; |
571 | prev_ufile=NULL; | |
572 | for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) { | |
573 | prev_ufile=ufile; | |
574 | if (ufile->file) nopen++; | |
575 | } | |
576 | if (!ufile) { | |
577 | ufile=malloc(sizeof(*ufile)); | |
27d1fa35 | 578 | if (!ufile) { |
944cf283 FM |
579 | debuga(_("Not enough memory to store the user %s\n"),log_entry.User); |
580 | exit(EXIT_FAILURE); | |
581 | } | |
582 | memset(ufile,0,sizeof(*ufile)); | |
583 | ufile->next=first_user_file; | |
584 | first_user_file=ufile; | |
8e134f1a FM |
585 | /* |
586 | * This id_is_ip stuff is just to store the string only once if the user is | |
587 | * identified by its IP address instead of a distinct ID and IP address. | |
588 | */ | |
aa6ac9f2 | 589 | uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip); |
944cf283 | 590 | ufile->user=uinfo; |
944cf283 FM |
591 | nusers++; |
592 | } else { | |
593 | if (prev_ufile) { | |
594 | prev_ufile->next=ufile->next; | |
27d1fa35 FM |
595 | ufile->next=first_user_file; |
596 | first_user_file=ufile; | |
27d1fa35 | 597 | } |
944cf283 | 598 | } |
1c91da07 | 599 | #ifdef ENABLE_DOUBLE_CHECK_DATA |
944cf283 FM |
600 | ufile->user->nbytes+=log_entry.DataSize; |
601 | ufile->user->elap+=log_entry.ElapsedTime; | |
1c91da07 | 602 | #endif |
27d1fa35 | 603 | |
944cf283 FM |
604 | if (ufile->file==NULL) { |
605 | if (nopen>=maxopenfiles) { | |
606 | x=0; | |
607 | for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) { | |
608 | if (ufile1->file!=NULL) { | |
609 | if (x>=maxopenfiles) { | |
610 | if (fclose(ufile1->file)==EOF) { | |
611 | debuga(_("Write error in the log file of user %s: %s\n"),ufile1->user->id,strerror(errno)); | |
612 | exit(EXIT_FAILURE); | |
27d1fa35 | 613 | } |
944cf283 | 614 | ufile1->file=NULL; |
27d1fa35 | 615 | } |
944cf283 | 616 | x++; |
27d1fa35 FM |
617 | } |
618 | } | |
27d1fa35 | 619 | } |
944cf283 FM |
620 | if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) { |
621 | debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename); | |
27d1fa35 FM |
622 | exit(EXIT_FAILURE); |
623 | } | |
944cf283 FM |
624 | if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) { |
625 | debuga(_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno)); | |
626 | exit (1); | |
1c91da07 | 627 | } |
944cf283 | 628 | } |
27d1fa35 | 629 | |
944cf283 FM |
630 | strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime); |
631 | strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime); | |
27d1fa35 | 632 | |
944cf283 FM |
633 | if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, |
634 | log_entry.Ip,url,(uint64_t)log_entry.DataSize, | |
635 | log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) { | |
636 | debuga(_("Write error in the log file of user %s\n"),log_entry.User); | |
637 | exit(EXIT_FAILURE); | |
638 | } | |
639 | records_kept++; | |
27d1fa35 | 640 | |
944cf283 FM |
641 | if (fp_log && current_format!=&ReadSargLog) { |
642 | fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, | |
643 | log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize, | |
644 | log_entry.HttpCode,log_entry.ElapsedTime,smartfilter); | |
645 | } | |
646 | ||
647 | totregsg++; | |
648 | ||
649 | denied_write(&log_entry); | |
650 | authfail_write(&log_entry); | |
88776d28 | 651 | if (download_flag) download_write(&log_entry,download_url); |
27d1fa35 | 652 | |
944cf283 FM |
653 | if (current_format!=&ReadSargLog) { |
654 | if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){ | |
655 | mindate=idata; | |
656 | memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime)); | |
657 | } | |
658 | if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) { | |
659 | maxdate=idata; | |
660 | memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime)); | |
27d1fa35 FM |
661 | } |
662 | } | |
663 | ||
944cf283 FM |
664 | if(debugm){ |
665 | printf("IP=\t%s\n",log_entry.Ip); | |
666 | printf("USER=\t%s\n",log_entry.User); | |
667 | printf("ELAP=\t%ld\n",log_entry.ElapsedTime); | |
668 | printf("DATE=\t%s\n",dia); | |
669 | printf("TIME=\t%s\n",hora); | |
670 | //printf("FUNC=\t%s\n",fun); | |
671 | printf("URL=\t%s\n",url); | |
672 | printf("CODE=\t%s\n",log_entry.HttpCode); | |
673 | printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize); | |
27d1fa35 FM |
674 | } |
675 | } | |
676 | longline_destroy(&line); | |
677 | ||
944cf283 FM |
678 | if (!from_stdin) { |
679 | if (from_pipe) | |
680 | pclose(fp_in); | |
681 | else { | |
682 | fclose(fp_in); | |
683 | if (ShowReadStatistics) { | |
684 | if (ShowReadPercent) | |
685 | printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 ); | |
686 | else | |
687 | printf(_("SARG: Records in file: %lu\n"),recs2); | |
688 | } | |
689 | } | |
690 | } | |
691 | } | |
692 | ||
7c8c06c5 FM |
693 | /*! |
694 | * Display a line with the excluded entries count. | |
695 | * | |
696 | * \param Explain A translated string explaining the exluded count. | |
697 | * \param Reason The reason number. | |
698 | */ | |
699 | static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason) | |
700 | { | |
701 | if (excluded_count[Reason]>0) { | |
702 | debuga(" %s: %lu\n",Explain,excluded_count[Reason]); | |
703 | } | |
704 | } | |
705 | ||
944cf283 FM |
706 | /*! |
707 | Read the log files. | |
708 | ||
709 | \param Filter The filtering parameters for the file to load. | |
710 | ||
711 | \retval 1 Records found. | |
712 | \retval 0 No record found. | |
713 | */ | |
714 | int ReadLogFile(struct ReadLogDataStruct *Filter) | |
715 | { | |
944cf283 FM |
716 | int x; |
717 | int cstatus; | |
718 | struct userfilestruct *ufile; | |
719 | struct userfilestruct *ufile1; | |
6068ae56 FM |
720 | FileListIterator FIter; |
721 | const char *file; | |
944cf283 FM |
722 | |
723 | for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0; | |
7c8c06c5 | 724 | for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0; |
944cf283 FM |
725 | first_user_file=NULL; |
726 | ||
727 | if (!dataonly) { | |
728 | denied_open(); | |
729 | authfail_open(); | |
730 | download_open(); | |
731 | } | |
732 | ||
6068ae56 FM |
733 | FIter=FileListIter_Open(AccessLog); |
734 | while ((file=FileListIter_Next(FIter))!=NULL) | |
735 | ReadOneLogFile(Filter,file); | |
736 | FileListIter_Close(FIter); | |
944cf283 | 737 | |
27d1fa35 | 738 | if(fp_log != NULL) { |
27d1fa35 | 739 | char val2[40]; |
944cf283 | 740 | char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below |
0c87646f | 741 | |
507460ae | 742 | if (fclose(fp_log)==EOF) { |
944cf283 | 743 | debuga(_("Write error in %s: %s\n"),SargLogFile,strerror(errno)); |
507460ae FM |
744 | exit(EXIT_FAILURE); |
745 | } | |
944cf283 FM |
746 | strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start); |
747 | strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end); | |
748 | if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) { | |
749 | debuga(_("File name too long: %s/sarg-%s-%s.log\n"),ParsedOutputLog,val2,val1); | |
27d1fa35 FM |
750 | exit(EXIT_FAILURE); |
751 | } | |
944cf283 FM |
752 | if (rename(SargLogFile,val4)) { |
753 | debuga(_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno)); | |
27d1fa35 | 754 | } else { |
944cf283 | 755 | strcpy(SargLogFile,val4); |
27d1fa35 FM |
756 | |
757 | if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') { | |
758 | /* | |
759 | No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are | |
760 | necessary around the command name, put them in the configuration file. | |
761 | */ | |
944cf283 FM |
762 | if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) { |
763 | debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile); | |
27d1fa35 FM |
764 | exit(EXIT_FAILURE); |
765 | } | |
766 | cstatus=system(val1); | |
767 | if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { | |
768 | debuga(_("command return status %d\n"),WEXITSTATUS(cstatus)); | |
769 | debuga(_("command: %s\n"),val1); | |
770 | exit(EXIT_FAILURE); | |
771 | } | |
772 | } | |
773 | } | |
774 | if(debug) | |
944cf283 | 775 | debuga(_("Sarg parsed log saved as %s\n"),SargLogFile); |
0c87646f | 776 | } |
27d1fa35 | 777 | |
8e53b2e7 | 778 | denied_close(); |
16b013cc | 779 | authfail_close(); |
11284535 | 780 | download_close(); |
27d1fa35 FM |
781 | |
782 | for (ufile=first_user_file ; ufile ; ufile=ufile1) { | |
783 | ufile1=ufile->next; | |
507460ae FM |
784 | if (ufile->file!=NULL && fclose(ufile->file)==EOF) { |
785 | debuga(_("Write error in the log file of user %s: %s\n"),ufile->user->id,strerror(errno)); | |
786 | exit(EXIT_FAILURE); | |
787 | } | |
27d1fa35 FM |
788 | free(ufile); |
789 | } | |
790 | ||
791 | if (debug) { | |
8e501bd6 | 792 | unsigned long int totalcount=0; |
27d1fa35 FM |
793 | |
794 | debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx); | |
795 | ||
7c8c06c5 FM |
796 | for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--); |
797 | if (x>=0) { | |
798 | debuga(_("Reasons for excluded entries:\n")); | |
799 | DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong); | |
800 | DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery); | |
801 | DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver); | |
7c8c06c5 FM |
802 | DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString); |
803 | DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat); | |
804 | DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData); | |
805 | DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange); | |
806 | DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange); | |
807 | DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange); | |
808 | DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User); | |
809 | DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode); | |
810 | DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar); | |
811 | DisplayExcludeCount(_("No URL in entry"),ER_NoUrl); | |
812 | DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr); | |
813 | DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url); | |
814 | DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange); | |
815 | DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl); | |
816 | DisplayExcludeCount(_("No user in entry"),ER_NoUser); | |
817 | DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser); | |
818 | DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser); | |
819 | DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser); | |
820 | } | |
821 | ||
1c91da07 FM |
822 | for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) { |
823 | if (format_count[x]>0) { | |
824 | /* TRANSLATORS: It displays the number of lines found in the input log files | |
825 | * for each supported log format. The log format name is the %s and is a string | |
826 | * you translate somewhere else. */ | |
8e501bd6 | 827 | debuga(_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]); |
1c91da07 FM |
828 | totalcount+=format_count[x]; |
829 | } | |
830 | } | |
27d1fa35 | 831 | |
1c91da07 | 832 | if (totalcount==0 && totregsg) |
27d1fa35 FM |
833 | debuga(_("Log with invalid format\n")); |
834 | } | |
835 | ||
27d1fa35 FM |
836 | return((totregsg!=0) ? 1 : 0); |
837 | } | |
6a943fc1 FM |
838 | |
839 | /*! | |
840 | * Get the start and end date of the period covered by the log files. | |
841 | */ | |
842 | void GetLogPeriod(struct tm *Start,struct tm *End) | |
843 | { | |
844 | if (EarliestDate>=0) { | |
845 | memcpy(Start,&EarliestDateTime,sizeof(struct tm)); | |
846 | } else { | |
847 | memset(Start,0,sizeof(struct tm)); | |
848 | } | |
849 | if (LatestDate>=0) { | |
850 | memcpy(End,&LatestDateTime,sizeof(struct tm)); | |
851 | } else { | |
852 | memset(End,0,sizeof(struct tm)); | |
853 | } | |
854 | } |