--- /dev/null
- if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
- debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
+/*
+ * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
+ * 1998, 2012
+ *
+ * SARG donations:
+ * please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ * http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+#include "include/readlog.h"
+
+#define REPORT_EVERY_X_LINES 5000
+#define MAX_OPEN_USER_FILES 10
+
+struct userfilestruct
+{
+ struct userfilestruct *next;
+ struct userinfostruct *user;
+ FILE *file;
+};
+
+numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
+numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
+
+extern char *userfile;
+
+/*!
+Read the log files.
+
+\param Filter The filtering parameters for the file to load.
+
+\retval 1 Records found.
+\retval 0 No record found.
+*/
+int ReadLogFile(struct ReadLogDataStruct *Filter)
+{
+ enum isa_col_id {
+ ISACOL_Ip,
+ ISACOL_UserName,
+ ISACOL_Date,
+ ISACOL_Time,
+ ISACOL_TimeTaken,
+ ISACOL_Bytes,
+ ISACOL_Uri,
+ ISACOL_Status,
+ ISACOL_Last //last entry of the list !
+ };
+ enum InputLogFormat {
+ ILF_Unknown,
+ ILF_Squid,
+ ILF_Common,
+ ILF_Sarg,
+ ILF_Isa,
+ ILF_Last //last entry of the list !
+ };
+
+ enum InputLogFormat ilf;
+ int ilf_count[ILF_Last];
+ longline line;
+ char *linebuf;
+ char *str;
+ char arq_log[255];
+ char fun[MAXLEN];
+ char elap[255];
+ char user[MAX_USER_LEN];
+ char data[255];
+ char ip[60];
+ char hora[30];
+ char mes[30];
+ char tbuf2[128];
+ char dia[128]="";
+ char wuser[MAXLEN];
+ char tmp3[MAXLEN];
+ char sz_Download_Unsort[20000];
+ char start_hour[128];
+ char download_url[MAXLEN];
+ char smartfilter[MAXLEN];
+ const char *arq;
+ const char *url;
+ int iarq;
+ int blen;
+ int OutputNonZero = REPORT_EVERY_X_LINES ;
+ int idata=0;
+ int isa_ncols=0,isa_cols[ISACOL_Last];
+ int x;
+ int hmr;
+ int nopen;
+ int maxopenfiles=MAX_OPEN_USER_FILES;
+ int mindate=0;
+ int maxdate=0;
+ int cstatus;
+ unsigned long int recs1=0UL;
+ unsigned long int recs2=0UL;
+ long int totregsl=0;
+ long int totregsg=0;
+ long int totregsx=0;
+ long long int iyear, imonth, iday;
+ FILE *fp_in=NULL;
+ FILE *fp_log=NULL;
+ FILE *fp_Download_Unsort=NULL;
+ bool from_pipe;
+ bool from_stdin;
+ bool download_flag=false;
+ bool id_is_ip;
+ bool totper=false;
+ struct stat logstat;
+ struct getwordstruct gwarea;
+ struct tm tt;
+ struct userfilestruct *prev_ufile;
+ struct userinfostruct *uinfo;
+ struct userfilestruct *first_user_file=NULL;
+ struct userfilestruct *ufile;
+ struct userfilestruct *ufile1;
+ struct ReadLogStruct log_entry;
+ time_t tnum;
+
+ for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
+ tmp3[0]='\0';
+ start_hour[0]='\0';
+ first_user_file=NULL;
+
+ snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
+
+ if(DataFile[0]=='\0') {
+ denied_open();
+ authfail_open();
+ }
+
+ if ((line=longline_create())==NULL) {
+ debuga(_("Not enough memory to read a log file\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ for (iarq=0 ; iarq<NAccessLog ; iarq++) {
+ arq=AccessLog[iarq];
+
+ if(strcmp(arq,"-")==0) {
+ if(debug)
+ debuga(_("Reading access log file: from stdin\n"));
+ fp_in=stdin;
+ from_stdin=true;
+ } else {
+ if (Filter->DateRange[0]!='\0') {
+ if (stat(arq,&logstat)!=0) {
+ debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
+ } else {
+ struct tm *logtime=localtime(&logstat.st_mtime);
+ if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
+ debuga(_("Ignoring old log file %s\n"),arq);
+ continue;
+ }
+ }
+ }
+ fp_in=decomp(arq,&from_pipe);
+ if(fp_in==NULL) {
+ debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if(debug) debuga(_("Reading access log file: %s\n"),arq);
+ from_stdin=false;
+ }
+
+ ilf=ILF_Unknown;
+ download_flag=false;
+
+ // pre-read the file only if we have to show stats
+ if (ShowReadStatistics && !from_stdin && !from_pipe) {
+ size_t nread,i;
+ bool skipcr=false;
+ char tmp4[MAXLEN];
+
+ recs1=0UL;
+ recs2=0UL;
+
+ while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
+ for (i=0 ; i<nread ; i++)
+ if (skipcr) {
+ if (tmp4[i]!='\n' && tmp4[i]!='\r') {
+ skipcr=false;
+ }
+ } else {
+ if (tmp4[i]=='\n' || tmp4[i]=='\r') {
+ skipcr=true;
+ recs1++;
+ }
+ }
+ }
+ rewind(fp_in);
+ printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
+ putchar('\r');
+ fflush( stdout ) ;
+ }
+
+ longline_reset(line);
+
+ while ((linebuf=longline_read(fp_in,line))!=NULL) {
+ blen=strlen(linebuf);
+
+ if (ilf==ILF_Unknown) {
+ if(strncmp(linebuf,"#Software: Mic",14) == 0) {
+ fixendofline(linebuf);
+ if (debug)
+ debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
+ ilf=ILF_Isa;
+ ilf_count[ilf]++;
+ continue;
+ }
+
+ if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
+ if (getperiod_fromsarglog(arq,&period)<0) {
+ debuga(_("The name of the file is invalid: %s\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ ilf=ILF_Sarg;
+ ilf_count[ilf]++;
+ continue;
+ }
+ }
+
+ if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
+ if(access(ParsedOutputLog,R_OK) != 0) {
+ my_mkdir(ParsedOutputLog);
+ }
+ if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
+ debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
+ exit(EXIT_FAILURE);
+ }
+ if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
+ debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ fputs("*** SARG Log ***\n",fp_log);
+ }
+
+ recs2++;
+ if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
+ double perc = recs2 * 100. / recs1 ;
+ printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
+ putchar('\r');
+ fflush (stdout);
+ OutputNonZero = REPORT_EVERY_X_LINES ;
+ }
+ if(blen < 58) continue;
+ if(strstr(linebuf,"HTTP/0.0") != 0) continue;
+ if(strstr(linebuf,"logfile turned over") != 0) continue;
+ if(linebuf[0] == ' ') continue;
+
+ // exclude_string
+ if(ExcludeString[0] != '\0') {
+ bool exstring=false;
+ getword_start(&gwarea,ExcludeString);
+ while(strchr(gwarea.current,':') != 0) {
+ if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
+ exit(EXIT_FAILURE);
+ }
+ if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
+ exstring=true;
+ break;
+ }
+ }
+ if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
+ exstring=true;
+ if(exstring) continue;
+ }
+
+ totregsl++;
+ if(debugm)
+ printf("BUF=%s\n",linebuf);
+
+ memset(&log_entry,0,sizeof(log_entry));
+ if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
+ getword_start(&gwarea,linebuf);
+ if (getword(data,sizeof(data),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken time in your access.log file\n"));
+ exit(EXIT_FAILURE);
+ }
+ if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
+ strcpy(ip,data);
+ log_entry.Ip=ip;
+ if(squid24) {
+ if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ }
+ log_entry.User=user;
+ if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
+ getword(fun,sizeof(fun),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken url in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_skip(MAXLEN,&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
+ if (getword(code,sizeof(code),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (getword(code,sizeof(code),&gwarea,'\0')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if ((str = strchr(code, ':')) != NULL)
+ *str = '/';
+ log_entry.HttpCode=code;
+
+ ilf=ILF_Common;
+ ilf_count[ilf]++;
+
+ getword_start(&gwarea,data+1);
+ if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ getword_start(&gwarea,data);
+ if (getword_atoll(&iday,&gwarea,'/')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(mes,sizeof(mes),&gwarea,'/')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&iyear,&gwarea,'/')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+
+ imonth=month2num(mes)+1;
+ idata=builddia(iday,imonth,iyear);
+ computedate(iyear,imonth,iday,&tt);
+ if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+ tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+ debuga(_("Invalid time found in %s\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.EntryTime=&tt;
+ }
+
+ if(ilf==ILF_Unknown || ilf==ILF_Squid) {
+ if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
+ if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
+ debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if(strlen(elap) < 1) continue;
+ log_entry.ElapsedTime=atol(elap);
+ if (getword(ip,sizeof(ip),&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.Ip=ip;
+ if (getword(code,sizeof(code),&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.HttpCode=code;
+ if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(fun,sizeof(fun),&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken url in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(user,sizeof(user),&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.User=user;
+ ilf=ILF_Squid;
+ ilf_count[ilf]++;
+
+ tnum=atoi(data);
+ log_entry.EntryTime=localtime(&tnum);
+ if (log_entry.EntryTime == NULL) {
+ debuga(_("Cannot convert the timestamp from the squid log file\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
+
+ idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
+ }
+ }
+ if (ilf==ILF_Sarg) {
+ getword_start(&gwarea,linebuf);
+ if (getword(data,sizeof(data),&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(user,sizeof(user),&gwarea,'\t')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.User=user;
+ if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.Ip=ip;
+ if (getword_ptr(linebuf,&log_entry.Url,&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(code,sizeof(code),&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.HttpCode=code;
+ if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ getword_start(&gwarea,data);
+ if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&iyear,&gwarea,'\0')<0){
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ idata=builddia(iday,imonth,iyear);
+ computedate(iyear,imonth,iday,&tt);
+ if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+ tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+ debuga(_("Invalid time found in %s\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ log_entry.EntryTime=&tt;
+ }
+ if (ilf==ILF_Isa) {
+ if (linebuf[0] == '#') {
+ int ncols,cols[ISACOL_Last];
+
+ fixendofline(linebuf);
+ getword_start(&gwarea,linebuf);
+ // remove the #Fields: column at the beginning of the line
+ if (getword_skip(1000,&gwarea,' ')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
+ ncols=0;
+ while(gwarea.current[0] != '\0') {
+ if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
+ if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
+ if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
+ if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
+ if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
+ if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
+ if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
+ if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
+ ncols++;
+ }
+ if (cols[ISACOL_Ip]>=0) {
+ isa_ncols=ncols;
+ for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
+ isa_cols[ncols]=cols[ncols];
+ }
+ continue;
+ }
+ if (!isa_ncols) continue;
+ getword_start(&gwarea,linebuf);
+ for (x=0 ; x<isa_ncols ; x++) {
+ if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
+ debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (x==isa_cols[ISACOL_Ip]) {
+ if (strlen(str)>=sizeof(ip)) {
+ debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ strcpy(ip,str);
+ log_entry.Ip=ip;
+ } else if (x==isa_cols[ISACOL_UserName]) {
+ if (strlen(str)>=sizeof(user)) {
+ debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ strcpy(user,str);
+ log_entry.User=user;
+ } else if (x==isa_cols[ISACOL_Date]) {
+ if (strlen(str)>=sizeof(data)) {
+ debuga(_("Maybe you have a broken date in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ strcpy(data,str);
+ } else if (x==isa_cols[ISACOL_Time]) {
+ if (strlen(str)>=sizeof(hora)) {
+ debuga(_("Maybe you have a broken time in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ strcpy(hora,str);
+ } else if (x==isa_cols[ISACOL_TimeTaken]) {
+ log_entry.ElapsedTime=atol(str);
+ } else if (x==isa_cols[ISACOL_Bytes]) {
+ log_entry.DataSize=atoll(str);
+ } else if (x==isa_cols[ISACOL_Uri]) {
+ log_entry.Url=str;
+ } else if (x==isa_cols[ISACOL_Status]) {
+ if (strlen(str)>=sizeof(code)) {
+ debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ strcpy(code,str);
+ }
+ }
+
+ if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
+ static char valcode[12];
+ sprintf(valcode,"DENIED/%s",code);
+ log_entry.HttpCode=valcode;
+ } else {
+ log_entry.HttpCode=code;
+ }
+ getword_start(&gwarea,data);
+ if (getword_atoll(&iyear,&gwarea,'-')<0){
+ debuga(_("Maybe you have a broken year in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&imonth,&gwarea,'-')<0){
+ debuga(_("Maybe you have a broken month in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ if (getword_atoll(&iday,&gwarea,'\0')<0){
+ debuga(_("Maybe you have a broken day in your %s file\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+
+ idata=builddia(iday,imonth,iyear);
+ computedate(iyear,imonth,iday,&tt);
+ if (isa_cols[ISACOL_Time]>=0) {
+ if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
+ tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
+ debuga(_("Invalid time found in %s\n"),arq);
+ exit(EXIT_FAILURE);
+ }
+ }
+ log_entry.EntryTime=&tt;
+ }
+ if (log_entry.EntryTime==NULL) {
+ debuga(_("Unknown input log file format: no time\n"));
+ break;
+ }
+ if (log_entry.Ip==NULL) {
+ debuga(_("Unknown input log file format: no IP addresses\n"));
+ break;
+ }
+ if (log_entry.User==NULL) {
+ debuga(_("Unknown input log file format: no user\n"));
+ break;
+ }
+ if (log_entry.Url==NULL) {
+ debuga(_("Unknown input log file format: no URL\n"));
+ break;
+ }
+
+ if(debugm)
+ printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
+
+ if(Filter->DateRange[0] != '\0'){
+ if(idata < dfrom || idata > duntil) continue;
+ }
+
+ // Record only hours usage which is required
+ if( bsearch( &( log_entry.EntryTime->tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
+ continue;
+
+ if( bsearch( &( log_entry.EntryTime->tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
+ continue;
+
+
+ if(strlen(log_entry.User) > MAX_USER_LEN) {
+ if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
+ totregsx++;
+ continue;
+ }
+
+ // include_users
+ if(IncludeUsers[0] != '\0') {
+ snprintf(val1,sizeof(val1),":%s:",log_entry.User);
+ if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
+ continue;
+ }
+
+ if(vercode(log_entry.HttpCode)) {
+ if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
+ totregsx++;
+ continue;
+ }
+
+ if(testvaliduserchar(log_entry.User))
+ continue;
+
+ // replace any tab by a single space
+ for (str=log_entry.Url ; *str ; str++)
+ if (*str=='\t') *str=' ';
+ for (str=log_entry.HttpCode ; *str ; str++)
+ if (*str=='\t') *str=' ';
+
+ if(ilf!=ILF_Sarg) {
+ /*
+ The full URL is not saved in sarg log. There is no point in testing the URL to detect
+ a downloaded file.
+ */
+ download_flag=is_download_suffix(log_entry.Url);
+ if (download_flag) {
+ safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
+ download_count++;
+ }
+ } else
+ download_flag=false;
+
+ url=process_url(log_entry.Url,LongUrl);
+ if (!url || url[0] == '\0') continue;
+
+ if(addr[0] != '\0'){
+ if(strcmp(addr,log_entry.Ip)!=0) continue;
+ }
+ if(Filter->HostFilter) {
+ if(!vhexclude(url)) {
+ if (debugm) printf(_("Excluded site: %s\n"),url);
+ totregsx++;
+ continue;
+ }
+ }
+
+ if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
+ hmr=log_entry.EntryTime->tm_hour*100+log_entry.EntryTime->tm_min;
+ if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
+ }
+
+ if(site[0] != '\0'){
+ if(strstr(url,site)==0) continue;
+ }
+
+ if(UserIp) {
+ log_entry.User=log_entry.Ip;
+ id_is_ip=true;
+ } else {
+ id_is_ip=false;
+ if(strcmp(log_entry.User,"-") == 0 || strcmp(log_entry.User," ") == 0 || strcmp(log_entry.User,"") == 0) {
+ if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
+ log_entry.User=log_entry.Ip;
+ id_is_ip=true;
+ }
+ if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
+ continue;
+ if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
+ log_entry.User="everybody";
+ } else {
+ strlow(log_entry.User);
+ if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
+ if ((str=strchr(user,'+'))!=NULL || (str=strchr(user,'\\'))!=NULL || (str=strchr(user,'_'))!=NULL) {
+ log_entry.User=str+1;
+ }
+ }
+ }
+ }
+
+ if(us[0] != '\0'){
+ if(strcmp(log_entry.User,us)!=0) continue;
+ }
+
+ if(Filter->SysUsers) {
+ snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
+ if(strstr(userfile, wuser) == 0)
+ continue;
+ }
+
+ if(Filter->UserFilter) {
+ if(!vuexclude(log_entry.User)) {
+ if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
+ totregsx++;
+ continue;
+ }
+ }
+
+ if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 ||
+ strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0)
+ continue;
+
+ if (log_entry.DataSize<0) log_entry.DataSize=0;
+
+ if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
+ if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
+ log_entry.ElapsedTime=0;
+ }
+
+ if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
+ fixendofline(str);
+ snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
+ } else strcpy(smartfilter,"\"\"");
+
+ nopen=0;
+ prev_ufile=NULL;
+ for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
+ prev_ufile=ufile;
+ if (ufile->file) nopen++;
+ }
+ if (!ufile) {
+ ufile=malloc(sizeof(*ufile));
+ if (!ufile) {
+ debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
+ exit(EXIT_FAILURE);
+ }
+ memset(ufile,0,sizeof(*ufile));
+ ufile->next=first_user_file;
+ first_user_file=ufile;
+ uinfo=userinfo_create(log_entry.User);
+ ufile->user=uinfo;
+ uinfo->id_is_ip=id_is_ip;
+ } else {
+ if (prev_ufile) {
+ prev_ufile->next=ufile->next;
+ ufile->next=first_user_file;
+ first_user_file=ufile;
+ }
+ }
+ #ifdef ENABLE_DOUBLE_CHECK_DATA
+ ufile->user->nbytes+=log_entry.DataSize;
+ ufile->user->elap+=log_entry.ElapsedTime;
+ #endif
+
+ if (ufile->file==NULL) {
+ if (nopen>=maxopenfiles) {
+ x=0;
+ for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
+ if (ufile1->file!=NULL) {
+ if (x>=maxopenfiles) {
+ if (fclose(ufile1->file)==EOF) {
+ debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ ufile1->file=NULL;
+ }
+ x++;
+ }
+ }
+ }
++ if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
++ debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
+ exit(EXIT_FAILURE);
+ }
+ if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
+ debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
+ exit (1);
+ }
+ }
+
+ strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime);
+ strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime);
+
+ if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
+ debuga(_("Write error in the log file of user %s\n"),log_entry.User);
+ exit(EXIT_FAILURE);
+ }
+
+ if(fp_log && ilf!=ILF_Sarg)
+ fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
+
+ totregsg++;
+
+ if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
+ ndownload = 1;
+
+ if ( ! fp_Download_Unsort ) {
+ if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
+ debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
+ exit (1);
+ }
+ }
+ fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url);
+ }
+
+ denied_write(&log_entry);
+ authfail_write(&log_entry);
+
+ if (ilf!=ILF_Sarg) {
+ if(!totper || idata<mindate){
+ mindate=idata;
+ memcpy(&period.start,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
+ strcpy(start_hour,tbuf2);
+ }
+ if (!totper || idata>maxdate) {
+ maxdate=idata;
+ memcpy(&period.end,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
+ }
+ totper=true;
+ }
+
+ if(debugm){
+ printf("IP=\t%s\n",log_entry.Ip);
+ printf("USER=\t%s\n",log_entry.User);
+ printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
+ printf("DATE=\t%s\n",dia);
+ printf("TIME=\t%s\n",hora);
+ printf("FUNC=\t%s\n",fun);
+ printf("URL=\t%s\n",url);
+ printf("CODE=\t%s\n",log_entry.HttpCode);
+ printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
+ }
+ }
+
+ if (!from_stdin) {
+ if (from_pipe)
+ pclose(fp_in);
+ else {
+ fclose(fp_in);
+ if( ShowReadStatistics )
+ printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
+ }
+ }
+ }
+ longline_destroy(&line);
+
+ if(fp_log != NULL) {
+ char end_hour[128];
+ char val2[40];
+ char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
+
+ fclose(fp_log);
+ safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
+ strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
+ strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
+ if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
+ debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
+ exit(EXIT_FAILURE);
+ }
+ if (rename(arq_log,val4)) {
+ debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
+ } else {
+ strcpy(arq_log,val4);
+
+ if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
+ /*
+ No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
+ necessary around the command name, put them in the configuration file.
+ */
+ if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
+ debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
+ exit(EXIT_FAILURE);
+ }
+ cstatus=system(val1);
+ if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
+ debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
+ debuga(_("command: %s\n"),val1);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ if(debug)
+ debuga(_("Sarg parsed log saved as %s\n"),arq_log);
+ }
+
+ denied_close();
+ authfail_close();
+ if (fp_Download_Unsort) fclose (fp_Download_Unsort);
+
+ for (ufile=first_user_file ; ufile ; ufile=ufile1) {
+ ufile1=ufile->next;
+ if (ufile->file!=NULL) fclose(ufile->file);
+ free(ufile);
+ }
+
+ if (debug) {
+ int totalcount=0;
+
+ debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
+
+ for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
+
+ if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
+ debuga(_("Log with mixed records format (squid and common log)\n"));
+
+ if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
+ debuga(_("Common log format\n"));
+
+ if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
+ debuga(_("Squid log format\n"));
+
+ if(ilf_count[ILF_Sarg]>0)
+ debuga(_("Sarg log format\n"));
+
+ if(totalcount==0 && totregsg)
+ debuga(_("Log with invalid format\n"));
+ }
+
+ if(debugz){
+ debugaz(_("date=%s\n"),dia);
+ debugaz(_("period=%s\n"),period.text);
+ }
+
+ return((totregsg!=0) ? 1 : 0);
+}