2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 A new file is being read. The name of the file is \a FileName.
34 static void Common_NewFile(const char *FileName
)
39 Extract a column containing a long long int from \a Line.
41 The extracted value is stored in \a Value.
43 The pointer to the next byte just after the number is returned
46 static char *Common_GetLongLongInt(char *Line
,long long int *Value
)
52 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
58 Read one entry from a standard squid log format.
60 \param Line One line from the input log file.
61 \param Entry Where to store the information parsed from the line.
63 \retval RLRC_NoError One valid entry is parsed.
64 \retval RLRC_Unknown The line is invalid.
65 \retval RLRC_InternalError An internal error was encountered.
67 static enum ReadLogReturnCodeEnum
Common_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
86 for (IpLen
=0 ; *Line
&& *Line
!=' ' ; IpLen
++) Line
++;
87 if (*Line
!=' ' || IpLen
==0) return(RLRC_Unknown
);
90 // squid version <= 2.4 store the user ID in the second column: skip the first column here
92 while (*Line
&& *Line
!=' ') Line
++;
93 if (*Line
!=' '|| Line
==Begin
) return(RLRC_Unknown
);
96 // the ID of the user or - if the user is unidentified
98 for (UserLen
=0 ; *Line
&& *Line
!=' ' ; UserLen
++) Line
++;
99 if (*Line
!=' ' || UserLen
==0) return(RLRC_Unknown
);
102 // squid version > 2.4 store the user ID in the first column: skip the second column here
104 while (*Line
&& *Line
!=' ') Line
++;
105 if (*Line
!=' '|| Line
==Begin
) return(RLRC_Unknown
);
108 // get the date enclosed within square brackets
110 if (*Line
!='[') return(RLRC_Unknown
);
113 while (isdigit(*Line
)) Day
=Day
*10+(*Line
++-'0');
114 if (*Line
!='/' || Day
<1 || Day
>31) return(RLRC_Unknown
);
117 for (MonthNameLen
=0 ; MonthNameLen
<sizeof(MonthName
)-1 && isalpha(*Line
) ; MonthNameLen
++) MonthName
[MonthNameLen
]=*Line
++;
118 if (*Line
!='/') return(RLRC_Unknown
);
119 MonthName
[MonthNameLen
]='\0';
120 Month
=month2num(MonthName
);
121 if (Month
>=12) return(RLRC_Unknown
);
125 while (isdigit(*Line
)) Year
=Year
*10+(*Line
++-'0');
126 if (*Line
!=':' || Year
<1900 || Year
>2200) return(RLRC_Unknown
);
131 while (isdigit(*Line
)) Hour
=Hour
*10+(*Line
++-'0');
132 if (*Line
!=':' || Hour
>=24) return(RLRC_Unknown
);
135 while (isdigit(*Line
)) Minute
=Minute
*10+(*Line
++-'0');
136 if (*Line
!=':' || Minute
>=60) return(RLRC_Unknown
);
139 while (isdigit(*Line
)) Second
=Second
*10+(*Line
++-'0');
140 if (*Line
!=' ' || Second
>60) return(RLRC_Unknown
); //second can be 60 due to a leap second
142 // skip the timezone up to the closing ]
143 while (*Line
&& *Line
!=']') Line
++;
144 if (*Line
!=']') return(RLRC_Unknown
);
146 Entry
->EntryTime
.tm_year
=Year
-1900;
147 Entry
->EntryTime
.tm_mon
=Month
;
148 Entry
->EntryTime
.tm_mday
=Day
;
149 Entry
->EntryTime
.tm_hour
=Hour
;
150 Entry
->EntryTime
.tm_min
=Minute
;
151 Entry
->EntryTime
.tm_sec
=Second
;
152 Entry
->EntryTime
.tm_isdst
=-1;
154 // the URL is enclosed between double qhotes
156 if (*Line
!=' ') return(RLRC_Unknown
);
158 if (*Line
!='\"') return(RLRC_Unknown
);
160 // skip the HTTP function
162 while (isalpha(*Line
)) Line
++;
163 if (*Line
!=' ' || Line
==Begin
) return(RLRC_Unknown
);
167 for (UrlLen
=0 ; *Line
&& *Line
!=' ' ; UrlLen
++) Line
++;
168 if (*Line
!=' ' || UrlLen
==0) return(RLRC_Unknown
);
172 while (*Line
&& *Line
!='\"') Line
++;
173 if (*Line
!='\"') return(RLRC_Unknown
);
175 if (*Line
!=' ') return(RLRC_Unknown
);
177 // get the HTTP code.
178 Entry
->HttpCode
=++Line
;
179 for (HttpCodeLen
=0 ; *Line
&& *Line
!=' ' ; HttpCodeLen
++) Line
++;
180 if (*Line
!=' ' || HttpCodeLen
==0) return(RLRC_Unknown
);
182 // get the number of transfered bytes.
184 Line
=Common_GetLongLongInt(Line
,&Entry
->DataSize
);
185 // some log contains more columns
186 if ((*Line
&& *Line
!=' ') || Begin
==Line
) return(RLRC_Unknown
);
188 // check the entry time
189 if (mktime(&Entry
->EntryTime
)==-1) {
190 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the common log file\n"));
191 return(RLRC_InternalError
);
194 // it is safe to alter the line buffer now that we are returning a valid entry
196 Entry
->HttpCode
[HttpCodeLen
]='\0';
197 Entry
->Url
[UrlLen
]='\0';
198 Entry
->User
[UserLen
]='\0';
200 return(RLRC_NoError
);
203 //! \brief Object to read a standard common log format.
204 const struct ReadLogProcessStruct ReadCommonLog
=
206 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
207 N_("common log format"),