]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog_common.c
Strip the user suffix from the redirector log
[thirdparty/sarg.git] / readlog_common.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2015
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 /*!
32 A new file is being read. The name of the file is \a FileName.
33 */
34 static void Common_NewFile(const char *FileName)
35 {
36 }
37
38 /*!
39 Extract a column containing a long long int from \a Line.
40
41 The extracted value is stored in \a Value.
42
43 The pointer to the next byte just after the number is returned
44 by the function.
45 */
46 static char *Common_GetLongLongInt(char *Line,long long int *Value)
47 {
48 *Value=0LL;
49 if (*Line=='-') {
50 ++Line;
51 } else {
52 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
53 }
54 return(Line);
55 }
56
57 /*!
58 Read one entry from a standard squid log format.
59
60 \param Line One line from the input log file.
61 \param Entry Where to store the information parsed from the line.
62
63 \retval RLRC_NoError One valid entry is parsed.
64 \retval RLRC_Unknown The line is invalid.
65 \retval RLRC_InternalError An internal error was encountered.
66 */
67 static enum ReadLogReturnCodeEnum Common_ReadEntry(char *Line,struct ReadLogStruct *Entry)
68 {
69 const char *Begin;
70 int IpLen;
71 int HttpCodeLen;
72 int UrlLen;
73 int UserLen;
74 int Day;
75 char MonthName[4];
76 int MonthNameLen;
77 int Month;
78 int Year;
79 int Hour;
80 int Minute;
81 int Second;
82 char *Ip;
83
84 // get IP address
85 Entry->Ip=Ip=Line;
86 for (IpLen=0 ; *Line && *Line!=' ' ; IpLen++) Line++;
87 if (*Line!=' ' || IpLen==0) return(RLRC_Unknown);
88
89 if (!squid24) {
90 // squid version <= 2.4 store the user ID in the second column: skip the first column here
91 Begin=++Line;
92 while (*Line && *Line!=' ') Line++;
93 if (*Line!=' '|| Line==Begin) return(RLRC_Unknown);
94 }
95
96 // the ID of the user or - if the user is unidentified
97 Entry->User=++Line;
98 for (UserLen=0 ; *Line && *Line!=' ' ; UserLen++) Line++;
99 if (*Line!=' ' || UserLen==0) return(RLRC_Unknown);
100
101 if (squid24) {
102 // squid version > 2.4 store the user ID in the first column: skip the second column here
103 Begin=++Line;
104 while (*Line && *Line!=' ') Line++;
105 if (*Line!=' '|| Line==Begin) return(RLRC_Unknown);
106 }
107
108 // get the date enclosed within square brackets
109 ++Line;
110 if (*Line!='[') return(RLRC_Unknown);
111 ++Line;
112 Day=0;
113 while (isdigit(*Line)) Day=Day*10+(*Line++-'0');
114 if (*Line!='/' || Day<1 || Day>31) return(RLRC_Unknown);
115
116 ++Line;
117 for (MonthNameLen=0 ; MonthNameLen<sizeof(MonthName)-1 && isalpha(*Line) ; MonthNameLen++) MonthName[MonthNameLen]=*Line++;
118 if (*Line!='/') return(RLRC_Unknown);
119 MonthName[MonthNameLen]='\0';
120 Month=month2num(MonthName);
121 if (Month>=12) return(RLRC_Unknown);
122
123 ++Line;
124 Year=0;
125 while (isdigit(*Line)) Year=Year*10+(*Line++-'0');
126 if (*Line!=':' || Year<1900 || Year>2200) return(RLRC_Unknown);
127
128 // get the time
129 ++Line;
130 Hour=0;
131 while (isdigit(*Line)) Hour=Hour*10+(*Line++-'0');
132 if (*Line!=':' || Hour>=24) return(RLRC_Unknown);
133 ++Line;
134 Minute=0;
135 while (isdigit(*Line)) Minute=Minute*10+(*Line++-'0');
136 if (*Line!=':' || Minute>=60) return(RLRC_Unknown);
137 ++Line;
138 Second=0;
139 while (isdigit(*Line)) Second=Second*10+(*Line++-'0');
140 if (*Line!=' ' || Second>60) return(RLRC_Unknown); //second can be 60 due to a leap second
141
142 // skip the timezone up to the closing ]
143 while (*Line && *Line!=']') Line++;
144 if (*Line!=']') return(RLRC_Unknown);
145
146 Entry->EntryTime.tm_year=Year-1900;
147 Entry->EntryTime.tm_mon=Month;
148 Entry->EntryTime.tm_mday=Day;
149 Entry->EntryTime.tm_hour=Hour;
150 Entry->EntryTime.tm_min=Minute;
151 Entry->EntryTime.tm_sec=Second;
152 Entry->EntryTime.tm_isdst=-1;
153
154 // the URL is enclosed between double qhotes
155 ++Line;
156 if (*Line!=' ') return(RLRC_Unknown);
157 ++Line;
158 if (*Line!='\"') return(RLRC_Unknown);
159
160 // skip the HTTP function
161 Begin=++Line;
162 while (isalpha(*Line)) Line++;
163 if (*Line!=' ' || Line==Begin) return(RLRC_Unknown);
164
165 // get the URL
166 Entry->Url=++Line;
167 for (UrlLen=0 ; *Line && *Line!=' ' ; UrlLen++) Line++;
168 if (*Line!=' ' || UrlLen==0) return(RLRC_Unknown);
169
170 // skip the HTTP/...
171 ++Line;
172 while (*Line && *Line!='\"') Line++;
173 if (*Line!='\"') return(RLRC_Unknown);
174 ++Line;
175 if (*Line!=' ') return(RLRC_Unknown);
176
177 // get the HTTP code.
178 Entry->HttpCode=++Line;
179 for (HttpCodeLen=0 ; *Line && *Line!=' ' ; HttpCodeLen++) Line++;
180 if (*Line!=' ' || HttpCodeLen==0) return(RLRC_Unknown);
181
182 // get the number of transfered bytes.
183 Begin=++Line;
184 Line=Common_GetLongLongInt(Line,&Entry->DataSize);
185 // some log contains more columns
186 if ((*Line && *Line!=' ') || Begin==Line) return(RLRC_Unknown);
187
188 // check the entry time
189 if (mktime(&Entry->EntryTime)==-1) {
190 debuga(__FILE__,__LINE__,_("Invalid date or time found in the common log file\n"));
191 return(RLRC_InternalError);
192 }
193
194 // it is safe to alter the line buffer now that we are returning a valid entry
195 Ip[IpLen]='\0';
196 Entry->HttpCode[HttpCodeLen]='\0';
197 Entry->Url[UrlLen]='\0';
198 Entry->User[UserLen]='\0';
199
200 return(RLRC_NoError);
201 }
202
203 //! \brief Object to read a standard common log format.
204 const struct ReadLogProcessStruct ReadCommonLog=
205 {
206 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
207 N_("common log format"),
208 Common_NewFile,
209 Common_ReadEntry
210 };