]> git.ipfire.org Git - thirdparty/squid.git/blob - tools/purge/copyout.cc
Renamed squid.h to squid-old.h and config.h to squid.h
[thirdparty/squid.git] / tools / purge / copyout.cc
1 //
2 // $Id$
3 //
4 // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de>
5 //
6 // File: copyout.cc
7 // Tue Jun 15 1999
8 //
9 // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme
10 // Universit?t Hannover, Germany
11 //
12 // Permission to use, copy, modify, distribute, and sell this software
13 // and its documentation for any purpose is hereby granted without fee,
14 // provided that (i) the above copyright notices and this permission
15 // notice appear in all copies of the software and related documentation,
16 // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte
17 // Systeme and the University of Hannover may not be used in any
18 // advertising or publicity relating to the software without the
19 // specific, prior written permission of Lehrgebiet Rechnernetze und
20 // Verteilte Systeme and the University of Hannover.
21 //
22 // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
24 // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
25 //
26 // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR
27 // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
28 // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES
29 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT
30 // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY,
31 // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
32 // SOFTWARE.
33 //
34 // Revision 1.2 1999/06/16 13:05:26 voeckler
35 // mmap file copying on Solaris.
36 //
37 // Revision 1.1 1999/06/15 21:10:47 voeckler
38 // Initial revision
39 //
40 //
41 #if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__)
42 #pragma implementation
43 #endif
44
45 #include "squid.h"
46 #include "copyout.hh"
47
48 //#include <assert.h>
49 //#include <sys/types.h>
50 #include <sys/stat.h>
51 #include <stdio.h>
52 #include <string.h>
53 #include <fcntl.h>
54 #include <errno.h>
55 #include <unistd.h>
56 #include <sys/mman.h>
57
58 #ifndef MAP_FILE
59 #define MAP_FILE 0
60 #endif // MAP_FILE
61
62 int
63 assert_copydir( const char* copydir )
64 // purpose: check, if copydir is a directory and that we can write into it.
65 // paramtr: copydir (IN): name of directory for copying bodies.
66 // returns: 0 if ok, -1 otherwise.
67 // further: errors are handled within. If the directory does not exist,
68 // the assertion function will try to create it.
69 {
70 struct stat st;
71 int status = stat( copydir, &st );
72
73 // check, if either "copydir" points to a valid directory,
74 // or if copydir can be created.
75 if ( status == 0 && ! S_ISDIR(st.st_mode) ) {
76 // stat() returned true, but did not point to a directory
77 fprintf( stderr, "copy dir \"%s\" is a file!\n", copydir );
78 return -1;
79 } else if ( S_ISDIR(st.st_mode) &&
80 !( (st.st_uid == geteuid() && ( (st.st_mode & S_IWUSR) > 0 )) ||
81 (st.st_gid == getegid() && ( (st.st_mode & S_IWGRP) > 0 )) ||
82 ((st.st_mode & S_IWOTH) > 0) ) ) {
83 fprintf( stderr, "copy dir \"%s\" is not accessible to me\n", copydir );
84 return -1;
85 }
86 if ( status == -1 ) {
87 // stat() returned with an error. 'File not found' is a legal error.
88 if ( errno != ENOENT ) {
89 // not a 'file not found' error, so this is hard error.
90 fprintf( stderr, "accessing copy-out dir \"%s\": %s\n",
91 copydir, strerror(errno) );
92 return -1;
93 } else {
94 // directory does not exist. Try to create it.
95 if ( mkdir( copydir, 0750 ) == -1 ) {
96 fprintf( stderr, "mkdir(%s): %s\n", copydir, strerror(errno) );
97 return -1;
98 }
99 }
100 }
101
102 // postcondition: copydir exists and is a directory.
103 return 0;
104 }
105
106 inline
107 unsigned
108 xlate( char ch )
109 {
110 if ( ch == '\r' ) return 0u;
111 else if ( ch == '\n' ) return 1u;
112 else return 2u;
113 }
114
115 // shortcut for monotoneous typings...
116 #define BAUTZ(x) delete[] filename; close(input); close(out); return (x)
117
118 bool
119 copy_out( size_t filesize, size_t metasize, unsigned debug,
120 const char* fn, const char* url, const char* copydir,
121 bool copyHdr )
122 // purpose: copy content from squid disk file into separate file
123 // paramtr: metasize (IN): size of metadata to skip
124 // fn (IN): current filename of squid disk file
125 // url (IN): currently looked at URL to generate separate file
126 // copydir (IN): base directory where to generate the file
127 // copyHdr (IN): copy HTTP header, too, if set to true.
128 // returns: true, if successful, false otherwise.
129 {
130 static const char* index = "index.html";
131
132 // find hostname part after the scheme (okok, not counting port, etc.)
133 const char* ptr = strstr( url, "://" );
134 if ( ptr == 0 || strlen(ptr) < 4 ) return false;
135
136 // create filename to store contents into
137 char *filename = new char[ strlen(url) + strlen(copydir) + strlen(index) ];
138 assert( filename != 0 );
139 strcpy( filename, copydir );
140 strcat( filename, "/" );
141 char* here = filename + strlen(filename);
142 strcat( filename, ptr+3 );
143
144 // handle server root (e.g. "http://www.focus.de" )
145 if ( strchr( ptr+3, '/' ) == 0 ) strcat( filename, "/" );
146
147 // handle directories (e.g. "http://www.focus.de/A/" )
148 if ( filename[strlen(filename)-1] == '/' ) strcat( filename, index );
149
150 // create subdirectory structure
151 for ( char* t = strchr(here,'/'); t; t = strchr(t,'/') ) {
152 *t = 0;
153 if ( mkdir( filename, 0775 ) == -1 && errno != EEXIST ) {
154 fprintf( stderr, "mkdir(%s): %s\n", filename, strerror(errno) );
155 delete[] filename;
156 return false;
157 } else if ( debug & 0x02 ) {
158 fprintf( stderr, "# creating %s\n", filename );
159 }
160 *t++ = '/';
161 }
162
163 // create file
164 int out = open( filename, O_CREAT | O_RDWR | O_TRUNC, 0664 );
165 if ( out == -1 ) {
166 fprintf( stderr, "open(%s,RDWR): %s\n", filename, strerror(errno) );
167 delete[] filename;
168 return false;
169 } else if ( debug & 0x02 ) {
170 fprintf( stderr, "# creating %s\n", filename );
171 }
172
173 // (re)open cache file
174 int input = open( fn, O_RDONLY );
175 if ( input == -1 ) {
176 fprintf( stderr, "open(%s,RDONLY): %s\n", fn, strerror(errno) );
177 delete[] filename;
178 close(out);
179 return false;
180 }
181
182 // find double CRLF sequence (actually, look at the FSM below)
183 // FIXME: this only looks at the already known buffer read previously,
184 // which is globally passed (yuck)! As a limitation, the content data
185 // *must* begin within the buffer size (that is: 16k)!
186 if ( ! copyHdr ) {
187 extern char* linebuffer; // import from purge.cc
188 extern size_t buffersize; // import from purge.cc
189
190 unsigned state = 0;
191 char* s = linebuffer + metasize;
192 while ( s < linebuffer + buffersize && state < 4 ) {
193 // state transition machine
194 static unsigned table[4][3] = { {3,2,0}, {0,4,0}, {1,4,0}, {4,2,0} };
195 // old || \r | \n |else|
196 // =====++====+====+====+
197 // 0 || 3 | 2 | 0 |
198 // 1 || 0 | 4 | 0 |
199 // 2 || 1 | 4 | 0 |
200 // 3 || 4 | 2 | 0 |
201 state = table[ state ][ xlate(*s++) ];
202 }
203
204 if ( state < 4 )
205 // complain bitterly, if the HTTP header was too large ( > 16k ).
206 fprintf( stderr, "WARNING: %s will contain partial HTTP header data!\n",
207 filename );
208
209 // adjust to different seek size
210 metasize = s - linebuffer;
211 }
212
213 // no need to copy zero content files
214 if ( filesize - metasize <= 0 ) {
215 BAUTZ( filesize-metasize == 0 );
216 }
217
218 #ifdef USE_REGULAR_COPY
219 // position input at start of server answer (contains HTTP headers)
220 if ( lseek( input, metasize, SEEK_SET ) == -1 ) {
221 fprintf( stderr, "lseek(%s,%lu): %s\n", fn, metasize, strerror(errno) );
222 BAUTZ(false);
223 }
224
225 // file copy input into output via buffer (regular io)
226 char buffer[32768];
227 int rsize, wsize;
228 while ( (rsize=read(input,buffer,sizeof(buffer))) > 0 ) {
229 if ( (wsize=write(out,buffer,rsize)) <= 0 ) break;
230 }
231 if ( rsize < 0 || wsize < 0 ) perror( "while copying" );
232 #else // use mmap copy (compare: Stevens APUE 12.9)
233 // precondition: filesize-metasize > 0
234 // seek end of output file ...
235 off_t position = lseek( out, filesize-metasize-1, SEEK_SET );
236 if ( position == -1 ) {
237 fprintf( stderr, "lseek(%s,%lu): %s\n", filename,
238 (unsigned long)filesize-metasize,
239 strerror(errno) );
240 BAUTZ(false);
241 } else if ( debug & 0x02 ) {
242 fprintf( stderr, "# filesize=%lu, metasize=%lu, filepos=%ld\n",
243 (unsigned long)filesize, (unsigned long)metasize,
244 (long)position );
245 }
246
247 // ...and write 1 byte there (create a file that length)
248 if ( write( out, "", 1 ) != 1 ) {
249 perror( "write to output" );
250 BAUTZ(false);
251 }
252
253 // create source mmap to copy from (mmap complete file)
254 caddr_t src = (caddr_t) mmap( 0, filesize, PROT_READ,
255 MAP_FILE | MAP_SHARED, input, 0 );
256 if ( src == (caddr_t) -1 ) {
257 perror( "mmap input" );
258 BAUTZ(false);
259 }
260
261 // create destination mmap to copy into (mmap data portion)
262 caddr_t dst = (caddr_t) mmap( 0, filesize-metasize, PROT_READ | PROT_WRITE,
263 MAP_FILE | MAP_SHARED, out, 0 );
264 if ( dst == (caddr_t) -1 ) {
265 perror( "mmap output" );
266 munmap( src, filesize );
267 BAUTZ(false);
268 }
269
270 // copy file (beware of offset into wanted data, skip meta data)
271 memcpy( dst, src+metasize, filesize-metasize );
272
273 // clean up
274 munmap( dst, filesize-metasize );
275 munmap( src, filesize );
276 #endif // USE_REGULAR_COPY
277
278 BAUTZ(true);
279 }