]>
Commit | Line | Data |
---|---|---|
5f623035 | 1 | /* |
77b1029d | 2 | * Copyright (C) 1996-2020 The Squid Software Foundation and contributors |
5f623035 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
0b96a9b3 | 9 | // Author: Jens-S. V?ckler <voeckler@rvs.uni-hannover.de> |
eb1f6bfa AJ |
10 | // |
11 | // File: copyout.cc | |
12 | // Tue Jun 15 1999 | |
13 | // | |
14 | // (c) 1999 Lehrgebiet Rechnernetze und Verteilte Systeme | |
0b96a9b3 | 15 | // Universit?t Hannover, Germany |
eb1f6bfa AJ |
16 | // |
17 | // Permission to use, copy, modify, distribute, and sell this software | |
18 | // and its documentation for any purpose is hereby granted without fee, | |
19 | // provided that (i) the above copyright notices and this permission | |
20 | // notice appear in all copies of the software and related documentation, | |
21 | // and (ii) the names of the Lehrgebiet Rechnernetze und Verteilte | |
22 | // Systeme and the University of Hannover may not be used in any | |
23 | // advertising or publicity relating to the software without the | |
24 | // specific, prior written permission of Lehrgebiet Rechnernetze und | |
25 | // Verteilte Systeme and the University of Hannover. | |
26 | // | |
27 | // THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, | |
28 | // EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY | |
29 | // WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
30 | // | |
31 | // IN NO EVENT SHALL THE LEHRGEBIET RECHNERNETZE UND VERTEILTE SYSTEME OR | |
32 | // THE UNIVERSITY OF HANNOVER BE LIABLE FOR ANY SPECIAL, INCIDENTAL, | |
33 | // INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY DAMAGES | |
34 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER OR NOT | |
35 | // ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF LIABILITY, | |
36 | // ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
37 | // SOFTWARE. | |
38 | // | |
eb1f6bfa AJ |
39 | // Revision 1.2 1999/06/16 13:05:26 voeckler |
40 | // mmap file copying on Solaris. | |
41 | // | |
42 | // Revision 1.1 1999/06/15 21:10:47 voeckler | |
43 | // Initial revision | |
44 | // | |
45 | // | |
f7f3304a | 46 | #include "squid.h" |
2ccf2eb2 AJ |
47 | #include "copyout.hh" |
48 | ||
eb1f6bfa | 49 | #include <sys/stat.h> |
074d6a40 AJ |
50 | #include <cerrno> |
51 | #include <cstring> | |
eb1f6bfa | 52 | #include <fcntl.h> |
eb1f6bfa | 53 | #include <unistd.h> |
c060699c | 54 | #include <sys/mman.h> |
2ccf2eb2 | 55 | |
eb1f6bfa AJ |
56 | #ifndef MAP_FILE |
57 | #define MAP_FILE 0 | |
58 | #endif // MAP_FILE | |
59 | ||
eb1f6bfa AJ |
60 | int |
61 | assert_copydir( const char* copydir ) | |
feec68a0 A |
62 | // purpose: check, if copydir is a directory and that we can write into it. |
63 | // paramtr: copydir (IN): name of directory for copying bodies. | |
64 | // returns: 0 if ok, -1 otherwise. | |
65 | // further: errors are handled within. If the directory does not exist, | |
66 | // the assertion function will try to create it. | |
eb1f6bfa | 67 | { |
feec68a0 A |
68 | struct stat st; |
69 | int status = stat( copydir, &st ); | |
eb1f6bfa | 70 | |
feec68a0 A |
71 | // check, if either "copydir" points to a valid directory, |
72 | // or if copydir can be created. | |
73 | if ( status == 0 && ! S_ISDIR(st.st_mode) ) { | |
74 | // stat() returned true, but did not point to a directory | |
75 | fprintf( stderr, "copy dir \"%s\" is a file!\n", copydir ); | |
76 | return -1; | |
77 | } else if ( S_ISDIR(st.st_mode) && | |
78 | !( (st.st_uid == geteuid() && ( (st.st_mode & S_IWUSR) > 0 )) || | |
79 | (st.st_gid == getegid() && ( (st.st_mode & S_IWGRP) > 0 )) || | |
80 | ((st.st_mode & S_IWOTH) > 0) ) ) { | |
81 | fprintf( stderr, "copy dir \"%s\" is not accessible to me\n", copydir ); | |
82 | return -1; | |
83 | } | |
84 | if ( status == -1 ) { | |
85 | // stat() returned with an error. 'File not found' is a legal error. | |
86 | if ( errno != ENOENT ) { | |
87 | // not a 'file not found' error, so this is hard error. | |
88 | fprintf( stderr, "accessing copy-out dir \"%s\": %s\n", | |
89 | copydir, strerror(errno) ); | |
90 | return -1; | |
91 | } else { | |
92 | // directory does not exist. Try to create it. | |
93 | if ( mkdir( copydir, 0750 ) == -1 ) { | |
94 | fprintf( stderr, "mkdir(%s): %s\n", copydir, strerror(errno) ); | |
95 | return -1; | |
96 | } | |
97 | } | |
eb1f6bfa | 98 | } |
eb1f6bfa | 99 | |
feec68a0 A |
100 | // postcondition: copydir exists and is a directory. |
101 | return 0; | |
eb1f6bfa AJ |
102 | } |
103 | ||
104 | inline | |
105 | unsigned | |
106 | xlate( char ch ) | |
107 | { | |
feec68a0 A |
108 | if ( ch == '\r' ) return 0u; |
109 | else if ( ch == '\n' ) return 1u; | |
110 | else return 2u; | |
eb1f6bfa AJ |
111 | } |
112 | ||
113 | // shortcut for monotoneous typings... | |
114 | #define BAUTZ(x) delete[] filename; close(input); close(out); return (x) | |
115 | ||
116 | bool | |
117 | copy_out( size_t filesize, size_t metasize, unsigned debug, | |
feec68a0 A |
118 | const char* fn, const char* url, const char* copydir, |
119 | bool copyHdr ) | |
120 | // purpose: copy content from squid disk file into separate file | |
121 | // paramtr: metasize (IN): size of metadata to skip | |
122 | // fn (IN): current filename of squid disk file | |
123 | // url (IN): currently looked at URL to generate separate file | |
124 | // copydir (IN): base directory where to generate the file | |
125 | // copyHdr (IN): copy HTTP header, too, if set to true. | |
126 | // returns: true, if successful, false otherwise. | |
eb1f6bfa | 127 | { |
feec68a0 | 128 | static const char* index = "index.html"; |
eb1f6bfa | 129 | |
feec68a0 | 130 | // find hostname part after the scheme (okok, not counting port, etc.) |
6e2aefad | 131 | const char* ptr = strstr( url, "://" ); |
feec68a0 | 132 | if ( ptr == 0 || strlen(ptr) < 4 ) return false; |
eb1f6bfa | 133 | |
feec68a0 | 134 | // create filename to store contents into |
cc9a7c80 AJ |
135 | // NP: magic extra 5 bytes for the component delimiter and termination octets |
136 | char *filename = new char[ strlen(ptr) + strlen(copydir) + strlen(index) +5 ]; | |
feec68a0 A |
137 | assert( filename != 0 ); |
138 | strcpy( filename, copydir ); | |
139 | strcat( filename, "/" ); | |
140 | char* here = filename + strlen(filename); | |
141 | strcat( filename, ptr+3 ); | |
eb1f6bfa | 142 | |
feec68a0 A |
143 | // handle server root (e.g. "http://www.focus.de" ) |
144 | if ( strchr( ptr+3, '/' ) == 0 ) strcat( filename, "/" ); | |
eb1f6bfa | 145 | |
feec68a0 A |
146 | // handle directories (e.g. "http://www.focus.de/A/" ) |
147 | if ( filename[strlen(filename)-1] == '/' ) strcat( filename, index ); | |
148 | ||
149 | // create subdirectory structure | |
150 | for ( char* t = strchr(here,'/'); t; t = strchr(t,'/') ) { | |
151 | *t = 0; | |
152 | if ( mkdir( filename, 0775 ) == -1 && errno != EEXIST ) { | |
153 | fprintf( stderr, "mkdir(%s): %s\n", filename, strerror(errno) ); | |
154 | delete[] filename; | |
155 | return false; | |
156 | } else if ( debug & 0x02 ) { | |
157 | fprintf( stderr, "# creating %s\n", filename ); | |
158 | } | |
14942edd FC |
159 | *t = '/'; |
160 | ++t; | |
eb1f6bfa | 161 | } |
eb1f6bfa | 162 | |
feec68a0 A |
163 | // create file |
164 | int out = open( filename, O_CREAT | O_RDWR | O_TRUNC, 0664 ); | |
165 | if ( out == -1 ) { | |
166 | fprintf( stderr, "open(%s,RDWR): %s\n", filename, strerror(errno) ); | |
167 | delete[] filename; | |
168 | return false; | |
169 | } else if ( debug & 0x02 ) { | |
170 | fprintf( stderr, "# creating %s\n", filename ); | |
171 | } | |
eb1f6bfa | 172 | |
feec68a0 A |
173 | // (re)open cache file |
174 | int input = open( fn, O_RDONLY ); | |
175 | if ( input == -1 ) { | |
176 | fprintf( stderr, "open(%s,RDONLY): %s\n", fn, strerror(errno) ); | |
177 | delete[] filename; | |
178 | close(out); | |
179 | return false; | |
180 | } | |
eb1f6bfa | 181 | |
feec68a0 | 182 | // find double CRLF sequence (actually, look at the FSM below) |
9837567d | 183 | // XXX: this only looks at the already known buffer read previously, |
feec68a0 A |
184 | // which is globally passed (yuck)! As a limitation, the content data |
185 | // *must* begin within the buffer size (that is: 16k)! | |
186 | if ( ! copyHdr ) { | |
187 | extern char* linebuffer; // import from purge.cc | |
188 | extern size_t buffersize; // import from purge.cc | |
eb1f6bfa | 189 | |
feec68a0 A |
190 | unsigned state = 0; |
191 | char* s = linebuffer + metasize; | |
192 | while ( s < linebuffer + buffersize && state < 4 ) { | |
193 | // state transition machine | |
194 | static unsigned table[4][3] = { {3,2,0}, {0,4,0}, {1,4,0}, {4,2,0} }; | |
195 | // old || \r | \n |else| | |
196 | // =====++====+====+====+ | |
197 | // 0 || 3 | 2 | 0 | | |
198 | // 1 || 0 | 4 | 0 | | |
199 | // 2 || 1 | 4 | 0 | | |
200 | // 3 || 4 | 2 | 0 | | |
aec55359 FC |
201 | state = table[ state ][ xlate(*s) ]; |
202 | ++s; | |
feec68a0 | 203 | } |
eb1f6bfa | 204 | |
feec68a0 A |
205 | if ( state < 4 ) |
206 | // complain bitterly, if the HTTP header was too large ( > 16k ). | |
207 | fprintf( stderr, "WARNING: %s will contain partial HTTP header data!\n", | |
208 | filename ); | |
eb1f6bfa | 209 | |
feec68a0 A |
210 | // adjust to different seek size |
211 | metasize = s - linebuffer; | |
212 | } | |
eb1f6bfa | 213 | |
feec68a0 A |
214 | // no need to copy zero content files |
215 | if ( filesize - metasize <= 0 ) { | |
216 | BAUTZ( filesize-metasize == 0 ); | |
217 | } | |
eb1f6bfa AJ |
218 | |
219 | #ifdef USE_REGULAR_COPY | |
feec68a0 A |
220 | // position input at start of server answer (contains HTTP headers) |
221 | if ( lseek( input, metasize, SEEK_SET ) == -1 ) { | |
222 | fprintf( stderr, "lseek(%s,%lu): %s\n", fn, metasize, strerror(errno) ); | |
223 | BAUTZ(false); | |
224 | } | |
eb1f6bfa | 225 | |
feec68a0 A |
226 | // file copy input into output via buffer (regular io) |
227 | char buffer[32768]; | |
228 | int rsize, wsize; | |
229 | while ( (rsize=read(input,buffer,sizeof(buffer))) > 0 ) { | |
230 | if ( (wsize=write(out,buffer,rsize)) <= 0 ) break; | |
231 | } | |
232 | if ( rsize < 0 || wsize < 0 ) perror( "while copying" ); | |
eb1f6bfa | 233 | #else // use mmap copy (compare: Stevens APUE 12.9) |
feec68a0 A |
234 | // precondition: filesize-metasize > 0 |
235 | // seek end of output file ... | |
236 | off_t position = lseek( out, filesize-metasize-1, SEEK_SET ); | |
237 | if ( position == -1 ) { | |
8978bd9d | 238 | fprintf( stderr, "lseek(%s,%lu): %s\n", filename, |
ec3c3187 | 239 | (unsigned long)filesize-metasize, |
feec68a0 A |
240 | strerror(errno) ); |
241 | BAUTZ(false); | |
242 | } else if ( debug & 0x02 ) { | |
243 | fprintf( stderr, "# filesize=%lu, metasize=%lu, filepos=%ld\n", | |
ec3c3187 FC |
244 | (unsigned long)filesize, (unsigned long)metasize, |
245 | (long)position ); | |
feec68a0 | 246 | } |
eb1f6bfa | 247 | |
feec68a0 A |
248 | // ...and write 1 byte there (create a file that length) |
249 | if ( write( out, "", 1 ) != 1 ) { | |
250 | perror( "write to output" ); | |
251 | BAUTZ(false); | |
252 | } | |
eb1f6bfa | 253 | |
feec68a0 A |
254 | // create source mmap to copy from (mmap complete file) |
255 | caddr_t src = (caddr_t) mmap( 0, filesize, PROT_READ, | |
256 | MAP_FILE | MAP_SHARED, input, 0 ); | |
257 | if ( src == (caddr_t) -1 ) { | |
258 | perror( "mmap input" ); | |
259 | BAUTZ(false); | |
260 | } | |
261 | ||
262 | // create destination mmap to copy into (mmap data portion) | |
263 | caddr_t dst = (caddr_t) mmap( 0, filesize-metasize, PROT_READ | PROT_WRITE, | |
264 | MAP_FILE | MAP_SHARED, out, 0 ); | |
265 | if ( dst == (caddr_t) -1 ) { | |
266 | perror( "mmap output" ); | |
267 | munmap( src, filesize ); | |
268 | BAUTZ(false); | |
269 | } | |
eb1f6bfa | 270 | |
feec68a0 A |
271 | // copy file (beware of offset into wanted data, skip meta data) |
272 | memcpy( dst, src+metasize, filesize-metasize ); | |
eb1f6bfa | 273 | |
feec68a0 A |
274 | // clean up |
275 | munmap( dst, filesize-metasize ); | |
276 | munmap( src, filesize ); | |
eb1f6bfa AJ |
277 | #endif // USE_REGULAR_COPY |
278 | ||
feec68a0 | 279 | BAUTZ(true); |
eb1f6bfa | 280 | } |
f53969cc | 281 |