]> git.ipfire.org Git - thirdparty/squid.git/blob - src/disk.cc
Summary: Synced with libecap, adopted pass-all-changes-through transactions
[thirdparty/squid.git] / src / disk.cc
1
2 /*
3 * $Id: disk.cc,v 1.173 2007/04/30 16:56:09 wessels Exp $
4 *
5 * DEBUG: section 6 Disk I/O Routines
6 * AUTHOR: Harvest Derived
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "squid.h"
37 #include "fde.h"
38 #include "MemBuf.h"
39
40 static PF diskHandleRead;
41 static PF diskHandleWrite;
42
43 #if defined(_SQUID_WIN32_) || defined(_SQUID_OS2_)
44 static int
45 diskWriteIsComplete(int fd)
46 {
47 return fd_table[fd].disk.write_q ? 0 : 1;
48 }
49
50 #endif
51
52 void
53 disk_init(void)
54 {
55 (void) 0;
56 }
57
58 /*
59 * opens a disk file specified by 'path'. This function always
60 * blocks! There is no callback.
61 */
62 int
63 file_open(const char *path, int mode)
64 {
65 int fd;
66 PROF_start(file_open);
67
68 if (FILE_MODE(mode) == O_WRONLY)
69 mode |= O_APPEND;
70
71 errno = 0;
72
73 fd = open(path, mode, 0644);
74
75 statCounter.syscalls.disk.opens++;
76
77 if (fd < 0) {
78 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
79 fd = DISK_ERROR;
80 } else {
81 debugs(6, 5, "file_open: FD " << fd);
82 commSetCloseOnExec(fd);
83 fd_open(fd, FD_FILE, path);
84 }
85
86 PROF_stop(file_open);
87 return fd;
88 }
89
90
91 /* close a disk file. */
92 void
93 file_close(int fd)
94 {
95 fde *F = &fd_table[fd];
96 PF *read_callback;
97 PROF_start(file_close);
98 assert(fd >= 0);
99 assert(F->flags.open);
100
101 if ((read_callback = F->read_handler)) {
102 F->read_handler = NULL;
103 read_callback(-1, F->read_data);
104 }
105
106 if (F->flags.write_daemon) {
107 #if defined(_SQUID_WIN32_) || defined(_SQUID_OS2_)
108 /*
109 * on some operating systems, you can not delete or rename
110 * open files, so we won't allow delayed close.
111 */
112
113 while (!diskWriteIsComplete(fd))
114 diskHandleWrite(fd, NULL);
115
116 #else
117
118 F->flags.close_request = 1;
119
120 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
121
122 PROF_stop(file_close);
123
124 return;
125
126 #endif
127
128 }
129
130 /*
131 * Assert there is no write callback. Otherwise we might be
132 * leaking write state data by closing the descriptor
133 */
134 assert(F->write_handler == NULL);
135
136 F->flags.closing = 1;
137
138 #if CALL_FSYNC_BEFORE_CLOSE
139
140 fsync(fd);
141
142 #endif
143
144 close(fd);
145
146 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
147
148 fd_close(fd);
149
150 statCounter.syscalls.disk.closes++;
151
152 PROF_stop(file_close);
153 }
154
155 /*
156 * This function has the purpose of combining multiple writes. This is
157 * to facilitate the ASYNC_IO option since it can only guarantee 1
158 * write to a file per trip around the comm.c select() loop. That's bad
159 * because more than 1 write can be made to the access.log file per
160 * trip, and so this code is purely designed to help batch multiple
161 * sequential writes to the access.log file. Squid will never issue
162 * multiple writes for any other file type during 1 trip around the
163 * select() loop. --SLF
164 */
165 static void
166
167 diskCombineWrites(struct _fde_disk *fdd)
168 {
169 int len = 0;
170 dwrite_q *q = NULL;
171 dwrite_q *wq = NULL;
172 /*
173 * We need to combine multiple write requests on an FD's write
174 * queue But only if we don't need to seek() in between them, ugh!
175 * XXX This currently ignores any seeks (file_offset)
176 */
177
178 if (fdd->write_q != NULL && fdd->write_q->next != NULL)
179 {
180 len = 0;
181
182 for (q = fdd->write_q; q != NULL; q = q->next)
183 len += q->len - q->buf_offset;
184
185 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
186
187 wq->buf = (char *)xmalloc(len);
188
189 wq->len = 0;
190
191 wq->buf_offset = 0;
192
193 wq->next = NULL;
194
195 wq->free_func = xfree;
196
197 do {
198 q = fdd->write_q;
199 len = q->len - q->buf_offset;
200 xmemcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
201 wq->len += len;
202 fdd->write_q = q->next;
203
204 if (q->free_func)
205 (q->free_func) (q->buf);
206
207 if (q) {
208 memFree(q, MEM_DWRITE_Q);
209 q = NULL;
210 }
211 } while (fdd->write_q != NULL);
212
213 fdd->write_q_tail = wq;
214
215 fdd->write_q = wq;
216 }
217 }
218
219 /* write handler */
220 static void
221 diskHandleWrite(int fd, void *notused)
222 {
223 int len = 0;
224 fde *F = &fd_table[fd];
225
226 struct _fde_disk *fdd = &F->disk;
227 dwrite_q *q = fdd->write_q;
228 int status = DISK_OK;
229 int do_close;
230
231 if (NULL == q)
232 return;
233
234 PROF_start(diskHandleWrite);
235
236 debugs(6, 3, "diskHandleWrite: FD " << fd);
237
238 F->flags.write_daemon = 0;
239
240 assert(fdd->write_q != NULL);
241
242 assert(fdd->write_q->len > fdd->write_q->buf_offset);
243
244 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " << (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes");
245
246 errno = 0;
247
248 if (fdd->write_q->file_offset != -1)
249 lseek(fd, fdd->write_q->file_offset, SEEK_SET);
250
251 len = FD_WRITE_METHOD(fd,
252 fdd->write_q->buf + fdd->write_q->buf_offset,
253 fdd->write_q->len - fdd->write_q->buf_offset);
254
255 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
256
257 statCounter.syscalls.disk.writes++;
258
259 fd_bytes(fd, len, FD_WRITE);
260
261 if (len < 0) {
262 if (!ignoreErrno(errno)) {
263 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
264 debugs(50, 1, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
265
266 /*
267 * If there is no write callback, then this file is
268 * most likely something important like a log file, or
269 * an interprocess pipe. Its not a swapfile. We feel
270 * that a write failure on a log file is rather important,
271 * and Squid doesn't otherwise deal with this condition.
272 * So to get the administrators attention, we exit with
273 * a fatal message.
274 */
275
276 if (fdd->wrt_handle == NULL)
277 fatal("Write failure -- check your disk space and cache.log");
278
279 /*
280 * If there is a write failure, then we notify the
281 * upper layer via the callback, at the end of this
282 * function. Meanwhile, flush all pending buffers
283 * here. Let the upper layer decide how to handle the
284 * failure. This will prevent experiencing multiple,
285 * repeated write failures for the same FD because of
286 * the queued data.
287 */
288 do {
289 fdd->write_q = q->next;
290
291 if (q->free_func)
292 (q->free_func) (q->buf);
293
294 if (q) {
295 memFree(q, MEM_DWRITE_Q);
296 q = NULL;
297 }
298 } while ((q = fdd->write_q));
299 }
300
301 len = 0;
302 }
303
304 if (q != NULL) {
305 /* q might become NULL from write failure above */
306 q->buf_offset += len;
307
308 if (q->buf_offset > q->len)
309 debugs(50, 1, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
310 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
311 len << " FD " << fd << ")");
312
313
314 assert(q->buf_offset <= q->len);
315
316 if (q->buf_offset == q->len) {
317 /* complete write */
318 fdd->write_q = q->next;
319
320 if (q->free_func)
321 (q->free_func) (q->buf);
322
323 if (q) {
324 memFree(q, MEM_DWRITE_Q);
325 q = NULL;
326 }
327 }
328 }
329
330 if (fdd->write_q == NULL) {
331 /* no more data */
332 fdd->write_q_tail = NULL;
333 } else {
334 /* another block is queued */
335 diskCombineWrites(fdd);
336 commSetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
337 F->flags.write_daemon = 1;
338 }
339
340 do_close = F->flags.close_request;
341
342 if (fdd->wrt_handle) {
343 DWCB *callback = fdd->wrt_handle;
344 void *cbdata;
345 fdd->wrt_handle = NULL;
346
347 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
348 callback(fd, status, len, cbdata);
349 /*
350 * NOTE, this callback can close the FD, so we must
351 * not touch 'F', 'fdd', etc. after this.
352 */
353 PROF_stop(diskHandleWrite);
354 return;
355 /* XXX But what about close_request??? */
356 }
357 }
358
359 if (do_close)
360 file_close(fd);
361
362 PROF_stop(diskHandleWrite);
363 }
364
365
366 /* write block to a file */
367 /* write back queue. Only one writer at a time. */
368 /* call a handle when writing is complete. */
369 void
370 file_write(int fd,
371 off_t file_offset,
372 void const *ptr_to_buf,
373 int len,
374 DWCB * handle,
375 void *handle_data,
376 FREE * free_func)
377 {
378 dwrite_q *wq = NULL;
379 fde *F = &fd_table[fd];
380 PROF_start(file_write);
381 assert(fd >= 0);
382 assert(F->flags.open);
383 /* if we got here. Caller is eligible to write. */
384 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
385 wq->file_offset = file_offset;
386 wq->buf = (char *)ptr_to_buf;
387 wq->len = len;
388 wq->buf_offset = 0;
389 wq->next = NULL;
390 wq->free_func = free_func;
391
392 if (!F->disk.wrt_handle_data) {
393 F->disk.wrt_handle = handle;
394 F->disk.wrt_handle_data = cbdataReference(handle_data);
395 } else {
396 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
397 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
398 }
399
400 /* add to queue */
401 if (F->disk.write_q == NULL) {
402 /* empty queue */
403 F->disk.write_q = F->disk.write_q_tail = wq;
404 } else {
405 F->disk.write_q_tail->next = wq;
406 F->disk.write_q_tail = wq;
407 }
408
409 if (!F->flags.write_daemon) {
410 diskHandleWrite(fd, NULL);
411 }
412
413 PROF_stop(file_write);
414 }
415
416 /*
417 * a wrapper around file_write to allow for MemBuf to be file_written
418 * in a snap
419 */
420 void
421 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
422 {
423 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
424 }
425
426 /* Read from FD */
427 static void
428 diskHandleRead(int fd, void *data)
429 {
430 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
431 fde *F = &fd_table[fd];
432 int len;
433 int rc = DISK_OK;
434 /*
435 * FD < 0 indicates premature close; we just have to free
436 * the state data.
437 */
438
439 if (fd < 0) {
440 memFree(ctrl_dat, MEM_DREAD_CTRL);
441 return;
442 }
443
444 PROF_start(diskHandleRead);
445
446 if (F->disk.offset != ctrl_dat->offset) {
447 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
448 lseek(fd, ctrl_dat->offset, SEEK_SET); /* XXX ignore return? */
449 statCounter.syscalls.disk.seeks++;
450 F->disk.offset = ctrl_dat->offset;
451 }
452
453 errno = 0;
454 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
455
456 if (len > 0)
457 F->disk.offset += len;
458
459 statCounter.syscalls.disk.reads++;
460
461 fd_bytes(fd, len, FD_READ);
462
463 if (len < 0) {
464 if (ignoreErrno(errno)) {
465 commSetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
466 PROF_stop(diskHandleRead);
467 return;
468 }
469
470 debugs(50, 1, "diskHandleRead: FD " << fd << ": " << xstrerror());
471 len = 0;
472 rc = DISK_ERROR;
473 } else if (len == 0) {
474 rc = DISK_EOF;
475 }
476
477 if (cbdataReferenceValid(ctrl_dat->client_data))
478 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
479
480 cbdataReferenceDone(ctrl_dat->client_data);
481
482 memFree(ctrl_dat, MEM_DREAD_CTRL);
483
484 PROF_stop(diskHandleRead);
485 }
486
487
488 /* start read operation */
489 /* buffer must be allocated from the caller.
490 * It must have at least req_len space in there.
491 * call handler when a reading is complete. */
492 void
493 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
494 {
495 dread_ctrl *ctrl_dat;
496 PROF_start(file_read);
497 assert(fd >= 0);
498 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
499 ctrl_dat->fd = fd;
500 ctrl_dat->offset = offset;
501 ctrl_dat->req_len = req_len;
502 ctrl_dat->buf = buf;
503 ctrl_dat->end_of_file = 0;
504 ctrl_dat->handler = handler;
505 ctrl_dat->client_data = cbdataReference(client_data);
506 diskHandleRead(fd, ctrl_dat);
507 PROF_stop(file_read);
508 }
509
510 void
511 safeunlink(const char *s, int quiet)
512 {
513 statCounter.syscalls.disk.unlinks++;
514
515 if (unlink(s) < 0 && !quiet)
516 debugs(50, 1, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
517 }
518
519 /*
520 * Same as rename(2) but complains if something goes wrong;
521 * the caller is responsible for handing and explaining the
522 * consequences of errors.
523 */
524 int
525 xrename(const char *from, const char *to)
526 {
527 debugs(21, 2, "xrename: renaming " << from << " to " << to);
528 #if defined (_SQUID_OS2_) || defined (_SQUID_WIN32_)
529
530 remove
531 (to);
532
533 #endif
534
535 if (0 == rename(from, to))
536 return 0;
537
538 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
539
540 return -1;
541 }
542