]> git.ipfire.org Git - thirdparty/squid.git/blob - src/disk.cc
Prep for 3.3.12 and 3.4.4
[thirdparty/squid.git] / src / disk.cc
1 /*
2 * DEBUG: section 06 Disk I/O Routines
3 * AUTHOR: Harvest Derived
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "comm/Loops.h"
35 #include "disk.h"
36 #include "fd.h"
37 #include "fde.h"
38 #include "globals.h"
39 #include "Mem.h"
40 #include "MemBuf.h"
41 #include "profiler/Profiler.h"
42 #include "StatCounters.h"
43
44 #if HAVE_ERRNO_H
45 #include <errno.h>
46 #endif
47
48 static PF diskHandleRead;
49 static PF diskHandleWrite;
50
51 #if _SQUID_WINDOWS_ || _SQUID_OS2_
52 static int
53 diskWriteIsComplete(int fd)
54 {
55 return fd_table[fd].disk.write_q ? 0 : 1;
56 }
57
58 #endif
59
60 void
61 disk_init(void)
62 {
63 (void) 0;
64 }
65
66 /* hack needed on SunStudio to avoid linkage convention mismatch */
67 static void cxx_xfree(void *ptr)
68 {
69 xfree(ptr);
70 }
71
72 /*
73 * opens a disk file specified by 'path'. This function always
74 * blocks! There is no callback.
75 */
76 int
77 file_open(const char *path, int mode)
78 {
79 int fd;
80 PROF_start(file_open);
81
82 if (FILE_MODE(mode) == O_WRONLY)
83 mode |= O_APPEND;
84
85 errno = 0;
86
87 fd = open(path, mode, 0644);
88
89 ++ statCounter.syscalls.disk.opens;
90
91 if (fd < 0) {
92 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
93 fd = DISK_ERROR;
94 } else {
95 debugs(6, 5, "file_open: FD " << fd);
96 commSetCloseOnExec(fd);
97 fd_open(fd, FD_FILE, path);
98 }
99
100 PROF_stop(file_open);
101 return fd;
102 }
103
104 /* close a disk file. */
105 void
106 file_close(int fd)
107 {
108 fde *F = &fd_table[fd];
109 PF *read_callback;
110 PROF_start(file_close);
111 assert(fd >= 0);
112 assert(F->flags.open);
113
114 if ((read_callback = F->read_handler)) {
115 F->read_handler = NULL;
116 read_callback(-1, F->read_data);
117 }
118
119 if (F->flags.write_daemon) {
120 #if _SQUID_WINDOWS_ || _SQUID_OS2_
121 /*
122 * on some operating systems, you can not delete or rename
123 * open files, so we won't allow delayed close.
124 */
125 while (!diskWriteIsComplete(fd))
126 diskHandleWrite(fd, NULL);
127 #else
128 F->flags.close_request = true;
129 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
130 PROF_stop(file_close);
131 return;
132 #endif
133
134 }
135
136 /*
137 * Assert there is no write callback. Otherwise we might be
138 * leaking write state data by closing the descriptor
139 */
140 assert(F->write_handler == NULL);
141
142 #if CALL_FSYNC_BEFORE_CLOSE
143
144 fsync(fd);
145
146 #endif
147
148 close(fd);
149
150 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
151
152 fd_close(fd);
153
154 ++ statCounter.syscalls.disk.closes;
155
156 PROF_stop(file_close);
157 }
158
159 /*
160 * This function has the purpose of combining multiple writes. This is
161 * to facilitate the ASYNC_IO option since it can only guarantee 1
162 * write to a file per trip around the comm.c select() loop. That's bad
163 * because more than 1 write can be made to the access.log file per
164 * trip, and so this code is purely designed to help batch multiple
165 * sequential writes to the access.log file. Squid will never issue
166 * multiple writes for any other file type during 1 trip around the
167 * select() loop. --SLF
168 */
169 static void
170 diskCombineWrites(_fde_disk *fdd)
171 {
172 /*
173 * We need to combine multiple write requests on an FD's write
174 * queue But only if we don't need to seek() in between them, ugh!
175 * XXX This currently ignores any seeks (file_offset)
176 */
177
178 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
179 int len = 0;
180
181 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
182 len += q->len - q->buf_offset;
183
184 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
185
186 wq->buf = (char *)xmalloc(len);
187
188 wq->len = 0;
189
190 wq->buf_offset = 0;
191
192 wq->next = NULL;
193
194 wq->free_func = cxx_xfree;
195
196 while (fdd->write_q != NULL) {
197 dwrite_q *q = fdd->write_q;
198
199 len = q->len - q->buf_offset;
200 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
201 wq->len += len;
202 fdd->write_q = q->next;
203
204 if (q->free_func)
205 q->free_func(q->buf);
206
207 memFree(q, MEM_DWRITE_Q);
208 };
209
210 fdd->write_q_tail = wq;
211
212 fdd->write_q = wq;
213 }
214 }
215
216 /* write handler */
217 static void
218 diskHandleWrite(int fd, void *notused)
219 {
220 int len = 0;
221 fde *F = &fd_table[fd];
222
223 _fde_disk *fdd = &F->disk;
224 dwrite_q *q = fdd->write_q;
225 int status = DISK_OK;
226 bool do_close;
227
228 if (NULL == q)
229 return;
230
231 PROF_start(diskHandleWrite);
232
233 debugs(6, 3, "diskHandleWrite: FD " << fd);
234
235 F->flags.write_daemon = false;
236
237 assert(fdd->write_q != NULL);
238
239 assert(fdd->write_q->len > fdd->write_q->buf_offset);
240
241 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
242 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
243 fdd->write_q->file_offset);
244
245 errno = 0;
246
247 if (fdd->write_q->file_offset != -1)
248 lseek(fd, fdd->write_q->file_offset, SEEK_SET); /* XXX ignore return? */
249
250 len = FD_WRITE_METHOD(fd,
251 fdd->write_q->buf + fdd->write_q->buf_offset,
252 fdd->write_q->len - fdd->write_q->buf_offset);
253
254 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
255
256 ++ statCounter.syscalls.disk.writes;
257
258 fd_bytes(fd, len, FD_WRITE);
259
260 if (len < 0) {
261 if (!ignoreErrno(errno)) {
262 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
263 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
264
265 /*
266 * If there is no write callback, then this file is
267 * most likely something important like a log file, or
268 * an interprocess pipe. Its not a swapfile. We feel
269 * that a write failure on a log file is rather important,
270 * and Squid doesn't otherwise deal with this condition.
271 * So to get the administrators attention, we exit with
272 * a fatal message.
273 */
274
275 if (fdd->wrt_handle == NULL)
276 fatal("Write failure -- check your disk space and cache.log");
277
278 /*
279 * If there is a write failure, then we notify the
280 * upper layer via the callback, at the end of this
281 * function. Meanwhile, flush all pending buffers
282 * here. Let the upper layer decide how to handle the
283 * failure. This will prevent experiencing multiple,
284 * repeated write failures for the same FD because of
285 * the queued data.
286 */
287 do {
288 fdd->write_q = q->next;
289
290 if (q->free_func)
291 q->free_func(q->buf);
292
293 if (q) {
294 memFree(q, MEM_DWRITE_Q);
295 q = NULL;
296 }
297 } while ((q = fdd->write_q));
298 }
299
300 len = 0;
301 }
302
303 if (q != NULL) {
304 /* q might become NULL from write failure above */
305 q->buf_offset += len;
306
307 if (q->buf_offset > q->len)
308 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
309 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
310 len << " FD " << fd << ")");
311
312 assert(q->buf_offset <= q->len);
313
314 if (q->buf_offset == q->len) {
315 /* complete write */
316 fdd->write_q = q->next;
317
318 if (q->free_func)
319 q->free_func(q->buf);
320
321 if (q) {
322 memFree(q, MEM_DWRITE_Q);
323 q = NULL;
324 }
325 }
326 }
327
328 if (fdd->write_q == NULL) {
329 /* no more data */
330 fdd->write_q_tail = NULL;
331 } else {
332 /* another block is queued */
333 diskCombineWrites(fdd);
334 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
335 F->flags.write_daemon = true;
336 }
337
338 do_close = F->flags.close_request;
339
340 if (fdd->wrt_handle) {
341 DWCB *callback = fdd->wrt_handle;
342 void *cbdata;
343 fdd->wrt_handle = NULL;
344
345 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
346 callback(fd, status, len, cbdata);
347 /*
348 * NOTE, this callback can close the FD, so we must
349 * not touch 'F', 'fdd', etc. after this.
350 */
351 PROF_stop(diskHandleWrite);
352 return;
353 /* XXX But what about close_request??? */
354 }
355 }
356
357 if (do_close)
358 file_close(fd);
359
360 PROF_stop(diskHandleWrite);
361 }
362
363 /* write block to a file */
364 /* write back queue. Only one writer at a time. */
365 /* call a handle when writing is complete. */
366 void
367 file_write(int fd,
368 off_t file_offset,
369 void const *ptr_to_buf,
370 int len,
371 DWCB * handle,
372 void *handle_data,
373 FREE * free_func)
374 {
375 dwrite_q *wq = NULL;
376 fde *F = &fd_table[fd];
377 PROF_start(file_write);
378 assert(fd >= 0);
379 assert(F->flags.open);
380 /* if we got here. Caller is eligible to write. */
381 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
382 wq->file_offset = file_offset;
383 wq->buf = (char *)ptr_to_buf;
384 wq->len = len;
385 wq->buf_offset = 0;
386 wq->next = NULL;
387 wq->free_func = free_func;
388
389 if (!F->disk.wrt_handle_data) {
390 F->disk.wrt_handle = handle;
391 F->disk.wrt_handle_data = cbdataReference(handle_data);
392 } else {
393 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
394 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
395 }
396
397 /* add to queue */
398 if (F->disk.write_q == NULL) {
399 /* empty queue */
400 F->disk.write_q = F->disk.write_q_tail = wq;
401 } else {
402 F->disk.write_q_tail->next = wq;
403 F->disk.write_q_tail = wq;
404 }
405
406 if (!F->flags.write_daemon) {
407 diskHandleWrite(fd, NULL);
408 }
409
410 PROF_stop(file_write);
411 }
412
413 /*
414 * a wrapper around file_write to allow for MemBuf to be file_written
415 * in a snap
416 */
417 void
418 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
419 {
420 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
421 }
422
423 /* Read from FD */
424 static void
425 diskHandleRead(int fd, void *data)
426 {
427 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
428 fde *F = &fd_table[fd];
429 int len;
430 int rc = DISK_OK;
431 /*
432 * FD < 0 indicates premature close; we just have to free
433 * the state data.
434 */
435
436 if (fd < 0) {
437 memFree(ctrl_dat, MEM_DREAD_CTRL);
438 return;
439 }
440
441 PROF_start(diskHandleRead);
442
443 #if WRITES_MAINTAIN_DISK_OFFSET
444 if (F->disk.offset != ctrl_dat->offset) {
445 #else
446 {
447 #endif
448 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
449 lseek(fd, ctrl_dat->offset, SEEK_SET); /* XXX ignore return? */
450 ++ statCounter.syscalls.disk.seeks;
451 F->disk.offset = ctrl_dat->offset;
452 }
453
454 errno = 0;
455 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
456
457 if (len > 0)
458 F->disk.offset += len;
459
460 ++ statCounter.syscalls.disk.reads;
461
462 fd_bytes(fd, len, FD_READ);
463
464 if (len < 0) {
465 if (ignoreErrno(errno)) {
466 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
467 PROF_stop(diskHandleRead);
468 return;
469 }
470
471 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerror());
472 len = 0;
473 rc = DISK_ERROR;
474 } else if (len == 0) {
475 rc = DISK_EOF;
476 }
477
478 if (cbdataReferenceValid(ctrl_dat->client_data))
479 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
480
481 cbdataReferenceDone(ctrl_dat->client_data);
482
483 memFree(ctrl_dat, MEM_DREAD_CTRL);
484
485 PROF_stop(diskHandleRead);
486 }
487
488 /* start read operation */
489 /* buffer must be allocated from the caller.
490 * It must have at least req_len space in there.
491 * call handler when a reading is complete. */
492 void
493 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
494 {
495 dread_ctrl *ctrl_dat;
496 PROF_start(file_read);
497 assert(fd >= 0);
498 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
499 ctrl_dat->fd = fd;
500 ctrl_dat->offset = offset;
501 ctrl_dat->req_len = req_len;
502 ctrl_dat->buf = buf;
503 ctrl_dat->end_of_file = 0;
504 ctrl_dat->handler = handler;
505 ctrl_dat->client_data = cbdataReference(client_data);
506 diskHandleRead(fd, ctrl_dat);
507 PROF_stop(file_read);
508 }
509
510 void
511 safeunlink(const char *s, int quiet)
512 {
513 ++ statCounter.syscalls.disk.unlinks;
514
515 if (unlink(s) < 0 && !quiet)
516 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
517 }
518
519 /*
520 * Same as rename(2) but complains if something goes wrong;
521 * the caller is responsible for handing and explaining the
522 * consequences of errors.
523 */
524 int
525 xrename(const char *from, const char *to)
526 {
527 debugs(21, 2, "xrename: renaming " << from << " to " << to);
528 #if _SQUID_OS2_ || _SQUID_WINDOWS_
529 remove(to);
530 #endif
531
532 if (0 == rename(from, to))
533 return 0;
534
535 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
536
537 return -1;
538 }
539