]> git.ipfire.org Git - thirdparty/squid.git/blob - src/fs_io.cc
ba5c6c6664a8945be9c432978d96953d1f98335d
[thirdparty/squid.git] / src / fs_io.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 06 Disk I/O Routines */
10
11 #include "squid.h"
12 #include "comm/Loops.h"
13 #include "fd.h"
14 #include "fde.h"
15 #include "fs_io.h"
16 #include "globals.h"
17 #include "MemBuf.h"
18 #include "profiler/Profiler.h"
19 #include "StatCounters.h"
20
21 #include <cerrno>
22
23 static PF diskHandleRead;
24 static PF diskHandleWrite;
25
26 #if _SQUID_WINDOWS_ || _SQUID_OS2_
27 static int
28 diskWriteIsComplete(int fd)
29 {
30 return fd_table[fd].disk.write_q ? 0 : 1;
31 }
32
33 #endif
34
35 /* hack needed on SunStudio to avoid linkage convention mismatch */
36 static void cxx_xfree(void *ptr)
37 {
38 xfree(ptr);
39 }
40
41 /*
42 * opens a disk file specified by 'path'. This function always
43 * blocks! There is no callback.
44 */
45 int
46 file_open(const char *path, int mode)
47 {
48 int fd;
49 PROF_start(file_open);
50
51 if (FILE_MODE(mode) == O_WRONLY)
52 mode |= O_APPEND;
53
54 errno = 0;
55
56 fd = open(path, mode, 0644);
57
58 ++ statCounter.syscalls.disk.opens;
59
60 if (fd < 0) {
61 debugs(50, 3, "file_open: error opening file " << path << ": " << xstrerror());
62 fd = DISK_ERROR;
63 } else {
64 debugs(6, 5, "file_open: FD " << fd);
65 commSetCloseOnExec(fd);
66 fd_open(fd, FD_FILE, path);
67 }
68
69 PROF_stop(file_open);
70 return fd;
71 }
72
73 /* close a disk file. */
74 void
75 file_close(int fd)
76 {
77 fde *F = &fd_table[fd];
78 PF *read_callback;
79 PROF_start(file_close);
80 assert(fd >= 0);
81 assert(F->flags.open);
82
83 if ((read_callback = F->read_handler)) {
84 F->read_handler = NULL;
85 read_callback(-1, F->read_data);
86 }
87
88 if (F->flags.write_daemon) {
89 #if _SQUID_WINDOWS_ || _SQUID_OS2_
90 /*
91 * on some operating systems, you can not delete or rename
92 * open files, so we won't allow delayed close.
93 */
94 while (!diskWriteIsComplete(fd))
95 diskHandleWrite(fd, NULL);
96 #else
97 F->flags.close_request = true;
98 debugs(6, 2, "file_close: FD " << fd << ", delaying close");
99 PROF_stop(file_close);
100 return;
101 #endif
102
103 }
104
105 /*
106 * Assert there is no write callback. Otherwise we might be
107 * leaking write state data by closing the descriptor
108 */
109 assert(F->write_handler == NULL);
110
111 #if CALL_FSYNC_BEFORE_CLOSE
112
113 fsync(fd);
114
115 #endif
116
117 close(fd);
118
119 debugs(6, F->flags.close_request ? 2 : 5, "file_close: FD " << fd << " really closing\n");
120
121 fd_close(fd);
122
123 ++ statCounter.syscalls.disk.closes;
124
125 PROF_stop(file_close);
126 }
127
128 /*
129 * This function has the purpose of combining multiple writes. This is
130 * to facilitate the ASYNC_IO option since it can only guarantee 1
131 * write to a file per trip around the comm.c select() loop. That's bad
132 * because more than 1 write can be made to the access.log file per
133 * trip, and so this code is purely designed to help batch multiple
134 * sequential writes to the access.log file. Squid will never issue
135 * multiple writes for any other file type during 1 trip around the
136 * select() loop. --SLF
137 */
138 static void
139 diskCombineWrites(_fde_disk *fdd)
140 {
141 /*
142 * We need to combine multiple write requests on an FD's write
143 * queue But only if we don't need to seek() in between them, ugh!
144 * XXX This currently ignores any seeks (file_offset)
145 */
146
147 if (fdd->write_q != NULL && fdd->write_q->next != NULL) {
148 int len = 0;
149
150 for (dwrite_q *q = fdd->write_q; q != NULL; q = q->next)
151 len += q->len - q->buf_offset;
152
153 dwrite_q *wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
154
155 wq->buf = (char *)xmalloc(len);
156
157 wq->len = 0;
158
159 wq->buf_offset = 0;
160
161 wq->next = NULL;
162
163 wq->free_func = cxx_xfree;
164
165 while (fdd->write_q != NULL) {
166 dwrite_q *q = fdd->write_q;
167
168 len = q->len - q->buf_offset;
169 memcpy(wq->buf + wq->len, q->buf + q->buf_offset, len);
170 wq->len += len;
171 fdd->write_q = q->next;
172
173 if (q->free_func)
174 q->free_func(q->buf);
175
176 memFree(q, MEM_DWRITE_Q);
177 };
178
179 fdd->write_q_tail = wq;
180
181 fdd->write_q = wq;
182 }
183 }
184
185 /* write handler */
186 static void
187 diskHandleWrite(int fd, void *)
188 {
189 int len = 0;
190 fde *F = &fd_table[fd];
191
192 _fde_disk *fdd = &F->disk;
193 dwrite_q *q = fdd->write_q;
194 int status = DISK_OK;
195 bool do_close;
196
197 if (NULL == q)
198 return;
199
200 PROF_start(diskHandleWrite);
201
202 debugs(6, 3, "diskHandleWrite: FD " << fd);
203
204 F->flags.write_daemon = false;
205
206 assert(fdd->write_q != NULL);
207
208 assert(fdd->write_q->len > fdd->write_q->buf_offset);
209
210 debugs(6, 3, "diskHandleWrite: FD " << fd << " writing " <<
211 (fdd->write_q->len - fdd->write_q->buf_offset) << " bytes at " <<
212 fdd->write_q->file_offset);
213
214 errno = 0;
215
216 if (fdd->write_q->file_offset != -1) {
217 errno = 0;
218 if (lseek(fd, fdd->write_q->file_offset, SEEK_SET) == -1) {
219 debugs(50, DBG_IMPORTANT, "error in seek for fd " << fd << ": " << xstrerror());
220 // XXX: handle error?
221 }
222 }
223
224 len = FD_WRITE_METHOD(fd,
225 fdd->write_q->buf + fdd->write_q->buf_offset,
226 fdd->write_q->len - fdd->write_q->buf_offset);
227
228 debugs(6, 3, "diskHandleWrite: FD " << fd << " len = " << len);
229
230 ++ statCounter.syscalls.disk.writes;
231
232 fd_bytes(fd, len, FD_WRITE);
233
234 if (len < 0) {
235 if (!ignoreErrno(errno)) {
236 status = errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR;
237 debugs(50, DBG_IMPORTANT, "diskHandleWrite: FD " << fd << ": disk write error: " << xstrerror());
238
239 /*
240 * If there is no write callback, then this file is
241 * most likely something important like a log file, or
242 * an interprocess pipe. Its not a swapfile. We feel
243 * that a write failure on a log file is rather important,
244 * and Squid doesn't otherwise deal with this condition.
245 * So to get the administrators attention, we exit with
246 * a fatal message.
247 */
248
249 if (fdd->wrt_handle == NULL)
250 fatal("Write failure -- check your disk space and cache.log");
251
252 /*
253 * If there is a write failure, then we notify the
254 * upper layer via the callback, at the end of this
255 * function. Meanwhile, flush all pending buffers
256 * here. Let the upper layer decide how to handle the
257 * failure. This will prevent experiencing multiple,
258 * repeated write failures for the same FD because of
259 * the queued data.
260 */
261 do {
262 fdd->write_q = q->next;
263
264 if (q->free_func)
265 q->free_func(q->buf);
266
267 if (q) {
268 memFree(q, MEM_DWRITE_Q);
269 q = NULL;
270 }
271 } while ((q = fdd->write_q));
272 }
273
274 len = 0;
275 }
276
277 if (q != NULL) {
278 /* q might become NULL from write failure above */
279 q->buf_offset += len;
280
281 if (q->buf_offset > q->len)
282 debugs(50, DBG_IMPORTANT, "diskHandleWriteComplete: q->buf_offset > q->len (" <<
283 q << "," << (int) q->buf_offset << ", " << q->len << ", " <<
284 len << " FD " << fd << ")");
285
286 assert(q->buf_offset <= q->len);
287
288 if (q->buf_offset == q->len) {
289 /* complete write */
290 fdd->write_q = q->next;
291
292 if (q->free_func)
293 q->free_func(q->buf);
294
295 if (q) {
296 memFree(q, MEM_DWRITE_Q);
297 q = NULL;
298 }
299 }
300 }
301
302 if (fdd->write_q == NULL) {
303 /* no more data */
304 fdd->write_q_tail = NULL;
305 } else {
306 /* another block is queued */
307 diskCombineWrites(fdd);
308 Comm::SetSelect(fd, COMM_SELECT_WRITE, diskHandleWrite, NULL, 0);
309 F->flags.write_daemon = true;
310 }
311
312 do_close = F->flags.close_request;
313
314 if (fdd->wrt_handle) {
315 DWCB *callback = fdd->wrt_handle;
316 void *cbdata;
317 fdd->wrt_handle = NULL;
318
319 if (cbdataReferenceValidDone(fdd->wrt_handle_data, &cbdata)) {
320 callback(fd, status, len, cbdata);
321 /*
322 * NOTE, this callback can close the FD, so we must
323 * not touch 'F', 'fdd', etc. after this.
324 */
325 PROF_stop(diskHandleWrite);
326 return;
327 /* XXX But what about close_request??? */
328 }
329 }
330
331 if (do_close)
332 file_close(fd);
333
334 PROF_stop(diskHandleWrite);
335 }
336
337 /* write block to a file */
338 /* write back queue. Only one writer at a time. */
339 /* call a handle when writing is complete. */
340 void
341 file_write(int fd,
342 off_t file_offset,
343 void const *ptr_to_buf,
344 int len,
345 DWCB * handle,
346 void *handle_data,
347 FREE * free_func)
348 {
349 dwrite_q *wq = NULL;
350 fde *F = &fd_table[fd];
351 PROF_start(file_write);
352 assert(fd >= 0);
353 assert(F->flags.open);
354 /* if we got here. Caller is eligible to write. */
355 wq = (dwrite_q *)memAllocate(MEM_DWRITE_Q);
356 wq->file_offset = file_offset;
357 wq->buf = (char *)ptr_to_buf;
358 wq->len = len;
359 wq->buf_offset = 0;
360 wq->next = NULL;
361 wq->free_func = free_func;
362
363 if (!F->disk.wrt_handle_data) {
364 F->disk.wrt_handle = handle;
365 F->disk.wrt_handle_data = cbdataReference(handle_data);
366 } else {
367 /* Detect if there is multiple concurrent users of this fd.. we only support one callback */
368 assert(F->disk.wrt_handle_data == handle_data && F->disk.wrt_handle == handle);
369 }
370
371 /* add to queue */
372 if (F->disk.write_q == NULL) {
373 /* empty queue */
374 F->disk.write_q = F->disk.write_q_tail = wq;
375 } else {
376 F->disk.write_q_tail->next = wq;
377 F->disk.write_q_tail = wq;
378 }
379
380 if (!F->flags.write_daemon) {
381 diskHandleWrite(fd, NULL);
382 }
383
384 PROF_stop(file_write);
385 }
386
387 /*
388 * a wrapper around file_write to allow for MemBuf to be file_written
389 * in a snap
390 */
391 void
392 file_write_mbuf(int fd, off_t off, MemBuf mb, DWCB * handler, void *handler_data)
393 {
394 file_write(fd, off, mb.buf, mb.size, handler, handler_data, mb.freeFunc());
395 }
396
397 /* Read from FD */
398 static void
399 diskHandleRead(int fd, void *data)
400 {
401 dread_ctrl *ctrl_dat = (dread_ctrl *)data;
402 fde *F = &fd_table[fd];
403 int len;
404 int rc = DISK_OK;
405 /*
406 * FD < 0 indicates premature close; we just have to free
407 * the state data.
408 */
409
410 if (fd < 0) {
411 memFree(ctrl_dat, MEM_DREAD_CTRL);
412 return;
413 }
414
415 PROF_start(diskHandleRead);
416
417 #if WRITES_MAINTAIN_DISK_OFFSET
418 if (F->disk.offset != ctrl_dat->offset) {
419 #else
420 {
421 #endif
422 debugs(6, 3, "diskHandleRead: FD " << fd << " seeking to offset " << ctrl_dat->offset);
423 errno = 0;
424 if (lseek(fd, ctrl_dat->offset, SEEK_SET) == -1) {
425 // shouldn't happen, let's detect that
426 debugs(50, DBG_IMPORTANT, "error in seek for fd " << fd << ": " << xstrerror());
427 // XXX handle failures?
428 }
429 ++ statCounter.syscalls.disk.seeks;
430 F->disk.offset = ctrl_dat->offset;
431 }
432
433 errno = 0;
434 len = FD_READ_METHOD(fd, ctrl_dat->buf, ctrl_dat->req_len);
435
436 if (len > 0)
437 F->disk.offset += len;
438
439 ++ statCounter.syscalls.disk.reads;
440
441 fd_bytes(fd, len, FD_READ);
442
443 if (len < 0) {
444 if (ignoreErrno(errno)) {
445 Comm::SetSelect(fd, COMM_SELECT_READ, diskHandleRead, ctrl_dat, 0);
446 PROF_stop(diskHandleRead);
447 return;
448 }
449
450 debugs(50, DBG_IMPORTANT, "diskHandleRead: FD " << fd << ": " << xstrerror());
451 len = 0;
452 rc = DISK_ERROR;
453 } else if (len == 0) {
454 rc = DISK_EOF;
455 }
456
457 if (cbdataReferenceValid(ctrl_dat->client_data))
458 ctrl_dat->handler(fd, ctrl_dat->buf, len, rc, ctrl_dat->client_data);
459
460 cbdataReferenceDone(ctrl_dat->client_data);
461
462 memFree(ctrl_dat, MEM_DREAD_CTRL);
463
464 PROF_stop(diskHandleRead);
465 }
466
467 /* start read operation */
468 /* buffer must be allocated from the caller.
469 * It must have at least req_len space in there.
470 * call handler when a reading is complete. */
471 void
472 file_read(int fd, char *buf, int req_len, off_t offset, DRCB * handler, void *client_data)
473 {
474 dread_ctrl *ctrl_dat;
475 PROF_start(file_read);
476 assert(fd >= 0);
477 ctrl_dat = (dread_ctrl *)memAllocate(MEM_DREAD_CTRL);
478 ctrl_dat->fd = fd;
479 ctrl_dat->offset = offset;
480 ctrl_dat->req_len = req_len;
481 ctrl_dat->buf = buf;
482 ctrl_dat->end_of_file = 0;
483 ctrl_dat->handler = handler;
484 ctrl_dat->client_data = cbdataReference(client_data);
485 diskHandleRead(fd, ctrl_dat);
486 PROF_stop(file_read);
487 }
488
489 void
490 safeunlink(const char *s, int quiet)
491 {
492 ++ statCounter.syscalls.disk.unlinks;
493
494 if (unlink(s) < 0 && !quiet)
495 debugs(50, DBG_IMPORTANT, "safeunlink: Couldn't delete " << s << ": " << xstrerror());
496 }
497
498 /*
499 * Same as rename(2) but complains if something goes wrong;
500 * the caller is responsible for handing and explaining the
501 * consequences of errors.
502 */
503 int
504 xrename(const char *from, const char *to)
505 {
506 debugs(21, 2, "xrename: renaming " << from << " to " << to);
507 #if _SQUID_OS2_ || _SQUID_WINDOWS_
508 remove(to);
509 #endif
510
511 if (0 == rename(from, to))
512 return 0;
513
514 debugs(21, errno == ENOENT ? 2 : 1, "xrename: Cannot rename " << from << " to " << to << ": " << xstrerror());
515
516 return -1;
517 }
518
519 int
520 fsBlockSize(const char *path, int *blksize)
521 {
522 struct statvfs sfs;
523
524 if (xstatvfs(path, &sfs)) {
525 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerror());
526 *blksize = 2048;
527 return 1;
528 }
529
530 *blksize = (int) sfs.f_frsize;
531
532 // Sanity check; make sure we have a meaningful value.
533 if (*blksize < 512)
534 *blksize = 2048;
535
536 return 0;
537 }
538
539 #define fsbtoblk(num, fsbs, bs) \
540 (((fsbs) != 0 && (fsbs) < (bs)) ? \
541 (num) / ((bs) / (fsbs)) : (num) * ((fsbs) / (bs)))
542 int
543 fsStats(const char *path, int *totl_kb, int *free_kb, int *totl_in, int *free_in)
544 {
545 struct statvfs sfs;
546
547 if (xstatvfs(path, &sfs)) {
548 debugs(50, DBG_IMPORTANT, "" << path << ": " << xstrerror());
549 return 1;
550 }
551
552 *totl_kb = (int) fsbtoblk(sfs.f_blocks, sfs.f_frsize, 1024);
553 *free_kb = (int) fsbtoblk(sfs.f_bfree, sfs.f_frsize, 1024);
554 *totl_in = (int) sfs.f_files;
555 *free_in = (int) sfs.f_ffree;
556 return 0;
557 }
558