From: Alex Rousskov Date: Mon, 21 Jan 2013 23:04:45 +0000 (-0700) Subject: Report more detail about disk errors, especially the disker ID and db path. X-Git-Tag: SQUID_3_5_0_1~444^2~71 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4e4f7fd92d664af9928060a3f36873bc22ff8162;p=thirdparty%2Fsquid.git Report more detail about disk errors, especially the disker ID and db path. On partial writes, try to write the leftovers a few times before giving up. The caller is unlikely to employ a different strategy, and we can do that faster. Current callers simply give up and declare an error. Partial disk writes seem to be typical on overflowing disks, but there may be other conditions we have not observed in the lab yet. --- diff --git a/src/DiskIO/IpcIo/IpcIoFile.cc b/src/DiskIO/IpcIo/IpcIoFile.cc index 52ea41136d..921c3e72c5 100644 --- a/src/DiskIO/IpcIo/IpcIoFile.cc +++ b/src/DiskIO/IpcIo/IpcIoFile.cc @@ -270,13 +270,19 @@ IpcIoFile::writeCompleted(WriteRequest *writeRequest, { bool ioError = false; if (!response) { - debugs(79, 3, HERE << "error: timeout"); + debugs(79, 3, "disker " << diskId << " timeout"); ioError = true; // I/O timeout does not warrant setting error_? } else if (response->xerrno) { - debugs(79, DBG_IMPORTANT, HERE << "error: " << xstrerr(response->xerrno)); + debugs(79, DBG_IMPORTANT, "ERROR: disker " << diskId << + " error writing " << writeRequest->len << " bytes at " << + writeRequest->offset << ": " << xstrerr(response->xerrno) << + "; this worker will stop using " << dbName); ioError = error_ = true; } else if (response->len != writeRequest->len) { - debugs(79, DBG_IMPORTANT, HERE << "problem: " << response->len << " < " << writeRequest->len); + debugs(79, DBG_IMPORTANT, "ERROR: disker " << diskId << " wrote " << + response->len << " instead of " << writeRequest->len << + " bytes (offset " << writeRequest->offset << "); " << + "this worker will stop using " << dbName); error_ = true; } @@ -648,27 +654,68 @@ diskerRead(IpcIoMsg &ipcIo) } } +/// Tries to write buffer to disk (a few times if needed); +/// sets ipcIo results, but does no cleanup. The caller must cleanup. static void -diskerWrite(IpcIoMsg &ipcIo) -{ - const char *const buf = Ipc::Mem::PagePointer(ipcIo.page); - const ssize_t wrote = pwrite(TheFile, buf, min(ipcIo.len, Ipc::Mem::PageSize()), ipcIo.offset); - ++statCounter.syscalls.disk.writes; - fd_bytes(TheFile, wrote, FD_WRITE); +diskerWriteAttempts(IpcIoMsg &ipcIo) +{ + const char *buf = Ipc::Mem::PagePointer(ipcIo.page); + size_t toWrite = min(ipcIo.len, Ipc::Mem::PageSize()); + size_t wroteSoFar = 0; + off_t offset = ipcIo.offset; + // Partial writes to disk do happen. It is unlikely that the caller can + // handle partial writes by doing something other than writing leftovers + // again, so we try to write them ourselves to minimize overheads. + const int attemptLimit = 10; + for (int attempts = 1; attempts <= attemptLimit; ++attempts) { + const ssize_t result = pwrite(TheFile, buf, toWrite, offset); + ++statCounter.syscalls.disk.writes; + fd_bytes(TheFile, result, FD_WRITE); + + if (result < 0) { + ipcIo.xerrno = errno; + assert(ipcIo.xerrno); + debugs(47, DBG_IMPORTANT, "disker" << KidIdentifier << + " error writing " << toWrite << '/' << ipcIo.len << + " at " << ipcIo.offset << '+' << wroteSoFar << + " on " << attempts << " try: " << xstrerr(ipcIo.xerrno)); + ipcIo.len = wroteSoFar; + return; // bail on error + } - if (wrote >= 0) { + const size_t wroteNow = static_cast(result); // result >= 0 ipcIo.xerrno = 0; - const size_t len = static_cast(wrote); // safe because wrote > 0 - debugs(47,8, HERE << "disker" << KidIdentifier << " wrote " << - (len == ipcIo.len ? "all " : "just ") << wrote); - ipcIo.len = len; - } else { - ipcIo.xerrno = errno; - ipcIo.len = 0; - debugs(47,5, HERE << "disker" << KidIdentifier << " write error: " << - ipcIo.xerrno); + + debugs(47,3, "disker" << KidIdentifier << " wrote " << + (wroteNow >= toWrite ? "all " : "just ") << wroteNow << + " out of " << toWrite << '/' << ipcIo.len << " at " << + ipcIo.offset << '+' << wroteSoFar << " on " << attempts << + " try"); + + wroteSoFar += wroteNow; + + if (wroteNow >= toWrite) { + ipcIo.xerrno = 0; + ipcIo.len = wroteSoFar; + return; // wrote everything there was to write + } + + buf += wroteNow; + offset += wroteNow; + toWrite -= wroteNow; } + debugs(47, DBG_IMPORTANT, "disker" << KidIdentifier << + " exhausted all " << attemptLimit << " attempts while writing " << + toWrite << '/' << ipcIo.len << " at " << ipcIo.offset << '+' << + wroteSoFar); + return; // not a fatal I/O error, unless the caller treats it as such +} + +static void +diskerWrite(IpcIoMsg &ipcIo) +{ + diskerWriteAttempts(ipcIo); // may fail Ipc::Mem::PutPage(ipcIo.page); }