static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr)
{
if (ret < 0) {
- switch (ret) {
- case -ENODEV:
- return true;
- case -EAGAIN:
- /*
- * The device is probably suspended. This happens while the dm table
- * is reloaded, e.g. because a path is added or removed. This is an
- * operation that should complete within 1ms, so just wait a bit and
- * retry.
- *
- * If the device was suspended for another reason, we'll wait and
- * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before
- * we return an error and potentially stop the VM.
- */
- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
- return true;
- default:
- return false;
- }
+ /* Path errors sometimes result in -ENODEV */
+ return ret == -ENODEV;
}
if (io_hdr->host_status != SCSI_HOST_OK) {
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData acb;
+ uint64_t eagain_sleep_ns = 1 * SCALE_MS;
int retries = SG_IO_MAX_RETRIES;
int ret;
},
};
- do {
- ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
- } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret));
+retry:
+ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+ if (req == SG_IO && s->use_mpath) {
+ if (ret == -EAGAIN && eagain_sleep_ns < NANOSECONDS_PER_SECOND) {
+ /*
+ * If this is a multipath device, it is probably suspended.
+ *
+ * This can happen while the dm table is reloaded, e.g. because a
+ * path is added or removed. This is an operation that should
+ * complete within 1ms, so just wait a bit and retry.
+ *
+ * There are also some cases in which libmpathpersist must recover
+ * from path failure during its operation, which can leave the
+ * device suspended for a bit longer while the library brings back
+ * reservations into the expected state.
+ *
+ * Use increasing delays to cover both cases without waiting
+ * excessively, and stop after a bit more than a second (1023 ms).
+ * This is a tolerable delay before we return an error and
+ * potentially stop the VM.
+ */
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, eagain_sleep_ns);
+ eagain_sleep_ns *= 2;
+ goto retry;
+ }
+
+ /* Even for ret == 0, the SG_IO header can contain an error */
+ if (retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)) {
+ goto retry;
+ }
+ }
return ret;
}