2 * QEMU System Emulator block driver
4 * Copyright (c) 2011 IBM Corp.
5 * Copyright (c) 2012 Red Hat, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "block/block.h"
29 #include "block/blockjob_int.h"
30 #include "block/block_int.h"
31 #include "block/trace.h"
32 #include "sysemu/block-backend.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-block-core.h"
35 #include "qapi/qmp/qerror.h"
36 #include "qemu/coroutine.h"
37 #include "qemu/timer.h"
39 /* Transactional group of block jobs */
42 /* Is this txn being cancelled? */
46 QLIST_HEAD(, Job
) jobs
;
53 * The block job API is composed of two categories of functions.
55 * The first includes functions used by the monitor. The monitor is
56 * peculiar in that it accesses the block job list with block_job_get, and
57 * therefore needs consistency across block_job_get and the actual operation
58 * (e.g. block_job_set_speed). The consistency is achieved with
59 * aio_context_acquire/release. These functions are declared in blockjob.h.
61 * The second includes functions used by the block job drivers and sometimes
62 * by the core block layer. These do not care about locking, because the
63 * whole coroutine runs under the AioContext lock, and are declared in
67 static bool is_block_job(Job
*job
)
69 return job_type(job
) == JOB_TYPE_BACKUP
||
70 job_type(job
) == JOB_TYPE_COMMIT
||
71 job_type(job
) == JOB_TYPE_MIRROR
||
72 job_type(job
) == JOB_TYPE_STREAM
;
75 BlockJob
*block_job_next(BlockJob
*bjob
)
77 Job
*job
= bjob
? &bjob
->job
: NULL
;
81 } while (job
&& !is_block_job(job
));
83 return job
? container_of(job
, BlockJob
, job
) : NULL
;
86 BlockJob
*block_job_get(const char *id
)
88 Job
*job
= job_get(id
);
90 if (job
&& is_block_job(job
)) {
91 return container_of(job
, BlockJob
, job
);
97 JobTxn
*block_job_txn_new(void)
99 JobTxn
*txn
= g_new0(JobTxn
, 1);
100 QLIST_INIT(&txn
->jobs
);
105 static void block_job_txn_ref(JobTxn
*txn
)
110 void block_job_txn_unref(JobTxn
*txn
)
112 if (txn
&& --txn
->refcnt
== 0) {
117 void block_job_txn_add_job(JobTxn
*txn
, BlockJob
*job
)
126 QLIST_INSERT_HEAD(&txn
->jobs
, &job
->job
, txn_list
);
127 block_job_txn_ref(txn
);
130 void block_job_txn_del_job(BlockJob
*job
)
133 QLIST_REMOVE(&job
->job
, txn_list
);
134 block_job_txn_unref(job
->txn
);
139 static void block_job_attached_aio_context(AioContext
*new_context
,
141 static void block_job_detach_aio_context(void *opaque
);
143 void block_job_free(Job
*job
)
145 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
146 BlockDriverState
*bs
= blk_bs(bjob
->blk
);
151 block_job_remove_all_bdrv(bjob
);
152 blk_remove_aio_context_notifier(bjob
->blk
,
153 block_job_attached_aio_context
,
154 block_job_detach_aio_context
, bjob
);
155 blk_unref(bjob
->blk
);
156 error_free(bjob
->blocker
);
159 static void block_job_attached_aio_context(AioContext
*new_context
,
162 BlockJob
*job
= opaque
;
164 job
->job
.aio_context
= new_context
;
165 if (job
->driver
->attached_aio_context
) {
166 job
->driver
->attached_aio_context(job
, new_context
);
169 job_resume(&job
->job
);
172 void block_job_drain(Job
*job
)
174 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
176 blk_drain(bjob
->blk
);
177 if (bjob
->driver
->drain
) {
178 bjob
->driver
->drain(bjob
);
182 static void block_job_detach_aio_context(void *opaque
)
184 BlockJob
*job
= opaque
;
186 /* In case the job terminates during aio_poll()... */
189 job_pause(&job
->job
);
191 while (!job
->job
.paused
&& !job_is_completed(&job
->job
)) {
192 job_drain(&job
->job
);
195 job
->job
.aio_context
= NULL
;
196 job_unref(&job
->job
);
199 static char *child_job_get_parent_desc(BdrvChild
*c
)
201 BlockJob
*job
= c
->opaque
;
202 return g_strdup_printf("%s job '%s'", job_type_str(&job
->job
), job
->job
.id
);
205 static void child_job_drained_begin(BdrvChild
*c
)
207 BlockJob
*job
= c
->opaque
;
208 job_pause(&job
->job
);
211 static void child_job_drained_end(BdrvChild
*c
)
213 BlockJob
*job
= c
->opaque
;
214 job_resume(&job
->job
);
217 static const BdrvChildRole child_job
= {
218 .get_parent_desc
= child_job_get_parent_desc
,
219 .drained_begin
= child_job_drained_begin
,
220 .drained_end
= child_job_drained_end
,
221 .stay_at_node
= true,
224 void block_job_remove_all_bdrv(BlockJob
*job
)
227 for (l
= job
->nodes
; l
; l
= l
->next
) {
228 BdrvChild
*c
= l
->data
;
229 bdrv_op_unblock_all(c
->bs
, job
->blocker
);
230 bdrv_root_unref_child(c
);
232 g_slist_free(job
->nodes
);
236 int block_job_add_bdrv(BlockJob
*job
, const char *name
, BlockDriverState
*bs
,
237 uint64_t perm
, uint64_t shared_perm
, Error
**errp
)
241 c
= bdrv_root_attach_child(bs
, name
, &child_job
, perm
, shared_perm
,
247 job
->nodes
= g_slist_prepend(job
->nodes
, c
);
249 bdrv_op_block_all(bs
, job
->blocker
);
254 bool block_job_is_internal(BlockJob
*job
)
256 return (job
->job
.id
== NULL
);
259 const BlockJobDriver
*block_job_driver(BlockJob
*job
)
264 static int block_job_prepare(BlockJob
*job
)
266 if (job
->job
.ret
== 0 && job
->driver
->prepare
) {
267 job
->job
.ret
= job
->driver
->prepare(job
);
272 static void job_cancel_async(Job
*job
, bool force
)
274 if (job
->user_paused
) {
275 /* Do not call job_enter here, the caller will handle it. */
276 job
->user_paused
= false;
277 if (job
->driver
->user_resume
) {
278 job
->driver
->user_resume(job
);
280 assert(job
->pause_count
> 0);
283 job
->cancelled
= true;
284 /* To prevent 'force == false' overriding a previous 'force == true' */
285 job
->force_cancel
|= force
;
288 static int block_job_txn_apply(JobTxn
*txn
, int fn(BlockJob
*), bool lock
)
295 QLIST_FOREACH_SAFE(job
, &txn
->jobs
, txn_list
, next
) {
296 assert(is_block_job(job
));
297 bjob
= container_of(job
, BlockJob
, job
);
300 ctx
= job
->aio_context
;
301 aio_context_acquire(ctx
);
305 aio_context_release(ctx
);
314 static void block_job_completed_txn_abort(BlockJob
*job
)
317 JobTxn
*txn
= job
->txn
;
322 * We are cancelled by another job, which will handle everything.
326 txn
->aborting
= true;
327 block_job_txn_ref(txn
);
329 /* We are the first failed job. Cancel other jobs. */
330 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
331 ctx
= other_job
->aio_context
;
332 aio_context_acquire(ctx
);
335 /* Other jobs are effectively cancelled by us, set the status for
336 * them; this job, however, may or may not be cancelled, depending
337 * on the caller, so leave it. */
338 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
339 if (other_job
!= &job
->job
) {
340 job_cancel_async(other_job
, false);
343 while (!QLIST_EMPTY(&txn
->jobs
)) {
344 other_job
= QLIST_FIRST(&txn
->jobs
);
345 ctx
= other_job
->aio_context
;
346 if (!job_is_completed(other_job
)) {
347 assert(job_is_cancelled(other_job
));
348 job_finish_sync(other_job
, NULL
, NULL
);
350 job_finalize_single(other_job
);
351 aio_context_release(ctx
);
354 block_job_txn_unref(txn
);
357 static int block_job_needs_finalize(BlockJob
*job
)
359 return !job
->job
.auto_finalize
;
362 static int block_job_finalize_single(BlockJob
*job
)
364 return job_finalize_single(&job
->job
);
367 static void block_job_do_finalize(BlockJob
*job
)
370 assert(job
&& job
->txn
);
372 /* prepare the transaction to complete */
373 rc
= block_job_txn_apply(job
->txn
, block_job_prepare
, true);
375 block_job_completed_txn_abort(job
);
377 block_job_txn_apply(job
->txn
, block_job_finalize_single
, true);
381 static int block_job_transition_to_pending(BlockJob
*job
)
383 job_state_transition(&job
->job
, JOB_STATUS_PENDING
);
384 if (!job
->job
.auto_finalize
) {
385 job_event_pending(&job
->job
);
390 static void block_job_completed_txn_success(BlockJob
*job
)
392 JobTxn
*txn
= job
->txn
;
395 job_state_transition(&job
->job
, JOB_STATUS_WAITING
);
398 * Successful completion, see if there are other running jobs in this
401 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
402 if (!job_is_completed(other_job
)) {
405 assert(other_job
->ret
== 0);
408 block_job_txn_apply(txn
, block_job_transition_to_pending
, false);
410 /* If no jobs need manual finalization, automatically do so */
411 if (block_job_txn_apply(txn
, block_job_needs_finalize
, false) == 0) {
412 block_job_do_finalize(job
);
416 /* Assumes the job_mutex is held */
417 static bool job_timer_pending(Job
*job
)
419 return timer_pending(&job
->sleep_timer
);
422 void block_job_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
424 int64_t old_speed
= job
->speed
;
426 if (job_apply_verb(&job
->job
, JOB_VERB_SET_SPEED
, errp
)) {
430 error_setg(errp
, QERR_INVALID_PARAMETER
, "speed");
434 ratelimit_set_speed(&job
->limit
, speed
, BLOCK_JOB_SLICE_TIME
);
437 if (speed
&& speed
<= old_speed
) {
441 /* kick only if a timer is pending */
442 job_enter_cond(&job
->job
, job_timer_pending
);
445 int64_t block_job_ratelimit_get_delay(BlockJob
*job
, uint64_t n
)
451 return ratelimit_calculate_delay(&job
->limit
, n
);
454 void block_job_finalize(BlockJob
*job
, Error
**errp
)
456 assert(job
&& job
->job
.id
);
457 if (job_apply_verb(&job
->job
, JOB_VERB_FINALIZE
, errp
)) {
460 block_job_do_finalize(job
);
463 void block_job_dismiss(BlockJob
**jobptr
, Error
**errp
)
465 BlockJob
*job
= *jobptr
;
466 /* similarly to _complete, this is QMP-interface only. */
468 if (job_apply_verb(&job
->job
, JOB_VERB_DISMISS
, errp
)) {
472 job_do_dismiss(&job
->job
);
476 void block_job_cancel(BlockJob
*job
, bool force
)
478 if (job
->job
.status
== JOB_STATUS_CONCLUDED
) {
479 job_do_dismiss(&job
->job
);
482 job_cancel_async(&job
->job
, force
);
483 if (!job_started(&job
->job
)) {
484 block_job_completed(job
, -ECANCELED
);
485 } else if (job
->job
.deferred_to_main_loop
) {
486 block_job_completed_txn_abort(job
);
488 block_job_enter(job
);
492 void block_job_user_cancel(BlockJob
*job
, bool force
, Error
**errp
)
494 if (job_apply_verb(&job
->job
, JOB_VERB_CANCEL
, errp
)) {
497 block_job_cancel(job
, force
);
500 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
501 * used with job_finish_sync() without the need for (rather nasty) function
502 * pointer casts there. */
503 static void block_job_cancel_err(Job
*job
, Error
**errp
)
505 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
506 assert(is_block_job(job
));
507 block_job_cancel(bjob
, false);
510 int block_job_cancel_sync(BlockJob
*job
)
512 return job_finish_sync(&job
->job
, &block_job_cancel_err
, NULL
);
515 void block_job_cancel_sync_all(void)
518 AioContext
*aio_context
;
520 while ((job
= block_job_next(NULL
))) {
521 aio_context
= blk_get_aio_context(job
->blk
);
522 aio_context_acquire(aio_context
);
523 block_job_cancel_sync(job
);
524 aio_context_release(aio_context
);
528 int block_job_complete_sync(BlockJob
*job
, Error
**errp
)
530 return job_finish_sync(&job
->job
, job_complete
, errp
);
533 void block_job_progress_update(BlockJob
*job
, uint64_t done
)
538 void block_job_progress_set_remaining(BlockJob
*job
, uint64_t remaining
)
540 job
->len
= job
->offset
+ remaining
;
543 BlockJobInfo
*block_job_query(BlockJob
*job
, Error
**errp
)
547 if (block_job_is_internal(job
)) {
548 error_setg(errp
, "Cannot query QEMU internal jobs");
551 info
= g_new0(BlockJobInfo
, 1);
552 info
->type
= g_strdup(job_type_str(&job
->job
));
553 info
->device
= g_strdup(job
->job
.id
);
554 info
->len
= job
->len
;
555 info
->busy
= atomic_read(&job
->job
.busy
);
556 info
->paused
= job
->job
.pause_count
> 0;
557 info
->offset
= job
->offset
;
558 info
->speed
= job
->speed
;
559 info
->io_status
= job
->iostatus
;
560 info
->ready
= job
->ready
;
561 info
->status
= job
->job
.status
;
562 info
->auto_finalize
= job
->job
.auto_finalize
;
563 info
->auto_dismiss
= job
->job
.auto_dismiss
;
564 info
->has_error
= job
->job
.ret
!= 0;
565 info
->error
= job
->job
.ret
? g_strdup(strerror(-job
->job
.ret
)) : NULL
;
569 static void block_job_iostatus_set_err(BlockJob
*job
, int error
)
571 if (job
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
572 job
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
573 BLOCK_DEVICE_IO_STATUS_FAILED
;
577 static void block_job_event_cancelled(Notifier
*n
, void *opaque
)
579 BlockJob
*job
= opaque
;
581 if (block_job_is_internal(job
)) {
585 qapi_event_send_block_job_cancelled(job_type(&job
->job
),
593 static void block_job_event_completed(Notifier
*n
, void *opaque
)
595 BlockJob
*job
= opaque
;
596 const char *msg
= NULL
;
598 if (block_job_is_internal(job
)) {
602 if (job
->job
.ret
< 0) {
603 msg
= strerror(-job
->job
.ret
);
606 qapi_event_send_block_job_completed(job_type(&job
->job
),
616 static void block_job_event_pending(Notifier
*n
, void *opaque
)
618 BlockJob
*job
= opaque
;
620 if (block_job_is_internal(job
)) {
624 qapi_event_send_block_job_pending(job_type(&job
->job
),
630 * API for block job drivers and the block layer. These functions are
631 * declared in blockjob_int.h.
634 void *block_job_create(const char *job_id
, const BlockJobDriver
*driver
,
635 JobTxn
*txn
, BlockDriverState
*bs
, uint64_t perm
,
636 uint64_t shared_perm
, int64_t speed
, int flags
,
637 BlockCompletionFunc
*cb
, void *opaque
, Error
**errp
)
644 error_setg(errp
, QERR_DEVICE_IN_USE
, bdrv_get_device_name(bs
));
648 if (job_id
== NULL
&& !(flags
& JOB_INTERNAL
)) {
649 job_id
= bdrv_get_device_name(bs
);
652 blk
= blk_new(perm
, shared_perm
);
653 ret
= blk_insert_bs(blk
, bs
, errp
);
659 job
= job_create(job_id
, &driver
->job_driver
, blk_get_aio_context(blk
),
660 flags
, cb
, opaque
, errp
);
666 assert(is_block_job(&job
->job
));
667 assert(job
->job
.driver
->free
== &block_job_free
);
668 assert(job
->job
.driver
->user_resume
== &block_job_user_resume
);
669 assert(job
->job
.driver
->drain
== &block_job_drain
);
671 job
->driver
= driver
;
674 job
->finalize_cancelled_notifier
.notify
= block_job_event_cancelled
;
675 job
->finalize_completed_notifier
.notify
= block_job_event_completed
;
676 job
->pending_notifier
.notify
= block_job_event_pending
;
678 notifier_list_add(&job
->job
.on_finalize_cancelled
,
679 &job
->finalize_cancelled_notifier
);
680 notifier_list_add(&job
->job
.on_finalize_completed
,
681 &job
->finalize_completed_notifier
);
682 notifier_list_add(&job
->job
.on_pending
, &job
->pending_notifier
);
684 error_setg(&job
->blocker
, "block device is in use by block job: %s",
685 job_type_str(&job
->job
));
686 block_job_add_bdrv(job
, "main node", bs
, 0, BLK_PERM_ALL
, &error_abort
);
689 bdrv_op_unblock(bs
, BLOCK_OP_TYPE_DATAPLANE
, job
->blocker
);
691 blk_add_aio_context_notifier(blk
, block_job_attached_aio_context
,
692 block_job_detach_aio_context
, job
);
694 /* Only set speed when necessary to avoid NotSupported error */
696 Error
*local_err
= NULL
;
698 block_job_set_speed(job
, speed
, &local_err
);
700 job_early_fail(&job
->job
);
701 error_propagate(errp
, local_err
);
706 /* Single jobs are modeled as single-job transactions for sake of
707 * consolidating the job management logic */
709 txn
= block_job_txn_new();
710 block_job_txn_add_job(txn
, job
);
711 block_job_txn_unref(txn
);
713 block_job_txn_add_job(txn
, job
);
719 void block_job_completed(BlockJob
*job
, int ret
)
721 assert(job
&& job
->txn
&& !job_is_completed(&job
->job
));
722 assert(blk_bs(job
->blk
)->job
== job
);
724 job_update_rc(&job
->job
);
725 trace_block_job_completed(job
, ret
, job
->job
.ret
);
727 block_job_completed_txn_abort(job
);
729 block_job_completed_txn_success(job
);
733 void block_job_enter(BlockJob
*job
)
735 job_enter_cond(&job
->job
, NULL
);
738 void block_job_yield(BlockJob
*job
)
740 assert(job
->job
.busy
);
742 /* Check cancellation *before* setting busy = false, too! */
743 if (job_is_cancelled(&job
->job
)) {
747 if (!job_should_pause(&job
->job
)) {
748 job_do_yield(&job
->job
, -1);
751 job_pause_point(&job
->job
);
754 void block_job_iostatus_reset(BlockJob
*job
)
756 if (job
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
759 assert(job
->job
.user_paused
&& job
->job
.pause_count
> 0);
760 job
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
763 void block_job_user_resume(Job
*job
)
765 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
766 block_job_iostatus_reset(bjob
);
769 void block_job_event_ready(BlockJob
*job
)
771 job_state_transition(&job
->job
, JOB_STATUS_READY
);
774 if (block_job_is_internal(job
)) {
778 qapi_event_send_block_job_ready(job_type(&job
->job
),
782 job
->speed
, &error_abort
);
785 BlockErrorAction
block_job_error_action(BlockJob
*job
, BlockdevOnError on_err
,
786 int is_read
, int error
)
788 BlockErrorAction action
;
791 case BLOCKDEV_ON_ERROR_ENOSPC
:
792 case BLOCKDEV_ON_ERROR_AUTO
:
793 action
= (error
== ENOSPC
) ?
794 BLOCK_ERROR_ACTION_STOP
: BLOCK_ERROR_ACTION_REPORT
;
796 case BLOCKDEV_ON_ERROR_STOP
:
797 action
= BLOCK_ERROR_ACTION_STOP
;
799 case BLOCKDEV_ON_ERROR_REPORT
:
800 action
= BLOCK_ERROR_ACTION_REPORT
;
802 case BLOCKDEV_ON_ERROR_IGNORE
:
803 action
= BLOCK_ERROR_ACTION_IGNORE
;
808 if (!block_job_is_internal(job
)) {
809 qapi_event_send_block_job_error(job
->job
.id
,
810 is_read
? IO_OPERATION_TYPE_READ
:
811 IO_OPERATION_TYPE_WRITE
,
812 action
, &error_abort
);
814 if (action
== BLOCK_ERROR_ACTION_STOP
) {
815 job_pause(&job
->job
);
816 /* make the pause user visible, which will be resumed from QMP. */
817 job
->job
.user_paused
= true;
818 block_job_iostatus_set_err(job
, error
);