2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/sysemu.h"
32 #include "qemu/notify.h"
33 #include "block/coroutine.h"
34 #include "block/qapi.h"
35 #include "qmp-commands.h"
36 #include "qemu/timer.h"
37 #include "qapi-event.h"
40 #include <sys/types.h>
42 #include <sys/ioctl.h>
43 #include <sys/queue.h>
53 struct BdrvDirtyBitmap
{
55 QLIST_ENTRY(BdrvDirtyBitmap
) list
;
58 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
61 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
62 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
63 BlockDriverCompletionFunc
*cb
, void *opaque
);
64 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
65 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
66 BlockDriverCompletionFunc
*cb
, void *opaque
);
67 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
68 int64_t sector_num
, int nb_sectors
,
70 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
71 int64_t sector_num
, int nb_sectors
,
73 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
74 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
75 BdrvRequestFlags flags
);
76 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
77 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
78 BdrvRequestFlags flags
);
79 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
83 BdrvRequestFlags flags
,
84 BlockDriverCompletionFunc
*cb
,
87 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
88 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
89 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
);
91 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
92 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
94 static QTAILQ_HEAD(, BlockDriverState
) graph_bdrv_states
=
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states
);
97 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
98 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist
;
104 static int is_windows_drive_prefix(const char *filename
)
106 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
107 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename
)
113 if (is_windows_drive_prefix(filename
) &&
116 if (strstart(filename
, "\\\\.\\", NULL
) ||
117 strstart(filename
, "//./", NULL
))
123 /* throttling disk I/O limits */
124 void bdrv_set_io_limits(BlockDriverState
*bs
,
129 throttle_config(&bs
->throttle_state
, cfg
);
131 for (i
= 0; i
< 2; i
++) {
132 qemu_co_enter_next(&bs
->throttled_reqs
[i
]);
136 /* this function drain all the throttled IOs */
137 static bool bdrv_start_throttled_reqs(BlockDriverState
*bs
)
139 bool drained
= false;
140 bool enabled
= bs
->io_limits_enabled
;
143 bs
->io_limits_enabled
= false;
145 for (i
= 0; i
< 2; i
++) {
146 while (qemu_co_enter_next(&bs
->throttled_reqs
[i
])) {
151 bs
->io_limits_enabled
= enabled
;
156 void bdrv_io_limits_disable(BlockDriverState
*bs
)
158 bs
->io_limits_enabled
= false;
160 bdrv_start_throttled_reqs(bs
);
162 throttle_destroy(&bs
->throttle_state
);
165 static void bdrv_throttle_read_timer_cb(void *opaque
)
167 BlockDriverState
*bs
= opaque
;
168 qemu_co_enter_next(&bs
->throttled_reqs
[0]);
171 static void bdrv_throttle_write_timer_cb(void *opaque
)
173 BlockDriverState
*bs
= opaque
;
174 qemu_co_enter_next(&bs
->throttled_reqs
[1]);
177 /* should be called before bdrv_set_io_limits if a limit is set */
178 void bdrv_io_limits_enable(BlockDriverState
*bs
)
180 assert(!bs
->io_limits_enabled
);
181 throttle_init(&bs
->throttle_state
,
182 bdrv_get_aio_context(bs
),
184 bdrv_throttle_read_timer_cb
,
185 bdrv_throttle_write_timer_cb
,
187 bs
->io_limits_enabled
= true;
190 /* This function makes an IO wait if needed
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
195 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
199 /* does this io must wait */
200 bool must_wait
= throttle_schedule_timer(&bs
->throttle_state
, is_write
);
202 /* if must wait or any request of this type throttled queue the IO */
204 !qemu_co_queue_empty(&bs
->throttled_reqs
[is_write
])) {
205 qemu_co_queue_wait(&bs
->throttled_reqs
[is_write
]);
208 /* the IO will be executed, do the accounting */
209 throttle_account(&bs
->throttle_state
, is_write
, bytes
);
212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs
->throttle_state
, is_write
)) {
217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs
->throttled_reqs
[is_write
]);
221 size_t bdrv_opt_mem_align(BlockDriverState
*bs
)
223 if (!bs
|| !bs
->drv
) {
224 /* 4k should be on the safe side */
228 return bs
->bl
.opt_mem_alignment
;
231 /* check if the path starts with "<protocol>:" */
232 static int path_has_protocol(const char *path
)
237 if (is_windows_drive(path
) ||
238 is_windows_drive_prefix(path
)) {
241 p
= path
+ strcspn(path
, ":/\\");
243 p
= path
+ strcspn(path
, ":/");
249 int path_is_absolute(const char *path
)
252 /* specific case for names like: "\\.\d:" */
253 if (is_windows_drive(path
) || is_windows_drive_prefix(path
)) {
256 return (*path
== '/' || *path
== '\\');
258 return (*path
== '/');
262 /* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
265 void path_combine(char *dest
, int dest_size
,
266 const char *base_path
,
267 const char *filename
)
274 if (path_is_absolute(filename
)) {
275 pstrcpy(dest
, dest_size
, filename
);
277 p
= strchr(base_path
, ':');
282 p1
= strrchr(base_path
, '/');
286 p2
= strrchr(base_path
, '\\');
298 if (len
> dest_size
- 1)
300 memcpy(dest
, base_path
, len
);
302 pstrcat(dest
, dest_size
, filename
);
306 void bdrv_get_full_backing_filename(BlockDriverState
*bs
, char *dest
, size_t sz
)
308 if (bs
->backing_file
[0] == '\0' || path_has_protocol(bs
->backing_file
)) {
309 pstrcpy(dest
, sz
, bs
->backing_file
);
311 path_combine(dest
, sz
, bs
->filename
, bs
->backing_file
);
315 void bdrv_register(BlockDriver
*bdrv
)
317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv
->bdrv_co_readv
) {
319 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
320 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
325 if (!bdrv
->bdrv_aio_readv
) {
326 /* add AIO emulation layer */
327 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
328 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
332 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
335 /* create a new block device (by default it is empty) */
336 BlockDriverState
*bdrv_new(const char *device_name
, Error
**errp
)
338 BlockDriverState
*bs
;
341 if (bdrv_find(device_name
)) {
342 error_setg(errp
, "Device with id '%s' already exists",
346 if (bdrv_find_node(device_name
)) {
347 error_setg(errp
, "Device with node-name '%s' already exists",
352 bs
= g_malloc0(sizeof(BlockDriverState
));
353 QLIST_INIT(&bs
->dirty_bitmaps
);
354 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
355 if (device_name
[0] != '\0') {
356 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, device_list
);
358 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
359 QLIST_INIT(&bs
->op_blockers
[i
]);
361 bdrv_iostatus_disable(bs
);
362 notifier_list_init(&bs
->close_notifiers
);
363 notifier_with_return_list_init(&bs
->before_write_notifiers
);
364 qemu_co_queue_init(&bs
->throttled_reqs
[0]);
365 qemu_co_queue_init(&bs
->throttled_reqs
[1]);
367 bs
->aio_context
= qemu_get_aio_context();
372 void bdrv_add_close_notifier(BlockDriverState
*bs
, Notifier
*notify
)
374 notifier_list_add(&bs
->close_notifiers
, notify
);
377 BlockDriver
*bdrv_find_format(const char *format_name
)
380 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
381 if (!strcmp(drv1
->format_name
, format_name
)) {
388 static int bdrv_is_whitelisted(BlockDriver
*drv
, bool read_only
)
390 static const char *whitelist_rw
[] = {
391 CONFIG_BDRV_RW_WHITELIST
393 static const char *whitelist_ro
[] = {
394 CONFIG_BDRV_RO_WHITELIST
398 if (!whitelist_rw
[0] && !whitelist_ro
[0]) {
399 return 1; /* no whitelist, anything goes */
402 for (p
= whitelist_rw
; *p
; p
++) {
403 if (!strcmp(drv
->format_name
, *p
)) {
408 for (p
= whitelist_ro
; *p
; p
++) {
409 if (!strcmp(drv
->format_name
, *p
)) {
417 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
,
420 BlockDriver
*drv
= bdrv_find_format(format_name
);
421 return drv
&& bdrv_is_whitelisted(drv
, read_only
) ? drv
: NULL
;
424 typedef struct CreateCo
{
432 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
434 Error
*local_err
= NULL
;
437 CreateCo
*cco
= opaque
;
440 ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->opts
, &local_err
);
442 error_propagate(&cco
->err
, local_err
);
447 int bdrv_create(BlockDriver
*drv
, const char* filename
,
448 QemuOpts
*opts
, Error
**errp
)
455 .filename
= g_strdup(filename
),
461 if (!drv
->bdrv_create
) {
462 error_setg(errp
, "Driver '%s' does not support image creation", drv
->format_name
);
467 if (qemu_in_coroutine()) {
468 /* Fast-path if already in coroutine context */
469 bdrv_create_co_entry(&cco
);
471 co
= qemu_coroutine_create(bdrv_create_co_entry
);
472 qemu_coroutine_enter(co
, &cco
);
473 while (cco
.ret
== NOT_DONE
) {
481 error_propagate(errp
, cco
.err
);
483 error_setg_errno(errp
, -ret
, "Could not create image");
488 g_free(cco
.filename
);
492 int bdrv_create_file(const char *filename
, QemuOpts
*opts
, Error
**errp
)
495 Error
*local_err
= NULL
;
498 drv
= bdrv_find_protocol(filename
, true);
500 error_setg(errp
, "Could not find protocol for file '%s'", filename
);
504 ret
= bdrv_create(drv
, filename
, opts
, &local_err
);
506 error_propagate(errp
, local_err
);
511 int bdrv_refresh_limits(BlockDriverState
*bs
)
513 BlockDriver
*drv
= bs
->drv
;
515 memset(&bs
->bl
, 0, sizeof(bs
->bl
));
521 /* Take some limits from the children as a default */
523 bdrv_refresh_limits(bs
->file
);
524 bs
->bl
.opt_transfer_length
= bs
->file
->bl
.opt_transfer_length
;
525 bs
->bl
.opt_mem_alignment
= bs
->file
->bl
.opt_mem_alignment
;
527 bs
->bl
.opt_mem_alignment
= 512;
530 if (bs
->backing_hd
) {
531 bdrv_refresh_limits(bs
->backing_hd
);
532 bs
->bl
.opt_transfer_length
=
533 MAX(bs
->bl
.opt_transfer_length
,
534 bs
->backing_hd
->bl
.opt_transfer_length
);
535 bs
->bl
.opt_mem_alignment
=
536 MAX(bs
->bl
.opt_mem_alignment
,
537 bs
->backing_hd
->bl
.opt_mem_alignment
);
540 /* Then let the driver override it */
541 if (drv
->bdrv_refresh_limits
) {
542 return drv
->bdrv_refresh_limits(bs
);
549 * Create a uniquely-named empty temporary file.
550 * Return 0 upon success, otherwise a negative errno value.
552 int get_tmp_filename(char *filename
, int size
)
555 char temp_dir
[MAX_PATH
];
556 /* GetTempFileName requires that its output buffer (4th param)
557 have length MAX_PATH or greater. */
558 assert(size
>= MAX_PATH
);
559 return (GetTempPath(MAX_PATH
, temp_dir
)
560 && GetTempFileName(temp_dir
, "qem", 0, filename
)
561 ? 0 : -GetLastError());
565 tmpdir
= getenv("TMPDIR");
569 if (snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
) >= size
) {
572 fd
= mkstemp(filename
);
576 if (close(fd
) != 0) {
585 * Detect host devices. By convention, /dev/cdrom[N] is always
586 * recognized as a host CDROM.
588 static BlockDriver
*find_hdev_driver(const char *filename
)
590 int score_max
= 0, score
;
591 BlockDriver
*drv
= NULL
, *d
;
593 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
594 if (d
->bdrv_probe_device
) {
595 score
= d
->bdrv_probe_device(filename
);
596 if (score
> score_max
) {
606 BlockDriver
*bdrv_find_protocol(const char *filename
,
607 bool allow_protocol_prefix
)
614 /* TODO Drivers without bdrv_file_open must be specified explicitly */
617 * XXX(hch): we really should not let host device detection
618 * override an explicit protocol specification, but moving this
619 * later breaks access to device names with colons in them.
620 * Thanks to the brain-dead persistent naming schemes on udev-
621 * based Linux systems those actually are quite common.
623 drv1
= find_hdev_driver(filename
);
628 if (!path_has_protocol(filename
) || !allow_protocol_prefix
) {
629 return bdrv_find_format("file");
632 p
= strchr(filename
, ':');
635 if (len
> sizeof(protocol
) - 1)
636 len
= sizeof(protocol
) - 1;
637 memcpy(protocol
, filename
, len
);
638 protocol
[len
] = '\0';
639 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
640 if (drv1
->protocol_name
&&
641 !strcmp(drv1
->protocol_name
, protocol
)) {
648 static int find_image_format(BlockDriverState
*bs
, const char *filename
,
649 BlockDriver
**pdrv
, Error
**errp
)
651 int score
, score_max
;
652 BlockDriver
*drv1
, *drv
;
656 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
657 if (bs
->sg
|| !bdrv_is_inserted(bs
) || bdrv_getlength(bs
) == 0) {
658 drv
= bdrv_find_format("raw");
660 error_setg(errp
, "Could not find raw image format");
667 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
669 error_setg_errno(errp
, -ret
, "Could not read image for determining its "
677 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
678 if (drv1
->bdrv_probe
) {
679 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
680 if (score
> score_max
) {
687 error_setg(errp
, "Could not determine image format: No compatible "
696 * Set the current 'total_sectors' value
698 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
700 BlockDriver
*drv
= bs
->drv
;
702 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
706 /* query actual device if possible, otherwise just trust the hint */
707 if (drv
->bdrv_getlength
) {
708 int64_t length
= drv
->bdrv_getlength(bs
);
712 hint
= DIV_ROUND_UP(length
, BDRV_SECTOR_SIZE
);
715 bs
->total_sectors
= hint
;
720 * Set open flags for a given discard mode
722 * Return 0 on success, -1 if the discard mode was invalid.
724 int bdrv_parse_discard_flags(const char *mode
, int *flags
)
726 *flags
&= ~BDRV_O_UNMAP
;
728 if (!strcmp(mode
, "off") || !strcmp(mode
, "ignore")) {
730 } else if (!strcmp(mode
, "on") || !strcmp(mode
, "unmap")) {
731 *flags
|= BDRV_O_UNMAP
;
740 * Set open flags for a given cache mode
742 * Return 0 on success, -1 if the cache mode was invalid.
744 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
746 *flags
&= ~BDRV_O_CACHE_MASK
;
748 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
749 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
750 } else if (!strcmp(mode
, "directsync")) {
751 *flags
|= BDRV_O_NOCACHE
;
752 } else if (!strcmp(mode
, "writeback")) {
753 *flags
|= BDRV_O_CACHE_WB
;
754 } else if (!strcmp(mode
, "unsafe")) {
755 *flags
|= BDRV_O_CACHE_WB
;
756 *flags
|= BDRV_O_NO_FLUSH
;
757 } else if (!strcmp(mode
, "writethrough")) {
758 /* this is the default */
767 * The copy-on-read flag is actually a reference count so multiple users may
768 * use the feature without worrying about clobbering its previous state.
769 * Copy-on-read stays enabled until all users have called to disable it.
771 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
776 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
778 assert(bs
->copy_on_read
> 0);
783 * Returns the flags that a temporary snapshot should get, based on the
784 * originally requested flags (the originally requested image will have flags
785 * like a backing file)
787 static int bdrv_temp_snapshot_flags(int flags
)
789 return (flags
& ~BDRV_O_SNAPSHOT
) | BDRV_O_TEMPORARY
;
793 * Returns the flags that bs->file should get, based on the given flags for
796 static int bdrv_inherited_flags(int flags
)
798 /* Enable protocol handling, disable format probing for bs->file */
799 flags
|= BDRV_O_PROTOCOL
;
801 /* Our block drivers take care to send flushes and respect unmap policy,
802 * so we can enable both unconditionally on lower layers. */
803 flags
|= BDRV_O_CACHE_WB
| BDRV_O_UNMAP
;
805 /* Clear flags that only apply to the top layer */
806 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
| BDRV_O_COPY_ON_READ
);
812 * Returns the flags that bs->backing_hd should get, based on the given flags
815 static int bdrv_backing_flags(int flags
)
817 /* backing files always opened read-only */
818 flags
&= ~(BDRV_O_RDWR
| BDRV_O_COPY_ON_READ
);
820 /* snapshot=on is handled on the top layer */
821 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_TEMPORARY
);
826 static int bdrv_open_flags(BlockDriverState
*bs
, int flags
)
828 int open_flags
= flags
| BDRV_O_CACHE_WB
;
831 * Clear flags that are internal to the block layer before opening the
834 open_flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
837 * Snapshots should be writable.
839 if (flags
& BDRV_O_TEMPORARY
) {
840 open_flags
|= BDRV_O_RDWR
;
846 static void bdrv_assign_node_name(BlockDriverState
*bs
,
847 const char *node_name
,
854 /* empty string node name is invalid */
855 if (node_name
[0] == '\0') {
856 error_setg(errp
, "Empty node name");
860 /* takes care of avoiding namespaces collisions */
861 if (bdrv_find(node_name
)) {
862 error_setg(errp
, "node-name=%s is conflicting with a device id",
867 /* takes care of avoiding duplicates node names */
868 if (bdrv_find_node(node_name
)) {
869 error_setg(errp
, "Duplicate node name");
873 /* copy node name into the bs and insert it into the graph list */
874 pstrcpy(bs
->node_name
, sizeof(bs
->node_name
), node_name
);
875 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs
, node_list
);
879 * Common part for opening disk images and files
881 * Removes all processed options from *options.
883 static int bdrv_open_common(BlockDriverState
*bs
, BlockDriverState
*file
,
884 QDict
*options
, int flags
, BlockDriver
*drv
, Error
**errp
)
887 const char *filename
;
888 const char *node_name
= NULL
;
889 Error
*local_err
= NULL
;
892 assert(bs
->file
== NULL
);
893 assert(options
!= NULL
&& bs
->options
!= options
);
896 filename
= file
->filename
;
898 filename
= qdict_get_try_str(options
, "filename");
901 if (drv
->bdrv_needs_filename
&& !filename
) {
902 error_setg(errp
, "The '%s' block driver requires a file name",
907 trace_bdrv_open_common(bs
, filename
?: "", flags
, drv
->format_name
);
909 node_name
= qdict_get_try_str(options
, "node-name");
910 bdrv_assign_node_name(bs
, node_name
, &local_err
);
912 error_propagate(errp
, local_err
);
915 qdict_del(options
, "node-name");
917 /* bdrv_open() with directly using a protocol as drv. This layer is already
918 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
919 * and return immediately. */
920 if (file
!= NULL
&& drv
->bdrv_file_open
) {
925 bs
->open_flags
= flags
;
926 bs
->guest_block_size
= 512;
927 bs
->request_alignment
= 512;
928 bs
->zero_beyond_eof
= true;
929 open_flags
= bdrv_open_flags(bs
, flags
);
930 bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
932 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
, bs
->read_only
)) {
934 !bs
->read_only
&& bdrv_is_whitelisted(drv
, true)
935 ? "Driver '%s' can only be used for read-only devices"
936 : "Driver '%s' is not whitelisted",
941 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
942 if (flags
& BDRV_O_COPY_ON_READ
) {
943 if (!bs
->read_only
) {
944 bdrv_enable_copy_on_read(bs
);
946 error_setg(errp
, "Can't use copy-on-read on read-only device");
951 if (filename
!= NULL
) {
952 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
954 bs
->filename
[0] = '\0';
958 bs
->opaque
= g_malloc0(drv
->instance_size
);
960 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
962 /* Open the image, either directly or using a protocol */
963 if (drv
->bdrv_file_open
) {
964 assert(file
== NULL
);
965 assert(!drv
->bdrv_needs_filename
|| filename
!= NULL
);
966 ret
= drv
->bdrv_file_open(bs
, options
, open_flags
, &local_err
);
969 error_setg(errp
, "Can't use '%s' as a block driver for the "
970 "protocol level", drv
->format_name
);
975 ret
= drv
->bdrv_open(bs
, options
, open_flags
, &local_err
);
980 error_propagate(errp
, local_err
);
981 } else if (bs
->filename
[0]) {
982 error_setg_errno(errp
, -ret
, "Could not open '%s'", bs
->filename
);
984 error_setg_errno(errp
, -ret
, "Could not open image");
989 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
991 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
995 bdrv_refresh_limits(bs
);
996 assert(bdrv_opt_mem_align(bs
) != 0);
997 assert((bs
->request_alignment
!= 0) || bs
->sg
);
1008 static QDict
*parse_json_filename(const char *filename
, Error
**errp
)
1010 QObject
*options_obj
;
1014 ret
= strstart(filename
, "json:", &filename
);
1017 options_obj
= qobject_from_json(filename
);
1019 error_setg(errp
, "Could not parse the JSON options");
1023 if (qobject_type(options_obj
) != QTYPE_QDICT
) {
1024 qobject_decref(options_obj
);
1025 error_setg(errp
, "Invalid JSON object given");
1029 options
= qobject_to_qdict(options_obj
);
1030 qdict_flatten(options
);
1036 * Fills in default options for opening images and converts the legacy
1037 * filename/flags pair to option QDict entries.
1039 static int bdrv_fill_options(QDict
**options
, const char **pfilename
, int flags
,
1042 const char *filename
= *pfilename
;
1043 const char *drvname
;
1044 bool protocol
= flags
& BDRV_O_PROTOCOL
;
1045 bool parse_filename
= false;
1046 Error
*local_err
= NULL
;
1049 /* Parse json: pseudo-protocol */
1050 if (filename
&& g_str_has_prefix(filename
, "json:")) {
1051 QDict
*json_options
= parse_json_filename(filename
, &local_err
);
1053 error_propagate(errp
, local_err
);
1057 /* Options given in the filename have lower priority than options
1058 * specified directly */
1059 qdict_join(*options
, json_options
, false);
1060 QDECREF(json_options
);
1061 *pfilename
= filename
= NULL
;
1068 /* Fetch the file name from the options QDict if necessary */
1070 if (!qdict_haskey(*options
, "filename")) {
1071 qdict_put(*options
, "filename", qstring_from_str(filename
));
1072 parse_filename
= true;
1074 error_setg(errp
, "Can't specify 'file' and 'filename' options at "
1080 /* Find the right block driver */
1081 filename
= qdict_get_try_str(*options
, "filename");
1082 drvname
= qdict_get_try_str(*options
, "driver");
1086 drv
= bdrv_find_protocol(filename
, parse_filename
);
1088 error_setg(errp
, "Unknown protocol");
1092 drvname
= drv
->format_name
;
1093 qdict_put(*options
, "driver", qstring_from_str(drvname
));
1095 error_setg(errp
, "Must specify either driver or file");
1100 drv
= bdrv_find_format(drvname
);
1102 error_setg(errp
, "Unknown driver '%s'", drvname
);
1106 /* Driver-specific filename parsing */
1107 if (drv
->bdrv_parse_filename
&& parse_filename
) {
1108 drv
->bdrv_parse_filename(filename
, *options
, &local_err
);
1110 error_propagate(errp
, local_err
);
1114 if (!drv
->bdrv_needs_filename
) {
1115 qdict_del(*options
, "filename");
1123 * Opens a file using a protocol (file, host_device, nbd, ...)
1125 * options is an indirect pointer to a QDict of options to pass to the block
1126 * drivers, or pointer to NULL for an empty set of options. If this function
1127 * takes ownership of the QDict reference, it will set *options to NULL;
1128 * otherwise, it will contain unused/unrecognized options after this function
1129 * returns. Then, the caller is responsible for freeing it. If it intends to
1130 * reuse the QDict, QINCREF() should be called beforehand.
1132 static int bdrv_file_open(BlockDriverState
*bs
, QDict
**options
, int flags
,
1136 const char *filename
;
1137 const char *drvname
;
1138 Error
*local_err
= NULL
;
1141 filename
= qdict_get_try_str(*options
, "filename");
1142 drvname
= qdict_get_str(*options
, "driver");
1144 drv
= bdrv_find_format(drvname
);
1146 qdict_del(*options
, "driver");
1149 if (!drv
->bdrv_file_open
) {
1150 ret
= bdrv_open(&bs
, filename
, NULL
, *options
, flags
, drv
, &local_err
);
1153 ret
= bdrv_open_common(bs
, NULL
, *options
, flags
, drv
, &local_err
);
1156 error_propagate(errp
, local_err
);
1167 void bdrv_set_backing_hd(BlockDriverState
*bs
, BlockDriverState
*backing_hd
)
1170 if (bs
->backing_hd
) {
1171 assert(bs
->backing_blocker
);
1172 bdrv_op_unblock_all(bs
->backing_hd
, bs
->backing_blocker
);
1173 } else if (backing_hd
) {
1174 error_setg(&bs
->backing_blocker
,
1175 "device is used as backing hd of '%s'",
1179 bs
->backing_hd
= backing_hd
;
1181 error_free(bs
->backing_blocker
);
1182 bs
->backing_blocker
= NULL
;
1185 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1186 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_hd
->filename
);
1187 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
),
1188 backing_hd
->drv
? backing_hd
->drv
->format_name
: "");
1190 bdrv_op_block_all(bs
->backing_hd
, bs
->backing_blocker
);
1191 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1192 bdrv_op_unblock(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
,
1193 bs
->backing_blocker
);
1195 bdrv_refresh_limits(bs
);
1199 * Opens the backing file for a BlockDriverState if not yet open
1201 * options is a QDict of options to pass to the block drivers, or NULL for an
1202 * empty set of options. The reference to the QDict is transferred to this
1203 * function (even on failure), so if the caller intends to reuse the dictionary,
1204 * it needs to use QINCREF() before calling bdrv_file_open.
1206 int bdrv_open_backing_file(BlockDriverState
*bs
, QDict
*options
, Error
**errp
)
1208 char *backing_filename
= g_malloc0(PATH_MAX
);
1210 BlockDriver
*back_drv
= NULL
;
1211 BlockDriverState
*backing_hd
;
1212 Error
*local_err
= NULL
;
1214 if (bs
->backing_hd
!= NULL
) {
1219 /* NULL means an empty set of options */
1220 if (options
== NULL
) {
1221 options
= qdict_new();
1224 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1225 if (qdict_haskey(options
, "file.filename")) {
1226 backing_filename
[0] = '\0';
1227 } else if (bs
->backing_file
[0] == '\0' && qdict_size(options
) == 0) {
1231 bdrv_get_full_backing_filename(bs
, backing_filename
, PATH_MAX
);
1234 backing_hd
= bdrv_new("", errp
);
1236 if (bs
->backing_format
[0] != '\0') {
1237 back_drv
= bdrv_find_format(bs
->backing_format
);
1240 assert(bs
->backing_hd
== NULL
);
1241 ret
= bdrv_open(&backing_hd
,
1242 *backing_filename
? backing_filename
: NULL
, NULL
, options
,
1243 bdrv_backing_flags(bs
->open_flags
), back_drv
, &local_err
);
1245 bdrv_unref(backing_hd
);
1247 bs
->open_flags
|= BDRV_O_NO_BACKING
;
1248 error_setg(errp
, "Could not open backing file: %s",
1249 error_get_pretty(local_err
));
1250 error_free(local_err
);
1253 bdrv_set_backing_hd(bs
, backing_hd
);
1256 g_free(backing_filename
);
1261 * Opens a disk image whose options are given as BlockdevRef in another block
1264 * If allow_none is true, no image will be opened if filename is false and no
1265 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1267 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1268 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1269 * itself, all options starting with "${bdref_key}." are considered part of the
1272 * The BlockdevRef will be removed from the options QDict.
1274 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1276 int bdrv_open_image(BlockDriverState
**pbs
, const char *filename
,
1277 QDict
*options
, const char *bdref_key
, int flags
,
1278 bool allow_none
, Error
**errp
)
1280 QDict
*image_options
;
1282 char *bdref_key_dot
;
1283 const char *reference
;
1286 assert(*pbs
== NULL
);
1288 bdref_key_dot
= g_strdup_printf("%s.", bdref_key
);
1289 qdict_extract_subqdict(options
, &image_options
, bdref_key_dot
);
1290 g_free(bdref_key_dot
);
1292 reference
= qdict_get_try_str(options
, bdref_key
);
1293 if (!filename
&& !reference
&& !qdict_size(image_options
)) {
1297 error_setg(errp
, "A block device must be specified for \"%s\"",
1301 QDECREF(image_options
);
1305 ret
= bdrv_open(pbs
, filename
, reference
, image_options
, flags
, NULL
, errp
);
1308 qdict_del(options
, bdref_key
);
1312 void bdrv_append_temp_snapshot(BlockDriverState
*bs
, int flags
, Error
**errp
)
1314 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1315 char *tmp_filename
= g_malloc0(PATH_MAX
+ 1);
1317 BlockDriver
*bdrv_qcow2
;
1318 QemuOpts
*opts
= NULL
;
1319 QDict
*snapshot_options
;
1320 BlockDriverState
*bs_snapshot
;
1324 /* if snapshot, we create a temporary backing file and open it
1325 instead of opening 'filename' directly */
1327 /* Get the required size from the image */
1328 total_size
= bdrv_getlength(bs
);
1329 if (total_size
< 0) {
1330 error_setg_errno(errp
, -total_size
, "Could not get image size");
1333 total_size
&= BDRV_SECTOR_MASK
;
1335 /* Create the temporary image */
1336 ret
= get_tmp_filename(tmp_filename
, PATH_MAX
+ 1);
1338 error_setg_errno(errp
, -ret
, "Could not get temporary filename");
1342 bdrv_qcow2
= bdrv_find_format("qcow2");
1343 opts
= qemu_opts_create(bdrv_qcow2
->create_opts
, NULL
, 0,
1345 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, total_size
);
1346 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, opts
, &local_err
);
1347 qemu_opts_del(opts
);
1349 error_setg_errno(errp
, -ret
, "Could not create temporary overlay "
1350 "'%s': %s", tmp_filename
,
1351 error_get_pretty(local_err
));
1352 error_free(local_err
);
1356 /* Prepare a new options QDict for the temporary file */
1357 snapshot_options
= qdict_new();
1358 qdict_put(snapshot_options
, "file.driver",
1359 qstring_from_str("file"));
1360 qdict_put(snapshot_options
, "file.filename",
1361 qstring_from_str(tmp_filename
));
1363 bs_snapshot
= bdrv_new("", &error_abort
);
1365 ret
= bdrv_open(&bs_snapshot
, NULL
, NULL
, snapshot_options
,
1366 flags
, bdrv_qcow2
, &local_err
);
1368 error_propagate(errp
, local_err
);
1372 bdrv_append(bs_snapshot
, bs
);
1375 g_free(tmp_filename
);
1379 * Opens a disk image (raw, qcow2, vmdk, ...)
1381 * options is a QDict of options to pass to the block drivers, or NULL for an
1382 * empty set of options. The reference to the QDict belongs to the block layer
1383 * after the call (even on failure), so if the caller intends to reuse the
1384 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1386 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1387 * If it is not NULL, the referenced BDS will be reused.
1389 * The reference parameter may be used to specify an existing block device which
1390 * should be opened. If specified, neither options nor a filename may be given,
1391 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1393 int bdrv_open(BlockDriverState
**pbs
, const char *filename
,
1394 const char *reference
, QDict
*options
, int flags
,
1395 BlockDriver
*drv
, Error
**errp
)
1398 BlockDriverState
*file
= NULL
, *bs
;
1399 const char *drvname
;
1400 Error
*local_err
= NULL
;
1401 int snapshot_flags
= 0;
1406 bool options_non_empty
= options
? qdict_size(options
) : false;
1410 error_setg(errp
, "Cannot reuse an existing BDS when referencing "
1411 "another block device");
1415 if (filename
|| options_non_empty
) {
1416 error_setg(errp
, "Cannot reference an existing block device with "
1417 "additional options or a new filename");
1421 bs
= bdrv_lookup_bs(reference
, reference
, errp
);
1433 bs
= bdrv_new("", &error_abort
);
1436 /* NULL means an empty set of options */
1437 if (options
== NULL
) {
1438 options
= qdict_new();
1441 ret
= bdrv_fill_options(&options
, &filename
, flags
, &local_err
);
1446 bs
->options
= options
;
1447 options
= qdict_clone_shallow(options
);
1449 if (flags
& BDRV_O_PROTOCOL
) {
1451 ret
= bdrv_file_open(bs
, &options
, flags
& ~BDRV_O_PROTOCOL
,
1456 } else if (bs
->drv
) {
1457 goto close_and_fail
;
1463 /* Open image file without format layer */
1464 if (flags
& BDRV_O_RDWR
) {
1465 flags
|= BDRV_O_ALLOW_RDWR
;
1467 if (flags
& BDRV_O_SNAPSHOT
) {
1468 snapshot_flags
= bdrv_temp_snapshot_flags(flags
);
1469 flags
= bdrv_backing_flags(flags
);
1472 assert(file
== NULL
);
1473 ret
= bdrv_open_image(&file
, filename
, options
, "file",
1474 bdrv_inherited_flags(flags
),
1480 /* Find the right image format driver */
1481 drvname
= qdict_get_try_str(options
, "driver");
1483 drv
= bdrv_find_format(drvname
);
1484 qdict_del(options
, "driver");
1486 error_setg(errp
, "Invalid driver: '%s'", drvname
);
1494 ret
= find_image_format(file
, filename
, &drv
, &local_err
);
1496 error_setg(errp
, "Must specify either driver or file");
1506 /* Open the image */
1507 ret
= bdrv_open_common(bs
, file
, options
, flags
, drv
, &local_err
);
1512 if (file
&& (bs
->file
!= file
)) {
1517 /* If there is a backing file, use it */
1518 if ((flags
& BDRV_O_NO_BACKING
) == 0) {
1519 QDict
*backing_options
;
1521 qdict_extract_subqdict(options
, &backing_options
, "backing.");
1522 ret
= bdrv_open_backing_file(bs
, backing_options
, &local_err
);
1524 goto close_and_fail
;
1528 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1529 * temporary snapshot afterwards. */
1530 if (snapshot_flags
) {
1531 bdrv_append_temp_snapshot(bs
, snapshot_flags
, &local_err
);
1533 error_propagate(errp
, local_err
);
1534 goto close_and_fail
;
1540 /* Check if any unknown options were used */
1541 if (options
&& (qdict_size(options
) != 0)) {
1542 const QDictEntry
*entry
= qdict_first(options
);
1543 if (flags
& BDRV_O_PROTOCOL
) {
1544 error_setg(errp
, "Block protocol '%s' doesn't support the option "
1545 "'%s'", drv
->format_name
, entry
->key
);
1547 error_setg(errp
, "Block format '%s' used by device '%s' doesn't "
1548 "support the option '%s'", drv
->format_name
,
1549 bs
->device_name
, entry
->key
);
1553 goto close_and_fail
;
1556 if (!bdrv_key_required(bs
)) {
1557 bdrv_dev_change_media_cb(bs
, true);
1558 } else if (!runstate_check(RUN_STATE_PRELAUNCH
)
1559 && !runstate_check(RUN_STATE_INMIGRATE
)
1560 && !runstate_check(RUN_STATE_PAUSED
)) { /* HACK */
1562 "Guest must be stopped for opening of encrypted image");
1564 goto close_and_fail
;
1575 QDECREF(bs
->options
);
1579 /* If *pbs is NULL, a new BDS has been created in this function and
1580 needs to be freed now. Otherwise, it does not need to be closed,
1581 since it has not really been opened yet. */
1585 error_propagate(errp
, local_err
);
1590 /* See fail path, but now the BDS has to be always closed */
1598 error_propagate(errp
, local_err
);
1603 typedef struct BlockReopenQueueEntry
{
1605 BDRVReopenState state
;
1606 QSIMPLEQ_ENTRY(BlockReopenQueueEntry
) entry
;
1607 } BlockReopenQueueEntry
;
1610 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1611 * reopen of multiple devices.
1613 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1614 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1615 * be created and initialized. This newly created BlockReopenQueue should be
1616 * passed back in for subsequent calls that are intended to be of the same
1619 * bs is the BlockDriverState to add to the reopen queue.
1621 * flags contains the open flags for the associated bs
1623 * returns a pointer to bs_queue, which is either the newly allocated
1624 * bs_queue, or the existing bs_queue being used.
1627 BlockReopenQueue
*bdrv_reopen_queue(BlockReopenQueue
*bs_queue
,
1628 BlockDriverState
*bs
, int flags
)
1632 BlockReopenQueueEntry
*bs_entry
;
1633 if (bs_queue
== NULL
) {
1634 bs_queue
= g_new0(BlockReopenQueue
, 1);
1635 QSIMPLEQ_INIT(bs_queue
);
1638 /* bdrv_open() masks this flag out */
1639 flags
&= ~BDRV_O_PROTOCOL
;
1642 bdrv_reopen_queue(bs_queue
, bs
->file
, bdrv_inherited_flags(flags
));
1645 bs_entry
= g_new0(BlockReopenQueueEntry
, 1);
1646 QSIMPLEQ_INSERT_TAIL(bs_queue
, bs_entry
, entry
);
1648 bs_entry
->state
.bs
= bs
;
1649 bs_entry
->state
.flags
= flags
;
1655 * Reopen multiple BlockDriverStates atomically & transactionally.
1657 * The queue passed in (bs_queue) must have been built up previous
1658 * via bdrv_reopen_queue().
1660 * Reopens all BDS specified in the queue, with the appropriate
1661 * flags. All devices are prepared for reopen, and failure of any
1662 * device will cause all device changes to be abandonded, and intermediate
1665 * If all devices prepare successfully, then the changes are committed
1669 int bdrv_reopen_multiple(BlockReopenQueue
*bs_queue
, Error
**errp
)
1672 BlockReopenQueueEntry
*bs_entry
, *next
;
1673 Error
*local_err
= NULL
;
1675 assert(bs_queue
!= NULL
);
1679 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1680 if (bdrv_reopen_prepare(&bs_entry
->state
, bs_queue
, &local_err
)) {
1681 error_propagate(errp
, local_err
);
1684 bs_entry
->prepared
= true;
1687 /* If we reach this point, we have success and just need to apply the
1690 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1691 bdrv_reopen_commit(&bs_entry
->state
);
1697 QSIMPLEQ_FOREACH_SAFE(bs_entry
, bs_queue
, entry
, next
) {
1698 if (ret
&& bs_entry
->prepared
) {
1699 bdrv_reopen_abort(&bs_entry
->state
);
1708 /* Reopen a single BlockDriverState with the specified flags. */
1709 int bdrv_reopen(BlockDriverState
*bs
, int bdrv_flags
, Error
**errp
)
1712 Error
*local_err
= NULL
;
1713 BlockReopenQueue
*queue
= bdrv_reopen_queue(NULL
, bs
, bdrv_flags
);
1715 ret
= bdrv_reopen_multiple(queue
, &local_err
);
1716 if (local_err
!= NULL
) {
1717 error_propagate(errp
, local_err
);
1724 * Prepares a BlockDriverState for reopen. All changes are staged in the
1725 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1726 * the block driver layer .bdrv_reopen_prepare()
1728 * bs is the BlockDriverState to reopen
1729 * flags are the new open flags
1730 * queue is the reopen queue
1732 * Returns 0 on success, non-zero on error. On error errp will be set
1735 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1736 * It is the responsibility of the caller to then call the abort() or
1737 * commit() for any other BDS that have been left in a prepare() state
1740 int bdrv_reopen_prepare(BDRVReopenState
*reopen_state
, BlockReopenQueue
*queue
,
1744 Error
*local_err
= NULL
;
1747 assert(reopen_state
!= NULL
);
1748 assert(reopen_state
->bs
->drv
!= NULL
);
1749 drv
= reopen_state
->bs
->drv
;
1751 /* if we are to stay read-only, do not allow permission change
1753 if (!(reopen_state
->bs
->open_flags
& BDRV_O_ALLOW_RDWR
) &&
1754 reopen_state
->flags
& BDRV_O_RDWR
) {
1755 error_set(errp
, QERR_DEVICE_IS_READ_ONLY
,
1756 reopen_state
->bs
->device_name
);
1761 ret
= bdrv_flush(reopen_state
->bs
);
1763 error_set(errp
, ERROR_CLASS_GENERIC_ERROR
, "Error (%s) flushing drive",
1768 if (drv
->bdrv_reopen_prepare
) {
1769 ret
= drv
->bdrv_reopen_prepare(reopen_state
, queue
, &local_err
);
1771 if (local_err
!= NULL
) {
1772 error_propagate(errp
, local_err
);
1774 error_setg(errp
, "failed while preparing to reopen image '%s'",
1775 reopen_state
->bs
->filename
);
1780 /* It is currently mandatory to have a bdrv_reopen_prepare()
1781 * handler for each supported drv. */
1782 error_set(errp
, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
1783 drv
->format_name
, reopen_state
->bs
->device_name
,
1784 "reopening of file");
1796 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1797 * makes them final by swapping the staging BlockDriverState contents into
1798 * the active BlockDriverState contents.
1800 void bdrv_reopen_commit(BDRVReopenState
*reopen_state
)
1804 assert(reopen_state
!= NULL
);
1805 drv
= reopen_state
->bs
->drv
;
1806 assert(drv
!= NULL
);
1808 /* If there are any driver level actions to take */
1809 if (drv
->bdrv_reopen_commit
) {
1810 drv
->bdrv_reopen_commit(reopen_state
);
1813 /* set BDS specific flags now */
1814 reopen_state
->bs
->open_flags
= reopen_state
->flags
;
1815 reopen_state
->bs
->enable_write_cache
= !!(reopen_state
->flags
&
1817 reopen_state
->bs
->read_only
= !(reopen_state
->flags
& BDRV_O_RDWR
);
1819 bdrv_refresh_limits(reopen_state
->bs
);
1823 * Abort the reopen, and delete and free the staged changes in
1826 void bdrv_reopen_abort(BDRVReopenState
*reopen_state
)
1830 assert(reopen_state
!= NULL
);
1831 drv
= reopen_state
->bs
->drv
;
1832 assert(drv
!= NULL
);
1834 if (drv
->bdrv_reopen_abort
) {
1835 drv
->bdrv_reopen_abort(reopen_state
);
1840 void bdrv_close(BlockDriverState
*bs
)
1843 block_job_cancel_sync(bs
->job
);
1845 bdrv_drain_all(); /* complete I/O */
1847 bdrv_drain_all(); /* in case flush left pending I/O */
1848 notifier_list_notify(&bs
->close_notifiers
, bs
);
1851 if (bs
->backing_hd
) {
1852 BlockDriverState
*backing_hd
= bs
->backing_hd
;
1853 bdrv_set_backing_hd(bs
, NULL
);
1854 bdrv_unref(backing_hd
);
1856 bs
->drv
->bdrv_close(bs
);
1860 bs
->copy_on_read
= 0;
1861 bs
->backing_file
[0] = '\0';
1862 bs
->backing_format
[0] = '\0';
1863 bs
->total_sectors
= 0;
1868 bs
->zero_beyond_eof
= false;
1869 QDECREF(bs
->options
);
1872 if (bs
->file
!= NULL
) {
1873 bdrv_unref(bs
->file
);
1878 bdrv_dev_change_media_cb(bs
, false);
1880 /*throttling disk I/O limits*/
1881 if (bs
->io_limits_enabled
) {
1882 bdrv_io_limits_disable(bs
);
1886 void bdrv_close_all(void)
1888 BlockDriverState
*bs
;
1890 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1891 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1893 aio_context_acquire(aio_context
);
1895 aio_context_release(aio_context
);
1899 /* Check if any requests are in-flight (including throttled requests) */
1900 static bool bdrv_requests_pending(BlockDriverState
*bs
)
1902 if (!QLIST_EMPTY(&bs
->tracked_requests
)) {
1905 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[0])) {
1908 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[1])) {
1911 if (bs
->file
&& bdrv_requests_pending(bs
->file
)) {
1914 if (bs
->backing_hd
&& bdrv_requests_pending(bs
->backing_hd
)) {
1921 * Wait for pending requests to complete across all BlockDriverStates
1923 * This function does not flush data to disk, use bdrv_flush_all() for that
1924 * after calling this function.
1926 * Note that completion of an asynchronous I/O operation can trigger any
1927 * number of other I/O operations on other devices---for example a coroutine
1928 * can be arbitrarily complex and a constant flow of I/O can come until the
1929 * coroutine is complete. Because of this, it is not possible to have a
1930 * function to drain a single device's I/O queue.
1932 void bdrv_drain_all(void)
1934 /* Always run first iteration so any pending completion BHs run */
1936 BlockDriverState
*bs
;
1941 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1942 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1945 aio_context_acquire(aio_context
);
1946 bdrv_start_throttled_reqs(bs
);
1947 bs_busy
= bdrv_requests_pending(bs
);
1948 bs_busy
|= aio_poll(aio_context
, bs_busy
);
1949 aio_context_release(aio_context
);
1956 /* make a BlockDriverState anonymous by removing from bdrv_state and
1957 * graph_bdrv_state list.
1958 Also, NULL terminate the device_name to prevent double remove */
1959 void bdrv_make_anon(BlockDriverState
*bs
)
1961 if (bs
->device_name
[0] != '\0') {
1962 QTAILQ_REMOVE(&bdrv_states
, bs
, device_list
);
1964 bs
->device_name
[0] = '\0';
1965 if (bs
->node_name
[0] != '\0') {
1966 QTAILQ_REMOVE(&graph_bdrv_states
, bs
, node_list
);
1968 bs
->node_name
[0] = '\0';
1971 static void bdrv_rebind(BlockDriverState
*bs
)
1973 if (bs
->drv
&& bs
->drv
->bdrv_rebind
) {
1974 bs
->drv
->bdrv_rebind(bs
);
1978 static void bdrv_move_feature_fields(BlockDriverState
*bs_dest
,
1979 BlockDriverState
*bs_src
)
1981 /* move some fields that need to stay attached to the device */
1984 bs_dest
->dev_ops
= bs_src
->dev_ops
;
1985 bs_dest
->dev_opaque
= bs_src
->dev_opaque
;
1986 bs_dest
->dev
= bs_src
->dev
;
1987 bs_dest
->guest_block_size
= bs_src
->guest_block_size
;
1988 bs_dest
->copy_on_read
= bs_src
->copy_on_read
;
1990 bs_dest
->enable_write_cache
= bs_src
->enable_write_cache
;
1992 /* i/o throttled req */
1993 memcpy(&bs_dest
->throttle_state
,
1994 &bs_src
->throttle_state
,
1995 sizeof(ThrottleState
));
1996 bs_dest
->throttled_reqs
[0] = bs_src
->throttled_reqs
[0];
1997 bs_dest
->throttled_reqs
[1] = bs_src
->throttled_reqs
[1];
1998 bs_dest
->io_limits_enabled
= bs_src
->io_limits_enabled
;
2001 bs_dest
->on_read_error
= bs_src
->on_read_error
;
2002 bs_dest
->on_write_error
= bs_src
->on_write_error
;
2005 bs_dest
->iostatus_enabled
= bs_src
->iostatus_enabled
;
2006 bs_dest
->iostatus
= bs_src
->iostatus
;
2009 bs_dest
->dirty_bitmaps
= bs_src
->dirty_bitmaps
;
2011 /* reference count */
2012 bs_dest
->refcnt
= bs_src
->refcnt
;
2015 bs_dest
->job
= bs_src
->job
;
2017 /* keep the same entry in bdrv_states */
2018 pstrcpy(bs_dest
->device_name
, sizeof(bs_dest
->device_name
),
2019 bs_src
->device_name
);
2020 bs_dest
->device_list
= bs_src
->device_list
;
2021 memcpy(bs_dest
->op_blockers
, bs_src
->op_blockers
,
2022 sizeof(bs_dest
->op_blockers
));
2026 * Swap bs contents for two image chains while they are live,
2027 * while keeping required fields on the BlockDriverState that is
2028 * actually attached to a device.
2030 * This will modify the BlockDriverState fields, and swap contents
2031 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2033 * bs_new is required to be anonymous.
2035 * This function does not create any image files.
2037 void bdrv_swap(BlockDriverState
*bs_new
, BlockDriverState
*bs_old
)
2039 BlockDriverState tmp
;
2041 /* The code needs to swap the node_name but simply swapping node_list won't
2042 * work so first remove the nodes from the graph list, do the swap then
2043 * insert them back if needed.
2045 if (bs_new
->node_name
[0] != '\0') {
2046 QTAILQ_REMOVE(&graph_bdrv_states
, bs_new
, node_list
);
2048 if (bs_old
->node_name
[0] != '\0') {
2049 QTAILQ_REMOVE(&graph_bdrv_states
, bs_old
, node_list
);
2052 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2053 assert(bs_new
->device_name
[0] == '\0');
2054 assert(QLIST_EMPTY(&bs_new
->dirty_bitmaps
));
2055 assert(bs_new
->job
== NULL
);
2056 assert(bs_new
->dev
== NULL
);
2057 assert(bs_new
->io_limits_enabled
== false);
2058 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2064 /* there are some fields that should not be swapped, move them back */
2065 bdrv_move_feature_fields(&tmp
, bs_old
);
2066 bdrv_move_feature_fields(bs_old
, bs_new
);
2067 bdrv_move_feature_fields(bs_new
, &tmp
);
2069 /* bs_new shouldn't be in bdrv_states even after the swap! */
2070 assert(bs_new
->device_name
[0] == '\0');
2072 /* Check a few fields that should remain attached to the device */
2073 assert(bs_new
->dev
== NULL
);
2074 assert(bs_new
->job
== NULL
);
2075 assert(bs_new
->io_limits_enabled
== false);
2076 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2078 /* insert the nodes back into the graph node list if needed */
2079 if (bs_new
->node_name
[0] != '\0') {
2080 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_new
, node_list
);
2082 if (bs_old
->node_name
[0] != '\0') {
2083 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_old
, node_list
);
2086 bdrv_rebind(bs_new
);
2087 bdrv_rebind(bs_old
);
2091 * Add new bs contents at the top of an image chain while the chain is
2092 * live, while keeping required fields on the top layer.
2094 * This will modify the BlockDriverState fields, and swap contents
2095 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2097 * bs_new is required to be anonymous.
2099 * This function does not create any image files.
2101 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
2103 bdrv_swap(bs_new
, bs_top
);
2105 /* The contents of 'tmp' will become bs_top, as we are
2106 * swapping bs_new and bs_top contents. */
2107 bdrv_set_backing_hd(bs_top
, bs_new
);
2110 static void bdrv_delete(BlockDriverState
*bs
)
2114 assert(bdrv_op_blocker_is_empty(bs
));
2115 assert(!bs
->refcnt
);
2116 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
2120 /* remove from list, if necessary */
2126 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
2127 /* TODO change to DeviceState *dev when all users are qdevified */
2133 bdrv_iostatus_reset(bs
);
2137 /* TODO qdevified devices don't use this, remove when devices are qdevified */
2138 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
2140 if (bdrv_attach_dev(bs
, dev
) < 0) {
2145 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
2146 /* TODO change to DeviceState *dev when all users are qdevified */
2148 assert(bs
->dev
== dev
);
2151 bs
->dev_opaque
= NULL
;
2152 bs
->guest_block_size
= 512;
2155 /* TODO change to return DeviceState * when all users are qdevified */
2156 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
2161 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
2165 bs
->dev_opaque
= opaque
;
2168 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
2170 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
2171 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
2172 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
2173 if (tray_was_closed
) {
2175 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs
),
2176 true, &error_abort
);
2180 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs
),
2181 false, &error_abort
);
2186 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
2188 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
2191 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
2193 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
2194 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
2198 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
2200 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
2201 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
2206 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
2208 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
2209 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
2213 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
2215 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
2216 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
2222 * Run consistency checks on an image
2224 * Returns 0 if the check could be completed (it doesn't mean that the image is
2225 * free of errors) or -errno when an internal error occurred. The results of the
2226 * check are stored in res.
2228 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
, BdrvCheckMode fix
)
2230 if (bs
->drv
->bdrv_check
== NULL
) {
2234 memset(res
, 0, sizeof(*res
));
2235 return bs
->drv
->bdrv_check(bs
, res
, fix
);
2238 #define COMMIT_BUF_SECTORS 2048
2240 /* commit COW file into the raw image */
2241 int bdrv_commit(BlockDriverState
*bs
)
2243 BlockDriver
*drv
= bs
->drv
;
2244 int64_t sector
, total_sectors
, length
, backing_length
;
2245 int n
, ro
, open_flags
;
2247 uint8_t *buf
= NULL
;
2248 char filename
[PATH_MAX
];
2253 if (!bs
->backing_hd
) {
2257 if (bdrv_op_is_blocked(bs
, BLOCK_OP_TYPE_COMMIT
, NULL
) ||
2258 bdrv_op_is_blocked(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
, NULL
)) {
2262 ro
= bs
->backing_hd
->read_only
;
2263 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2264 pstrcpy(filename
, sizeof(filename
), bs
->backing_hd
->filename
);
2265 open_flags
= bs
->backing_hd
->open_flags
;
2268 if (bdrv_reopen(bs
->backing_hd
, open_flags
| BDRV_O_RDWR
, NULL
)) {
2273 length
= bdrv_getlength(bs
);
2279 backing_length
= bdrv_getlength(bs
->backing_hd
);
2280 if (backing_length
< 0) {
2281 ret
= backing_length
;
2285 /* If our top snapshot is larger than the backing file image,
2286 * grow the backing file image if possible. If not possible,
2287 * we must return an error */
2288 if (length
> backing_length
) {
2289 ret
= bdrv_truncate(bs
->backing_hd
, length
);
2295 total_sectors
= length
>> BDRV_SECTOR_BITS
;
2296 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
2298 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
2299 ret
= bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
);
2304 ret
= bdrv_read(bs
, sector
, buf
, n
);
2309 ret
= bdrv_write(bs
->backing_hd
, sector
, buf
, n
);
2316 if (drv
->bdrv_make_empty
) {
2317 ret
= drv
->bdrv_make_empty(bs
);
2325 * Make sure all data we wrote to the backing device is actually
2328 if (bs
->backing_hd
) {
2329 bdrv_flush(bs
->backing_hd
);
2337 /* ignoring error return here */
2338 bdrv_reopen(bs
->backing_hd
, open_flags
& ~BDRV_O_RDWR
, NULL
);
2344 int bdrv_commit_all(void)
2346 BlockDriverState
*bs
;
2348 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
2349 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2351 aio_context_acquire(aio_context
);
2352 if (bs
->drv
&& bs
->backing_hd
) {
2353 int ret
= bdrv_commit(bs
);
2355 aio_context_release(aio_context
);
2359 aio_context_release(aio_context
);
2365 * Remove an active request from the tracked requests list
2367 * This function should be called when a tracked request is completing.
2369 static void tracked_request_end(BdrvTrackedRequest
*req
)
2371 if (req
->serialising
) {
2372 req
->bs
->serialising_in_flight
--;
2375 QLIST_REMOVE(req
, list
);
2376 qemu_co_queue_restart_all(&req
->wait_queue
);
2380 * Add an active request to the tracked requests list
2382 static void tracked_request_begin(BdrvTrackedRequest
*req
,
2383 BlockDriverState
*bs
,
2385 unsigned int bytes
, bool is_write
)
2387 *req
= (BdrvTrackedRequest
){
2391 .is_write
= is_write
,
2392 .co
= qemu_coroutine_self(),
2393 .serialising
= false,
2394 .overlap_offset
= offset
,
2395 .overlap_bytes
= bytes
,
2398 qemu_co_queue_init(&req
->wait_queue
);
2400 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
2403 static void mark_request_serialising(BdrvTrackedRequest
*req
, uint64_t align
)
2405 int64_t overlap_offset
= req
->offset
& ~(align
- 1);
2406 unsigned int overlap_bytes
= ROUND_UP(req
->offset
+ req
->bytes
, align
)
2409 if (!req
->serialising
) {
2410 req
->bs
->serialising_in_flight
++;
2411 req
->serialising
= true;
2414 req
->overlap_offset
= MIN(req
->overlap_offset
, overlap_offset
);
2415 req
->overlap_bytes
= MAX(req
->overlap_bytes
, overlap_bytes
);
2419 * Round a region to cluster boundaries
2421 void bdrv_round_to_clusters(BlockDriverState
*bs
,
2422 int64_t sector_num
, int nb_sectors
,
2423 int64_t *cluster_sector_num
,
2424 int *cluster_nb_sectors
)
2426 BlockDriverInfo bdi
;
2428 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
2429 *cluster_sector_num
= sector_num
;
2430 *cluster_nb_sectors
= nb_sectors
;
2432 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
2433 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
2434 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
2439 static int bdrv_get_cluster_size(BlockDriverState
*bs
)
2441 BlockDriverInfo bdi
;
2444 ret
= bdrv_get_info(bs
, &bdi
);
2445 if (ret
< 0 || bdi
.cluster_size
== 0) {
2446 return bs
->request_alignment
;
2448 return bdi
.cluster_size
;
2452 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
2453 int64_t offset
, unsigned int bytes
)
2456 if (offset
>= req
->overlap_offset
+ req
->overlap_bytes
) {
2460 if (req
->overlap_offset
>= offset
+ bytes
) {
2466 static bool coroutine_fn
wait_serialising_requests(BdrvTrackedRequest
*self
)
2468 BlockDriverState
*bs
= self
->bs
;
2469 BdrvTrackedRequest
*req
;
2471 bool waited
= false;
2473 if (!bs
->serialising_in_flight
) {
2479 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
2480 if (req
== self
|| (!req
->serialising
&& !self
->serialising
)) {
2483 if (tracked_request_overlaps(req
, self
->overlap_offset
,
2484 self
->overlap_bytes
))
2486 /* Hitting this means there was a reentrant request, for
2487 * example, a block driver issuing nested requests. This must
2488 * never happen since it means deadlock.
2490 assert(qemu_coroutine_self() != req
->co
);
2492 /* If the request is already (indirectly) waiting for us, or
2493 * will wait for us as soon as it wakes up, then just go on
2494 * (instead of producing a deadlock in the former case). */
2495 if (!req
->waiting_for
) {
2496 self
->waiting_for
= req
;
2497 qemu_co_queue_wait(&req
->wait_queue
);
2498 self
->waiting_for
= NULL
;
2513 * -EINVAL - backing format specified, but no file
2514 * -ENOSPC - can't update the backing file because no space is left in the
2516 * -ENOTSUP - format driver doesn't support changing the backing file
2518 int bdrv_change_backing_file(BlockDriverState
*bs
,
2519 const char *backing_file
, const char *backing_fmt
)
2521 BlockDriver
*drv
= bs
->drv
;
2524 /* Backing file format doesn't make sense without a backing file */
2525 if (backing_fmt
&& !backing_file
) {
2529 if (drv
->bdrv_change_backing_file
!= NULL
) {
2530 ret
= drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
2536 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_file
?: "");
2537 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), backing_fmt
?: "");
2543 * Finds the image layer in the chain that has 'bs' as its backing file.
2545 * active is the current topmost image.
2547 * Returns NULL if bs is not found in active's image chain,
2548 * or if active == bs.
2550 BlockDriverState
*bdrv_find_overlay(BlockDriverState
*active
,
2551 BlockDriverState
*bs
)
2553 BlockDriverState
*overlay
= NULL
;
2554 BlockDriverState
*intermediate
;
2556 assert(active
!= NULL
);
2559 /* if bs is the same as active, then by definition it has no overlay
2565 intermediate
= active
;
2566 while (intermediate
->backing_hd
) {
2567 if (intermediate
->backing_hd
== bs
) {
2568 overlay
= intermediate
;
2571 intermediate
= intermediate
->backing_hd
;
2577 typedef struct BlkIntermediateStates
{
2578 BlockDriverState
*bs
;
2579 QSIMPLEQ_ENTRY(BlkIntermediateStates
) entry
;
2580 } BlkIntermediateStates
;
2584 * Drops images above 'base' up to and including 'top', and sets the image
2585 * above 'top' to have base as its backing file.
2587 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2588 * information in 'bs' can be properly updated.
2590 * E.g., this will convert the following chain:
2591 * bottom <- base <- intermediate <- top <- active
2595 * bottom <- base <- active
2597 * It is allowed for bottom==base, in which case it converts:
2599 * base <- intermediate <- top <- active
2606 * if active == top, that is considered an error
2609 int bdrv_drop_intermediate(BlockDriverState
*active
, BlockDriverState
*top
,
2610 BlockDriverState
*base
)
2612 BlockDriverState
*intermediate
;
2613 BlockDriverState
*base_bs
= NULL
;
2614 BlockDriverState
*new_top_bs
= NULL
;
2615 BlkIntermediateStates
*intermediate_state
, *next
;
2618 QSIMPLEQ_HEAD(states_to_delete
, BlkIntermediateStates
) states_to_delete
;
2619 QSIMPLEQ_INIT(&states_to_delete
);
2621 if (!top
->drv
|| !base
->drv
) {
2625 new_top_bs
= bdrv_find_overlay(active
, top
);
2627 if (new_top_bs
== NULL
) {
2628 /* we could not find the image above 'top', this is an error */
2632 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2633 * to do, no intermediate images */
2634 if (new_top_bs
->backing_hd
== base
) {
2641 /* now we will go down through the list, and add each BDS we find
2642 * into our deletion queue, until we hit the 'base'
2644 while (intermediate
) {
2645 intermediate_state
= g_malloc0(sizeof(BlkIntermediateStates
));
2646 intermediate_state
->bs
= intermediate
;
2647 QSIMPLEQ_INSERT_TAIL(&states_to_delete
, intermediate_state
, entry
);
2649 if (intermediate
->backing_hd
== base
) {
2650 base_bs
= intermediate
->backing_hd
;
2653 intermediate
= intermediate
->backing_hd
;
2655 if (base_bs
== NULL
) {
2656 /* something went wrong, we did not end at the base. safely
2657 * unravel everything, and exit with error */
2661 /* success - we can delete the intermediate states, and link top->base */
2662 ret
= bdrv_change_backing_file(new_top_bs
, base_bs
->filename
,
2663 base_bs
->drv
? base_bs
->drv
->format_name
: "");
2667 bdrv_set_backing_hd(new_top_bs
, base_bs
);
2669 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2670 /* so that bdrv_close() does not recursively close the chain */
2671 bdrv_set_backing_hd(intermediate_state
->bs
, NULL
);
2672 bdrv_unref(intermediate_state
->bs
);
2677 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2678 g_free(intermediate_state
);
2684 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
2689 if (size
> INT_MAX
) {
2693 if (!bdrv_is_inserted(bs
))
2699 len
= bdrv_getlength(bs
);
2704 if ((offset
> len
) || (len
- offset
< size
))
2710 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
2713 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2717 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
2718 nb_sectors
* BDRV_SECTOR_SIZE
);
2721 typedef struct RwCo
{
2722 BlockDriverState
*bs
;
2727 BdrvRequestFlags flags
;
2730 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
2732 RwCo
*rwco
= opaque
;
2734 if (!rwco
->is_write
) {
2735 rwco
->ret
= bdrv_co_do_preadv(rwco
->bs
, rwco
->offset
,
2736 rwco
->qiov
->size
, rwco
->qiov
,
2739 rwco
->ret
= bdrv_co_do_pwritev(rwco
->bs
, rwco
->offset
,
2740 rwco
->qiov
->size
, rwco
->qiov
,
2746 * Process a vectored synchronous request using coroutines
2748 static int bdrv_prwv_co(BlockDriverState
*bs
, int64_t offset
,
2749 QEMUIOVector
*qiov
, bool is_write
,
2750 BdrvRequestFlags flags
)
2757 .is_write
= is_write
,
2763 * In sync call context, when the vcpu is blocked, this throttling timer
2764 * will not fire; so the I/O throttling function has to be disabled here
2765 * if it has been enabled.
2767 if (bs
->io_limits_enabled
) {
2768 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
2769 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
2770 bdrv_io_limits_disable(bs
);
2773 if (qemu_in_coroutine()) {
2774 /* Fast-path if already in coroutine context */
2775 bdrv_rw_co_entry(&rwco
);
2777 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2779 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
2780 qemu_coroutine_enter(co
, &rwco
);
2781 while (rwco
.ret
== NOT_DONE
) {
2782 aio_poll(aio_context
, true);
2789 * Process a synchronous request using coroutines
2791 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
2792 int nb_sectors
, bool is_write
, BdrvRequestFlags flags
)
2795 struct iovec iov
= {
2796 .iov_base
= (void *)buf
,
2797 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
2800 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2804 qemu_iovec_init_external(&qiov
, &iov
, 1);
2805 return bdrv_prwv_co(bs
, sector_num
<< BDRV_SECTOR_BITS
,
2806 &qiov
, is_write
, flags
);
2809 /* return < 0 if error. See bdrv_write() for the return codes */
2810 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
2811 uint8_t *buf
, int nb_sectors
)
2813 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false, 0);
2816 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2817 int bdrv_read_unthrottled(BlockDriverState
*bs
, int64_t sector_num
,
2818 uint8_t *buf
, int nb_sectors
)
2823 enabled
= bs
->io_limits_enabled
;
2824 bs
->io_limits_enabled
= false;
2825 ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
);
2826 bs
->io_limits_enabled
= enabled
;
2830 /* Return < 0 if error. Important errors are:
2831 -EIO generic I/O error (may happen for all errors)
2832 -ENOMEDIUM No media inserted.
2833 -EINVAL Invalid sector number or nb_sectors
2834 -EACCES Trying to write a read-only device
2836 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
2837 const uint8_t *buf
, int nb_sectors
)
2839 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true, 0);
2842 int bdrv_write_zeroes(BlockDriverState
*bs
, int64_t sector_num
,
2843 int nb_sectors
, BdrvRequestFlags flags
)
2845 return bdrv_rw_co(bs
, sector_num
, NULL
, nb_sectors
, true,
2846 BDRV_REQ_ZERO_WRITE
| flags
);
2850 * Completely zero out a block device with the help of bdrv_write_zeroes.
2851 * The operation is sped up by checking the block status and only writing
2852 * zeroes to the device if they currently do not return zeroes. Optional
2853 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2855 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2857 int bdrv_make_zero(BlockDriverState
*bs
, BdrvRequestFlags flags
)
2859 int64_t target_size
;
2860 int64_t ret
, nb_sectors
, sector_num
= 0;
2863 target_size
= bdrv_getlength(bs
);
2864 if (target_size
< 0) {
2867 target_size
/= BDRV_SECTOR_SIZE
;
2870 nb_sectors
= target_size
- sector_num
;
2871 if (nb_sectors
<= 0) {
2874 if (nb_sectors
> INT_MAX
) {
2875 nb_sectors
= INT_MAX
;
2877 ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, &n
);
2879 error_report("error getting block status at sector %" PRId64
": %s",
2880 sector_num
, strerror(-ret
));
2883 if (ret
& BDRV_BLOCK_ZERO
) {
2887 ret
= bdrv_write_zeroes(bs
, sector_num
, n
, flags
);
2889 error_report("error writing zeroes at sector %" PRId64
": %s",
2890 sector_num
, strerror(-ret
));
2897 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
, void *buf
, int bytes
)
2900 struct iovec iov
= {
2901 .iov_base
= (void *)buf
,
2910 qemu_iovec_init_external(&qiov
, &iov
, 1);
2911 ret
= bdrv_prwv_co(bs
, offset
, &qiov
, false, 0);
2919 int bdrv_pwritev(BlockDriverState
*bs
, int64_t offset
, QEMUIOVector
*qiov
)
2923 ret
= bdrv_prwv_co(bs
, offset
, qiov
, true, 0);
2931 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
2932 const void *buf
, int bytes
)
2935 struct iovec iov
= {
2936 .iov_base
= (void *) buf
,
2944 qemu_iovec_init_external(&qiov
, &iov
, 1);
2945 return bdrv_pwritev(bs
, offset
, &qiov
);
2949 * Writes to the file and ensures that no writes are reordered across this
2950 * request (acts as a barrier)
2952 * Returns 0 on success, -errno in error cases.
2954 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
2955 const void *buf
, int count
)
2959 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
2964 /* No flush needed for cache modes that already do it */
2965 if (bs
->enable_write_cache
) {
2972 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
2973 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
2975 /* Perform I/O through a temporary buffer so that users who scribble over
2976 * their read buffer while the operation is in progress do not end up
2977 * modifying the image file. This is critical for zero-copy guest I/O
2978 * where anything might happen inside guest memory.
2980 void *bounce_buffer
;
2982 BlockDriver
*drv
= bs
->drv
;
2984 QEMUIOVector bounce_qiov
;
2985 int64_t cluster_sector_num
;
2986 int cluster_nb_sectors
;
2990 /* Cover entire cluster so no additional backing file I/O is required when
2991 * allocating cluster in the image file.
2993 bdrv_round_to_clusters(bs
, sector_num
, nb_sectors
,
2994 &cluster_sector_num
, &cluster_nb_sectors
);
2996 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
2997 cluster_sector_num
, cluster_nb_sectors
);
2999 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
3000 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
3001 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
3003 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
3009 if (drv
->bdrv_co_write_zeroes
&&
3010 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
3011 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
3012 cluster_nb_sectors
, 0);
3014 /* This does not change the data on the disk, it is not necessary
3015 * to flush even in cache=writethrough mode.
3017 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
3022 /* It might be okay to ignore write errors for guest requests. If this
3023 * is a deliberate copy-on-read then we don't want to ignore the error.
3024 * Simply report it in all cases.
3029 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
3030 qemu_iovec_from_buf(qiov
, 0, bounce_buffer
+ skip_bytes
,
3031 nb_sectors
* BDRV_SECTOR_SIZE
);
3034 qemu_vfree(bounce_buffer
);
3039 * Forwards an already correctly aligned request to the BlockDriver. This
3040 * handles copy on read and zeroing after EOF; any other features must be
3041 * implemented by the caller.
3043 static int coroutine_fn
bdrv_aligned_preadv(BlockDriverState
*bs
,
3044 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
3045 int64_t align
, QEMUIOVector
*qiov
, int flags
)
3047 BlockDriver
*drv
= bs
->drv
;
3050 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
3051 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
3053 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3054 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3056 /* Handle Copy on Read and associated serialisation */
3057 if (flags
& BDRV_REQ_COPY_ON_READ
) {
3058 /* If we touch the same cluster it counts as an overlap. This
3059 * guarantees that allocating writes will be serialized and not race
3060 * with each other for the same cluster. For example, in copy-on-read
3061 * it ensures that the CoR read and write operations are atomic and
3062 * guest writes cannot interleave between them. */
3063 mark_request_serialising(req
, bdrv_get_cluster_size(bs
));
3066 wait_serialising_requests(req
);
3068 if (flags
& BDRV_REQ_COPY_ON_READ
) {
3071 ret
= bdrv_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
3076 if (!ret
|| pnum
!= nb_sectors
) {
3077 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
3082 /* Forward the request to the BlockDriver */
3083 if (!(bs
->zero_beyond_eof
&& bs
->growable
)) {
3084 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
3086 /* Read zeros after EOF of growable BDSes */
3087 int64_t len
, total_sectors
, max_nb_sectors
;
3089 len
= bdrv_getlength(bs
);
3095 total_sectors
= DIV_ROUND_UP(len
, BDRV_SECTOR_SIZE
);
3096 max_nb_sectors
= ROUND_UP(MAX(0, total_sectors
- sector_num
),
3097 align
>> BDRV_SECTOR_BITS
);
3098 if (max_nb_sectors
> 0) {
3099 ret
= drv
->bdrv_co_readv(bs
, sector_num
,
3100 MIN(nb_sectors
, max_nb_sectors
), qiov
);
3105 /* Reading beyond end of file is supposed to produce zeroes */
3106 if (ret
== 0 && total_sectors
< sector_num
+ nb_sectors
) {
3107 uint64_t offset
= MAX(0, total_sectors
- sector_num
);
3108 uint64_t bytes
= (sector_num
+ nb_sectors
- offset
) *
3110 qemu_iovec_memset(qiov
, offset
* BDRV_SECTOR_SIZE
, 0, bytes
);
3119 * Handle a read request in coroutine context
3121 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
3122 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3123 BdrvRequestFlags flags
)
3125 BlockDriver
*drv
= bs
->drv
;
3126 BdrvTrackedRequest req
;
3128 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3129 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3130 uint8_t *head_buf
= NULL
;
3131 uint8_t *tail_buf
= NULL
;
3132 QEMUIOVector local_qiov
;
3133 bool use_local_qiov
= false;
3139 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3143 if (bs
->copy_on_read
) {
3144 flags
|= BDRV_REQ_COPY_ON_READ
;
3147 /* throttling disk I/O */
3148 if (bs
->io_limits_enabled
) {
3149 bdrv_io_limits_intercept(bs
, bytes
, false);
3152 /* Align read if necessary by padding qiov */
3153 if (offset
& (align
- 1)) {
3154 head_buf
= qemu_blockalign(bs
, align
);
3155 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3156 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3157 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3158 use_local_qiov
= true;
3160 bytes
+= offset
& (align
- 1);
3161 offset
= offset
& ~(align
- 1);
3164 if ((offset
+ bytes
) & (align
- 1)) {
3165 if (!use_local_qiov
) {
3166 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3167 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3168 use_local_qiov
= true;
3170 tail_buf
= qemu_blockalign(bs
, align
);
3171 qemu_iovec_add(&local_qiov
, tail_buf
,
3172 align
- ((offset
+ bytes
) & (align
- 1)));
3174 bytes
= ROUND_UP(bytes
, align
);
3177 tracked_request_begin(&req
, bs
, offset
, bytes
, false);
3178 ret
= bdrv_aligned_preadv(bs
, &req
, offset
, bytes
, align
,
3179 use_local_qiov
? &local_qiov
: qiov
,
3181 tracked_request_end(&req
);
3183 if (use_local_qiov
) {
3184 qemu_iovec_destroy(&local_qiov
);
3185 qemu_vfree(head_buf
);
3186 qemu_vfree(tail_buf
);
3192 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
3193 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3194 BdrvRequestFlags flags
)
3196 if (nb_sectors
< 0 || nb_sectors
> (UINT_MAX
>> BDRV_SECTOR_BITS
)) {
3200 return bdrv_co_do_preadv(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3201 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3204 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
3205 int nb_sectors
, QEMUIOVector
*qiov
)
3207 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
3209 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
3212 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
3213 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
3215 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
3217 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
3218 BDRV_REQ_COPY_ON_READ
);
3221 /* if no limit is specified in the BlockLimits use a default
3222 * of 32768 512-byte sectors (16 MiB) per request.
3224 #define MAX_WRITE_ZEROES_DEFAULT 32768
3226 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
3227 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
)
3229 BlockDriver
*drv
= bs
->drv
;
3231 struct iovec iov
= {0};
3234 int max_write_zeroes
= bs
->bl
.max_write_zeroes
?
3235 bs
->bl
.max_write_zeroes
: MAX_WRITE_ZEROES_DEFAULT
;
3237 while (nb_sectors
> 0 && !ret
) {
3238 int num
= nb_sectors
;
3240 /* Align request. Block drivers can expect the "bulk" of the request
3243 if (bs
->bl
.write_zeroes_alignment
3244 && num
> bs
->bl
.write_zeroes_alignment
) {
3245 if (sector_num
% bs
->bl
.write_zeroes_alignment
!= 0) {
3246 /* Make a small request up to the first aligned sector. */
3247 num
= bs
->bl
.write_zeroes_alignment
;
3248 num
-= sector_num
% bs
->bl
.write_zeroes_alignment
;
3249 } else if ((sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
!= 0) {
3250 /* Shorten the request to the last aligned sector. num cannot
3251 * underflow because num > bs->bl.write_zeroes_alignment.
3253 num
-= (sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
;
3257 /* limit request size */
3258 if (num
> max_write_zeroes
) {
3259 num
= max_write_zeroes
;
3263 /* First try the efficient write zeroes operation */
3264 if (drv
->bdrv_co_write_zeroes
) {
3265 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, num
, flags
);
3268 if (ret
== -ENOTSUP
) {
3269 /* Fall back to bounce buffer if write zeroes is unsupported */
3270 iov
.iov_len
= num
* BDRV_SECTOR_SIZE
;
3271 if (iov
.iov_base
== NULL
) {
3272 iov
.iov_base
= qemu_blockalign(bs
, num
* BDRV_SECTOR_SIZE
);
3273 memset(iov
.iov_base
, 0, num
* BDRV_SECTOR_SIZE
);
3275 qemu_iovec_init_external(&qiov
, &iov
, 1);
3277 ret
= drv
->bdrv_co_writev(bs
, sector_num
, num
, &qiov
);
3279 /* Keep bounce buffer around if it is big enough for all
3280 * all future requests.
3282 if (num
< max_write_zeroes
) {
3283 qemu_vfree(iov
.iov_base
);
3284 iov
.iov_base
= NULL
;
3292 qemu_vfree(iov
.iov_base
);
3297 * Forwards an already correctly aligned write request to the BlockDriver.
3299 static int coroutine_fn
bdrv_aligned_pwritev(BlockDriverState
*bs
,
3300 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
3301 QEMUIOVector
*qiov
, int flags
)
3303 BlockDriver
*drv
= bs
->drv
;
3307 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
3308 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
3310 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3311 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3313 waited
= wait_serialising_requests(req
);
3314 assert(!waited
|| !req
->serialising
);
3315 assert(req
->overlap_offset
<= offset
);
3316 assert(offset
+ bytes
<= req
->overlap_offset
+ req
->overlap_bytes
);
3318 ret
= notifier_with_return_list_notify(&bs
->before_write_notifiers
, req
);
3320 if (!ret
&& bs
->detect_zeroes
!= BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF
&&
3321 !(flags
& BDRV_REQ_ZERO_WRITE
) && drv
->bdrv_co_write_zeroes
&&
3322 qemu_iovec_is_zero(qiov
)) {
3323 flags
|= BDRV_REQ_ZERO_WRITE
;
3324 if (bs
->detect_zeroes
== BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP
) {
3325 flags
|= BDRV_REQ_MAY_UNMAP
;
3330 /* Do nothing, write notifier decided to fail this request */
3331 } else if (flags
& BDRV_REQ_ZERO_WRITE
) {
3332 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_ZERO
);
3333 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3335 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV
);
3336 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
3338 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_DONE
);
3340 if (ret
== 0 && !bs
->enable_write_cache
) {
3341 ret
= bdrv_co_flush(bs
);
3344 bdrv_set_dirty(bs
, sector_num
, nb_sectors
);
3346 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
3347 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
3349 if (bs
->growable
&& ret
>= 0) {
3350 bs
->total_sectors
= MAX(bs
->total_sectors
, sector_num
+ nb_sectors
);
3357 * Handle a write request in coroutine context
3359 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
3360 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3361 BdrvRequestFlags flags
)
3363 BdrvTrackedRequest req
;
3364 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3365 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3366 uint8_t *head_buf
= NULL
;
3367 uint8_t *tail_buf
= NULL
;
3368 QEMUIOVector local_qiov
;
3369 bool use_local_qiov
= false;
3375 if (bs
->read_only
) {
3378 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3382 /* throttling disk I/O */
3383 if (bs
->io_limits_enabled
) {
3384 bdrv_io_limits_intercept(bs
, bytes
, true);
3388 * Align write if necessary by performing a read-modify-write cycle.
3389 * Pad qiov with the read parts and be sure to have a tracked request not
3390 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3392 tracked_request_begin(&req
, bs
, offset
, bytes
, true);
3394 if (offset
& (align
- 1)) {
3395 QEMUIOVector head_qiov
;
3396 struct iovec head_iov
;
3398 mark_request_serialising(&req
, align
);
3399 wait_serialising_requests(&req
);
3401 head_buf
= qemu_blockalign(bs
, align
);
3402 head_iov
= (struct iovec
) {
3403 .iov_base
= head_buf
,
3406 qemu_iovec_init_external(&head_qiov
, &head_iov
, 1);
3408 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_HEAD
);
3409 ret
= bdrv_aligned_preadv(bs
, &req
, offset
& ~(align
- 1), align
,
3410 align
, &head_qiov
, 0);
3414 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_HEAD
);
3416 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3417 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3418 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3419 use_local_qiov
= true;
3421 bytes
+= offset
& (align
- 1);
3422 offset
= offset
& ~(align
- 1);
3425 if ((offset
+ bytes
) & (align
- 1)) {
3426 QEMUIOVector tail_qiov
;
3427 struct iovec tail_iov
;
3431 mark_request_serialising(&req
, align
);
3432 waited
= wait_serialising_requests(&req
);
3433 assert(!waited
|| !use_local_qiov
);
3435 tail_buf
= qemu_blockalign(bs
, align
);
3436 tail_iov
= (struct iovec
) {
3437 .iov_base
= tail_buf
,
3440 qemu_iovec_init_external(&tail_qiov
, &tail_iov
, 1);
3442 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_TAIL
);
3443 ret
= bdrv_aligned_preadv(bs
, &req
, (offset
+ bytes
) & ~(align
- 1), align
,
3444 align
, &tail_qiov
, 0);
3448 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_TAIL
);
3450 if (!use_local_qiov
) {
3451 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3452 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3453 use_local_qiov
= true;
3456 tail_bytes
= (offset
+ bytes
) & (align
- 1);
3457 qemu_iovec_add(&local_qiov
, tail_buf
+ tail_bytes
, align
- tail_bytes
);
3459 bytes
= ROUND_UP(bytes
, align
);
3462 ret
= bdrv_aligned_pwritev(bs
, &req
, offset
, bytes
,
3463 use_local_qiov
? &local_qiov
: qiov
,
3467 tracked_request_end(&req
);
3469 if (use_local_qiov
) {
3470 qemu_iovec_destroy(&local_qiov
);
3472 qemu_vfree(head_buf
);
3473 qemu_vfree(tail_buf
);
3478 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
3479 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3480 BdrvRequestFlags flags
)
3482 if (nb_sectors
< 0 || nb_sectors
> (INT_MAX
>> BDRV_SECTOR_BITS
)) {
3486 return bdrv_co_do_pwritev(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3487 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3490 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
3491 int nb_sectors
, QEMUIOVector
*qiov
)
3493 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
3495 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
3498 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
3499 int64_t sector_num
, int nb_sectors
,
3500 BdrvRequestFlags flags
)
3502 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3504 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
3505 flags
&= ~BDRV_REQ_MAY_UNMAP
;
3508 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
3509 BDRV_REQ_ZERO_WRITE
| flags
);
3513 * Truncate file to 'offset' bytes (needed only for file protocols)
3515 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
3517 BlockDriver
*drv
= bs
->drv
;
3521 if (!drv
->bdrv_truncate
)
3525 if (bdrv_op_is_blocked(bs
, BLOCK_OP_TYPE_RESIZE
, NULL
)) {
3528 ret
= drv
->bdrv_truncate(bs
, offset
);
3530 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
3531 bdrv_dev_resize_cb(bs
);
3537 * Length of a allocated file in bytes. Sparse files are counted by actual
3538 * allocated space. Return < 0 if error or unknown.
3540 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
3542 BlockDriver
*drv
= bs
->drv
;
3546 if (drv
->bdrv_get_allocated_file_size
) {
3547 return drv
->bdrv_get_allocated_file_size(bs
);
3550 return bdrv_get_allocated_file_size(bs
->file
);
3556 * Length of a file in bytes. Return < 0 if error or unknown.
3558 int64_t bdrv_getlength(BlockDriverState
*bs
)
3560 BlockDriver
*drv
= bs
->drv
;
3564 if (drv
->has_variable_length
) {
3565 int ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
3570 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
3573 /* return 0 as number of sectors if no device present or error */
3574 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
3577 length
= bdrv_getlength(bs
);
3581 length
= length
>> BDRV_SECTOR_BITS
;
3582 *nb_sectors_ptr
= length
;
3585 void bdrv_set_on_error(BlockDriverState
*bs
, BlockdevOnError on_read_error
,
3586 BlockdevOnError on_write_error
)
3588 bs
->on_read_error
= on_read_error
;
3589 bs
->on_write_error
= on_write_error
;
3592 BlockdevOnError
bdrv_get_on_error(BlockDriverState
*bs
, bool is_read
)
3594 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
3597 BlockErrorAction
bdrv_get_error_action(BlockDriverState
*bs
, bool is_read
, int error
)
3599 BlockdevOnError on_err
= is_read
? bs
->on_read_error
: bs
->on_write_error
;
3602 case BLOCKDEV_ON_ERROR_ENOSPC
:
3603 return (error
== ENOSPC
) ?
3604 BLOCK_ERROR_ACTION_STOP
: BLOCK_ERROR_ACTION_REPORT
;
3605 case BLOCKDEV_ON_ERROR_STOP
:
3606 return BLOCK_ERROR_ACTION_STOP
;
3607 case BLOCKDEV_ON_ERROR_REPORT
:
3608 return BLOCK_ERROR_ACTION_REPORT
;
3609 case BLOCKDEV_ON_ERROR_IGNORE
:
3610 return BLOCK_ERROR_ACTION_IGNORE
;
3616 /* This is done by device models because, while the block layer knows
3617 * about the error, it does not know whether an operation comes from
3618 * the device or the block layer (from a job, for example).
3620 void bdrv_error_action(BlockDriverState
*bs
, BlockErrorAction action
,
3621 bool is_read
, int error
)
3625 if (action
== BLOCK_ERROR_ACTION_STOP
) {
3626 /* First set the iostatus, so that "info block" returns an iostatus
3627 * that matches the events raised so far (an additional error iostatus
3628 * is fine, but not a lost one).
3630 bdrv_iostatus_set_err(bs
, error
);
3632 /* Then raise the request to stop the VM and the event.
3633 * qemu_system_vmstop_request_prepare has two effects. First,
3634 * it ensures that the STOP event always comes after the
3635 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3636 * can observe the STOP event and do a "cont" before the STOP
3637 * event is issued, the VM will not stop. In this case, vm_start()
3638 * also ensures that the STOP/RESUME pair of events is emitted.
3640 qemu_system_vmstop_request_prepare();
3641 qapi_event_send_block_io_error(bdrv_get_device_name(bs
),
3642 is_read
? IO_OPERATION_TYPE_READ
:
3643 IO_OPERATION_TYPE_WRITE
,
3644 action
, &error_abort
);
3645 qemu_system_vmstop_request(RUN_STATE_IO_ERROR
);
3647 qapi_event_send_block_io_error(bdrv_get_device_name(bs
),
3648 is_read
? IO_OPERATION_TYPE_READ
:
3649 IO_OPERATION_TYPE_WRITE
,
3650 action
, &error_abort
);
3654 int bdrv_is_read_only(BlockDriverState
*bs
)
3656 return bs
->read_only
;
3659 int bdrv_is_sg(BlockDriverState
*bs
)
3664 int bdrv_enable_write_cache(BlockDriverState
*bs
)
3666 return bs
->enable_write_cache
;
3669 void bdrv_set_enable_write_cache(BlockDriverState
*bs
, bool wce
)
3671 bs
->enable_write_cache
= wce
;
3673 /* so a reopen() will preserve wce */
3675 bs
->open_flags
|= BDRV_O_CACHE_WB
;
3677 bs
->open_flags
&= ~BDRV_O_CACHE_WB
;
3681 int bdrv_is_encrypted(BlockDriverState
*bs
)
3683 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
3685 return bs
->encrypted
;
3688 int bdrv_key_required(BlockDriverState
*bs
)
3690 BlockDriverState
*backing_hd
= bs
->backing_hd
;
3692 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
3694 return (bs
->encrypted
&& !bs
->valid_key
);
3697 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
3700 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
3701 ret
= bdrv_set_key(bs
->backing_hd
, key
);
3707 if (!bs
->encrypted
) {
3709 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
3712 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
3715 } else if (!bs
->valid_key
) {
3717 /* call the change callback now, we skipped it on open */
3718 bdrv_dev_change_media_cb(bs
, true);
3723 const char *bdrv_get_format_name(BlockDriverState
*bs
)
3725 return bs
->drv
? bs
->drv
->format_name
: NULL
;
3728 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
3733 const char **formats
= NULL
;
3735 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
3736 if (drv
->format_name
) {
3739 while (formats
&& i
&& !found
) {
3740 found
= !strcmp(formats
[--i
], drv
->format_name
);
3744 formats
= g_realloc(formats
, (count
+ 1) * sizeof(char *));
3745 formats
[count
++] = drv
->format_name
;
3746 it(opaque
, drv
->format_name
);
3753 /* This function is to find block backend bs */
3754 BlockDriverState
*bdrv_find(const char *name
)
3756 BlockDriverState
*bs
;
3758 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3759 if (!strcmp(name
, bs
->device_name
)) {
3766 /* This function is to find a node in the bs graph */
3767 BlockDriverState
*bdrv_find_node(const char *node_name
)
3769 BlockDriverState
*bs
;
3773 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3774 if (!strcmp(node_name
, bs
->node_name
)) {
3781 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3782 BlockDeviceInfoList
*bdrv_named_nodes_list(void)
3784 BlockDeviceInfoList
*list
, *entry
;
3785 BlockDriverState
*bs
;
3788 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3789 entry
= g_malloc0(sizeof(*entry
));
3790 entry
->value
= bdrv_block_device_info(bs
);
3798 BlockDriverState
*bdrv_lookup_bs(const char *device
,
3799 const char *node_name
,
3802 BlockDriverState
*bs
= NULL
;
3805 bs
= bdrv_find(device
);
3813 bs
= bdrv_find_node(node_name
);
3820 error_setg(errp
, "Cannot find device=%s nor node_name=%s",
3821 device
? device
: "",
3822 node_name
? node_name
: "");
3826 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
3829 return QTAILQ_FIRST(&bdrv_states
);
3831 return QTAILQ_NEXT(bs
, device_list
);
3834 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
3836 BlockDriverState
*bs
;
3838 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3843 const char *bdrv_get_device_name(BlockDriverState
*bs
)
3845 return bs
->device_name
;
3848 int bdrv_get_flags(BlockDriverState
*bs
)
3850 return bs
->open_flags
;
3853 int bdrv_flush_all(void)
3855 BlockDriverState
*bs
;
3858 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3859 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
3862 aio_context_acquire(aio_context
);
3863 ret
= bdrv_flush(bs
);
3864 if (ret
< 0 && !result
) {
3867 aio_context_release(aio_context
);
3873 int bdrv_has_zero_init_1(BlockDriverState
*bs
)
3878 int bdrv_has_zero_init(BlockDriverState
*bs
)
3882 /* If BS is a copy on write image, it is initialized to
3883 the contents of the base image, which may not be zeroes. */
3884 if (bs
->backing_hd
) {
3887 if (bs
->drv
->bdrv_has_zero_init
) {
3888 return bs
->drv
->bdrv_has_zero_init(bs
);
3895 bool bdrv_unallocated_blocks_are_zero(BlockDriverState
*bs
)
3897 BlockDriverInfo bdi
;
3899 if (bs
->backing_hd
) {
3903 if (bdrv_get_info(bs
, &bdi
) == 0) {
3904 return bdi
.unallocated_blocks_are_zero
;
3910 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState
*bs
)
3912 BlockDriverInfo bdi
;
3914 if (bs
->backing_hd
|| !(bs
->open_flags
& BDRV_O_UNMAP
)) {
3918 if (bdrv_get_info(bs
, &bdi
) == 0) {
3919 return bdi
.can_write_zeroes_with_unmap
;
3925 typedef struct BdrvCoGetBlockStatusData
{
3926 BlockDriverState
*bs
;
3927 BlockDriverState
*base
;
3933 } BdrvCoGetBlockStatusData
;
3936 * Returns true iff the specified sector is present in the disk image. Drivers
3937 * not implementing the functionality are assumed to not support backing files,
3938 * hence all their sectors are reported as allocated.
3940 * If 'sector_num' is beyond the end of the disk image the return value is 0
3941 * and 'pnum' is set to 0.
3943 * 'pnum' is set to the number of sectors (including and immediately following
3944 * the specified sector) that are known to be in the same
3945 * allocated/unallocated state.
3947 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3948 * beyond the end of the disk image it will be clamped.
3950 static int64_t coroutine_fn
bdrv_co_get_block_status(BlockDriverState
*bs
,
3952 int nb_sectors
, int *pnum
)
3958 length
= bdrv_getlength(bs
);
3963 if (sector_num
>= (length
>> BDRV_SECTOR_BITS
)) {
3968 n
= bs
->total_sectors
- sector_num
;
3969 if (n
< nb_sectors
) {
3973 if (!bs
->drv
->bdrv_co_get_block_status
) {
3975 ret
= BDRV_BLOCK_DATA
| BDRV_BLOCK_ALLOCATED
;
3976 if (bs
->drv
->protocol_name
) {
3977 ret
|= BDRV_BLOCK_OFFSET_VALID
| (sector_num
* BDRV_SECTOR_SIZE
);
3982 ret
= bs
->drv
->bdrv_co_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
3988 if (ret
& BDRV_BLOCK_RAW
) {
3989 assert(ret
& BDRV_BLOCK_OFFSET_VALID
);
3990 return bdrv_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
3994 if (ret
& (BDRV_BLOCK_DATA
| BDRV_BLOCK_ZERO
)) {
3995 ret
|= BDRV_BLOCK_ALLOCATED
;
3998 if (!(ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
)) {
3999 if (bdrv_unallocated_blocks_are_zero(bs
)) {
4000 ret
|= BDRV_BLOCK_ZERO
;
4001 } else if (bs
->backing_hd
) {
4002 BlockDriverState
*bs2
= bs
->backing_hd
;
4003 int64_t length2
= bdrv_getlength(bs2
);
4004 if (length2
>= 0 && sector_num
>= (length2
>> BDRV_SECTOR_BITS
)) {
4005 ret
|= BDRV_BLOCK_ZERO
;
4011 (ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
) &&
4012 (ret
& BDRV_BLOCK_OFFSET_VALID
)) {
4013 ret2
= bdrv_co_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
4016 /* Ignore errors. This is just providing extra information, it
4017 * is useful but not necessary.
4019 ret
|= (ret2
& BDRV_BLOCK_ZERO
);
4026 /* Coroutine wrapper for bdrv_get_block_status() */
4027 static void coroutine_fn
bdrv_get_block_status_co_entry(void *opaque
)
4029 BdrvCoGetBlockStatusData
*data
= opaque
;
4030 BlockDriverState
*bs
= data
->bs
;
4032 data
->ret
= bdrv_co_get_block_status(bs
, data
->sector_num
, data
->nb_sectors
,
4038 * Synchronous wrapper around bdrv_co_get_block_status().
4040 * See bdrv_co_get_block_status() for details.
4042 int64_t bdrv_get_block_status(BlockDriverState
*bs
, int64_t sector_num
,
4043 int nb_sectors
, int *pnum
)
4046 BdrvCoGetBlockStatusData data
= {
4048 .sector_num
= sector_num
,
4049 .nb_sectors
= nb_sectors
,
4054 if (qemu_in_coroutine()) {
4055 /* Fast-path if already in coroutine context */
4056 bdrv_get_block_status_co_entry(&data
);
4058 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
4060 co
= qemu_coroutine_create(bdrv_get_block_status_co_entry
);
4061 qemu_coroutine_enter(co
, &data
);
4062 while (!data
.done
) {
4063 aio_poll(aio_context
, true);
4069 int coroutine_fn
bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
4070 int nb_sectors
, int *pnum
)
4072 int64_t ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
4076 return (ret
& BDRV_BLOCK_ALLOCATED
);
4080 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4082 * Return true if the given sector is allocated in any image between
4083 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4084 * sector is allocated in any image of the chain. Return false otherwise.
4086 * 'pnum' is set to the number of sectors (including and immediately following
4087 * the specified sector) that are known to be in the same
4088 * allocated/unallocated state.
4091 int bdrv_is_allocated_above(BlockDriverState
*top
,
4092 BlockDriverState
*base
,
4094 int nb_sectors
, int *pnum
)
4096 BlockDriverState
*intermediate
;
4097 int ret
, n
= nb_sectors
;
4100 while (intermediate
&& intermediate
!= base
) {
4102 ret
= bdrv_is_allocated(intermediate
, sector_num
, nb_sectors
,
4112 * [sector_num, nb_sectors] is unallocated on top but intermediate
4115 * [sector_num+x, nr_sectors] allocated.
4117 if (n
> pnum_inter
&&
4118 (intermediate
== top
||
4119 sector_num
+ pnum_inter
< intermediate
->total_sectors
)) {
4123 intermediate
= intermediate
->backing_hd
;
4130 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
4132 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
4133 return bs
->backing_file
;
4134 else if (bs
->encrypted
)
4135 return bs
->filename
;
4140 void bdrv_get_backing_filename(BlockDriverState
*bs
,
4141 char *filename
, int filename_size
)
4143 pstrcpy(filename
, filename_size
, bs
->backing_file
);
4146 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
4147 const uint8_t *buf
, int nb_sectors
)
4149 BlockDriver
*drv
= bs
->drv
;
4152 if (!drv
->bdrv_write_compressed
)
4154 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
4157 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
4159 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
4162 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
4164 BlockDriver
*drv
= bs
->drv
;
4167 if (!drv
->bdrv_get_info
)
4169 memset(bdi
, 0, sizeof(*bdi
));
4170 return drv
->bdrv_get_info(bs
, bdi
);
4173 ImageInfoSpecific
*bdrv_get_specific_info(BlockDriverState
*bs
)
4175 BlockDriver
*drv
= bs
->drv
;
4176 if (drv
&& drv
->bdrv_get_specific_info
) {
4177 return drv
->bdrv_get_specific_info(bs
);
4182 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
4183 int64_t pos
, int size
)
4186 struct iovec iov
= {
4187 .iov_base
= (void *) buf
,
4191 qemu_iovec_init_external(&qiov
, &iov
, 1);
4192 return bdrv_writev_vmstate(bs
, &qiov
, pos
);
4195 int bdrv_writev_vmstate(BlockDriverState
*bs
, QEMUIOVector
*qiov
, int64_t pos
)
4197 BlockDriver
*drv
= bs
->drv
;
4201 } else if (drv
->bdrv_save_vmstate
) {
4202 return drv
->bdrv_save_vmstate(bs
, qiov
, pos
);
4203 } else if (bs
->file
) {
4204 return bdrv_writev_vmstate(bs
->file
, qiov
, pos
);
4210 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
4211 int64_t pos
, int size
)
4213 BlockDriver
*drv
= bs
->drv
;
4216 if (drv
->bdrv_load_vmstate
)
4217 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
4219 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
4223 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
4225 if (!bs
|| !bs
->drv
|| !bs
->drv
->bdrv_debug_event
) {
4229 bs
->drv
->bdrv_debug_event(bs
, event
);
4232 int bdrv_debug_breakpoint(BlockDriverState
*bs
, const char *event
,
4235 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_breakpoint
) {
4239 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_breakpoint
) {
4240 return bs
->drv
->bdrv_debug_breakpoint(bs
, event
, tag
);
4246 int bdrv_debug_remove_breakpoint(BlockDriverState
*bs
, const char *tag
)
4248 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_remove_breakpoint
) {
4252 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_remove_breakpoint
) {
4253 return bs
->drv
->bdrv_debug_remove_breakpoint(bs
, tag
);
4259 int bdrv_debug_resume(BlockDriverState
*bs
, const char *tag
)
4261 while (bs
&& (!bs
->drv
|| !bs
->drv
->bdrv_debug_resume
)) {
4265 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_resume
) {
4266 return bs
->drv
->bdrv_debug_resume(bs
, tag
);
4272 bool bdrv_debug_is_suspended(BlockDriverState
*bs
, const char *tag
)
4274 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_is_suspended
) {
4278 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_is_suspended
) {
4279 return bs
->drv
->bdrv_debug_is_suspended(bs
, tag
);
4285 int bdrv_is_snapshot(BlockDriverState
*bs
)
4287 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
4290 /* backing_file can either be relative, or absolute, or a protocol. If it is
4291 * relative, it must be relative to the chain. So, passing in bs->filename
4292 * from a BDS as backing_file should not be done, as that may be relative to
4293 * the CWD rather than the chain. */
4294 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
4295 const char *backing_file
)
4297 char *filename_full
= NULL
;
4298 char *backing_file_full
= NULL
;
4299 char *filename_tmp
= NULL
;
4300 int is_protocol
= 0;
4301 BlockDriverState
*curr_bs
= NULL
;
4302 BlockDriverState
*retval
= NULL
;
4304 if (!bs
|| !bs
->drv
|| !backing_file
) {
4308 filename_full
= g_malloc(PATH_MAX
);
4309 backing_file_full
= g_malloc(PATH_MAX
);
4310 filename_tmp
= g_malloc(PATH_MAX
);
4312 is_protocol
= path_has_protocol(backing_file
);
4314 for (curr_bs
= bs
; curr_bs
->backing_hd
; curr_bs
= curr_bs
->backing_hd
) {
4316 /* If either of the filename paths is actually a protocol, then
4317 * compare unmodified paths; otherwise make paths relative */
4318 if (is_protocol
|| path_has_protocol(curr_bs
->backing_file
)) {
4319 if (strcmp(backing_file
, curr_bs
->backing_file
) == 0) {
4320 retval
= curr_bs
->backing_hd
;
4324 /* If not an absolute filename path, make it relative to the current
4325 * image's filename path */
4326 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4329 /* We are going to compare absolute pathnames */
4330 if (!realpath(filename_tmp
, filename_full
)) {
4334 /* We need to make sure the backing filename we are comparing against
4335 * is relative to the current image filename (or absolute) */
4336 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4337 curr_bs
->backing_file
);
4339 if (!realpath(filename_tmp
, backing_file_full
)) {
4343 if (strcmp(backing_file_full
, filename_full
) == 0) {
4344 retval
= curr_bs
->backing_hd
;
4350 g_free(filename_full
);
4351 g_free(backing_file_full
);
4352 g_free(filename_tmp
);
4356 int bdrv_get_backing_file_depth(BlockDriverState
*bs
)
4362 if (!bs
->backing_hd
) {
4366 return 1 + bdrv_get_backing_file_depth(bs
->backing_hd
);
4369 BlockDriverState
*bdrv_find_base(BlockDriverState
*bs
)
4371 BlockDriverState
*curr_bs
= NULL
;
4379 while (curr_bs
->backing_hd
) {
4380 curr_bs
= curr_bs
->backing_hd
;
4385 /**************************************************************/
4388 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
4389 QEMUIOVector
*qiov
, int nb_sectors
,
4390 BlockDriverCompletionFunc
*cb
, void *opaque
)
4392 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
4394 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4398 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
4399 QEMUIOVector
*qiov
, int nb_sectors
,
4400 BlockDriverCompletionFunc
*cb
, void *opaque
)
4402 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
4404 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4408 BlockDriverAIOCB
*bdrv_aio_write_zeroes(BlockDriverState
*bs
,
4409 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
,
4410 BlockDriverCompletionFunc
*cb
, void *opaque
)
4412 trace_bdrv_aio_write_zeroes(bs
, sector_num
, nb_sectors
, flags
, opaque
);
4414 return bdrv_co_aio_rw_vector(bs
, sector_num
, NULL
, nb_sectors
,
4415 BDRV_REQ_ZERO_WRITE
| flags
,
4420 typedef struct MultiwriteCB
{
4425 BlockDriverCompletionFunc
*cb
;
4427 QEMUIOVector
*free_qiov
;
4431 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
4435 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
4436 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
4437 if (mcb
->callbacks
[i
].free_qiov
) {
4438 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
4440 g_free(mcb
->callbacks
[i
].free_qiov
);
4444 static void multiwrite_cb(void *opaque
, int ret
)
4446 MultiwriteCB
*mcb
= opaque
;
4448 trace_multiwrite_cb(mcb
, ret
);
4450 if (ret
< 0 && !mcb
->error
) {
4454 mcb
->num_requests
--;
4455 if (mcb
->num_requests
== 0) {
4456 multiwrite_user_cb(mcb
);
4461 static int multiwrite_req_compare(const void *a
, const void *b
)
4463 const BlockRequest
*req1
= a
, *req2
= b
;
4466 * Note that we can't simply subtract req2->sector from req1->sector
4467 * here as that could overflow the return value.
4469 if (req1
->sector
> req2
->sector
) {
4471 } else if (req1
->sector
< req2
->sector
) {
4479 * Takes a bunch of requests and tries to merge them. Returns the number of
4480 * requests that remain after merging.
4482 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
4483 int num_reqs
, MultiwriteCB
*mcb
)
4487 // Sort requests by start sector
4488 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
4490 // Check if adjacent requests touch the same clusters. If so, combine them,
4491 // filling up gaps with zero sectors.
4493 for (i
= 1; i
< num_reqs
; i
++) {
4495 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
4497 // Handle exactly sequential writes and overlapping writes.
4498 if (reqs
[i
].sector
<= oldreq_last
) {
4502 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
4508 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
4509 qemu_iovec_init(qiov
,
4510 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
4512 // Add the first request to the merged one. If the requests are
4513 // overlapping, drop the last sectors of the first request.
4514 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
4515 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, 0, size
);
4517 // We should need to add any zeros between the two requests
4518 assert (reqs
[i
].sector
<= oldreq_last
);
4520 // Add the second request
4521 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, 0, reqs
[i
].qiov
->size
);
4523 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
4524 reqs
[outidx
].qiov
= qiov
;
4526 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
4529 reqs
[outidx
].sector
= reqs
[i
].sector
;
4530 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
4531 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
4539 * Submit multiple AIO write requests at once.
4541 * On success, the function returns 0 and all requests in the reqs array have
4542 * been submitted. In error case this function returns -1, and any of the
4543 * requests may or may not be submitted yet. In particular, this means that the
4544 * callback will be called for some of the requests, for others it won't. The
4545 * caller must check the error field of the BlockRequest to wait for the right
4546 * callbacks (if error != 0, no callback will be called).
4548 * The implementation may modify the contents of the reqs array, e.g. to merge
4549 * requests. However, the fields opaque and error are left unmodified as they
4550 * are used to signal failure for a single request to the caller.
4552 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
4557 /* don't submit writes if we don't have a medium */
4558 if (bs
->drv
== NULL
) {
4559 for (i
= 0; i
< num_reqs
; i
++) {
4560 reqs
[i
].error
= -ENOMEDIUM
;
4565 if (num_reqs
== 0) {
4569 // Create MultiwriteCB structure
4570 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
4571 mcb
->num_requests
= 0;
4572 mcb
->num_callbacks
= num_reqs
;
4574 for (i
= 0; i
< num_reqs
; i
++) {
4575 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
4576 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
4579 // Check for mergable requests
4580 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
4582 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
4584 /* Run the aio requests. */
4585 mcb
->num_requests
= num_reqs
;
4586 for (i
= 0; i
< num_reqs
; i
++) {
4587 bdrv_co_aio_rw_vector(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
4588 reqs
[i
].nb_sectors
, reqs
[i
].flags
,
4596 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
4598 acb
->aiocb_info
->cancel(acb
);
4601 /**************************************************************/
4602 /* async block device emulation */
4604 typedef struct BlockDriverAIOCBSync
{
4605 BlockDriverAIOCB common
;
4608 /* vector translation state */
4612 } BlockDriverAIOCBSync
;
4614 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
4616 BlockDriverAIOCBSync
*acb
=
4617 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
4618 qemu_bh_delete(acb
->bh
);
4620 qemu_aio_release(acb
);
4623 static const AIOCBInfo bdrv_em_aiocb_info
= {
4624 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
4625 .cancel
= bdrv_aio_cancel_em
,
4628 static void bdrv_aio_bh_cb(void *opaque
)
4630 BlockDriverAIOCBSync
*acb
= opaque
;
4633 qemu_iovec_from_buf(acb
->qiov
, 0, acb
->bounce
, acb
->qiov
->size
);
4634 qemu_vfree(acb
->bounce
);
4635 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
4636 qemu_bh_delete(acb
->bh
);
4638 qemu_aio_release(acb
);
4641 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
4645 BlockDriverCompletionFunc
*cb
,
4650 BlockDriverAIOCBSync
*acb
;
4652 acb
= qemu_aio_get(&bdrv_em_aiocb_info
, bs
, cb
, opaque
);
4653 acb
->is_write
= is_write
;
4655 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
4656 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_aio_bh_cb
, acb
);
4659 qemu_iovec_to_buf(acb
->qiov
, 0, acb
->bounce
, qiov
->size
);
4660 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4662 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4665 qemu_bh_schedule(acb
->bh
);
4667 return &acb
->common
;
4670 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
4671 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4672 BlockDriverCompletionFunc
*cb
, void *opaque
)
4674 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
4677 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
4678 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4679 BlockDriverCompletionFunc
*cb
, void *opaque
)
4681 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
4685 typedef struct BlockDriverAIOCBCoroutine
{
4686 BlockDriverAIOCB common
;
4691 } BlockDriverAIOCBCoroutine
;
4693 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
4695 AioContext
*aio_context
= bdrv_get_aio_context(blockacb
->bs
);
4696 BlockDriverAIOCBCoroutine
*acb
=
4697 container_of(blockacb
, BlockDriverAIOCBCoroutine
, common
);
4702 aio_poll(aio_context
, true);
4706 static const AIOCBInfo bdrv_em_co_aiocb_info
= {
4707 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
4708 .cancel
= bdrv_aio_co_cancel_em
,
4711 static void bdrv_co_em_bh(void *opaque
)
4713 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4715 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
4721 qemu_bh_delete(acb
->bh
);
4722 qemu_aio_release(acb
);
4725 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4726 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
4728 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4729 BlockDriverState
*bs
= acb
->common
.bs
;
4731 if (!acb
->is_write
) {
4732 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
4733 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4735 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
4736 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4739 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4740 qemu_bh_schedule(acb
->bh
);
4743 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
4747 BdrvRequestFlags flags
,
4748 BlockDriverCompletionFunc
*cb
,
4753 BlockDriverAIOCBCoroutine
*acb
;
4755 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4756 acb
->req
.sector
= sector_num
;
4757 acb
->req
.nb_sectors
= nb_sectors
;
4758 acb
->req
.qiov
= qiov
;
4759 acb
->req
.flags
= flags
;
4760 acb
->is_write
= is_write
;
4763 co
= qemu_coroutine_create(bdrv_co_do_rw
);
4764 qemu_coroutine_enter(co
, acb
);
4766 return &acb
->common
;
4769 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
4771 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4772 BlockDriverState
*bs
= acb
->common
.bs
;
4774 acb
->req
.error
= bdrv_co_flush(bs
);
4775 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4776 qemu_bh_schedule(acb
->bh
);
4779 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
4780 BlockDriverCompletionFunc
*cb
, void *opaque
)
4782 trace_bdrv_aio_flush(bs
, opaque
);
4785 BlockDriverAIOCBCoroutine
*acb
;
4787 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4790 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
4791 qemu_coroutine_enter(co
, acb
);
4793 return &acb
->common
;
4796 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
4798 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4799 BlockDriverState
*bs
= acb
->common
.bs
;
4801 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
4802 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4803 qemu_bh_schedule(acb
->bh
);
4806 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
4807 int64_t sector_num
, int nb_sectors
,
4808 BlockDriverCompletionFunc
*cb
, void *opaque
)
4811 BlockDriverAIOCBCoroutine
*acb
;
4813 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
4815 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4816 acb
->req
.sector
= sector_num
;
4817 acb
->req
.nb_sectors
= nb_sectors
;
4819 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
4820 qemu_coroutine_enter(co
, acb
);
4822 return &acb
->common
;
4825 void bdrv_init(void)
4827 module_call_init(MODULE_INIT_BLOCK
);
4830 void bdrv_init_with_whitelist(void)
4832 use_bdrv_whitelist
= 1;
4836 void *qemu_aio_get(const AIOCBInfo
*aiocb_info
, BlockDriverState
*bs
,
4837 BlockDriverCompletionFunc
*cb
, void *opaque
)
4839 BlockDriverAIOCB
*acb
;
4841 acb
= g_slice_alloc(aiocb_info
->aiocb_size
);
4842 acb
->aiocb_info
= aiocb_info
;
4845 acb
->opaque
= opaque
;
4849 void qemu_aio_release(void *p
)
4851 BlockDriverAIOCB
*acb
= p
;
4852 g_slice_free1(acb
->aiocb_info
->aiocb_size
, acb
);
4855 /**************************************************************/
4856 /* Coroutine block device emulation */
4858 typedef struct CoroutineIOCompletion
{
4859 Coroutine
*coroutine
;
4861 } CoroutineIOCompletion
;
4863 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
4865 CoroutineIOCompletion
*co
= opaque
;
4868 qemu_coroutine_enter(co
->coroutine
, NULL
);
4871 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
4872 int nb_sectors
, QEMUIOVector
*iov
,
4875 CoroutineIOCompletion co
= {
4876 .coroutine
= qemu_coroutine_self(),
4878 BlockDriverAIOCB
*acb
;
4881 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
4882 bdrv_co_io_em_complete
, &co
);
4884 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
4885 bdrv_co_io_em_complete
, &co
);
4888 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
4892 qemu_coroutine_yield();
4897 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
4898 int64_t sector_num
, int nb_sectors
,
4901 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
4904 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
4905 int64_t sector_num
, int nb_sectors
,
4908 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
4911 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
4913 RwCo
*rwco
= opaque
;
4915 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
4918 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
4922 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
4926 /* Write back cached data to the OS even with cache=unsafe */
4927 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_OS
);
4928 if (bs
->drv
->bdrv_co_flush_to_os
) {
4929 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
4935 /* But don't actually force it to the disk with cache=unsafe */
4936 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
4940 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_DISK
);
4941 if (bs
->drv
->bdrv_co_flush_to_disk
) {
4942 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
4943 } else if (bs
->drv
->bdrv_aio_flush
) {
4944 BlockDriverAIOCB
*acb
;
4945 CoroutineIOCompletion co
= {
4946 .coroutine
= qemu_coroutine_self(),
4949 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
4953 qemu_coroutine_yield();
4958 * Some block drivers always operate in either writethrough or unsafe
4959 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4960 * know how the server works (because the behaviour is hardcoded or
4961 * depends on server-side configuration), so we can't ensure that
4962 * everything is safe on disk. Returning an error doesn't work because
4963 * that would break guests even if the server operates in writethrough
4966 * Let's hope the user knows what he's doing.
4974 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4975 * in the case of cache=unsafe, so there are no useless flushes.
4978 return bdrv_co_flush(bs
->file
);
4981 void bdrv_invalidate_cache(BlockDriverState
*bs
, Error
**errp
)
4983 Error
*local_err
= NULL
;
4990 if (bs
->drv
->bdrv_invalidate_cache
) {
4991 bs
->drv
->bdrv_invalidate_cache(bs
, &local_err
);
4992 } else if (bs
->file
) {
4993 bdrv_invalidate_cache(bs
->file
, &local_err
);
4996 error_propagate(errp
, local_err
);
5000 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
5002 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
5007 void bdrv_invalidate_cache_all(Error
**errp
)
5009 BlockDriverState
*bs
;
5010 Error
*local_err
= NULL
;
5012 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5013 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5015 aio_context_acquire(aio_context
);
5016 bdrv_invalidate_cache(bs
, &local_err
);
5017 aio_context_release(aio_context
);
5019 error_propagate(errp
, local_err
);
5025 void bdrv_clear_incoming_migration_all(void)
5027 BlockDriverState
*bs
;
5029 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5030 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5032 aio_context_acquire(aio_context
);
5033 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
5034 aio_context_release(aio_context
);
5038 int bdrv_flush(BlockDriverState
*bs
)
5046 if (qemu_in_coroutine()) {
5047 /* Fast-path if already in coroutine context */
5048 bdrv_flush_co_entry(&rwco
);
5050 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5052 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
5053 qemu_coroutine_enter(co
, &rwco
);
5054 while (rwco
.ret
== NOT_DONE
) {
5055 aio_poll(aio_context
, true);
5062 typedef struct DiscardCo
{
5063 BlockDriverState
*bs
;
5068 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
5070 DiscardCo
*rwco
= opaque
;
5072 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
5075 /* if no limit is specified in the BlockLimits use a default
5076 * of 32768 512-byte sectors (16 MiB) per request.
5078 #define MAX_DISCARD_DEFAULT 32768
5080 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
5087 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
5089 } else if (bs
->read_only
) {
5093 bdrv_reset_dirty(bs
, sector_num
, nb_sectors
);
5095 /* Do nothing if disabled. */
5096 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
5100 if (!bs
->drv
->bdrv_co_discard
&& !bs
->drv
->bdrv_aio_discard
) {
5104 max_discard
= bs
->bl
.max_discard
? bs
->bl
.max_discard
: MAX_DISCARD_DEFAULT
;
5105 while (nb_sectors
> 0) {
5107 int num
= nb_sectors
;
5110 if (bs
->bl
.discard_alignment
&&
5111 num
>= bs
->bl
.discard_alignment
&&
5112 sector_num
% bs
->bl
.discard_alignment
) {
5113 if (num
> bs
->bl
.discard_alignment
) {
5114 num
= bs
->bl
.discard_alignment
;
5116 num
-= sector_num
% bs
->bl
.discard_alignment
;
5119 /* limit request size */
5120 if (num
> max_discard
) {
5124 if (bs
->drv
->bdrv_co_discard
) {
5125 ret
= bs
->drv
->bdrv_co_discard(bs
, sector_num
, num
);
5127 BlockDriverAIOCB
*acb
;
5128 CoroutineIOCompletion co
= {
5129 .coroutine
= qemu_coroutine_self(),
5132 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
5133 bdrv_co_io_em_complete
, &co
);
5137 qemu_coroutine_yield();
5141 if (ret
&& ret
!= -ENOTSUP
) {
5151 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
5156 .sector_num
= sector_num
,
5157 .nb_sectors
= nb_sectors
,
5161 if (qemu_in_coroutine()) {
5162 /* Fast-path if already in coroutine context */
5163 bdrv_discard_co_entry(&rwco
);
5165 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5167 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
5168 qemu_coroutine_enter(co
, &rwco
);
5169 while (rwco
.ret
== NOT_DONE
) {
5170 aio_poll(aio_context
, true);
5177 /**************************************************************/
5178 /* removable device support */
5181 * Return TRUE if the media is present
5183 int bdrv_is_inserted(BlockDriverState
*bs
)
5185 BlockDriver
*drv
= bs
->drv
;
5189 if (!drv
->bdrv_is_inserted
)
5191 return drv
->bdrv_is_inserted(bs
);
5195 * Return whether the media changed since the last call to this
5196 * function, or -ENOTSUP if we don't know. Most drivers don't know.
5198 int bdrv_media_changed(BlockDriverState
*bs
)
5200 BlockDriver
*drv
= bs
->drv
;
5202 if (drv
&& drv
->bdrv_media_changed
) {
5203 return drv
->bdrv_media_changed(bs
);
5209 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5211 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
5213 BlockDriver
*drv
= bs
->drv
;
5215 if (drv
&& drv
->bdrv_eject
) {
5216 drv
->bdrv_eject(bs
, eject_flag
);
5219 if (bs
->device_name
[0] != '\0') {
5220 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs
),
5221 eject_flag
, &error_abort
);
5226 * Lock or unlock the media (if it is locked, the user won't be able
5227 * to eject it manually).
5229 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
5231 BlockDriver
*drv
= bs
->drv
;
5233 trace_bdrv_lock_medium(bs
, locked
);
5235 if (drv
&& drv
->bdrv_lock_medium
) {
5236 drv
->bdrv_lock_medium(bs
, locked
);
5240 /* needed for generic scsi interface */
5242 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
5244 BlockDriver
*drv
= bs
->drv
;
5246 if (drv
&& drv
->bdrv_ioctl
)
5247 return drv
->bdrv_ioctl(bs
, req
, buf
);
5251 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
5252 unsigned long int req
, void *buf
,
5253 BlockDriverCompletionFunc
*cb
, void *opaque
)
5255 BlockDriver
*drv
= bs
->drv
;
5257 if (drv
&& drv
->bdrv_aio_ioctl
)
5258 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
5262 void bdrv_set_guest_block_size(BlockDriverState
*bs
, int align
)
5264 bs
->guest_block_size
= align
;
5267 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
5269 return qemu_memalign(bdrv_opt_mem_align(bs
), size
);
5273 * Check if all memory in this vector is sector aligned.
5275 bool bdrv_qiov_is_aligned(BlockDriverState
*bs
, QEMUIOVector
*qiov
)
5278 size_t alignment
= bdrv_opt_mem_align(bs
);
5280 for (i
= 0; i
< qiov
->niov
; i
++) {
5281 if ((uintptr_t) qiov
->iov
[i
].iov_base
% alignment
) {
5284 if (qiov
->iov
[i
].iov_len
% alignment
) {
5292 BdrvDirtyBitmap
*bdrv_create_dirty_bitmap(BlockDriverState
*bs
, int granularity
,
5295 int64_t bitmap_size
;
5296 BdrvDirtyBitmap
*bitmap
;
5298 assert((granularity
& (granularity
- 1)) == 0);
5300 granularity
>>= BDRV_SECTOR_BITS
;
5301 assert(granularity
);
5302 bitmap_size
= bdrv_getlength(bs
);
5303 if (bitmap_size
< 0) {
5304 error_setg_errno(errp
, -bitmap_size
, "could not get length of device");
5305 errno
= -bitmap_size
;
5308 bitmap_size
>>= BDRV_SECTOR_BITS
;
5309 bitmap
= g_malloc0(sizeof(BdrvDirtyBitmap
));
5310 bitmap
->bitmap
= hbitmap_alloc(bitmap_size
, ffs(granularity
) - 1);
5311 QLIST_INSERT_HEAD(&bs
->dirty_bitmaps
, bitmap
, list
);
5315 void bdrv_release_dirty_bitmap(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5317 BdrvDirtyBitmap
*bm
, *next
;
5318 QLIST_FOREACH_SAFE(bm
, &bs
->dirty_bitmaps
, list
, next
) {
5320 QLIST_REMOVE(bitmap
, list
);
5321 hbitmap_free(bitmap
->bitmap
);
5328 BlockDirtyInfoList
*bdrv_query_dirty_bitmaps(BlockDriverState
*bs
)
5330 BdrvDirtyBitmap
*bm
;
5331 BlockDirtyInfoList
*list
= NULL
;
5332 BlockDirtyInfoList
**plist
= &list
;
5334 QLIST_FOREACH(bm
, &bs
->dirty_bitmaps
, list
) {
5335 BlockDirtyInfo
*info
= g_malloc0(sizeof(BlockDirtyInfo
));
5336 BlockDirtyInfoList
*entry
= g_malloc0(sizeof(BlockDirtyInfoList
));
5337 info
->count
= bdrv_get_dirty_count(bs
, bm
);
5339 ((int64_t) BDRV_SECTOR_SIZE
<< hbitmap_granularity(bm
->bitmap
));
5340 entry
->value
= info
;
5342 plist
= &entry
->next
;
5348 int bdrv_get_dirty(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
, int64_t sector
)
5351 return hbitmap_get(bitmap
->bitmap
, sector
);
5357 void bdrv_dirty_iter_init(BlockDriverState
*bs
,
5358 BdrvDirtyBitmap
*bitmap
, HBitmapIter
*hbi
)
5360 hbitmap_iter_init(hbi
, bitmap
->bitmap
, 0);
5363 void bdrv_set_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
5366 BdrvDirtyBitmap
*bitmap
;
5367 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5368 hbitmap_set(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5372 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
, int nr_sectors
)
5374 BdrvDirtyBitmap
*bitmap
;
5375 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5376 hbitmap_reset(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5380 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5382 return hbitmap_count(bitmap
->bitmap
);
5385 /* Get a reference to bs */
5386 void bdrv_ref(BlockDriverState
*bs
)
5391 /* Release a previously grabbed reference to bs.
5392 * If after releasing, reference count is zero, the BlockDriverState is
5394 void bdrv_unref(BlockDriverState
*bs
)
5396 assert(bs
->refcnt
> 0);
5397 if (--bs
->refcnt
== 0) {
5402 struct BdrvOpBlocker
{
5404 QLIST_ENTRY(BdrvOpBlocker
) list
;
5407 bool bdrv_op_is_blocked(BlockDriverState
*bs
, BlockOpType op
, Error
**errp
)
5409 BdrvOpBlocker
*blocker
;
5410 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5411 if (!QLIST_EMPTY(&bs
->op_blockers
[op
])) {
5412 blocker
= QLIST_FIRST(&bs
->op_blockers
[op
]);
5414 error_setg(errp
, "Device '%s' is busy: %s",
5415 bs
->device_name
, error_get_pretty(blocker
->reason
));
5422 void bdrv_op_block(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5424 BdrvOpBlocker
*blocker
;
5425 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5427 blocker
= g_malloc0(sizeof(BdrvOpBlocker
));
5428 blocker
->reason
= reason
;
5429 QLIST_INSERT_HEAD(&bs
->op_blockers
[op
], blocker
, list
);
5432 void bdrv_op_unblock(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5434 BdrvOpBlocker
*blocker
, *next
;
5435 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5436 QLIST_FOREACH_SAFE(blocker
, &bs
->op_blockers
[op
], list
, next
) {
5437 if (blocker
->reason
== reason
) {
5438 QLIST_REMOVE(blocker
, list
);
5444 void bdrv_op_block_all(BlockDriverState
*bs
, Error
*reason
)
5447 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5448 bdrv_op_block(bs
, i
, reason
);
5452 void bdrv_op_unblock_all(BlockDriverState
*bs
, Error
*reason
)
5455 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5456 bdrv_op_unblock(bs
, i
, reason
);
5460 bool bdrv_op_blocker_is_empty(BlockDriverState
*bs
)
5464 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5465 if (!QLIST_EMPTY(&bs
->op_blockers
[i
])) {
5472 void bdrv_iostatus_enable(BlockDriverState
*bs
)
5474 bs
->iostatus_enabled
= true;
5475 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5478 /* The I/O status is only enabled if the drive explicitly
5479 * enables it _and_ the VM is configured to stop on errors */
5480 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
5482 return (bs
->iostatus_enabled
&&
5483 (bs
->on_write_error
== BLOCKDEV_ON_ERROR_ENOSPC
||
5484 bs
->on_write_error
== BLOCKDEV_ON_ERROR_STOP
||
5485 bs
->on_read_error
== BLOCKDEV_ON_ERROR_STOP
));
5488 void bdrv_iostatus_disable(BlockDriverState
*bs
)
5490 bs
->iostatus_enabled
= false;
5493 void bdrv_iostatus_reset(BlockDriverState
*bs
)
5495 if (bdrv_iostatus_is_enabled(bs
)) {
5496 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5498 block_job_iostatus_reset(bs
->job
);
5503 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
5505 assert(bdrv_iostatus_is_enabled(bs
));
5506 if (bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
5507 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
5508 BLOCK_DEVICE_IO_STATUS_FAILED
;
5513 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
5514 enum BlockAcctType type
)
5516 assert(type
< BDRV_MAX_IOTYPE
);
5518 cookie
->bytes
= bytes
;
5519 cookie
->start_time_ns
= get_clock();
5520 cookie
->type
= type
;
5524 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
5526 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
5528 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
5529 bs
->nr_ops
[cookie
->type
]++;
5530 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
5533 void bdrv_img_create(const char *filename
, const char *fmt
,
5534 const char *base_filename
, const char *base_fmt
,
5535 char *options
, uint64_t img_size
, int flags
,
5536 Error
**errp
, bool quiet
)
5538 QemuOptsList
*create_opts
= NULL
;
5539 QemuOpts
*opts
= NULL
;
5540 const char *backing_fmt
, *backing_file
;
5542 BlockDriver
*drv
, *proto_drv
;
5543 BlockDriver
*backing_drv
= NULL
;
5544 Error
*local_err
= NULL
;
5547 /* Find driver and parse its options */
5548 drv
= bdrv_find_format(fmt
);
5550 error_setg(errp
, "Unknown file format '%s'", fmt
);
5554 proto_drv
= bdrv_find_protocol(filename
, true);
5556 error_setg(errp
, "Unknown protocol '%s'", filename
);
5560 create_opts
= qemu_opts_append(create_opts
, drv
->create_opts
);
5561 create_opts
= qemu_opts_append(create_opts
, proto_drv
->create_opts
);
5563 /* Create parameter list with default values */
5564 opts
= qemu_opts_create(create_opts
, NULL
, 0, &error_abort
);
5565 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, img_size
);
5567 /* Parse -o options */
5569 if (qemu_opts_do_parse(opts
, options
, NULL
) != 0) {
5570 error_setg(errp
, "Invalid options for file format '%s'", fmt
);
5575 if (base_filename
) {
5576 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FILE
, base_filename
)) {
5577 error_setg(errp
, "Backing file not supported for file format '%s'",
5584 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
5585 error_setg(errp
, "Backing file format not supported for file "
5586 "format '%s'", fmt
);
5591 backing_file
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FILE
);
5593 if (!strcmp(filename
, backing_file
)) {
5594 error_setg(errp
, "Error: Trying to create an image with the "
5595 "same filename as the backing file");
5600 backing_fmt
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FMT
);
5602 backing_drv
= bdrv_find_format(backing_fmt
);
5604 error_setg(errp
, "Unknown backing file format '%s'",
5610 // The size for the image must always be specified, with one exception:
5611 // If we are using a backing file, we can obtain the size from there
5612 size
= qemu_opt_get_size(opts
, BLOCK_OPT_SIZE
, 0);
5615 BlockDriverState
*bs
;
5619 /* backing files always opened read-only */
5621 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
5624 ret
= bdrv_open(&bs
, backing_file
, NULL
, NULL
, back_flags
,
5625 backing_drv
, &local_err
);
5627 error_setg_errno(errp
, -ret
, "Could not open '%s': %s",
5629 error_get_pretty(local_err
));
5630 error_free(local_err
);
5634 bdrv_get_geometry(bs
, &size
);
5637 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, size
);
5641 error_setg(errp
, "Image creation needs a size parameter");
5647 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
5648 qemu_opts_print(opts
);
5652 ret
= bdrv_create(drv
, filename
, opts
, &local_err
);
5654 if (ret
== -EFBIG
) {
5655 /* This is generally a better message than whatever the driver would
5656 * deliver (especially because of the cluster_size_hint), since that
5657 * is most probably not much different from "image too large". */
5658 const char *cluster_size_hint
= "";
5659 if (qemu_opt_get_size(opts
, BLOCK_OPT_CLUSTER_SIZE
, 0)) {
5660 cluster_size_hint
= " (try using a larger cluster size)";
5662 error_setg(errp
, "The image size is too large for file format '%s'"
5663 "%s", fmt
, cluster_size_hint
);
5664 error_free(local_err
);
5669 qemu_opts_del(opts
);
5670 qemu_opts_free(create_opts
);
5672 error_propagate(errp
, local_err
);
5676 AioContext
*bdrv_get_aio_context(BlockDriverState
*bs
)
5678 return bs
->aio_context
;
5681 void bdrv_detach_aio_context(BlockDriverState
*bs
)
5687 if (bs
->io_limits_enabled
) {
5688 throttle_detach_aio_context(&bs
->throttle_state
);
5690 if (bs
->drv
->bdrv_detach_aio_context
) {
5691 bs
->drv
->bdrv_detach_aio_context(bs
);
5694 bdrv_detach_aio_context(bs
->file
);
5696 if (bs
->backing_hd
) {
5697 bdrv_detach_aio_context(bs
->backing_hd
);
5700 bs
->aio_context
= NULL
;
5703 void bdrv_attach_aio_context(BlockDriverState
*bs
,
5704 AioContext
*new_context
)
5710 bs
->aio_context
= new_context
;
5712 if (bs
->backing_hd
) {
5713 bdrv_attach_aio_context(bs
->backing_hd
, new_context
);
5716 bdrv_attach_aio_context(bs
->file
, new_context
);
5718 if (bs
->drv
->bdrv_attach_aio_context
) {
5719 bs
->drv
->bdrv_attach_aio_context(bs
, new_context
);
5721 if (bs
->io_limits_enabled
) {
5722 throttle_attach_aio_context(&bs
->throttle_state
, new_context
);
5726 void bdrv_set_aio_context(BlockDriverState
*bs
, AioContext
*new_context
)
5728 bdrv_drain_all(); /* ensure there are no in-flight requests */
5730 bdrv_detach_aio_context(bs
);
5732 /* This function executes in the old AioContext so acquire the new one in
5733 * case it runs in a different thread.
5735 aio_context_acquire(new_context
);
5736 bdrv_attach_aio_context(bs
, new_context
);
5737 aio_context_release(new_context
);
5740 void bdrv_add_before_write_notifier(BlockDriverState
*bs
,
5741 NotifierWithReturn
*notifier
)
5743 notifier_with_return_list_add(&bs
->before_write_notifiers
, notifier
);
5746 int bdrv_amend_options(BlockDriverState
*bs
, QemuOpts
*opts
)
5748 if (!bs
->drv
->bdrv_amend_options
) {
5751 return bs
->drv
->bdrv_amend_options(bs
, opts
);
5754 /* This function will be called by the bdrv_recurse_is_first_non_filter method
5755 * of block filter and by bdrv_is_first_non_filter.
5756 * It is used to test if the given bs is the candidate or recurse more in the
5759 bool bdrv_recurse_is_first_non_filter(BlockDriverState
*bs
,
5760 BlockDriverState
*candidate
)
5762 /* return false if basic checks fails */
5763 if (!bs
|| !bs
->drv
) {
5767 /* the code reached a non block filter driver -> check if the bs is
5768 * the same as the candidate. It's the recursion termination condition.
5770 if (!bs
->drv
->is_filter
) {
5771 return bs
== candidate
;
5773 /* Down this path the driver is a block filter driver */
5775 /* If the block filter recursion method is defined use it to recurse down
5778 if (bs
->drv
->bdrv_recurse_is_first_non_filter
) {
5779 return bs
->drv
->bdrv_recurse_is_first_non_filter(bs
, candidate
);
5782 /* the driver is a block filter but don't allow to recurse -> return false
5787 /* This function checks if the candidate is the first non filter bs down it's
5788 * bs chain. Since we don't have pointers to parents it explore all bs chains
5789 * from the top. Some filters can choose not to pass down the recursion.
5791 bool bdrv_is_first_non_filter(BlockDriverState
*candidate
)
5793 BlockDriverState
*bs
;
5795 /* walk down the bs forest recursively */
5796 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5799 /* try to recurse in this top level bs */
5800 perm
= bdrv_recurse_is_first_non_filter(bs
, candidate
);
5802 /* candidate is the first non filter */