]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
block: Rewrite bdrv_next()
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d38ea87a 24#include "qemu/osdep.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
737e150e
PB
27#include "block/block_int.h"
28#include "block/blockjob.h"
d49b6836 29#include "qemu/error-report.h"
1de7afc9 30#include "qemu/module.h"
cc7a8ea7 31#include "qapi/qmp/qerror.h"
91a097e7 32#include "qapi/qmp/qbool.h"
7b1b5d19 33#include "qapi/qmp/qjson.h"
bfb197e0 34#include "sysemu/block-backend.h"
9c17d615 35#include "sysemu/sysemu.h"
1de7afc9 36#include "qemu/notify.h"
10817bf0 37#include "qemu/coroutine.h"
c13163fb 38#include "block/qapi.h"
b2023818 39#include "qmp-commands.h"
1de7afc9 40#include "qemu/timer.h"
a5ee7bd4 41#include "qapi-event.h"
db628338 42#include "block/throttle-groups.h"
fc01f7e7 43
71e72a19 44#ifdef CONFIG_BSD
7674e7bf 45#include <sys/ioctl.h>
72cf2d4f 46#include <sys/queue.h>
c5e97233 47#ifndef __DragonFly__
7674e7bf
FB
48#include <sys/disk.h>
49#endif
c5e97233 50#endif
7674e7bf 51
49dc768d
AL
52#ifdef _WIN32
53#include <windows.h>
54#endif
55
1c9805a3
SH
56#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
57
c69a4dd8 58struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 59
dc364f4c
BC
60static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
61 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
62
2c1d04e0
HR
63static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
64 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
65
8a22f02a
SH
66static QLIST_HEAD(, BlockDriver) bdrv_drivers =
67 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 68
f3930ed0
KW
69static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
70 const char *reference, QDict *options, int flags,
71 BlockDriverState *parent,
ce343771 72 const BdrvChildRole *child_role, Error **errp);
f3930ed0 73
eb852011
MA
74/* If non-zero, use only whitelisted block drivers */
75static int use_bdrv_whitelist;
76
64dff520
HR
77static void bdrv_close(BlockDriverState *bs);
78
9e0b22f4
SH
79#ifdef _WIN32
80static int is_windows_drive_prefix(const char *filename)
81{
82 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
83 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
84 filename[1] == ':');
85}
86
87int is_windows_drive(const char *filename)
88{
89 if (is_windows_drive_prefix(filename) &&
90 filename[2] == '\0')
91 return 1;
92 if (strstart(filename, "\\\\.\\", NULL) ||
93 strstart(filename, "//./", NULL))
94 return 1;
95 return 0;
96}
97#endif
98
339064d5
KW
99size_t bdrv_opt_mem_align(BlockDriverState *bs)
100{
101 if (!bs || !bs->drv) {
459b4e66
DL
102 /* page size or 4k (hdd sector size) should be on the safe side */
103 return MAX(4096, getpagesize());
339064d5
KW
104 }
105
106 return bs->bl.opt_mem_alignment;
107}
108
4196d2f0
DL
109size_t bdrv_min_mem_align(BlockDriverState *bs)
110{
111 if (!bs || !bs->drv) {
459b4e66
DL
112 /* page size or 4k (hdd sector size) should be on the safe side */
113 return MAX(4096, getpagesize());
4196d2f0
DL
114 }
115
116 return bs->bl.min_mem_alignment;
117}
118
9e0b22f4 119/* check if the path starts with "<protocol>:" */
5c98415b 120int path_has_protocol(const char *path)
9e0b22f4 121{
947995c0
PB
122 const char *p;
123
9e0b22f4
SH
124#ifdef _WIN32
125 if (is_windows_drive(path) ||
126 is_windows_drive_prefix(path)) {
127 return 0;
128 }
947995c0
PB
129 p = path + strcspn(path, ":/\\");
130#else
131 p = path + strcspn(path, ":/");
9e0b22f4
SH
132#endif
133
947995c0 134 return *p == ':';
9e0b22f4
SH
135}
136
83f64091 137int path_is_absolute(const char *path)
3b0d4f61 138{
21664424
FB
139#ifdef _WIN32
140 /* specific case for names like: "\\.\d:" */
f53f4da9 141 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 142 return 1;
f53f4da9
PB
143 }
144 return (*path == '/' || *path == '\\');
3b9f94e1 145#else
f53f4da9 146 return (*path == '/');
3b9f94e1 147#endif
3b0d4f61
FB
148}
149
83f64091
FB
150/* if filename is absolute, just copy it to dest. Otherwise, build a
151 path to it by considering it is relative to base_path. URL are
152 supported. */
153void path_combine(char *dest, int dest_size,
154 const char *base_path,
155 const char *filename)
3b0d4f61 156{
83f64091
FB
157 const char *p, *p1;
158 int len;
159
160 if (dest_size <= 0)
161 return;
162 if (path_is_absolute(filename)) {
163 pstrcpy(dest, dest_size, filename);
164 } else {
165 p = strchr(base_path, ':');
166 if (p)
167 p++;
168 else
169 p = base_path;
3b9f94e1
FB
170 p1 = strrchr(base_path, '/');
171#ifdef _WIN32
172 {
173 const char *p2;
174 p2 = strrchr(base_path, '\\');
175 if (!p1 || p2 > p1)
176 p1 = p2;
177 }
178#endif
83f64091
FB
179 if (p1)
180 p1++;
181 else
182 p1 = base_path;
183 if (p1 > p)
184 p = p1;
185 len = p - base_path;
186 if (len > dest_size - 1)
187 len = dest_size - 1;
188 memcpy(dest, base_path, len);
189 dest[len] = '\0';
190 pstrcat(dest, dest_size, filename);
3b0d4f61 191 }
3b0d4f61
FB
192}
193
0a82855a
HR
194void bdrv_get_full_backing_filename_from_filename(const char *backed,
195 const char *backing,
9f07429e
HR
196 char *dest, size_t sz,
197 Error **errp)
dc5a1371 198{
9f07429e
HR
199 if (backing[0] == '\0' || path_has_protocol(backing) ||
200 path_is_absolute(backing))
201 {
0a82855a 202 pstrcpy(dest, sz, backing);
9f07429e
HR
203 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
204 error_setg(errp, "Cannot use relative backing file names for '%s'",
205 backed);
dc5a1371 206 } else {
0a82855a 207 path_combine(dest, sz, backed, backing);
dc5a1371
PB
208 }
209}
210
9f07429e
HR
211void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
212 Error **errp)
0a82855a 213{
9f07429e
HR
214 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
215
216 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
217 dest, sz, errp);
0a82855a
HR
218}
219
0eb7217e
SH
220void bdrv_register(BlockDriver *bdrv)
221{
222 bdrv_setup_io_funcs(bdrv);
b2e12bc6 223
8a22f02a 224 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 225}
b338082b 226
7f06d47e 227BlockDriverState *bdrv_new_root(void)
b338082b 228{
7f06d47e 229 BlockDriverState *bs = bdrv_new();
e4e9986b 230
e4e9986b 231 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
e4e9986b
MA
232 return bs;
233}
234
235BlockDriverState *bdrv_new(void)
236{
237 BlockDriverState *bs;
238 int i;
239
5839e53b 240 bs = g_new0(BlockDriverState, 1);
e4654d2d 241 QLIST_INIT(&bs->dirty_bitmaps);
fbe40ff7
FZ
242 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
243 QLIST_INIT(&bs->op_blockers[i]);
244 }
d616b224 245 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
246 qemu_co_queue_init(&bs->throttled_reqs[0]);
247 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 248 bs->refcnt = 1;
dcd04228 249 bs->aio_context = qemu_get_aio_context();
d7d512f6 250
2c1d04e0
HR
251 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
252
b338082b
FB
253 return bs;
254}
255
ea2384d3
FB
256BlockDriver *bdrv_find_format(const char *format_name)
257{
258 BlockDriver *drv1;
8a22f02a
SH
259 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
260 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 261 return drv1;
8a22f02a 262 }
ea2384d3
FB
263 }
264 return NULL;
265}
266
b64ec4e4 267static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 268{
b64ec4e4
FZ
269 static const char *whitelist_rw[] = {
270 CONFIG_BDRV_RW_WHITELIST
271 };
272 static const char *whitelist_ro[] = {
273 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
274 };
275 const char **p;
276
b64ec4e4 277 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 278 return 1; /* no whitelist, anything goes */
b64ec4e4 279 }
eb852011 280
b64ec4e4 281 for (p = whitelist_rw; *p; p++) {
eb852011
MA
282 if (!strcmp(drv->format_name, *p)) {
283 return 1;
284 }
285 }
b64ec4e4
FZ
286 if (read_only) {
287 for (p = whitelist_ro; *p; p++) {
288 if (!strcmp(drv->format_name, *p)) {
289 return 1;
290 }
291 }
292 }
eb852011
MA
293 return 0;
294}
295
5b7e1542
ZYW
296typedef struct CreateCo {
297 BlockDriver *drv;
298 char *filename;
83d0521a 299 QemuOpts *opts;
5b7e1542 300 int ret;
cc84d90f 301 Error *err;
5b7e1542
ZYW
302} CreateCo;
303
304static void coroutine_fn bdrv_create_co_entry(void *opaque)
305{
cc84d90f
HR
306 Error *local_err = NULL;
307 int ret;
308
5b7e1542
ZYW
309 CreateCo *cco = opaque;
310 assert(cco->drv);
311
c282e1fd 312 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 313 if (local_err) {
cc84d90f
HR
314 error_propagate(&cco->err, local_err);
315 }
316 cco->ret = ret;
5b7e1542
ZYW
317}
318
0e7e1989 319int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 320 QemuOpts *opts, Error **errp)
ea2384d3 321{
5b7e1542
ZYW
322 int ret;
323
324 Coroutine *co;
325 CreateCo cco = {
326 .drv = drv,
327 .filename = g_strdup(filename),
83d0521a 328 .opts = opts,
5b7e1542 329 .ret = NOT_DONE,
cc84d90f 330 .err = NULL,
5b7e1542
ZYW
331 };
332
c282e1fd 333 if (!drv->bdrv_create) {
cc84d90f 334 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
335 ret = -ENOTSUP;
336 goto out;
5b7e1542
ZYW
337 }
338
339 if (qemu_in_coroutine()) {
340 /* Fast-path if already in coroutine context */
341 bdrv_create_co_entry(&cco);
342 } else {
343 co = qemu_coroutine_create(bdrv_create_co_entry);
344 qemu_coroutine_enter(co, &cco);
345 while (cco.ret == NOT_DONE) {
b47ec2c4 346 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
347 }
348 }
349
350 ret = cco.ret;
cc84d90f 351 if (ret < 0) {
84d18f06 352 if (cco.err) {
cc84d90f
HR
353 error_propagate(errp, cco.err);
354 } else {
355 error_setg_errno(errp, -ret, "Could not create image");
356 }
357 }
0e7e1989 358
80168bff
LC
359out:
360 g_free(cco.filename);
5b7e1542 361 return ret;
ea2384d3
FB
362}
363
c282e1fd 364int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
365{
366 BlockDriver *drv;
cc84d90f
HR
367 Error *local_err = NULL;
368 int ret;
84a12e66 369
b65a5e12 370 drv = bdrv_find_protocol(filename, true, errp);
84a12e66 371 if (drv == NULL) {
16905d71 372 return -ENOENT;
84a12e66
CH
373 }
374
c282e1fd 375 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 376 if (local_err) {
cc84d90f
HR
377 error_propagate(errp, local_err);
378 }
379 return ret;
84a12e66
CH
380}
381
892b7de8
ET
382/**
383 * Try to get @bs's logical and physical block size.
384 * On success, store them in @bsz struct and return 0.
385 * On failure return -errno.
386 * @bs must not be empty.
387 */
388int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
389{
390 BlockDriver *drv = bs->drv;
391
392 if (drv && drv->bdrv_probe_blocksizes) {
393 return drv->bdrv_probe_blocksizes(bs, bsz);
394 }
395
396 return -ENOTSUP;
397}
398
399/**
400 * Try to get @bs's geometry (cyls, heads, sectors).
401 * On success, store them in @geo struct and return 0.
402 * On failure return -errno.
403 * @bs must not be empty.
404 */
405int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
406{
407 BlockDriver *drv = bs->drv;
408
409 if (drv && drv->bdrv_probe_geometry) {
410 return drv->bdrv_probe_geometry(bs, geo);
411 }
412
413 return -ENOTSUP;
414}
415
eba25057
JM
416/*
417 * Create a uniquely-named empty temporary file.
418 * Return 0 upon success, otherwise a negative errno value.
419 */
420int get_tmp_filename(char *filename, int size)
d5249393 421{
eba25057 422#ifdef _WIN32
3b9f94e1 423 char temp_dir[MAX_PATH];
eba25057
JM
424 /* GetTempFileName requires that its output buffer (4th param)
425 have length MAX_PATH or greater. */
426 assert(size >= MAX_PATH);
427 return (GetTempPath(MAX_PATH, temp_dir)
428 && GetTempFileName(temp_dir, "qem", 0, filename)
429 ? 0 : -GetLastError());
d5249393 430#else
67b915a5 431 int fd;
7ccfb2eb 432 const char *tmpdir;
0badc1ee 433 tmpdir = getenv("TMPDIR");
69bef793
AS
434 if (!tmpdir) {
435 tmpdir = "/var/tmp";
436 }
eba25057
JM
437 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
438 return -EOVERFLOW;
439 }
ea2384d3 440 fd = mkstemp(filename);
fe235a06
DH
441 if (fd < 0) {
442 return -errno;
443 }
444 if (close(fd) != 0) {
445 unlink(filename);
eba25057
JM
446 return -errno;
447 }
448 return 0;
d5249393 449#endif
eba25057 450}
fc01f7e7 451
84a12e66
CH
452/*
453 * Detect host devices. By convention, /dev/cdrom[N] is always
454 * recognized as a host CDROM.
455 */
456static BlockDriver *find_hdev_driver(const char *filename)
457{
458 int score_max = 0, score;
459 BlockDriver *drv = NULL, *d;
460
461 QLIST_FOREACH(d, &bdrv_drivers, list) {
462 if (d->bdrv_probe_device) {
463 score = d->bdrv_probe_device(filename);
464 if (score > score_max) {
465 score_max = score;
466 drv = d;
467 }
468 }
469 }
470
471 return drv;
472}
473
98289620 474BlockDriver *bdrv_find_protocol(const char *filename,
b65a5e12
HR
475 bool allow_protocol_prefix,
476 Error **errp)
83f64091
FB
477{
478 BlockDriver *drv1;
479 char protocol[128];
1cec71e3 480 int len;
83f64091 481 const char *p;
19cb3738 482
66f82cee
KW
483 /* TODO Drivers without bdrv_file_open must be specified explicitly */
484
39508e7a
CH
485 /*
486 * XXX(hch): we really should not let host device detection
487 * override an explicit protocol specification, but moving this
488 * later breaks access to device names with colons in them.
489 * Thanks to the brain-dead persistent naming schemes on udev-
490 * based Linux systems those actually are quite common.
491 */
492 drv1 = find_hdev_driver(filename);
493 if (drv1) {
494 return drv1;
495 }
496
98289620 497 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
ef810437 498 return &bdrv_file;
84a12e66 499 }
98289620 500
9e0b22f4
SH
501 p = strchr(filename, ':');
502 assert(p != NULL);
1cec71e3
AL
503 len = p - filename;
504 if (len > sizeof(protocol) - 1)
505 len = sizeof(protocol) - 1;
506 memcpy(protocol, filename, len);
507 protocol[len] = '\0';
8a22f02a 508 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 509 if (drv1->protocol_name &&
8a22f02a 510 !strcmp(drv1->protocol_name, protocol)) {
83f64091 511 return drv1;
8a22f02a 512 }
83f64091 513 }
b65a5e12
HR
514
515 error_setg(errp, "Unknown protocol '%s'", protocol);
83f64091
FB
516 return NULL;
517}
518
c6684249
MA
519/*
520 * Guess image format by probing its contents.
521 * This is not a good idea when your image is raw (CVE-2008-2004), but
522 * we do it anyway for backward compatibility.
523 *
524 * @buf contains the image's first @buf_size bytes.
7cddd372
KW
525 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
526 * but can be smaller if the image file is smaller)
c6684249
MA
527 * @filename is its filename.
528 *
529 * For all block drivers, call the bdrv_probe() method to get its
530 * probing score.
531 * Return the first block driver with the highest probing score.
532 */
38f3ef57
KW
533BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
534 const char *filename)
c6684249
MA
535{
536 int score_max = 0, score;
537 BlockDriver *drv = NULL, *d;
538
539 QLIST_FOREACH(d, &bdrv_drivers, list) {
540 if (d->bdrv_probe) {
541 score = d->bdrv_probe(buf, buf_size, filename);
542 if (score > score_max) {
543 score_max = score;
544 drv = d;
545 }
546 }
547 }
548
549 return drv;
550}
551
f500a6d3 552static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 553 BlockDriver **pdrv, Error **errp)
f3a5d3f8 554{
c6684249 555 BlockDriver *drv;
7cddd372 556 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
f500a6d3 557 int ret = 0;
f8ea0b00 558
08a00559 559 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
b192af8a 560 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
ef810437 561 *pdrv = &bdrv_raw;
c98ac35d 562 return ret;
1a396859 563 }
f8ea0b00 564
83f64091 565 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 566 if (ret < 0) {
34b5d2c6
HR
567 error_setg_errno(errp, -ret, "Could not read image for determining its "
568 "format");
c98ac35d
SW
569 *pdrv = NULL;
570 return ret;
83f64091
FB
571 }
572
c6684249 573 drv = bdrv_probe_all(buf, ret, filename);
c98ac35d 574 if (!drv) {
34b5d2c6
HR
575 error_setg(errp, "Could not determine image format: No compatible "
576 "driver found");
c98ac35d
SW
577 ret = -ENOENT;
578 }
579 *pdrv = drv;
580 return ret;
ea2384d3
FB
581}
582
51762288
SH
583/**
584 * Set the current 'total_sectors' value
65a9bb25 585 * Return 0 on success, -errno on error.
51762288
SH
586 */
587static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
588{
589 BlockDriver *drv = bs->drv;
590
396759ad 591 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
b192af8a 592 if (bdrv_is_sg(bs))
396759ad
NB
593 return 0;
594
51762288
SH
595 /* query actual device if possible, otherwise just trust the hint */
596 if (drv->bdrv_getlength) {
597 int64_t length = drv->bdrv_getlength(bs);
598 if (length < 0) {
599 return length;
600 }
7e382003 601 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
602 }
603
604 bs->total_sectors = hint;
605 return 0;
606}
607
cddff5ba
KW
608/**
609 * Combines a QDict of new block driver @options with any missing options taken
610 * from @old_options, so that leaving out an option defaults to its old value.
611 */
612static void bdrv_join_options(BlockDriverState *bs, QDict *options,
613 QDict *old_options)
614{
615 if (bs->drv && bs->drv->bdrv_join_options) {
616 bs->drv->bdrv_join_options(options, old_options);
617 } else {
618 qdict_join(options, old_options, false);
619 }
620}
621
9e8f1835
PB
622/**
623 * Set open flags for a given discard mode
624 *
625 * Return 0 on success, -1 if the discard mode was invalid.
626 */
627int bdrv_parse_discard_flags(const char *mode, int *flags)
628{
629 *flags &= ~BDRV_O_UNMAP;
630
631 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
632 /* do nothing */
633 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
634 *flags |= BDRV_O_UNMAP;
635 } else {
636 return -1;
637 }
638
639 return 0;
640}
641
c3993cdc
SH
642/**
643 * Set open flags for a given cache mode
644 *
645 * Return 0 on success, -1 if the cache mode was invalid.
646 */
647int bdrv_parse_cache_flags(const char *mode, int *flags)
648{
649 *flags &= ~BDRV_O_CACHE_MASK;
650
651 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
652 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
653 } else if (!strcmp(mode, "directsync")) {
654 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
655 } else if (!strcmp(mode, "writeback")) {
656 *flags |= BDRV_O_CACHE_WB;
657 } else if (!strcmp(mode, "unsafe")) {
658 *flags |= BDRV_O_CACHE_WB;
659 *flags |= BDRV_O_NO_FLUSH;
660 } else if (!strcmp(mode, "writethrough")) {
661 /* this is the default */
662 } else {
663 return -1;
664 }
665
666 return 0;
667}
668
b1e6fc08 669/*
73176bee
KW
670 * Returns the options and flags that a temporary snapshot should get, based on
671 * the originally requested flags (the originally requested image will have
672 * flags like a backing file)
b1e6fc08 673 */
73176bee
KW
674static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
675 int parent_flags, QDict *parent_options)
b1e6fc08 676{
73176bee
KW
677 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
678
679 /* For temporary files, unconditional cache=unsafe is fine */
680 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
681 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
682 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
b1e6fc08
KW
683}
684
0b50cc88 685/*
8e2160e2
KW
686 * Returns the options and flags that bs->file should get if a protocol driver
687 * is expected, based on the given options and flags for the parent BDS
0b50cc88 688 */
8e2160e2
KW
689static void bdrv_inherited_options(int *child_flags, QDict *child_options,
690 int parent_flags, QDict *parent_options)
0b50cc88 691{
8e2160e2
KW
692 int flags = parent_flags;
693
0b50cc88
KW
694 /* Enable protocol handling, disable format probing for bs->file */
695 flags |= BDRV_O_PROTOCOL;
696
91a097e7
KW
697 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
698 * the parent. */
699 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
700 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
701
0b50cc88 702 /* Our block drivers take care to send flushes and respect unmap policy,
91a097e7
KW
703 * so we can default to enable both on lower layers regardless of the
704 * corresponding parent options. */
705 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
706 flags |= BDRV_O_UNMAP;
0b50cc88 707
0b50cc88 708 /* Clear flags that only apply to the top layer */
5669b44d 709 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
0b50cc88 710
8e2160e2 711 *child_flags = flags;
0b50cc88
KW
712}
713
f3930ed0 714const BdrvChildRole child_file = {
8e2160e2 715 .inherit_options = bdrv_inherited_options,
f3930ed0
KW
716};
717
718/*
8e2160e2
KW
719 * Returns the options and flags that bs->file should get if the use of formats
720 * (and not only protocols) is permitted for it, based on the given options and
721 * flags for the parent BDS
f3930ed0 722 */
8e2160e2
KW
723static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
724 int parent_flags, QDict *parent_options)
f3930ed0 725{
8e2160e2
KW
726 child_file.inherit_options(child_flags, child_options,
727 parent_flags, parent_options);
728
729 *child_flags &= ~BDRV_O_PROTOCOL;
f3930ed0
KW
730}
731
732const BdrvChildRole child_format = {
8e2160e2 733 .inherit_options = bdrv_inherited_fmt_options,
f3930ed0
KW
734};
735
317fc44e 736/*
8e2160e2
KW
737 * Returns the options and flags that bs->backing should get, based on the
738 * given options and flags for the parent BDS
317fc44e 739 */
8e2160e2
KW
740static void bdrv_backing_options(int *child_flags, QDict *child_options,
741 int parent_flags, QDict *parent_options)
317fc44e 742{
8e2160e2
KW
743 int flags = parent_flags;
744
91a097e7
KW
745 /* The cache mode is inherited unmodified for backing files */
746 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
747 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
748 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
749
317fc44e
KW
750 /* backing files always opened read-only */
751 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
752
753 /* snapshot=on is handled on the top layer */
8bfea15d 754 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e 755
8e2160e2 756 *child_flags = flags;
317fc44e
KW
757}
758
f3930ed0 759static const BdrvChildRole child_backing = {
8e2160e2 760 .inherit_options = bdrv_backing_options,
f3930ed0
KW
761};
762
7b272452
KW
763static int bdrv_open_flags(BlockDriverState *bs, int flags)
764{
765 int open_flags = flags | BDRV_O_CACHE_WB;
766
767 /*
768 * Clear flags that are internal to the block layer before opening the
769 * image.
770 */
20cca275 771 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
772
773 /*
774 * Snapshots should be writable.
775 */
8bfea15d 776 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
777 open_flags |= BDRV_O_RDWR;
778 }
779
780 return open_flags;
781}
782
91a097e7
KW
783static void update_flags_from_options(int *flags, QemuOpts *opts)
784{
785 *flags &= ~BDRV_O_CACHE_MASK;
786
787 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
788 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
789 *flags |= BDRV_O_CACHE_WB;
790 }
791
792 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
793 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
794 *flags |= BDRV_O_NO_FLUSH;
795 }
796
797 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
798 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
799 *flags |= BDRV_O_NOCACHE;
800 }
801}
802
803static void update_options_from_flags(QDict *options, int flags)
804{
805 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
806 qdict_put(options, BDRV_OPT_CACHE_WB,
807 qbool_from_bool(flags & BDRV_O_CACHE_WB));
808 }
809 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
810 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
811 qbool_from_bool(flags & BDRV_O_NOCACHE));
812 }
813 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
814 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
815 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
816 }
817}
818
636ea370
KW
819static void bdrv_assign_node_name(BlockDriverState *bs,
820 const char *node_name,
821 Error **errp)
6913c0c2 822{
15489c76 823 char *gen_node_name = NULL;
6913c0c2 824
15489c76
JC
825 if (!node_name) {
826 node_name = gen_node_name = id_generate(ID_BLOCK);
827 } else if (!id_wellformed(node_name)) {
828 /*
829 * Check for empty string or invalid characters, but not if it is
830 * generated (generated names use characters not available to the user)
831 */
9aebf3b8 832 error_setg(errp, "Invalid node name");
636ea370 833 return;
6913c0c2
BC
834 }
835
0c5e94ee 836 /* takes care of avoiding namespaces collisions */
7f06d47e 837 if (blk_by_name(node_name)) {
0c5e94ee
BC
838 error_setg(errp, "node-name=%s is conflicting with a device id",
839 node_name);
15489c76 840 goto out;
0c5e94ee
BC
841 }
842
6913c0c2
BC
843 /* takes care of avoiding duplicates node names */
844 if (bdrv_find_node(node_name)) {
845 error_setg(errp, "Duplicate node name");
15489c76 846 goto out;
6913c0c2
BC
847 }
848
849 /* copy node name into the bs and insert it into the graph list */
850 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
851 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
15489c76
JC
852out:
853 g_free(gen_node_name);
6913c0c2
BC
854}
855
18edf289
KW
856static QemuOptsList bdrv_runtime_opts = {
857 .name = "bdrv_common",
858 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
859 .desc = {
860 {
861 .name = "node-name",
862 .type = QEMU_OPT_STRING,
863 .help = "Node name of the block device node",
864 },
62392ebb
KW
865 {
866 .name = "driver",
867 .type = QEMU_OPT_STRING,
868 .help = "Block driver to use for the node",
869 },
91a097e7
KW
870 {
871 .name = BDRV_OPT_CACHE_WB,
872 .type = QEMU_OPT_BOOL,
873 .help = "Enable writeback mode",
874 },
875 {
876 .name = BDRV_OPT_CACHE_DIRECT,
877 .type = QEMU_OPT_BOOL,
878 .help = "Bypass software writeback cache on the host",
879 },
880 {
881 .name = BDRV_OPT_CACHE_NO_FLUSH,
882 .type = QEMU_OPT_BOOL,
883 .help = "Ignore flush requests",
884 },
18edf289
KW
885 { /* end of list */ }
886 },
887};
888
57915332
KW
889/*
890 * Common part for opening disk images and files
b6ad491a
KW
891 *
892 * Removes all processed options from *options.
57915332 893 */
9a4f4c31 894static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
82dc8b41 895 QDict *options, Error **errp)
57915332
KW
896{
897 int ret, open_flags;
035fccdf 898 const char *filename;
62392ebb 899 const char *driver_name = NULL;
6913c0c2 900 const char *node_name = NULL;
18edf289 901 QemuOpts *opts;
62392ebb 902 BlockDriver *drv;
34b5d2c6 903 Error *local_err = NULL;
57915332 904
6405875c 905 assert(bs->file == NULL);
707ff828 906 assert(options != NULL && bs->options != options);
57915332 907
62392ebb
KW
908 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
909 qemu_opts_absorb_qdict(opts, options, &local_err);
910 if (local_err) {
911 error_propagate(errp, local_err);
912 ret = -EINVAL;
913 goto fail_opts;
914 }
915
916 driver_name = qemu_opt_get(opts, "driver");
917 drv = bdrv_find_format(driver_name);
918 assert(drv != NULL);
919
45673671 920 if (file != NULL) {
9a4f4c31 921 filename = file->bs->filename;
45673671
KW
922 } else {
923 filename = qdict_get_try_str(options, "filename");
924 }
925
765003db
KW
926 if (drv->bdrv_needs_filename && !filename) {
927 error_setg(errp, "The '%s' block driver requires a file name",
928 drv->format_name);
18edf289
KW
929 ret = -EINVAL;
930 goto fail_opts;
6913c0c2 931 }
6913c0c2 932
82dc8b41
KW
933 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
934 drv->format_name);
62392ebb 935
18edf289 936 node_name = qemu_opt_get(opts, "node-name");
636ea370 937 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 938 if (local_err) {
636ea370 939 error_propagate(errp, local_err);
18edf289
KW
940 ret = -EINVAL;
941 goto fail_opts;
5d186eb0
KW
942 }
943
c25f53b0 944 bs->request_alignment = 512;
0d51b4de 945 bs->zero_beyond_eof = true;
82dc8b41 946 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
b64ec4e4
FZ
947
948 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
949 error_setg(errp,
950 !bs->read_only && bdrv_is_whitelisted(drv, true)
951 ? "Driver '%s' can only be used for read-only devices"
952 : "Driver '%s' is not whitelisted",
953 drv->format_name);
18edf289
KW
954 ret = -ENOTSUP;
955 goto fail_opts;
b64ec4e4 956 }
57915332 957
53fec9d3 958 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
82dc8b41 959 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
0ebd24e0
KW
960 if (!bs->read_only) {
961 bdrv_enable_copy_on_read(bs);
962 } else {
963 error_setg(errp, "Can't use copy-on-read on read-only device");
18edf289
KW
964 ret = -EINVAL;
965 goto fail_opts;
0ebd24e0 966 }
53fec9d3
SH
967 }
968
c2ad1b0c
KW
969 if (filename != NULL) {
970 pstrcpy(bs->filename, sizeof(bs->filename), filename);
971 } else {
972 bs->filename[0] = '\0';
973 }
91af7014 974 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 975
57915332 976 bs->drv = drv;
7267c094 977 bs->opaque = g_malloc0(drv->instance_size);
57915332 978
91a097e7
KW
979 /* Apply cache mode options */
980 update_flags_from_options(&bs->open_flags, opts);
981 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
e7c63796 982
66f82cee 983 /* Open the image, either directly or using a protocol */
82dc8b41 984 open_flags = bdrv_open_flags(bs, bs->open_flags);
66f82cee 985 if (drv->bdrv_file_open) {
5d186eb0 986 assert(file == NULL);
030be321 987 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 988 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 989 } else {
2af5ef70 990 if (file == NULL) {
34b5d2c6
HR
991 error_setg(errp, "Can't use '%s' as a block driver for the "
992 "protocol level", drv->format_name);
2af5ef70
KW
993 ret = -EINVAL;
994 goto free_and_fail;
995 }
f500a6d3 996 bs->file = file;
34b5d2c6 997 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
998 }
999
57915332 1000 if (ret < 0) {
84d18f06 1001 if (local_err) {
34b5d2c6 1002 error_propagate(errp, local_err);
2fa9aa59
DH
1003 } else if (bs->filename[0]) {
1004 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
1005 } else {
1006 error_setg_errno(errp, -ret, "Could not open image");
1007 }
57915332
KW
1008 goto free_and_fail;
1009 }
1010
a1f688f4
MA
1011 if (bs->encrypted) {
1012 error_report("Encrypted images are deprecated");
1013 error_printf("Support for them will be removed in a future release.\n"
1014 "You can use 'qemu-img convert' to convert your image"
1015 " to an unencrypted one.\n");
1016 }
1017
51762288
SH
1018 ret = refresh_total_sectors(bs, bs->total_sectors);
1019 if (ret < 0) {
34b5d2c6 1020 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 1021 goto free_and_fail;
57915332 1022 }
51762288 1023
3baca891
KW
1024 bdrv_refresh_limits(bs, &local_err);
1025 if (local_err) {
1026 error_propagate(errp, local_err);
1027 ret = -EINVAL;
1028 goto free_and_fail;
1029 }
1030
c25f53b0 1031 assert(bdrv_opt_mem_align(bs) != 0);
4196d2f0 1032 assert(bdrv_min_mem_align(bs) != 0);
b192af8a 1033 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
18edf289
KW
1034
1035 qemu_opts_del(opts);
57915332
KW
1036 return 0;
1037
1038free_and_fail:
f500a6d3 1039 bs->file = NULL;
7267c094 1040 g_free(bs->opaque);
57915332
KW
1041 bs->opaque = NULL;
1042 bs->drv = NULL;
18edf289
KW
1043fail_opts:
1044 qemu_opts_del(opts);
57915332
KW
1045 return ret;
1046}
1047
5e5c4f63
KW
1048static QDict *parse_json_filename(const char *filename, Error **errp)
1049{
1050 QObject *options_obj;
1051 QDict *options;
1052 int ret;
1053
1054 ret = strstart(filename, "json:", &filename);
1055 assert(ret);
1056
1057 options_obj = qobject_from_json(filename);
1058 if (!options_obj) {
1059 error_setg(errp, "Could not parse the JSON options");
1060 return NULL;
1061 }
1062
1063 if (qobject_type(options_obj) != QTYPE_QDICT) {
1064 qobject_decref(options_obj);
1065 error_setg(errp, "Invalid JSON object given");
1066 return NULL;
1067 }
1068
1069 options = qobject_to_qdict(options_obj);
1070 qdict_flatten(options);
1071
1072 return options;
1073}
1074
de3b53f0
KW
1075static void parse_json_protocol(QDict *options, const char **pfilename,
1076 Error **errp)
1077{
1078 QDict *json_options;
1079 Error *local_err = NULL;
1080
1081 /* Parse json: pseudo-protocol */
1082 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1083 return;
1084 }
1085
1086 json_options = parse_json_filename(*pfilename, &local_err);
1087 if (local_err) {
1088 error_propagate(errp, local_err);
1089 return;
1090 }
1091
1092 /* Options given in the filename have lower priority than options
1093 * specified directly */
1094 qdict_join(options, json_options, false);
1095 QDECREF(json_options);
1096 *pfilename = NULL;
1097}
1098
b6ce07aa 1099/*
f54120ff
KW
1100 * Fills in default options for opening images and converts the legacy
1101 * filename/flags pair to option QDict entries.
53a29513
HR
1102 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1103 * block driver has been specified explicitly.
b6ce07aa 1104 */
de3b53f0 1105static int bdrv_fill_options(QDict **options, const char *filename,
053e1578 1106 int *flags, Error **errp)
ea2384d3 1107{
c2ad1b0c 1108 const char *drvname;
53a29513 1109 bool protocol = *flags & BDRV_O_PROTOCOL;
e3fa4bfa 1110 bool parse_filename = false;
053e1578 1111 BlockDriver *drv = NULL;
34b5d2c6 1112 Error *local_err = NULL;
83f64091 1113
53a29513 1114 drvname = qdict_get_try_str(*options, "driver");
053e1578
HR
1115 if (drvname) {
1116 drv = bdrv_find_format(drvname);
1117 if (!drv) {
1118 error_setg(errp, "Unknown driver '%s'", drvname);
1119 return -ENOENT;
1120 }
1121 /* If the user has explicitly specified the driver, this choice should
1122 * override the BDRV_O_PROTOCOL flag */
1123 protocol = drv->bdrv_file_open;
53a29513
HR
1124 }
1125
1126 if (protocol) {
1127 *flags |= BDRV_O_PROTOCOL;
1128 } else {
1129 *flags &= ~BDRV_O_PROTOCOL;
1130 }
1131
91a097e7
KW
1132 /* Translate cache options from flags into options */
1133 update_options_from_flags(*options, *flags);
1134
035fccdf 1135 /* Fetch the file name from the options QDict if necessary */
17b005f1 1136 if (protocol && filename) {
f54120ff
KW
1137 if (!qdict_haskey(*options, "filename")) {
1138 qdict_put(*options, "filename", qstring_from_str(filename));
1139 parse_filename = true;
1140 } else {
1141 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1142 "the same time");
1143 return -EINVAL;
1144 }
035fccdf
KW
1145 }
1146
c2ad1b0c 1147 /* Find the right block driver */
f54120ff 1148 filename = qdict_get_try_str(*options, "filename");
f54120ff 1149
053e1578
HR
1150 if (!drvname && protocol) {
1151 if (filename) {
1152 drv = bdrv_find_protocol(filename, parse_filename, errp);
17b005f1 1153 if (!drv) {
053e1578 1154 return -EINVAL;
17b005f1 1155 }
053e1578
HR
1156
1157 drvname = drv->format_name;
1158 qdict_put(*options, "driver", qstring_from_str(drvname));
1159 } else {
1160 error_setg(errp, "Must specify either driver or file");
1161 return -EINVAL;
98289620 1162 }
c2ad1b0c
KW
1163 }
1164
17b005f1 1165 assert(drv || !protocol);
c2ad1b0c 1166
f54120ff 1167 /* Driver-specific filename parsing */
17b005f1 1168 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1169 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1170 if (local_err) {
34b5d2c6 1171 error_propagate(errp, local_err);
f54120ff 1172 return -EINVAL;
6963a30d 1173 }
cd5d031e
HR
1174
1175 if (!drv->bdrv_needs_filename) {
1176 qdict_del(*options, "filename");
cd5d031e 1177 }
6963a30d
KW
1178 }
1179
f54120ff
KW
1180 return 0;
1181}
1182
b4b059f6
KW
1183static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1184 BlockDriverState *child_bs,
260fecf1 1185 const char *child_name,
b4b059f6 1186 const BdrvChildRole *child_role)
df581792
KW
1187{
1188 BdrvChild *child = g_new(BdrvChild, 1);
1189 *child = (BdrvChild) {
1190 .bs = child_bs,
260fecf1 1191 .name = g_strdup(child_name),
df581792
KW
1192 .role = child_role,
1193 };
1194
1195 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
d42a8a93 1196 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
b4b059f6
KW
1197
1198 return child;
df581792
KW
1199}
1200
3f09bfbc 1201static void bdrv_detach_child(BdrvChild *child)
33a60407
KW
1202{
1203 QLIST_REMOVE(child, next);
d42a8a93 1204 QLIST_REMOVE(child, next_parent);
260fecf1 1205 g_free(child->name);
33a60407
KW
1206 g_free(child);
1207}
1208
1209void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1210{
779020cb
KW
1211 BlockDriverState *child_bs;
1212
1213 if (child == NULL) {
1214 return;
1215 }
33a60407
KW
1216
1217 if (child->bs->inherits_from == parent) {
1218 child->bs->inherits_from = NULL;
1219 }
1220
779020cb 1221 child_bs = child->bs;
33a60407
KW
1222 bdrv_detach_child(child);
1223 bdrv_unref(child_bs);
1224}
1225
5db15a57
KW
1226/*
1227 * Sets the backing file link of a BDS. A new reference is created; callers
1228 * which don't need their own reference any more must call bdrv_unref().
1229 */
8d24cce1
FZ
1230void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1231{
5db15a57
KW
1232 if (backing_hd) {
1233 bdrv_ref(backing_hd);
1234 }
8d24cce1 1235
760e0063 1236 if (bs->backing) {
826b6ca0 1237 assert(bs->backing_blocker);
760e0063 1238 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
5db15a57 1239 bdrv_unref_child(bs, bs->backing);
826b6ca0
FZ
1240 } else if (backing_hd) {
1241 error_setg(&bs->backing_blocker,
81e5f78a
AG
1242 "node is used as backing hd of '%s'",
1243 bdrv_get_device_or_node_name(bs));
826b6ca0
FZ
1244 }
1245
8d24cce1 1246 if (!backing_hd) {
826b6ca0
FZ
1247 error_free(bs->backing_blocker);
1248 bs->backing_blocker = NULL;
760e0063 1249 bs->backing = NULL;
8d24cce1
FZ
1250 goto out;
1251 }
260fecf1 1252 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
8d24cce1
FZ
1253 bs->open_flags &= ~BDRV_O_NO_BACKING;
1254 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1255 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1256 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0 1257
760e0063 1258 bdrv_op_block_all(backing_hd, bs->backing_blocker);
826b6ca0 1259 /* Otherwise we won't be able to commit due to check in bdrv_commit */
760e0063 1260 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
826b6ca0 1261 bs->backing_blocker);
8d24cce1 1262out:
3baca891 1263 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1264}
1265
31ca6d07
KW
1266/*
1267 * Opens the backing file for a BlockDriverState if not yet open
1268 *
d9b7b057
KW
1269 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1270 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1271 * itself, all options starting with "${bdref_key}." are considered part of the
1272 * BlockdevRef.
1273 *
1274 * TODO Can this be unified with bdrv_open_image()?
31ca6d07 1275 */
d9b7b057
KW
1276int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1277 const char *bdref_key, Error **errp)
9156df12 1278{
1ba4b6a5 1279 char *backing_filename = g_malloc0(PATH_MAX);
d9b7b057
KW
1280 char *bdref_key_dot;
1281 const char *reference = NULL;
317fc44e 1282 int ret = 0;
8d24cce1 1283 BlockDriverState *backing_hd;
d9b7b057
KW
1284 QDict *options;
1285 QDict *tmp_parent_options = NULL;
34b5d2c6 1286 Error *local_err = NULL;
9156df12 1287
760e0063 1288 if (bs->backing != NULL) {
1ba4b6a5 1289 goto free_exit;
9156df12
PB
1290 }
1291
31ca6d07 1292 /* NULL means an empty set of options */
d9b7b057
KW
1293 if (parent_options == NULL) {
1294 tmp_parent_options = qdict_new();
1295 parent_options = tmp_parent_options;
31ca6d07
KW
1296 }
1297
9156df12 1298 bs->open_flags &= ~BDRV_O_NO_BACKING;
d9b7b057
KW
1299
1300 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1301 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1302 g_free(bdref_key_dot);
1303
1304 reference = qdict_get_try_str(parent_options, bdref_key);
1305 if (reference || qdict_haskey(options, "file.filename")) {
1cb6f506
KW
1306 backing_filename[0] = '\0';
1307 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1308 QDECREF(options);
1ba4b6a5 1309 goto free_exit;
dbecebdd 1310 } else {
9f07429e
HR
1311 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1312 &local_err);
1313 if (local_err) {
1314 ret = -EINVAL;
1315 error_propagate(errp, local_err);
1316 QDECREF(options);
1317 goto free_exit;
1318 }
9156df12
PB
1319 }
1320
8ee79e70
KW
1321 if (!bs->drv || !bs->drv->supports_backing) {
1322 ret = -EINVAL;
1323 error_setg(errp, "Driver doesn't support backing files");
1324 QDECREF(options);
1325 goto free_exit;
1326 }
1327
c5f6e493
KW
1328 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1329 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
9156df12
PB
1330 }
1331
d9b7b057 1332 backing_hd = NULL;
f3930ed0
KW
1333 ret = bdrv_open_inherit(&backing_hd,
1334 *backing_filename ? backing_filename : NULL,
d9b7b057 1335 reference, options, 0, bs, &child_backing,
e43bfd9c 1336 errp);
9156df12 1337 if (ret < 0) {
9156df12 1338 bs->open_flags |= BDRV_O_NO_BACKING;
e43bfd9c 1339 error_prepend(errp, "Could not open backing file: ");
1ba4b6a5 1340 goto free_exit;
9156df12 1341 }
df581792 1342
5db15a57
KW
1343 /* Hook up the backing file link; drop our reference, bs owns the
1344 * backing_hd reference now */
8d24cce1 1345 bdrv_set_backing_hd(bs, backing_hd);
5db15a57 1346 bdrv_unref(backing_hd);
d80ac658 1347
d9b7b057
KW
1348 qdict_del(parent_options, bdref_key);
1349
1ba4b6a5
BC
1350free_exit:
1351 g_free(backing_filename);
d9b7b057 1352 QDECREF(tmp_parent_options);
1ba4b6a5 1353 return ret;
9156df12
PB
1354}
1355
da557aac
HR
1356/*
1357 * Opens a disk image whose options are given as BlockdevRef in another block
1358 * device's options.
1359 *
da557aac 1360 * If allow_none is true, no image will be opened if filename is false and no
b4b059f6 1361 * BlockdevRef is given. NULL will be returned, but errp remains unset.
da557aac
HR
1362 *
1363 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1364 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1365 * itself, all options starting with "${bdref_key}." are considered part of the
1366 * BlockdevRef.
1367 *
1368 * The BlockdevRef will be removed from the options QDict.
1369 */
b4b059f6
KW
1370BdrvChild *bdrv_open_child(const char *filename,
1371 QDict *options, const char *bdref_key,
1372 BlockDriverState* parent,
1373 const BdrvChildRole *child_role,
1374 bool allow_none, Error **errp)
da557aac 1375{
b4b059f6
KW
1376 BdrvChild *c = NULL;
1377 BlockDriverState *bs;
da557aac
HR
1378 QDict *image_options;
1379 int ret;
1380 char *bdref_key_dot;
1381 const char *reference;
1382
df581792 1383 assert(child_role != NULL);
f67503e5 1384
da557aac
HR
1385 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1386 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1387 g_free(bdref_key_dot);
1388
1389 reference = qdict_get_try_str(options, bdref_key);
1390 if (!filename && !reference && !qdict_size(image_options)) {
b4b059f6 1391 if (!allow_none) {
da557aac
HR
1392 error_setg(errp, "A block device must be specified for \"%s\"",
1393 bdref_key);
da557aac 1394 }
b20e61e0 1395 QDECREF(image_options);
da557aac
HR
1396 goto done;
1397 }
1398
b4b059f6
KW
1399 bs = NULL;
1400 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
ce343771 1401 parent, child_role, errp);
df581792
KW
1402 if (ret < 0) {
1403 goto done;
1404 }
1405
260fecf1 1406 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
da557aac
HR
1407
1408done:
1409 qdict_del(options, bdref_key);
b4b059f6
KW
1410 return c;
1411}
1412
73176bee
KW
1413static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1414 QDict *snapshot_options, Error **errp)
b998875d
KW
1415{
1416 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1417 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d 1418 int64_t total_size;
83d0521a 1419 QemuOpts *opts = NULL;
b998875d 1420 BlockDriverState *bs_snapshot;
c2e0dbbf 1421 Error *local_err = NULL;
b998875d
KW
1422 int ret;
1423
1424 /* if snapshot, we create a temporary backing file and open it
1425 instead of opening 'filename' directly */
1426
1427 /* Get the required size from the image */
f187743a
KW
1428 total_size = bdrv_getlength(bs);
1429 if (total_size < 0) {
6b8aeca5 1430 ret = total_size;
f187743a 1431 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1432 goto out;
f187743a 1433 }
b998875d
KW
1434
1435 /* Create the temporary image */
1ba4b6a5 1436 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1437 if (ret < 0) {
1438 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1439 goto out;
b998875d
KW
1440 }
1441
ef810437 1442 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
c282e1fd 1443 &error_abort);
39101f25 1444 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
e43bfd9c 1445 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
83d0521a 1446 qemu_opts_del(opts);
b998875d 1447 if (ret < 0) {
e43bfd9c
MA
1448 error_prepend(errp, "Could not create temporary overlay '%s': ",
1449 tmp_filename);
1ba4b6a5 1450 goto out;
b998875d
KW
1451 }
1452
73176bee 1453 /* Prepare options QDict for the temporary file */
b998875d
KW
1454 qdict_put(snapshot_options, "file.driver",
1455 qstring_from_str("file"));
1456 qdict_put(snapshot_options, "file.filename",
1457 qstring_from_str(tmp_filename));
e6641719
HR
1458 qdict_put(snapshot_options, "driver",
1459 qstring_from_str("qcow2"));
b998875d 1460
e4e9986b 1461 bs_snapshot = bdrv_new();
b998875d
KW
1462
1463 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
6ebf9aa2 1464 flags, &local_err);
73176bee 1465 snapshot_options = NULL;
b998875d
KW
1466 if (ret < 0) {
1467 error_propagate(errp, local_err);
1ba4b6a5 1468 goto out;
b998875d
KW
1469 }
1470
1471 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1472
1473out:
73176bee 1474 QDECREF(snapshot_options);
1ba4b6a5 1475 g_free(tmp_filename);
6b8aeca5 1476 return ret;
b998875d
KW
1477}
1478
b6ce07aa
KW
1479/*
1480 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1481 *
1482 * options is a QDict of options to pass to the block drivers, or NULL for an
1483 * empty set of options. The reference to the QDict belongs to the block layer
1484 * after the call (even on failure), so if the caller intends to reuse the
1485 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1486 *
1487 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1488 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1489 *
1490 * The reference parameter may be used to specify an existing block device which
1491 * should be opened. If specified, neither options nor a filename may be given,
1492 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1493 */
f3930ed0
KW
1494static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1495 const char *reference, QDict *options, int flags,
1496 BlockDriverState *parent,
ce343771 1497 const BdrvChildRole *child_role, Error **errp)
ea2384d3 1498{
b6ce07aa 1499 int ret;
9a4f4c31
KW
1500 BdrvChild *file = NULL;
1501 BlockDriverState *bs;
ce343771 1502 BlockDriver *drv = NULL;
74fe54f2 1503 const char *drvname;
3e8c2e57 1504 const char *backing;
34b5d2c6 1505 Error *local_err = NULL;
73176bee 1506 QDict *snapshot_options = NULL;
b1e6fc08 1507 int snapshot_flags = 0;
712e7874 1508
f67503e5 1509 assert(pbs);
f3930ed0
KW
1510 assert(!child_role || !flags);
1511 assert(!child_role == !parent);
f67503e5 1512
ddf5636d
HR
1513 if (reference) {
1514 bool options_non_empty = options ? qdict_size(options) : false;
1515 QDECREF(options);
1516
1517 if (*pbs) {
1518 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1519 "another block device");
1520 return -EINVAL;
1521 }
1522
1523 if (filename || options_non_empty) {
1524 error_setg(errp, "Cannot reference an existing block device with "
1525 "additional options or a new filename");
1526 return -EINVAL;
1527 }
1528
1529 bs = bdrv_lookup_bs(reference, reference, errp);
1530 if (!bs) {
1531 return -ENODEV;
1532 }
1533 bdrv_ref(bs);
1534 *pbs = bs;
1535 return 0;
1536 }
1537
f67503e5
HR
1538 if (*pbs) {
1539 bs = *pbs;
1540 } else {
e4e9986b 1541 bs = bdrv_new();
f67503e5
HR
1542 }
1543
de9c0cec
KW
1544 /* NULL means an empty set of options */
1545 if (options == NULL) {
1546 options = qdict_new();
1547 }
1548
145f598e 1549 /* json: syntax counts as explicit options, as if in the QDict */
de3b53f0
KW
1550 parse_json_protocol(options, &filename, &local_err);
1551 if (local_err) {
1552 ret = -EINVAL;
1553 goto fail;
1554 }
1555
145f598e
KW
1556 bs->explicit_options = qdict_clone_shallow(options);
1557
f3930ed0 1558 if (child_role) {
bddcec37 1559 bs->inherits_from = parent;
8e2160e2
KW
1560 child_role->inherit_options(&flags, options,
1561 parent->open_flags, parent->options);
f3930ed0
KW
1562 }
1563
de3b53f0 1564 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
462f5bcf
KW
1565 if (local_err) {
1566 goto fail;
1567 }
1568
62392ebb
KW
1569 bs->open_flags = flags;
1570 bs->options = options;
1571 options = qdict_clone_shallow(options);
1572
76c591b0 1573 /* Find the right image format driver */
76c591b0
KW
1574 drvname = qdict_get_try_str(options, "driver");
1575 if (drvname) {
1576 drv = bdrv_find_format(drvname);
76c591b0
KW
1577 if (!drv) {
1578 error_setg(errp, "Unknown driver: '%s'", drvname);
1579 ret = -EINVAL;
1580 goto fail;
1581 }
1582 }
1583
1584 assert(drvname || !(flags & BDRV_O_PROTOCOL));
76c591b0 1585
3e8c2e57
AG
1586 backing = qdict_get_try_str(options, "backing");
1587 if (backing && *backing == '\0') {
1588 flags |= BDRV_O_NO_BACKING;
1589 qdict_del(options, "backing");
1590 }
1591
f500a6d3 1592 /* Open image file without format layer */
f4788adc
KW
1593 if ((flags & BDRV_O_PROTOCOL) == 0) {
1594 if (flags & BDRV_O_RDWR) {
1595 flags |= BDRV_O_ALLOW_RDWR;
1596 }
1597 if (flags & BDRV_O_SNAPSHOT) {
73176bee
KW
1598 snapshot_options = qdict_new();
1599 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1600 flags, options);
8e2160e2 1601 bdrv_backing_options(&flags, options, flags, options);
f4788adc 1602 }
f500a6d3 1603
f3930ed0 1604 bs->open_flags = flags;
1fdd6933 1605
9a4f4c31
KW
1606 file = bdrv_open_child(filename, options, "file", bs,
1607 &child_file, true, &local_err);
1fdd6933
KW
1608 if (local_err) {
1609 ret = -EINVAL;
f4788adc
KW
1610 goto fail;
1611 }
f500a6d3
KW
1612 }
1613
76c591b0 1614 /* Image format probing */
38f3ef57 1615 bs->probed = !drv;
76c591b0 1616 if (!drv && file) {
9a4f4c31 1617 ret = find_image_format(file->bs, filename, &drv, &local_err);
17b005f1 1618 if (ret < 0) {
8bfea15d 1619 goto fail;
2a05cbe4 1620 }
62392ebb
KW
1621 /*
1622 * This option update would logically belong in bdrv_fill_options(),
1623 * but we first need to open bs->file for the probing to work, while
1624 * opening bs->file already requires the (mostly) final set of options
1625 * so that cache mode etc. can be inherited.
1626 *
1627 * Adding the driver later is somewhat ugly, but it's not an option
1628 * that would ever be inherited, so it's correct. We just need to make
1629 * sure to update both bs->options (which has the full effective
1630 * options for bs) and options (which has file.* already removed).
1631 */
1632 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1633 qdict_put(options, "driver", qstring_from_str(drv->format_name));
76c591b0 1634 } else if (!drv) {
17b005f1
KW
1635 error_setg(errp, "Must specify either driver or file");
1636 ret = -EINVAL;
8bfea15d 1637 goto fail;
ea2384d3 1638 }
b6ce07aa 1639
53a29513
HR
1640 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1641 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1642 /* file must be NULL if a protocol BDS is about to be created
1643 * (the inverse results in an error message from bdrv_open_common()) */
1644 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1645
b6ce07aa 1646 /* Open the image */
82dc8b41 1647 ret = bdrv_open_common(bs, file, options, &local_err);
b6ce07aa 1648 if (ret < 0) {
8bfea15d 1649 goto fail;
6987307c
CH
1650 }
1651
2a05cbe4 1652 if (file && (bs->file != file)) {
9a4f4c31 1653 bdrv_unref_child(bs, file);
f500a6d3
KW
1654 file = NULL;
1655 }
1656
b6ce07aa 1657 /* If there is a backing file, use it */
9156df12 1658 if ((flags & BDRV_O_NO_BACKING) == 0) {
d9b7b057 1659 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
b6ce07aa 1660 if (ret < 0) {
b6ad491a 1661 goto close_and_fail;
b6ce07aa 1662 }
b6ce07aa
KW
1663 }
1664
91af7014
HR
1665 bdrv_refresh_filename(bs);
1666
b6ad491a 1667 /* Check if any unknown options were used */
5acd9d81 1668 if (options && (qdict_size(options) != 0)) {
b6ad491a 1669 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1670 if (flags & BDRV_O_PROTOCOL) {
1671 error_setg(errp, "Block protocol '%s' doesn't support the option "
1672 "'%s'", drv->format_name, entry->key);
1673 } else {
d0e46a55
HR
1674 error_setg(errp,
1675 "Block format '%s' does not support the option '%s'",
1676 drv->format_name, entry->key);
5acd9d81 1677 }
b6ad491a
KW
1678
1679 ret = -EINVAL;
1680 goto close_and_fail;
1681 }
b6ad491a 1682
b6ce07aa 1683 if (!bdrv_key_required(bs)) {
a7f53e26
MA
1684 if (bs->blk) {
1685 blk_dev_change_media_cb(bs->blk, true);
1686 }
c3adb58f
MA
1687 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1688 && !runstate_check(RUN_STATE_INMIGRATE)
1689 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1690 error_setg(errp,
1691 "Guest must be stopped for opening of encrypted image");
1692 ret = -EBUSY;
1693 goto close_and_fail;
b6ce07aa
KW
1694 }
1695
c3adb58f 1696 QDECREF(options);
f67503e5 1697 *pbs = bs;
dd62f1ca
KW
1698
1699 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1700 * temporary snapshot afterwards. */
1701 if (snapshot_flags) {
73176bee
KW
1702 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1703 &local_err);
1704 snapshot_options = NULL;
dd62f1ca
KW
1705 if (local_err) {
1706 goto close_and_fail;
1707 }
1708 }
1709
b6ce07aa
KW
1710 return 0;
1711
8bfea15d 1712fail:
f500a6d3 1713 if (file != NULL) {
9a4f4c31 1714 bdrv_unref_child(bs, file);
f500a6d3 1715 }
73176bee 1716 QDECREF(snapshot_options);
145f598e 1717 QDECREF(bs->explicit_options);
de9c0cec 1718 QDECREF(bs->options);
b6ad491a 1719 QDECREF(options);
de9c0cec 1720 bs->options = NULL;
f67503e5
HR
1721 if (!*pbs) {
1722 /* If *pbs is NULL, a new BDS has been created in this function and
1723 needs to be freed now. Otherwise, it does not need to be closed,
1724 since it has not really been opened yet. */
1725 bdrv_unref(bs);
1726 }
84d18f06 1727 if (local_err) {
34b5d2c6
HR
1728 error_propagate(errp, local_err);
1729 }
b6ad491a 1730 return ret;
de9c0cec 1731
b6ad491a 1732close_and_fail:
f67503e5
HR
1733 /* See fail path, but now the BDS has to be always closed */
1734 if (*pbs) {
1735 bdrv_close(bs);
1736 } else {
1737 bdrv_unref(bs);
1738 }
73176bee 1739 QDECREF(snapshot_options);
b6ad491a 1740 QDECREF(options);
84d18f06 1741 if (local_err) {
34b5d2c6
HR
1742 error_propagate(errp, local_err);
1743 }
b6ce07aa
KW
1744 return ret;
1745}
1746
f3930ed0 1747int bdrv_open(BlockDriverState **pbs, const char *filename,
6ebf9aa2 1748 const char *reference, QDict *options, int flags, Error **errp)
f3930ed0
KW
1749{
1750 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
ce343771 1751 NULL, errp);
f3930ed0
KW
1752}
1753
e971aa12
JC
1754typedef struct BlockReopenQueueEntry {
1755 bool prepared;
1756 BDRVReopenState state;
1757 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1758} BlockReopenQueueEntry;
1759
1760/*
1761 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1762 * reopen of multiple devices.
1763 *
1764 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1765 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1766 * be created and initialized. This newly created BlockReopenQueue should be
1767 * passed back in for subsequent calls that are intended to be of the same
1768 * atomic 'set'.
1769 *
1770 * bs is the BlockDriverState to add to the reopen queue.
1771 *
4d2cb092
KW
1772 * options contains the changed options for the associated bs
1773 * (the BlockReopenQueue takes ownership)
1774 *
e971aa12
JC
1775 * flags contains the open flags for the associated bs
1776 *
1777 * returns a pointer to bs_queue, which is either the newly allocated
1778 * bs_queue, or the existing bs_queue being used.
1779 *
1780 */
28518102
KW
1781static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1782 BlockDriverState *bs,
1783 QDict *options,
1784 int flags,
1785 const BdrvChildRole *role,
1786 QDict *parent_options,
1787 int parent_flags)
e971aa12
JC
1788{
1789 assert(bs != NULL);
1790
1791 BlockReopenQueueEntry *bs_entry;
67251a31 1792 BdrvChild *child;
145f598e 1793 QDict *old_options, *explicit_options;
67251a31 1794
e971aa12
JC
1795 if (bs_queue == NULL) {
1796 bs_queue = g_new0(BlockReopenQueue, 1);
1797 QSIMPLEQ_INIT(bs_queue);
1798 }
1799
4d2cb092
KW
1800 if (!options) {
1801 options = qdict_new();
1802 }
1803
28518102
KW
1804 /*
1805 * Precedence of options:
1806 * 1. Explicitly passed in options (highest)
91a097e7 1807 * 2. Set in flags (only for top level)
145f598e 1808 * 3. Retained from explicitly set options of bs
8e2160e2 1809 * 4. Inherited from parent node
28518102
KW
1810 * 5. Retained from effective options of bs
1811 */
1812
91a097e7
KW
1813 if (!parent_options) {
1814 /*
1815 * Any setting represented by flags is always updated. If the
1816 * corresponding QDict option is set, it takes precedence. Otherwise
1817 * the flag is translated into a QDict option. The old setting of bs is
1818 * not considered.
1819 */
1820 update_options_from_flags(options, flags);
1821 }
1822
145f598e
KW
1823 /* Old explicitly set values (don't overwrite by inherited value) */
1824 old_options = qdict_clone_shallow(bs->explicit_options);
1825 bdrv_join_options(bs, options, old_options);
1826 QDECREF(old_options);
1827
1828 explicit_options = qdict_clone_shallow(options);
1829
28518102
KW
1830 /* Inherit from parent node */
1831 if (parent_options) {
1832 assert(!flags);
8e2160e2 1833 role->inherit_options(&flags, options, parent_flags, parent_options);
28518102
KW
1834 }
1835
1836 /* Old values are used for options that aren't set yet */
4d2cb092 1837 old_options = qdict_clone_shallow(bs->options);
cddff5ba 1838 bdrv_join_options(bs, options, old_options);
4d2cb092
KW
1839 QDECREF(old_options);
1840
f1f25a2e
KW
1841 /* bdrv_open() masks this flag out */
1842 flags &= ~BDRV_O_PROTOCOL;
1843
67251a31 1844 QLIST_FOREACH(child, &bs->children, next) {
4c9dfe5d
KW
1845 QDict *new_child_options;
1846 char *child_key_dot;
67251a31 1847
4c9dfe5d
KW
1848 /* reopen can only change the options of block devices that were
1849 * implicitly created and inherited options. For other (referenced)
1850 * block devices, a syntax like "backing.foo" results in an error. */
67251a31
KW
1851 if (child->bs->inherits_from != bs) {
1852 continue;
1853 }
1854
4c9dfe5d
KW
1855 child_key_dot = g_strdup_printf("%s.", child->name);
1856 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1857 g_free(child_key_dot);
1858
28518102
KW
1859 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1860 child->role, options, flags);
e971aa12
JC
1861 }
1862
1863 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1864 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1865
1866 bs_entry->state.bs = bs;
4d2cb092 1867 bs_entry->state.options = options;
145f598e 1868 bs_entry->state.explicit_options = explicit_options;
e971aa12
JC
1869 bs_entry->state.flags = flags;
1870
1871 return bs_queue;
1872}
1873
28518102
KW
1874BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1875 BlockDriverState *bs,
1876 QDict *options, int flags)
1877{
1878 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1879 NULL, NULL, 0);
1880}
1881
e971aa12
JC
1882/*
1883 * Reopen multiple BlockDriverStates atomically & transactionally.
1884 *
1885 * The queue passed in (bs_queue) must have been built up previous
1886 * via bdrv_reopen_queue().
1887 *
1888 * Reopens all BDS specified in the queue, with the appropriate
1889 * flags. All devices are prepared for reopen, and failure of any
1890 * device will cause all device changes to be abandonded, and intermediate
1891 * data cleaned up.
1892 *
1893 * If all devices prepare successfully, then the changes are committed
1894 * to all devices.
1895 *
1896 */
1897int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1898{
1899 int ret = -1;
1900 BlockReopenQueueEntry *bs_entry, *next;
1901 Error *local_err = NULL;
1902
1903 assert(bs_queue != NULL);
1904
1905 bdrv_drain_all();
1906
1907 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1908 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1909 error_propagate(errp, local_err);
1910 goto cleanup;
1911 }
1912 bs_entry->prepared = true;
1913 }
1914
1915 /* If we reach this point, we have success and just need to apply the
1916 * changes
1917 */
1918 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1919 bdrv_reopen_commit(&bs_entry->state);
1920 }
1921
1922 ret = 0;
1923
1924cleanup:
1925 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1926 if (ret && bs_entry->prepared) {
1927 bdrv_reopen_abort(&bs_entry->state);
145f598e
KW
1928 } else if (ret) {
1929 QDECREF(bs_entry->state.explicit_options);
e971aa12 1930 }
4d2cb092 1931 QDECREF(bs_entry->state.options);
e971aa12
JC
1932 g_free(bs_entry);
1933 }
1934 g_free(bs_queue);
1935 return ret;
1936}
1937
1938
1939/* Reopen a single BlockDriverState with the specified flags. */
1940int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1941{
1942 int ret = -1;
1943 Error *local_err = NULL;
4d2cb092 1944 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
e971aa12
JC
1945
1946 ret = bdrv_reopen_multiple(queue, &local_err);
1947 if (local_err != NULL) {
1948 error_propagate(errp, local_err);
1949 }
1950 return ret;
1951}
1952
1953
1954/*
1955 * Prepares a BlockDriverState for reopen. All changes are staged in the
1956 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1957 * the block driver layer .bdrv_reopen_prepare()
1958 *
1959 * bs is the BlockDriverState to reopen
1960 * flags are the new open flags
1961 * queue is the reopen queue
1962 *
1963 * Returns 0 on success, non-zero on error. On error errp will be set
1964 * as well.
1965 *
1966 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1967 * It is the responsibility of the caller to then call the abort() or
1968 * commit() for any other BDS that have been left in a prepare() state
1969 *
1970 */
1971int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1972 Error **errp)
1973{
1974 int ret = -1;
1975 Error *local_err = NULL;
1976 BlockDriver *drv;
ccf9dc07
KW
1977 QemuOpts *opts;
1978 const char *value;
e971aa12
JC
1979
1980 assert(reopen_state != NULL);
1981 assert(reopen_state->bs->drv != NULL);
1982 drv = reopen_state->bs->drv;
1983
ccf9dc07
KW
1984 /* Process generic block layer options */
1985 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1986 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1987 if (local_err) {
1988 error_propagate(errp, local_err);
1989 ret = -EINVAL;
1990 goto error;
1991 }
1992
91a097e7
KW
1993 update_flags_from_options(&reopen_state->flags, opts);
1994
1995 /* If a guest device is attached, it owns WCE */
1996 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
1997 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
1998 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
1999 if (old_wce != new_wce) {
2000 error_setg(errp, "Cannot change cache.writeback: Device attached");
2001 ret = -EINVAL;
2002 goto error;
2003 }
2004 }
2005
ccf9dc07
KW
2006 /* node-name and driver must be unchanged. Put them back into the QDict, so
2007 * that they are checked at the end of this function. */
2008 value = qemu_opt_get(opts, "node-name");
2009 if (value) {
2010 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2011 }
2012
2013 value = qemu_opt_get(opts, "driver");
2014 if (value) {
2015 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2016 }
2017
e971aa12
JC
2018 /* if we are to stay read-only, do not allow permission change
2019 * to r/w */
2020 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2021 reopen_state->flags & BDRV_O_RDWR) {
81e5f78a
AG
2022 error_setg(errp, "Node '%s' is read only",
2023 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2024 goto error;
2025 }
2026
2027
2028 ret = bdrv_flush(reopen_state->bs);
2029 if (ret) {
455b0fde 2030 error_setg_errno(errp, -ret, "Error flushing drive");
e971aa12
JC
2031 goto error;
2032 }
2033
2034 if (drv->bdrv_reopen_prepare) {
2035 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2036 if (ret) {
2037 if (local_err != NULL) {
2038 error_propagate(errp, local_err);
2039 } else {
d8b6895f
LC
2040 error_setg(errp, "failed while preparing to reopen image '%s'",
2041 reopen_state->bs->filename);
e971aa12
JC
2042 }
2043 goto error;
2044 }
2045 } else {
2046 /* It is currently mandatory to have a bdrv_reopen_prepare()
2047 * handler for each supported drv. */
81e5f78a
AG
2048 error_setg(errp, "Block format '%s' used by node '%s' "
2049 "does not support reopening files", drv->format_name,
2050 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2051 ret = -1;
2052 goto error;
2053 }
2054
4d2cb092
KW
2055 /* Options that are not handled are only okay if they are unchanged
2056 * compared to the old state. It is expected that some options are only
2057 * used for the initial open, but not reopen (e.g. filename) */
2058 if (qdict_size(reopen_state->options)) {
2059 const QDictEntry *entry = qdict_first(reopen_state->options);
2060
2061 do {
2062 QString *new_obj = qobject_to_qstring(entry->value);
2063 const char *new = qstring_get_str(new_obj);
2064 const char *old = qdict_get_try_str(reopen_state->bs->options,
2065 entry->key);
2066
2067 if (!old || strcmp(new, old)) {
2068 error_setg(errp, "Cannot change the option '%s'", entry->key);
2069 ret = -EINVAL;
2070 goto error;
2071 }
2072 } while ((entry = qdict_next(reopen_state->options, entry)));
2073 }
2074
e971aa12
JC
2075 ret = 0;
2076
2077error:
ccf9dc07 2078 qemu_opts_del(opts);
e971aa12
JC
2079 return ret;
2080}
2081
2082/*
2083 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2084 * makes them final by swapping the staging BlockDriverState contents into
2085 * the active BlockDriverState contents.
2086 */
2087void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2088{
2089 BlockDriver *drv;
2090
2091 assert(reopen_state != NULL);
2092 drv = reopen_state->bs->drv;
2093 assert(drv != NULL);
2094
2095 /* If there are any driver level actions to take */
2096 if (drv->bdrv_reopen_commit) {
2097 drv->bdrv_reopen_commit(reopen_state);
2098 }
2099
2100 /* set BDS specific flags now */
145f598e
KW
2101 QDECREF(reopen_state->bs->explicit_options);
2102
2103 reopen_state->bs->explicit_options = reopen_state->explicit_options;
e971aa12
JC
2104 reopen_state->bs->open_flags = reopen_state->flags;
2105 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2106 BDRV_O_CACHE_WB);
2107 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 2108
3baca891 2109 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
2110}
2111
2112/*
2113 * Abort the reopen, and delete and free the staged changes in
2114 * reopen_state
2115 */
2116void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2117{
2118 BlockDriver *drv;
2119
2120 assert(reopen_state != NULL);
2121 drv = reopen_state->bs->drv;
2122 assert(drv != NULL);
2123
2124 if (drv->bdrv_reopen_abort) {
2125 drv->bdrv_reopen_abort(reopen_state);
2126 }
145f598e
KW
2127
2128 QDECREF(reopen_state->explicit_options);
e971aa12
JC
2129}
2130
2131
64dff520 2132static void bdrv_close(BlockDriverState *bs)
fc01f7e7 2133{
33384421
HR
2134 BdrvAioNotifier *ban, *ban_next;
2135
ca9bd24c 2136 assert(!bs->job);
99b7e775
AG
2137
2138 /* Disable I/O limits and drain all pending throttled requests */
a0d64a61 2139 if (bs->throttle_state) {
99b7e775
AG
2140 bdrv_io_limits_disable(bs);
2141 }
2142
fc27291d 2143 bdrv_drained_begin(bs); /* complete I/O */
58fda173 2144 bdrv_flush(bs);
53ec73e2 2145 bdrv_drain(bs); /* in case flush left pending I/O */
fc27291d 2146
c5acdc9a
HR
2147 bdrv_release_named_dirty_bitmaps(bs);
2148 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2149
b4d02820
HR
2150 if (bs->blk) {
2151 blk_dev_change_media_cb(bs->blk, false);
2152 }
2153
3cbc002c 2154 if (bs->drv) {
6e93e7c4
KW
2155 BdrvChild *child, *next;
2156
9a7dedbc 2157 bs->drv->bdrv_close(bs);
9a4f4c31 2158 bs->drv = NULL;
9a7dedbc 2159
5db15a57 2160 bdrv_set_backing_hd(bs, NULL);
9a7dedbc 2161
9a4f4c31
KW
2162 if (bs->file != NULL) {
2163 bdrv_unref_child(bs, bs->file);
2164 bs->file = NULL;
2165 }
2166
6e93e7c4 2167 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
33a60407
KW
2168 /* TODO Remove bdrv_unref() from drivers' close function and use
2169 * bdrv_unref_child() here */
bddcec37
KW
2170 if (child->bs->inherits_from == bs) {
2171 child->bs->inherits_from = NULL;
2172 }
33a60407 2173 bdrv_detach_child(child);
6e93e7c4
KW
2174 }
2175
7267c094 2176 g_free(bs->opaque);
ea2384d3 2177 bs->opaque = NULL;
53fec9d3 2178 bs->copy_on_read = 0;
a275fa42
PB
2179 bs->backing_file[0] = '\0';
2180 bs->backing_format[0] = '\0';
6405875c
PB
2181 bs->total_sectors = 0;
2182 bs->encrypted = 0;
2183 bs->valid_key = 0;
2184 bs->sg = 0;
0d51b4de 2185 bs->zero_beyond_eof = false;
de9c0cec 2186 QDECREF(bs->options);
145f598e 2187 QDECREF(bs->explicit_options);
de9c0cec 2188 bs->options = NULL;
91af7014
HR
2189 QDECREF(bs->full_open_options);
2190 bs->full_open_options = NULL;
b338082b 2191 }
98f90dba 2192
33384421
HR
2193 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2194 g_free(ban);
2195 }
2196 QLIST_INIT(&bs->aio_notifiers);
fc27291d 2197 bdrv_drained_end(bs);
b338082b
FB
2198}
2199
2bc93fed
MK
2200void bdrv_close_all(void)
2201{
2202 BlockDriverState *bs;
ca9bd24c
HR
2203 AioContext *aio_context;
2204
2205 /* Drop references from requests still in flight, such as canceled block
2206 * jobs whose AIO context has not been polled yet */
2207 bdrv_drain_all();
2bc93fed 2208
ca9bd24c
HR
2209 blk_remove_all_bs();
2210 blockdev_close_all_bdrv_states();
ed78cda3 2211
ca9bd24c
HR
2212 /* Cancel all block jobs */
2213 while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2214 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2215 aio_context = bdrv_get_aio_context(bs);
2216
2217 aio_context_acquire(aio_context);
2218 if (bs->job) {
2219 block_job_cancel_sync(bs->job);
2220 aio_context_release(aio_context);
2221 break;
2222 }
2223 aio_context_release(aio_context);
2224 }
2225
2226 /* All the remaining BlockDriverStates are referenced directly or
2227 * indirectly from block jobs, so there needs to be at least one BDS
2228 * directly used by a block job */
2229 assert(bs);
2bc93fed
MK
2230 }
2231}
2232
f8aa905a
JC
2233/* Note that bs->device_list.tqe_prev is initially null,
2234 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2235 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2236 * resetting it to null on remove. */
2237void bdrv_device_remove(BlockDriverState *bs)
2238{
2239 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2240 bs->device_list.tqe_prev = NULL;
2241}
2242
dc364f4c
BC
2243/* make a BlockDriverState anonymous by removing from bdrv_state and
2244 * graph_bdrv_state list.
d22b2f41
RH
2245 Also, NULL terminate the device_name to prevent double remove */
2246void bdrv_make_anon(BlockDriverState *bs)
2247{
f8aa905a
JC
2248 /* Take care to remove bs from bdrv_states only when it's actually
2249 * in it. */
bfb197e0 2250 if (bs->device_list.tqe_prev) {
f8aa905a 2251 bdrv_device_remove(bs);
d22b2f41 2252 }
dc364f4c
BC
2253 if (bs->node_name[0] != '\0') {
2254 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2255 }
2256 bs->node_name[0] = '\0';
d22b2f41
RH
2257}
2258
8e419aef 2259/* Fields that need to stay with the top-level BDS */
4ddc07ca
PB
2260static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2261 BlockDriverState *bs_src)
8802d1fd 2262{
4ddc07ca 2263 /* move some fields that need to stay attached to the device */
8802d1fd
JC
2264
2265 /* dev info */
4ddc07ca 2266 bs_dest->copy_on_read = bs_src->copy_on_read;
8802d1fd 2267
4ddc07ca 2268 bs_dest->enable_write_cache = bs_src->enable_write_cache;
c4a248a1 2269
a9fc4408 2270 /* dirty bitmap */
e4654d2d 2271 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
063dd40e 2272}
a9fc4408 2273
dd62f1ca
KW
2274static void change_parent_backing_link(BlockDriverState *from,
2275 BlockDriverState *to)
2276{
2277 BdrvChild *c, *next;
2278
2279 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2280 assert(c->role != &child_backing);
2281 c->bs = to;
2282 QLIST_REMOVE(c, next_parent);
2283 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2284 bdrv_ref(to);
2285 bdrv_unref(from);
2286 }
2287 if (from->blk) {
2288 blk_set_bs(from->blk, to);
2289 if (!to->device_list.tqe_prev) {
2290 QTAILQ_INSERT_BEFORE(from, to, device_list);
2291 }
f8aa905a 2292 bdrv_device_remove(from);
dd62f1ca
KW
2293 }
2294}
2295
2296static void swap_feature_fields(BlockDriverState *bs_top,
2297 BlockDriverState *bs_new)
2298{
2299 BlockDriverState tmp;
2300
2301 bdrv_move_feature_fields(&tmp, bs_top);
2302 bdrv_move_feature_fields(bs_top, bs_new);
2303 bdrv_move_feature_fields(bs_new, &tmp);
2304
2305 assert(!bs_new->throttle_state);
2306 if (bs_top->throttle_state) {
2307 assert(bs_top->io_limits_enabled);
2308 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2309 bdrv_io_limits_disable(bs_top);
2310 }
2311}
2312
4ddc07ca
PB
2313/*
2314 * Add new bs contents at the top of an image chain while the chain is
2315 * live, while keeping required fields on the top layer.
2316 *
2317 * This will modify the BlockDriverState fields, and swap contents
2318 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2319 *
bfb197e0 2320 * bs_new must not be attached to a BlockBackend.
4ddc07ca
PB
2321 *
2322 * This function does not create any image files.
dd62f1ca
KW
2323 *
2324 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2325 * that's what the callers commonly need. bs_new will be referenced by the old
2326 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2327 * reference of its own, it must call bdrv_ref().
4ddc07ca
PB
2328 */
2329void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2330{
dd62f1ca
KW
2331 assert(!bdrv_requests_pending(bs_top));
2332 assert(!bdrv_requests_pending(bs_new));
2333
2334 bdrv_ref(bs_top);
2335 change_parent_backing_link(bs_top, bs_new);
2336
2337 /* Some fields always stay on top of the backing file chain */
2338 swap_feature_fields(bs_top, bs_new);
2339
2340 bdrv_set_backing_hd(bs_new, bs_top);
2341 bdrv_unref(bs_top);
4ddc07ca 2342
dd62f1ca
KW
2343 /* bs_new is now referenced by its new parents, we don't need the
2344 * additional reference any more. */
2345 bdrv_unref(bs_new);
8802d1fd
JC
2346}
2347
3f09bfbc
KW
2348void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2349{
2350 assert(!bdrv_requests_pending(old));
2351 assert(!bdrv_requests_pending(new));
2352
2353 bdrv_ref(old);
2354
2355 if (old->blk) {
2356 /* As long as these fields aren't in BlockBackend, but in the top-level
2357 * BlockDriverState, it's not possible for a BDS to have two BBs.
2358 *
2359 * We really want to copy the fields from old to new, but we go for a
2360 * swap instead so that pointers aren't duplicated and cause trouble.
2361 * (Also, bdrv_swap() used to do the same.) */
2362 assert(!new->blk);
2363 swap_feature_fields(old, new);
2364 }
2365 change_parent_backing_link(old, new);
2366
2367 /* Change backing files if a previously independent node is added to the
2368 * chain. For active commit, we replace top by its own (indirect) backing
2369 * file and don't do anything here so we don't build a loop. */
2370 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2371 bdrv_set_backing_hd(new, backing_bs(old));
2372 bdrv_set_backing_hd(old, NULL);
2373 }
2374
2375 bdrv_unref(old);
2376}
2377
4f6fd349 2378static void bdrv_delete(BlockDriverState *bs)
b338082b 2379{
3e914655 2380 assert(!bs->job);
3718d8ab 2381 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2382 assert(!bs->refcnt);
18846dee 2383
e1b5c52e
SH
2384 bdrv_close(bs);
2385
1b7bdbc1 2386 /* remove from list, if necessary */
d22b2f41 2387 bdrv_make_anon(bs);
34c6f050 2388
2c1d04e0
HR
2389 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2390
7267c094 2391 g_free(bs);
fc01f7e7
FB
2392}
2393
e97fc193
AL
2394/*
2395 * Run consistency checks on an image
2396 *
e076f338 2397 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2398 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2399 * check are stored in res.
e97fc193 2400 */
4534ff54 2401int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2402{
908bcd54
HR
2403 if (bs->drv == NULL) {
2404 return -ENOMEDIUM;
2405 }
e97fc193
AL
2406 if (bs->drv->bdrv_check == NULL) {
2407 return -ENOTSUP;
2408 }
2409
e076f338 2410 memset(res, 0, sizeof(*res));
4534ff54 2411 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2412}
2413
8a426614
KW
2414#define COMMIT_BUF_SECTORS 2048
2415
33e3963e
FB
2416/* commit COW file into the raw image */
2417int bdrv_commit(BlockDriverState *bs)
2418{
19cb3738 2419 BlockDriver *drv = bs->drv;
72706ea4 2420 int64_t sector, total_sectors, length, backing_length;
8a426614 2421 int n, ro, open_flags;
0bce597d 2422 int ret = 0;
72706ea4 2423 uint8_t *buf = NULL;
33e3963e 2424
19cb3738
FB
2425 if (!drv)
2426 return -ENOMEDIUM;
6bb45158 2427
760e0063 2428 if (!bs->backing) {
4dca4b63 2429 return -ENOTSUP;
33e3963e
FB
2430 }
2431
bb00021d 2432 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
760e0063 2433 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2d3735d3
SH
2434 return -EBUSY;
2435 }
2436
760e0063
KW
2437 ro = bs->backing->bs->read_only;
2438 open_flags = bs->backing->bs->open_flags;
4dca4b63
NS
2439
2440 if (ro) {
760e0063 2441 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
0bce597d 2442 return -EACCES;
4dca4b63 2443 }
ea2384d3 2444 }
33e3963e 2445
72706ea4
JC
2446 length = bdrv_getlength(bs);
2447 if (length < 0) {
2448 ret = length;
2449 goto ro_cleanup;
2450 }
2451
760e0063 2452 backing_length = bdrv_getlength(bs->backing->bs);
72706ea4
JC
2453 if (backing_length < 0) {
2454 ret = backing_length;
2455 goto ro_cleanup;
2456 }
2457
2458 /* If our top snapshot is larger than the backing file image,
2459 * grow the backing file image if possible. If not possible,
2460 * we must return an error */
2461 if (length > backing_length) {
760e0063 2462 ret = bdrv_truncate(bs->backing->bs, length);
72706ea4
JC
2463 if (ret < 0) {
2464 goto ro_cleanup;
2465 }
2466 }
2467
2468 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2469
2470 /* qemu_try_blockalign() for bs will choose an alignment that works for
760e0063 2471 * bs->backing->bs as well, so no need to compare the alignment manually. */
857d4f46
KW
2472 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2473 if (buf == NULL) {
2474 ret = -ENOMEM;
2475 goto ro_cleanup;
2476 }
8a426614
KW
2477
2478 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2479 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2480 if (ret < 0) {
2481 goto ro_cleanup;
2482 }
2483 if (ret) {
dabfa6cc
KW
2484 ret = bdrv_read(bs, sector, buf, n);
2485 if (ret < 0) {
8a426614
KW
2486 goto ro_cleanup;
2487 }
2488
760e0063 2489 ret = bdrv_write(bs->backing->bs, sector, buf, n);
dabfa6cc 2490 if (ret < 0) {
8a426614
KW
2491 goto ro_cleanup;
2492 }
ea2384d3 2493 }
33e3963e 2494 }
95389c86 2495
1d44952f
CH
2496 if (drv->bdrv_make_empty) {
2497 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2498 if (ret < 0) {
2499 goto ro_cleanup;
2500 }
1d44952f
CH
2501 bdrv_flush(bs);
2502 }
95389c86 2503
3f5075ae
CH
2504 /*
2505 * Make sure all data we wrote to the backing device is actually
2506 * stable on disk.
2507 */
760e0063
KW
2508 if (bs->backing) {
2509 bdrv_flush(bs->backing->bs);
dabfa6cc 2510 }
4dca4b63 2511
dabfa6cc 2512 ret = 0;
4dca4b63 2513ro_cleanup:
857d4f46 2514 qemu_vfree(buf);
4dca4b63
NS
2515
2516 if (ro) {
0bce597d 2517 /* ignoring error return here */
760e0063 2518 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2519 }
2520
1d44952f 2521 return ret;
33e3963e
FB
2522}
2523
756e6736
KW
2524/*
2525 * Return values:
2526 * 0 - success
2527 * -EINVAL - backing format specified, but no file
2528 * -ENOSPC - can't update the backing file because no space is left in the
2529 * image file header
2530 * -ENOTSUP - format driver doesn't support changing the backing file
2531 */
2532int bdrv_change_backing_file(BlockDriverState *bs,
2533 const char *backing_file, const char *backing_fmt)
2534{
2535 BlockDriver *drv = bs->drv;
469ef350 2536 int ret;
756e6736 2537
5f377794
PB
2538 /* Backing file format doesn't make sense without a backing file */
2539 if (backing_fmt && !backing_file) {
2540 return -EINVAL;
2541 }
2542
756e6736 2543 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2544 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2545 } else {
469ef350 2546 ret = -ENOTSUP;
756e6736 2547 }
469ef350
PB
2548
2549 if (ret == 0) {
2550 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2551 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2552 }
2553 return ret;
756e6736
KW
2554}
2555
6ebdcee2
JC
2556/*
2557 * Finds the image layer in the chain that has 'bs' as its backing file.
2558 *
2559 * active is the current topmost image.
2560 *
2561 * Returns NULL if bs is not found in active's image chain,
2562 * or if active == bs.
4caf0fcd
JC
2563 *
2564 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2565 */
2566BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2567 BlockDriverState *bs)
2568{
760e0063
KW
2569 while (active && bs != backing_bs(active)) {
2570 active = backing_bs(active);
6ebdcee2
JC
2571 }
2572
4caf0fcd
JC
2573 return active;
2574}
6ebdcee2 2575
4caf0fcd
JC
2576/* Given a BDS, searches for the base layer. */
2577BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2578{
2579 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2580}
2581
6ebdcee2
JC
2582/*
2583 * Drops images above 'base' up to and including 'top', and sets the image
2584 * above 'top' to have base as its backing file.
2585 *
2586 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2587 * information in 'bs' can be properly updated.
2588 *
2589 * E.g., this will convert the following chain:
2590 * bottom <- base <- intermediate <- top <- active
2591 *
2592 * to
2593 *
2594 * bottom <- base <- active
2595 *
2596 * It is allowed for bottom==base, in which case it converts:
2597 *
2598 * base <- intermediate <- top <- active
2599 *
2600 * to
2601 *
2602 * base <- active
2603 *
54e26900
JC
2604 * If backing_file_str is non-NULL, it will be used when modifying top's
2605 * overlay image metadata.
2606 *
6ebdcee2
JC
2607 * Error conditions:
2608 * if active == top, that is considered an error
2609 *
2610 */
2611int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2612 BlockDriverState *base, const char *backing_file_str)
6ebdcee2 2613{
6ebdcee2 2614 BlockDriverState *new_top_bs = NULL;
6ebdcee2
JC
2615 int ret = -EIO;
2616
6ebdcee2
JC
2617 if (!top->drv || !base->drv) {
2618 goto exit;
2619 }
2620
2621 new_top_bs = bdrv_find_overlay(active, top);
2622
2623 if (new_top_bs == NULL) {
2624 /* we could not find the image above 'top', this is an error */
2625 goto exit;
2626 }
2627
760e0063 2628 /* special case of new_top_bs->backing->bs already pointing to base - nothing
6ebdcee2 2629 * to do, no intermediate images */
760e0063 2630 if (backing_bs(new_top_bs) == base) {
6ebdcee2
JC
2631 ret = 0;
2632 goto exit;
2633 }
2634
5db15a57
KW
2635 /* Make sure that base is in the backing chain of top */
2636 if (!bdrv_chain_contains(top, base)) {
6ebdcee2
JC
2637 goto exit;
2638 }
2639
2640 /* success - we can delete the intermediate states, and link top->base */
5db15a57 2641 backing_file_str = backing_file_str ? backing_file_str : base->filename;
54e26900 2642 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
5db15a57 2643 base->drv ? base->drv->format_name : "");
6ebdcee2
JC
2644 if (ret) {
2645 goto exit;
2646 }
5db15a57 2647 bdrv_set_backing_hd(new_top_bs, base);
6ebdcee2 2648
6ebdcee2 2649 ret = 0;
6ebdcee2 2650exit:
6ebdcee2
JC
2651 return ret;
2652}
2653
61007b31
SH
2654/**
2655 * Truncate file to 'offset' bytes (needed only for file protocols)
2656 */
2657int bdrv_truncate(BlockDriverState *bs, int64_t offset)
71d0770c 2658{
61007b31
SH
2659 BlockDriver *drv = bs->drv;
2660 int ret;
2661 if (!drv)
71d0770c 2662 return -ENOMEDIUM;
61007b31
SH
2663 if (!drv->bdrv_truncate)
2664 return -ENOTSUP;
2665 if (bs->read_only)
2666 return -EACCES;
71d0770c 2667
61007b31
SH
2668 ret = drv->bdrv_truncate(bs, offset);
2669 if (ret == 0) {
2670 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2671 bdrv_dirty_bitmap_truncate(bs);
2672 if (bs->blk) {
2673 blk_dev_resize_cb(bs->blk);
2674 }
c0191e76 2675 }
61007b31 2676 return ret;
71d0770c
AL
2677}
2678
61007b31
SH
2679/**
2680 * Length of a allocated file in bytes. Sparse files are counted by actual
2681 * allocated space. Return < 0 if error or unknown.
2682 */
2683int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
71d0770c 2684{
61007b31
SH
2685 BlockDriver *drv = bs->drv;
2686 if (!drv) {
2687 return -ENOMEDIUM;
8f4754ed 2688 }
61007b31
SH
2689 if (drv->bdrv_get_allocated_file_size) {
2690 return drv->bdrv_get_allocated_file_size(bs);
2691 }
2692 if (bs->file) {
9a4f4c31 2693 return bdrv_get_allocated_file_size(bs->file->bs);
1c9805a3 2694 }
61007b31 2695 return -ENOTSUP;
1c9805a3 2696}
e7a8a783 2697
61007b31
SH
2698/**
2699 * Return number of sectors on success, -errno on error.
1c9805a3 2700 */
61007b31 2701int64_t bdrv_nb_sectors(BlockDriverState *bs)
1c9805a3 2702{
61007b31 2703 BlockDriver *drv = bs->drv;
498e386c 2704
61007b31
SH
2705 if (!drv)
2706 return -ENOMEDIUM;
2572b37a 2707
61007b31
SH
2708 if (drv->has_variable_length) {
2709 int ret = refresh_total_sectors(bs, bs->total_sectors);
2710 if (ret < 0) {
2711 return ret;
1c9805a3
SH
2712 }
2713 }
61007b31 2714 return bs->total_sectors;
1c9805a3 2715}
b338082b 2716
61007b31
SH
2717/**
2718 * Return length in bytes on success, -errno on error.
2719 * The length is always a multiple of BDRV_SECTOR_SIZE.
8d3b1a2d 2720 */
61007b31 2721int64_t bdrv_getlength(BlockDriverState *bs)
8d3b1a2d 2722{
61007b31 2723 int64_t ret = bdrv_nb_sectors(bs);
8d3b1a2d 2724
4a9c9ea0 2725 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
61007b31 2726 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2727}
2728
61007b31
SH
2729/* return 0 as number of sectors if no device present or error */
2730void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
07d27a44 2731{
61007b31 2732 int64_t nb_sectors = bdrv_nb_sectors(bs);
07d27a44 2733
61007b31 2734 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
07d27a44
MA
2735}
2736
61007b31 2737int bdrv_is_read_only(BlockDriverState *bs)
8d3b1a2d 2738{
61007b31 2739 return bs->read_only;
83f64091 2740}
83f64091 2741
61007b31 2742int bdrv_is_sg(BlockDriverState *bs)
f08145fe 2743{
61007b31 2744 return bs->sg;
f08145fe
KW
2745}
2746
61007b31 2747int bdrv_enable_write_cache(BlockDriverState *bs)
ab185921 2748{
61007b31 2749 return bs->enable_write_cache;
ab185921
SH
2750}
2751
61007b31 2752void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
da1fa91d 2753{
61007b31 2754 bs->enable_write_cache = wce;
ab185921 2755
61007b31
SH
2756 /* so a reopen() will preserve wce */
2757 if (wce) {
2758 bs->open_flags |= BDRV_O_CACHE_WB;
893a8f62 2759 } else {
61007b31 2760 bs->open_flags &= ~BDRV_O_CACHE_WB;
893a8f62 2761 }
da1fa91d
KW
2762}
2763
61007b31 2764int bdrv_is_encrypted(BlockDriverState *bs)
fc3959e4 2765{
760e0063 2766 if (bs->backing && bs->backing->bs->encrypted) {
61007b31 2767 return 1;
760e0063 2768 }
61007b31 2769 return bs->encrypted;
fc3959e4
FZ
2770}
2771
61007b31 2772int bdrv_key_required(BlockDriverState *bs)
fc3959e4 2773{
760e0063 2774 BdrvChild *backing = bs->backing;
61007b31 2775
760e0063 2776 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
61007b31 2777 return 1;
760e0063 2778 }
61007b31 2779 return (bs->encrypted && !bs->valid_key);
fc3959e4
FZ
2780}
2781
61007b31 2782int bdrv_set_key(BlockDriverState *bs, const char *key)
d0c7f642 2783{
d0c7f642 2784 int ret;
760e0063
KW
2785 if (bs->backing && bs->backing->bs->encrypted) {
2786 ret = bdrv_set_key(bs->backing->bs, key);
61007b31
SH
2787 if (ret < 0)
2788 return ret;
2789 if (!bs->encrypted)
2790 return 0;
2791 }
2792 if (!bs->encrypted) {
2793 return -EINVAL;
2794 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
d0c7f642
KW
2795 return -ENOMEDIUM;
2796 }
61007b31 2797 ret = bs->drv->bdrv_set_key(bs, key);
b9c64947 2798 if (ret < 0) {
61007b31
SH
2799 bs->valid_key = 0;
2800 } else if (!bs->valid_key) {
2801 bs->valid_key = 1;
2802 if (bs->blk) {
2803 /* call the change callback now, we skipped it on open */
2804 blk_dev_change_media_cb(bs->blk, true);
2805 }
1b0288ae 2806 }
61007b31
SH
2807 return ret;
2808}
f08f2dda 2809
c5fbe571 2810/*
61007b31
SH
2811 * Provide an encryption key for @bs.
2812 * If @key is non-null:
2813 * If @bs is not encrypted, fail.
2814 * Else if the key is invalid, fail.
2815 * Else set @bs's key to @key, replacing the existing key, if any.
2816 * If @key is null:
2817 * If @bs is encrypted and still lacks a key, fail.
2818 * Else do nothing.
2819 * On failure, store an error object through @errp if non-null.
c5fbe571 2820 */
61007b31 2821void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
c5fbe571 2822{
61007b31
SH
2823 if (key) {
2824 if (!bdrv_is_encrypted(bs)) {
2825 error_setg(errp, "Node '%s' is not encrypted",
2826 bdrv_get_device_or_node_name(bs));
2827 } else if (bdrv_set_key(bs, key) < 0) {
c6bd8c70 2828 error_setg(errp, QERR_INVALID_PASSWORD);
4d2855a3
MA
2829 }
2830 } else {
2831 if (bdrv_key_required(bs)) {
b1ca6391
MA
2832 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2833 "'%s' (%s) is encrypted",
81e5f78a 2834 bdrv_get_device_or_node_name(bs),
4d2855a3
MA
2835 bdrv_get_encrypted_filename(bs));
2836 }
2837 }
2838}
2839
61007b31 2840const char *bdrv_get_format_name(BlockDriverState *bs)
40b4f539 2841{
61007b31 2842 return bs->drv ? bs->drv->format_name : NULL;
40b4f539
KW
2843}
2844
61007b31 2845static int qsort_strcmp(const void *a, const void *b)
40b4f539 2846{
61007b31 2847 return strcmp(a, b);
40b4f539
KW
2848}
2849
61007b31
SH
2850void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2851 void *opaque)
40b4f539 2852{
61007b31
SH
2853 BlockDriver *drv;
2854 int count = 0;
2855 int i;
2856 const char **formats = NULL;
40b4f539 2857
61007b31
SH
2858 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2859 if (drv->format_name) {
2860 bool found = false;
2861 int i = count;
2862 while (formats && i && !found) {
2863 found = !strcmp(formats[--i], drv->format_name);
2864 }
e2a305fb 2865
61007b31
SH
2866 if (!found) {
2867 formats = g_renew(const char *, formats, count + 1);
2868 formats[count++] = drv->format_name;
2869 }
6c5a42ac 2870 }
61007b31 2871 }
6c5a42ac 2872
61007b31 2873 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
40b4f539 2874
61007b31
SH
2875 for (i = 0; i < count; i++) {
2876 it(opaque, formats[i]);
2877 }
40b4f539 2878
61007b31
SH
2879 g_free(formats);
2880}
40b4f539 2881
61007b31
SH
2882/* This function is to find a node in the bs graph */
2883BlockDriverState *bdrv_find_node(const char *node_name)
2884{
2885 BlockDriverState *bs;
391827eb 2886
61007b31 2887 assert(node_name);
40b4f539 2888
61007b31
SH
2889 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2890 if (!strcmp(node_name, bs->node_name)) {
2891 return bs;
40b4f539
KW
2892 }
2893 }
61007b31 2894 return NULL;
40b4f539
KW
2895}
2896
61007b31
SH
2897/* Put this QMP function here so it can access the static graph_bdrv_states. */
2898BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
40b4f539 2899{
61007b31
SH
2900 BlockDeviceInfoList *list, *entry;
2901 BlockDriverState *bs;
40b4f539 2902
61007b31
SH
2903 list = NULL;
2904 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2905 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2906 if (!info) {
2907 qapi_free_BlockDeviceInfoList(list);
2908 return NULL;
301db7c2 2909 }
61007b31
SH
2910 entry = g_malloc0(sizeof(*entry));
2911 entry->value = info;
2912 entry->next = list;
2913 list = entry;
301db7c2
RH
2914 }
2915
61007b31
SH
2916 return list;
2917}
40b4f539 2918
61007b31
SH
2919BlockDriverState *bdrv_lookup_bs(const char *device,
2920 const char *node_name,
2921 Error **errp)
2922{
2923 BlockBackend *blk;
2924 BlockDriverState *bs;
40b4f539 2925
61007b31
SH
2926 if (device) {
2927 blk = blk_by_name(device);
40b4f539 2928
61007b31 2929 if (blk) {
9f4ed6fb
AG
2930 bs = blk_bs(blk);
2931 if (!bs) {
5433c24f 2932 error_setg(errp, "Device '%s' has no medium", device);
5433c24f
HR
2933 }
2934
9f4ed6fb 2935 return bs;
61007b31
SH
2936 }
2937 }
40b4f539 2938
61007b31
SH
2939 if (node_name) {
2940 bs = bdrv_find_node(node_name);
6d519a5f 2941
61007b31
SH
2942 if (bs) {
2943 return bs;
2944 }
40b4f539
KW
2945 }
2946
61007b31
SH
2947 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2948 device ? device : "",
2949 node_name ? node_name : "");
2950 return NULL;
40b4f539
KW
2951}
2952
61007b31
SH
2953/* If 'base' is in the same chain as 'top', return true. Otherwise,
2954 * return false. If either argument is NULL, return false. */
2955bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
83f64091 2956{
61007b31 2957 while (top && top != base) {
760e0063 2958 top = backing_bs(top);
02c50efe 2959 }
61007b31
SH
2960
2961 return top != NULL;
02c50efe
FZ
2962}
2963
61007b31 2964BlockDriverState *bdrv_next_node(BlockDriverState *bs)
02c50efe 2965{
61007b31
SH
2966 if (!bs) {
2967 return QTAILQ_FIRST(&graph_bdrv_states);
02c50efe 2968 }
61007b31 2969 return QTAILQ_NEXT(bs, node_list);
83f64091
FB
2970}
2971
26260580
HR
2972/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2973 * the monitor or attached to a BlockBackend */
61007b31 2974BlockDriverState *bdrv_next(BlockDriverState *bs)
83f64091 2975{
26260580
HR
2976 if (!bs || bs->blk) {
2977 bs = blk_next_root_bs(bs);
2978 if (bs) {
2979 return bs;
2980 }
857d4f46 2981 }
26260580
HR
2982
2983 /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2984 * handled by the above block already */
2985 do {
2986 bs = bdrv_next_monitor_owned(bs);
2987 } while (bs && bs->blk);
2988 return bs;
83f64091 2989}
beac80cd 2990
61007b31 2991const char *bdrv_get_node_name(const BlockDriverState *bs)
83f64091 2992{
61007b31 2993 return bs->node_name;
beac80cd
FB
2994}
2995
61007b31
SH
2996/* TODO check what callers really want: bs->node_name or blk_name() */
2997const char *bdrv_get_device_name(const BlockDriverState *bs)
beac80cd 2998{
61007b31 2999 return bs->blk ? blk_name(bs->blk) : "";
f141eafe 3000}
83f64091 3001
61007b31
SH
3002/* This can be used to identify nodes that might not have a device
3003 * name associated. Since node and device names live in the same
3004 * namespace, the result is unambiguous. The exception is if both are
3005 * absent, then this returns an empty (non-null) string. */
3006const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
f141eafe 3007{
61007b31 3008 return bs->blk ? blk_name(bs->blk) : bs->node_name;
beac80cd 3009}
beac80cd 3010
61007b31 3011int bdrv_get_flags(BlockDriverState *bs)
0b5a2445 3012{
61007b31 3013 return bs->open_flags;
0b5a2445
PB
3014}
3015
61007b31 3016int bdrv_has_zero_init_1(BlockDriverState *bs)
68485420 3017{
61007b31 3018 return 1;
0b5a2445
PB
3019}
3020
61007b31 3021int bdrv_has_zero_init(BlockDriverState *bs)
0b5a2445 3022{
61007b31 3023 assert(bs->drv);
0b5a2445 3024
61007b31
SH
3025 /* If BS is a copy on write image, it is initialized to
3026 the contents of the base image, which may not be zeroes. */
760e0063 3027 if (bs->backing) {
61007b31
SH
3028 return 0;
3029 }
3030 if (bs->drv->bdrv_has_zero_init) {
3031 return bs->drv->bdrv_has_zero_init(bs);
0b5a2445 3032 }
61007b31
SH
3033
3034 /* safe default */
3035 return 0;
68485420
KW
3036}
3037
61007b31 3038bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
b2a61371 3039{
61007b31 3040 BlockDriverInfo bdi;
b2a61371 3041
760e0063 3042 if (bs->backing) {
61007b31
SH
3043 return false;
3044 }
3045
3046 if (bdrv_get_info(bs, &bdi) == 0) {
3047 return bdi.unallocated_blocks_are_zero;
b2a61371
SH
3048 }
3049
61007b31 3050 return false;
b2a61371
SH
3051}
3052
61007b31 3053bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
68485420 3054{
61007b31 3055 BlockDriverInfo bdi;
68485420 3056
760e0063 3057 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
61007b31
SH
3058 return false;
3059 }
68485420 3060
61007b31
SH
3061 if (bdrv_get_info(bs, &bdi) == 0) {
3062 return bdi.can_write_zeroes_with_unmap;
3063 }
68485420 3064
61007b31 3065 return false;
68485420
KW
3066}
3067
61007b31 3068const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
b2e12bc6 3069{
760e0063 3070 if (bs->backing && bs->backing->bs->encrypted)
61007b31
SH
3071 return bs->backing_file;
3072 else if (bs->encrypted)
3073 return bs->filename;
3074 else
3075 return NULL;
b2e12bc6
CH
3076}
3077
61007b31
SH
3078void bdrv_get_backing_filename(BlockDriverState *bs,
3079 char *filename, int filename_size)
016f5cf6 3080{
61007b31
SH
3081 pstrcpy(filename, filename_size, bs->backing_file);
3082}
d318aea9 3083
61007b31
SH
3084int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3085{
3086 BlockDriver *drv = bs->drv;
3087 if (!drv)
3088 return -ENOMEDIUM;
3089 if (!drv->bdrv_get_info)
3090 return -ENOTSUP;
3091 memset(bdi, 0, sizeof(*bdi));
3092 return drv->bdrv_get_info(bs, bdi);
3093}
016f5cf6 3094
61007b31
SH
3095ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3096{
3097 BlockDriver *drv = bs->drv;
3098 if (drv && drv->bdrv_get_specific_info) {
3099 return drv->bdrv_get_specific_info(bs);
3100 }
3101 return NULL;
016f5cf6
AG
3102}
3103
a31939e6 3104void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
4265d620 3105{
61007b31
SH
3106 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3107 return;
3108 }
4265d620 3109
61007b31 3110 bs->drv->bdrv_debug_event(bs, event);
4265d620
PB
3111}
3112
61007b31
SH
3113int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3114 const char *tag)
4265d620 3115{
61007b31 3116 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
9a4f4c31 3117 bs = bs->file ? bs->file->bs : NULL;
61007b31 3118 }
4265d620 3119
61007b31
SH
3120 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3121 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3122 }
4265d620 3123
61007b31 3124 return -ENOTSUP;
4265d620
PB
3125}
3126
61007b31 3127int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
ea2384d3 3128{
61007b31 3129 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
9a4f4c31 3130 bs = bs->file ? bs->file->bs : NULL;
61007b31 3131 }
ce1a14dc 3132
61007b31
SH
3133 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3134 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3135 }
3136
3137 return -ENOTSUP;
eb852011
MA
3138}
3139
61007b31 3140int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
ce1a14dc 3141{
61007b31 3142 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
9a4f4c31 3143 bs = bs->file ? bs->file->bs : NULL;
61007b31 3144 }
ce1a14dc 3145
61007b31
SH
3146 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3147 return bs->drv->bdrv_debug_resume(bs, tag);
3148 }
ce1a14dc 3149
61007b31 3150 return -ENOTSUP;
f197fe2b
FZ
3151}
3152
61007b31 3153bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
ce1a14dc 3154{
61007b31 3155 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
9a4f4c31 3156 bs = bs->file ? bs->file->bs : NULL;
f197fe2b 3157 }
19cb3738 3158
61007b31
SH
3159 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3160 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3161 }
f9f05dc5 3162
61007b31
SH
3163 return false;
3164}
f9f05dc5 3165
61007b31 3166int bdrv_is_snapshot(BlockDriverState *bs)
f9f05dc5 3167{
61007b31 3168 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
f9f05dc5
KW
3169}
3170
61007b31
SH
3171/* backing_file can either be relative, or absolute, or a protocol. If it is
3172 * relative, it must be relative to the chain. So, passing in bs->filename
3173 * from a BDS as backing_file should not be done, as that may be relative to
3174 * the CWD rather than the chain. */
3175BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3176 const char *backing_file)
f9f05dc5 3177{
61007b31
SH
3178 char *filename_full = NULL;
3179 char *backing_file_full = NULL;
3180 char *filename_tmp = NULL;
3181 int is_protocol = 0;
3182 BlockDriverState *curr_bs = NULL;
3183 BlockDriverState *retval = NULL;
f9f05dc5 3184
61007b31
SH
3185 if (!bs || !bs->drv || !backing_file) {
3186 return NULL;
f9f05dc5
KW
3187 }
3188
61007b31
SH
3189 filename_full = g_malloc(PATH_MAX);
3190 backing_file_full = g_malloc(PATH_MAX);
3191 filename_tmp = g_malloc(PATH_MAX);
f9f05dc5 3192
61007b31 3193 is_protocol = path_has_protocol(backing_file);
f9f05dc5 3194
760e0063 3195 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
f9f05dc5 3196
61007b31
SH
3197 /* If either of the filename paths is actually a protocol, then
3198 * compare unmodified paths; otherwise make paths relative */
3199 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3200 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
760e0063 3201 retval = curr_bs->backing->bs;
61007b31
SH
3202 break;
3203 }
3204 } else {
3205 /* If not an absolute filename path, make it relative to the current
3206 * image's filename path */
3207 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3208 backing_file);
f9f05dc5 3209
61007b31
SH
3210 /* We are going to compare absolute pathnames */
3211 if (!realpath(filename_tmp, filename_full)) {
3212 continue;
3213 }
07f07615 3214
61007b31
SH
3215 /* We need to make sure the backing filename we are comparing against
3216 * is relative to the current image filename (or absolute) */
3217 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3218 curr_bs->backing_file);
07f07615 3219
61007b31
SH
3220 if (!realpath(filename_tmp, backing_file_full)) {
3221 continue;
3222 }
eb489bb1 3223
61007b31 3224 if (strcmp(backing_file_full, filename_full) == 0) {
760e0063 3225 retval = curr_bs->backing->bs;
61007b31
SH
3226 break;
3227 }
3228 }
eb489bb1
KW
3229 }
3230
61007b31
SH
3231 g_free(filename_full);
3232 g_free(backing_file_full);
3233 g_free(filename_tmp);
3234 return retval;
3235}
3236
3237int bdrv_get_backing_file_depth(BlockDriverState *bs)
3238{
3239 if (!bs->drv) {
3240 return 0;
eb489bb1
KW
3241 }
3242
760e0063 3243 if (!bs->backing) {
61007b31 3244 return 0;
ca716364
KW
3245 }
3246
760e0063 3247 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
61007b31 3248}
07f07615 3249
61007b31
SH
3250void bdrv_init(void)
3251{
3252 module_call_init(MODULE_INIT_BLOCK);
3253}
29cdb251 3254
61007b31
SH
3255void bdrv_init_with_whitelist(void)
3256{
3257 use_bdrv_whitelist = 1;
3258 bdrv_init();
07f07615
PB
3259}
3260
5a8a30db 3261void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 3262{
5a8a30db
KW
3263 Error *local_err = NULL;
3264 int ret;
3265
3456a8d1
KW
3266 if (!bs->drv) {
3267 return;
3268 }
3269
04c01a5c 3270 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
7ea2d269
AK
3271 return;
3272 }
04c01a5c 3273 bs->open_flags &= ~BDRV_O_INACTIVE;
7ea2d269 3274
3456a8d1 3275 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 3276 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 3277 } else if (bs->file) {
9a4f4c31 3278 bdrv_invalidate_cache(bs->file->bs, &local_err);
5a8a30db
KW
3279 }
3280 if (local_err) {
04c01a5c 3281 bs->open_flags |= BDRV_O_INACTIVE;
5a8a30db
KW
3282 error_propagate(errp, local_err);
3283 return;
0f15423c 3284 }
3456a8d1 3285
5a8a30db
KW
3286 ret = refresh_total_sectors(bs, bs->total_sectors);
3287 if (ret < 0) {
04c01a5c 3288 bs->open_flags |= BDRV_O_INACTIVE;
5a8a30db
KW
3289 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3290 return;
3291 }
0f15423c
AL
3292}
3293
5a8a30db 3294void bdrv_invalidate_cache_all(Error **errp)
0f15423c
AL
3295{
3296 BlockDriverState *bs;
5a8a30db 3297 Error *local_err = NULL;
0f15423c 3298
dc364f4c 3299 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
ed78cda3
SH
3300 AioContext *aio_context = bdrv_get_aio_context(bs);
3301
3302 aio_context_acquire(aio_context);
5a8a30db 3303 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 3304 aio_context_release(aio_context);
5a8a30db
KW
3305 if (local_err) {
3306 error_propagate(errp, local_err);
3307 return;
3308 }
0f15423c
AL
3309 }
3310}
3311
76b1c7fe
KW
3312static int bdrv_inactivate(BlockDriverState *bs)
3313{
3314 int ret;
3315
3316 if (bs->drv->bdrv_inactivate) {
3317 ret = bs->drv->bdrv_inactivate(bs);
3318 if (ret < 0) {
3319 return ret;
3320 }
3321 }
3322
3323 bs->open_flags |= BDRV_O_INACTIVE;
3324 return 0;
3325}
3326
3327int bdrv_inactivate_all(void)
3328{
3329 BlockDriverState *bs;
3330 int ret;
3331
3332 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3333 AioContext *aio_context = bdrv_get_aio_context(bs);
3334
3335 aio_context_acquire(aio_context);
3336 ret = bdrv_inactivate(bs);
3337 aio_context_release(aio_context);
3338 if (ret < 0) {
3339 return ret;
3340 }
3341 }
3342
3343 return 0;
3344}
3345
19cb3738
FB
3346/**************************************************************/
3347/* removable device support */
3348
3349/**
3350 * Return TRUE if the media is present
3351 */
e031f750 3352bool bdrv_is_inserted(BlockDriverState *bs)
19cb3738
FB
3353{
3354 BlockDriver *drv = bs->drv;
28d7a789 3355 BdrvChild *child;
a1aff5bf 3356
e031f750
HR
3357 if (!drv) {
3358 return false;
3359 }
28d7a789
HR
3360 if (drv->bdrv_is_inserted) {
3361 return drv->bdrv_is_inserted(bs);
3362 }
3363 QLIST_FOREACH(child, &bs->children, next) {
3364 if (!bdrv_is_inserted(child->bs)) {
3365 return false;
3366 }
e031f750 3367 }
28d7a789 3368 return true;
19cb3738
FB
3369}
3370
3371/**
8e49ca46
MA
3372 * Return whether the media changed since the last call to this
3373 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3374 */
3375int bdrv_media_changed(BlockDriverState *bs)
3376{
3377 BlockDriver *drv = bs->drv;
19cb3738 3378
8e49ca46
MA
3379 if (drv && drv->bdrv_media_changed) {
3380 return drv->bdrv_media_changed(bs);
3381 }
3382 return -ENOTSUP;
19cb3738
FB
3383}
3384
3385/**
3386 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3387 */
f36f3949 3388void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3389{
3390 BlockDriver *drv = bs->drv;
bfb197e0 3391 const char *device_name;
19cb3738 3392
822e1cd1
MA
3393 if (drv && drv->bdrv_eject) {
3394 drv->bdrv_eject(bs, eject_flag);
19cb3738 3395 }
6f382ed2 3396
bfb197e0
MA
3397 device_name = bdrv_get_device_name(bs);
3398 if (device_name[0] != '\0') {
3399 qapi_event_send_device_tray_moved(device_name,
a5ee7bd4 3400 eject_flag, &error_abort);
6f382ed2 3401 }
19cb3738
FB
3402}
3403
19cb3738
FB
3404/**
3405 * Lock or unlock the media (if it is locked, the user won't be able
3406 * to eject it manually).
3407 */
025e849a 3408void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3409{
3410 BlockDriver *drv = bs->drv;
3411
025e849a 3412 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3413
025e849a
MA
3414 if (drv && drv->bdrv_lock_medium) {
3415 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3416 }
3417}
985a03b0 3418
9fcb0251
FZ
3419/* Get a reference to bs */
3420void bdrv_ref(BlockDriverState *bs)
3421{
3422 bs->refcnt++;
3423}
3424
3425/* Release a previously grabbed reference to bs.
3426 * If after releasing, reference count is zero, the BlockDriverState is
3427 * deleted. */
3428void bdrv_unref(BlockDriverState *bs)
3429{
9a4d5ca6
JC
3430 if (!bs) {
3431 return;
3432 }
9fcb0251
FZ
3433 assert(bs->refcnt > 0);
3434 if (--bs->refcnt == 0) {
3435 bdrv_delete(bs);
3436 }
3437}
3438
fbe40ff7
FZ
3439struct BdrvOpBlocker {
3440 Error *reason;
3441 QLIST_ENTRY(BdrvOpBlocker) list;
3442};
3443
3444bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3445{
3446 BdrvOpBlocker *blocker;
3447 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3448 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3449 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3450 if (errp) {
e43bfd9c
MA
3451 *errp = error_copy(blocker->reason);
3452 error_prepend(errp, "Node '%s' is busy: ",
3453 bdrv_get_device_or_node_name(bs));
fbe40ff7
FZ
3454 }
3455 return true;
3456 }
3457 return false;
3458}
3459
3460void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3461{
3462 BdrvOpBlocker *blocker;
3463 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3464
5839e53b 3465 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
3466 blocker->reason = reason;
3467 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3468}
3469
3470void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3471{
3472 BdrvOpBlocker *blocker, *next;
3473 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3474 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3475 if (blocker->reason == reason) {
3476 QLIST_REMOVE(blocker, list);
3477 g_free(blocker);
3478 }
3479 }
3480}
3481
3482void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3483{
3484 int i;
3485 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3486 bdrv_op_block(bs, i, reason);
3487 }
3488}
3489
3490void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3491{
3492 int i;
3493 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3494 bdrv_op_unblock(bs, i, reason);
3495 }
3496}
3497
3498bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3499{
3500 int i;
3501
3502 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3503 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3504 return false;
3505 }
3506 }
3507 return true;
3508}
3509
d92ada22
LC
3510void bdrv_img_create(const char *filename, const char *fmt,
3511 const char *base_filename, const char *base_fmt,
f382d43a
MR
3512 char *options, uint64_t img_size, int flags,
3513 Error **errp, bool quiet)
f88e1a42 3514{
83d0521a
CL
3515 QemuOptsList *create_opts = NULL;
3516 QemuOpts *opts = NULL;
3517 const char *backing_fmt, *backing_file;
3518 int64_t size;
f88e1a42 3519 BlockDriver *drv, *proto_drv;
cc84d90f 3520 Error *local_err = NULL;
f88e1a42
JS
3521 int ret = 0;
3522
3523 /* Find driver and parse its options */
3524 drv = bdrv_find_format(fmt);
3525 if (!drv) {
71c79813 3526 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 3527 return;
f88e1a42
JS
3528 }
3529
b65a5e12 3530 proto_drv = bdrv_find_protocol(filename, true, errp);
f88e1a42 3531 if (!proto_drv) {
d92ada22 3532 return;
f88e1a42
JS
3533 }
3534
c6149724
HR
3535 if (!drv->create_opts) {
3536 error_setg(errp, "Format driver '%s' does not support image creation",
3537 drv->format_name);
3538 return;
3539 }
3540
3541 if (!proto_drv->create_opts) {
3542 error_setg(errp, "Protocol driver '%s' does not support image creation",
3543 proto_drv->format_name);
3544 return;
3545 }
3546
c282e1fd
CL
3547 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3548 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
3549
3550 /* Create parameter list with default values */
83d0521a 3551 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
39101f25 3552 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
f88e1a42
JS
3553
3554 /* Parse -o options */
3555 if (options) {
dc523cd3
MA
3556 qemu_opts_do_parse(opts, options, NULL, &local_err);
3557 if (local_err) {
3558 error_report_err(local_err);
3559 local_err = NULL;
83d0521a 3560 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
3561 goto out;
3562 }
3563 }
3564
3565 if (base_filename) {
f43e47db 3566 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
6be4194b 3567 if (local_err) {
71c79813
LC
3568 error_setg(errp, "Backing file not supported for file format '%s'",
3569 fmt);
f88e1a42
JS
3570 goto out;
3571 }
3572 }
3573
3574 if (base_fmt) {
f43e47db 3575 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
6be4194b 3576 if (local_err) {
71c79813
LC
3577 error_setg(errp, "Backing file format not supported for file "
3578 "format '%s'", fmt);
f88e1a42
JS
3579 goto out;
3580 }
3581 }
3582
83d0521a
CL
3583 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3584 if (backing_file) {
3585 if (!strcmp(filename, backing_file)) {
71c79813
LC
3586 error_setg(errp, "Error: Trying to create an image with the "
3587 "same filename as the backing file");
792da93a
JS
3588 goto out;
3589 }
3590 }
3591
83d0521a 3592 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
f88e1a42
JS
3593
3594 // The size for the image must always be specified, with one exception:
3595 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
3596 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3597 if (size == -1) {
3598 if (backing_file) {
66f6b814 3599 BlockDriverState *bs;
29168018 3600 char *full_backing = g_new0(char, PATH_MAX);
52bf1e72 3601 int64_t size;
63090dac 3602 int back_flags;
e6641719 3603 QDict *backing_options = NULL;
63090dac 3604
29168018
HR
3605 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3606 full_backing, PATH_MAX,
3607 &local_err);
3608 if (local_err) {
3609 g_free(full_backing);
3610 goto out;
3611 }
3612
63090dac
PB
3613 /* backing files always opened read-only */
3614 back_flags =
3615 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 3616
e6641719
HR
3617 if (backing_fmt) {
3618 backing_options = qdict_new();
3619 qdict_put(backing_options, "driver",
3620 qstring_from_str(backing_fmt));
3621 }
3622
f67503e5 3623 bs = NULL;
e6641719 3624 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
6ebf9aa2 3625 back_flags, &local_err);
29168018 3626 g_free(full_backing);
f88e1a42 3627 if (ret < 0) {
f88e1a42
JS
3628 goto out;
3629 }
52bf1e72
MA
3630 size = bdrv_getlength(bs);
3631 if (size < 0) {
3632 error_setg_errno(errp, -size, "Could not get size of '%s'",
3633 backing_file);
3634 bdrv_unref(bs);
3635 goto out;
3636 }
f88e1a42 3637
39101f25 3638 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
66f6b814
HR
3639
3640 bdrv_unref(bs);
f88e1a42 3641 } else {
71c79813 3642 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
3643 goto out;
3644 }
3645 }
3646
f382d43a 3647 if (!quiet) {
fe646693 3648 printf("Formatting '%s', fmt=%s ", filename, fmt);
43c5d8f8 3649 qemu_opts_print(opts, " ");
f382d43a
MR
3650 puts("");
3651 }
83d0521a 3652
c282e1fd 3653 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 3654
cc84d90f
HR
3655 if (ret == -EFBIG) {
3656 /* This is generally a better message than whatever the driver would
3657 * deliver (especially because of the cluster_size_hint), since that
3658 * is most probably not much different from "image too large". */
3659 const char *cluster_size_hint = "";
83d0521a 3660 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 3661 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 3662 }
cc84d90f
HR
3663 error_setg(errp, "The image size is too large for file format '%s'"
3664 "%s", fmt, cluster_size_hint);
3665 error_free(local_err);
3666 local_err = NULL;
f88e1a42
JS
3667 }
3668
3669out:
83d0521a
CL
3670 qemu_opts_del(opts);
3671 qemu_opts_free(create_opts);
84d18f06 3672 if (local_err) {
cc84d90f
HR
3673 error_propagate(errp, local_err);
3674 }
f88e1a42 3675}
85d126f3
SH
3676
3677AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3678{
dcd04228
SH
3679 return bs->aio_context;
3680}
3681
3682void bdrv_detach_aio_context(BlockDriverState *bs)
3683{
33384421
HR
3684 BdrvAioNotifier *baf;
3685
dcd04228
SH
3686 if (!bs->drv) {
3687 return;
3688 }
3689
33384421
HR
3690 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3691 baf->detach_aio_context(baf->opaque);
3692 }
3693
a0d64a61 3694 if (bs->throttle_state) {
0e5b0a2d 3695 throttle_timers_detach_aio_context(&bs->throttle_timers);
13af91eb 3696 }
dcd04228
SH
3697 if (bs->drv->bdrv_detach_aio_context) {
3698 bs->drv->bdrv_detach_aio_context(bs);
3699 }
3700 if (bs->file) {
9a4f4c31 3701 bdrv_detach_aio_context(bs->file->bs);
dcd04228 3702 }
760e0063
KW
3703 if (bs->backing) {
3704 bdrv_detach_aio_context(bs->backing->bs);
dcd04228
SH
3705 }
3706
3707 bs->aio_context = NULL;
3708}
3709
3710void bdrv_attach_aio_context(BlockDriverState *bs,
3711 AioContext *new_context)
3712{
33384421
HR
3713 BdrvAioNotifier *ban;
3714
dcd04228
SH
3715 if (!bs->drv) {
3716 return;
3717 }
3718
3719 bs->aio_context = new_context;
3720
760e0063
KW
3721 if (bs->backing) {
3722 bdrv_attach_aio_context(bs->backing->bs, new_context);
dcd04228
SH
3723 }
3724 if (bs->file) {
9a4f4c31 3725 bdrv_attach_aio_context(bs->file->bs, new_context);
dcd04228
SH
3726 }
3727 if (bs->drv->bdrv_attach_aio_context) {
3728 bs->drv->bdrv_attach_aio_context(bs, new_context);
3729 }
a0d64a61 3730 if (bs->throttle_state) {
0e5b0a2d 3731 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
13af91eb 3732 }
33384421
HR
3733
3734 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3735 ban->attached_aio_context(new_context, ban->opaque);
3736 }
dcd04228
SH
3737}
3738
3739void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3740{
53ec73e2 3741 bdrv_drain(bs); /* ensure there are no in-flight requests */
dcd04228
SH
3742
3743 bdrv_detach_aio_context(bs);
3744
3745 /* This function executes in the old AioContext so acquire the new one in
3746 * case it runs in a different thread.
3747 */
3748 aio_context_acquire(new_context);
3749 bdrv_attach_aio_context(bs, new_context);
3750 aio_context_release(new_context);
85d126f3 3751}
d616b224 3752
33384421
HR
3753void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3754 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3755 void (*detach_aio_context)(void *opaque), void *opaque)
3756{
3757 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3758 *ban = (BdrvAioNotifier){
3759 .attached_aio_context = attached_aio_context,
3760 .detach_aio_context = detach_aio_context,
3761 .opaque = opaque
3762 };
3763
3764 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3765}
3766
3767void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3768 void (*attached_aio_context)(AioContext *,
3769 void *),
3770 void (*detach_aio_context)(void *),
3771 void *opaque)
3772{
3773 BdrvAioNotifier *ban, *ban_next;
3774
3775 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3776 if (ban->attached_aio_context == attached_aio_context &&
3777 ban->detach_aio_context == detach_aio_context &&
3778 ban->opaque == opaque)
3779 {
3780 QLIST_REMOVE(ban, list);
3781 g_free(ban);
3782
3783 return;
3784 }
3785 }
3786
3787 abort();
3788}
3789
77485434 3790int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
8b13976d 3791 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
6f176b48 3792{
c282e1fd 3793 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
3794 return -ENOTSUP;
3795 }
8b13976d 3796 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
6f176b48 3797}
f6186f49 3798
b5042a36
BC
3799/* This function will be called by the bdrv_recurse_is_first_non_filter method
3800 * of block filter and by bdrv_is_first_non_filter.
3801 * It is used to test if the given bs is the candidate or recurse more in the
3802 * node graph.
212a5a8f 3803 */
b5042a36 3804bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 3805 BlockDriverState *candidate)
f6186f49 3806{
b5042a36
BC
3807 /* return false if basic checks fails */
3808 if (!bs || !bs->drv) {
212a5a8f 3809 return false;
f6186f49
BC
3810 }
3811
b5042a36
BC
3812 /* the code reached a non block filter driver -> check if the bs is
3813 * the same as the candidate. It's the recursion termination condition.
3814 */
3815 if (!bs->drv->is_filter) {
3816 return bs == candidate;
212a5a8f 3817 }
b5042a36 3818 /* Down this path the driver is a block filter driver */
212a5a8f 3819
b5042a36
BC
3820 /* If the block filter recursion method is defined use it to recurse down
3821 * the node graph.
3822 */
3823 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 3824 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
3825 }
3826
b5042a36
BC
3827 /* the driver is a block filter but don't allow to recurse -> return false
3828 */
3829 return false;
f6186f49
BC
3830}
3831
212a5a8f
BC
3832/* This function checks if the candidate is the first non filter bs down it's
3833 * bs chain. Since we don't have pointers to parents it explore all bs chains
3834 * from the top. Some filters can choose not to pass down the recursion.
3835 */
3836bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 3837{
212a5a8f
BC
3838 BlockDriverState *bs;
3839
3840 /* walk down the bs forest recursively */
3841 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3842 bool perm;
3843
b5042a36 3844 /* try to recurse in this top level bs */
e6dc8a1f 3845 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
3846
3847 /* candidate is the first non filter */
3848 if (perm) {
3849 return true;
3850 }
3851 }
3852
3853 return false;
f6186f49 3854}
09158f00 3855
e12f3784
WC
3856BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3857 const char *node_name, Error **errp)
09158f00
BC
3858{
3859 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5a7e7a0b
SH
3860 AioContext *aio_context;
3861
09158f00
BC
3862 if (!to_replace_bs) {
3863 error_setg(errp, "Node name '%s' not found", node_name);
3864 return NULL;
3865 }
3866
5a7e7a0b
SH
3867 aio_context = bdrv_get_aio_context(to_replace_bs);
3868 aio_context_acquire(aio_context);
3869
09158f00 3870 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5a7e7a0b
SH
3871 to_replace_bs = NULL;
3872 goto out;
09158f00
BC
3873 }
3874
3875 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3876 * most non filter in order to prevent data corruption.
3877 * Another benefit is that this tests exclude backing files which are
3878 * blocked by the backing blockers.
3879 */
e12f3784 3880 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
09158f00 3881 error_setg(errp, "Only top most non filter can be replaced");
5a7e7a0b
SH
3882 to_replace_bs = NULL;
3883 goto out;
09158f00
BC
3884 }
3885
5a7e7a0b
SH
3886out:
3887 aio_context_release(aio_context);
09158f00
BC
3888 return to_replace_bs;
3889}
448ad91d 3890
91af7014
HR
3891static bool append_open_options(QDict *d, BlockDriverState *bs)
3892{
3893 const QDictEntry *entry;
9e700c1a 3894 QemuOptDesc *desc;
260fecf1 3895 BdrvChild *child;
91af7014 3896 bool found_any = false;
260fecf1 3897 const char *p;
91af7014
HR
3898
3899 for (entry = qdict_first(bs->options); entry;
3900 entry = qdict_next(bs->options, entry))
3901 {
260fecf1
KW
3902 /* Exclude options for children */
3903 QLIST_FOREACH(child, &bs->children, next) {
3904 if (strstart(qdict_entry_key(entry), child->name, &p)
3905 && (!*p || *p == '.'))
3906 {
3907 break;
3908 }
3909 }
3910 if (child) {
9e700c1a 3911 continue;
91af7014 3912 }
9e700c1a
KW
3913
3914 /* And exclude all non-driver-specific options */
3915 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3916 if (!strcmp(qdict_entry_key(entry), desc->name)) {
3917 break;
3918 }
3919 }
3920 if (desc->name) {
3921 continue;
3922 }
3923
3924 qobject_incref(qdict_entry_value(entry));
3925 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3926 found_any = true;
91af7014
HR
3927 }
3928
3929 return found_any;
3930}
3931
3932/* Updates the following BDS fields:
3933 * - exact_filename: A filename which may be used for opening a block device
3934 * which (mostly) equals the given BDS (even without any
3935 * other options; so reading and writing must return the same
3936 * results, but caching etc. may be different)
3937 * - full_open_options: Options which, when given when opening a block device
3938 * (without a filename), result in a BDS (mostly)
3939 * equalling the given one
3940 * - filename: If exact_filename is set, it is copied here. Otherwise,
3941 * full_open_options is converted to a JSON object, prefixed with
3942 * "json:" (for use through the JSON pseudo protocol) and put here.
3943 */
3944void bdrv_refresh_filename(BlockDriverState *bs)
3945{
3946 BlockDriver *drv = bs->drv;
3947 QDict *opts;
3948
3949 if (!drv) {
3950 return;
3951 }
3952
3953 /* This BDS's file name will most probably depend on its file's name, so
3954 * refresh that first */
3955 if (bs->file) {
9a4f4c31 3956 bdrv_refresh_filename(bs->file->bs);
91af7014
HR
3957 }
3958
3959 if (drv->bdrv_refresh_filename) {
3960 /* Obsolete information is of no use here, so drop the old file name
3961 * information before refreshing it */
3962 bs->exact_filename[0] = '\0';
3963 if (bs->full_open_options) {
3964 QDECREF(bs->full_open_options);
3965 bs->full_open_options = NULL;
3966 }
3967
4cdd01d3
KW
3968 opts = qdict_new();
3969 append_open_options(opts, bs);
3970 drv->bdrv_refresh_filename(bs, opts);
3971 QDECREF(opts);
91af7014
HR
3972 } else if (bs->file) {
3973 /* Try to reconstruct valid information from the underlying file */
3974 bool has_open_options;
3975
3976 bs->exact_filename[0] = '\0';
3977 if (bs->full_open_options) {
3978 QDECREF(bs->full_open_options);
3979 bs->full_open_options = NULL;
3980 }
3981
3982 opts = qdict_new();
3983 has_open_options = append_open_options(opts, bs);
3984
3985 /* If no specific options have been given for this BDS, the filename of
3986 * the underlying file should suffice for this one as well */
9a4f4c31
KW
3987 if (bs->file->bs->exact_filename[0] && !has_open_options) {
3988 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
91af7014
HR
3989 }
3990 /* Reconstructing the full options QDict is simple for most format block
3991 * drivers, as long as the full options are known for the underlying
3992 * file BDS. The full options QDict of that file BDS should somehow
3993 * contain a representation of the filename, therefore the following
3994 * suffices without querying the (exact_)filename of this BDS. */
9a4f4c31 3995 if (bs->file->bs->full_open_options) {
91af7014
HR
3996 qdict_put_obj(opts, "driver",
3997 QOBJECT(qstring_from_str(drv->format_name)));
9a4f4c31
KW
3998 QINCREF(bs->file->bs->full_open_options);
3999 qdict_put_obj(opts, "file",
4000 QOBJECT(bs->file->bs->full_open_options));
91af7014
HR
4001
4002 bs->full_open_options = opts;
4003 } else {
4004 QDECREF(opts);
4005 }
4006 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4007 /* There is no underlying file BDS (at least referenced by BDS.file),
4008 * so the full options QDict should be equal to the options given
4009 * specifically for this block device when it was opened (plus the
4010 * driver specification).
4011 * Because those options don't change, there is no need to update
4012 * full_open_options when it's already set. */
4013
4014 opts = qdict_new();
4015 append_open_options(opts, bs);
4016 qdict_put_obj(opts, "driver",
4017 QOBJECT(qstring_from_str(drv->format_name)));
4018
4019 if (bs->exact_filename[0]) {
4020 /* This may not work for all block protocol drivers (some may
4021 * require this filename to be parsed), but we have to find some
4022 * default solution here, so just include it. If some block driver
4023 * does not support pure options without any filename at all or
4024 * needs some special format of the options QDict, it needs to
4025 * implement the driver-specific bdrv_refresh_filename() function.
4026 */
4027 qdict_put_obj(opts, "filename",
4028 QOBJECT(qstring_from_str(bs->exact_filename)));
4029 }
4030
4031 bs->full_open_options = opts;
4032 }
4033
4034 if (bs->exact_filename[0]) {
4035 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4036 } else if (bs->full_open_options) {
4037 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4038 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4039 qstring_get_str(json));
4040 QDECREF(json);
4041 }
4042}