]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
block: add generic full disk encryption driver
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d38ea87a 24#include "qemu/osdep.h"
6d519a5f 25#include "trace.h"
737e150e
PB
26#include "block/block_int.h"
27#include "block/blockjob.h"
d49b6836 28#include "qemu/error-report.h"
1de7afc9 29#include "qemu/module.h"
cc7a8ea7 30#include "qapi/qmp/qerror.h"
91a097e7 31#include "qapi/qmp/qbool.h"
7b1b5d19 32#include "qapi/qmp/qjson.h"
bfb197e0 33#include "sysemu/block-backend.h"
9c17d615 34#include "sysemu/sysemu.h"
1de7afc9 35#include "qemu/notify.h"
10817bf0 36#include "qemu/coroutine.h"
c13163fb 37#include "block/qapi.h"
b2023818 38#include "qmp-commands.h"
1de7afc9 39#include "qemu/timer.h"
a5ee7bd4 40#include "qapi-event.h"
db628338 41#include "block/throttle-groups.h"
f348b6d1
VB
42#include "qemu/cutils.h"
43#include "qemu/id.h"
fc01f7e7 44
71e72a19 45#ifdef CONFIG_BSD
7674e7bf 46#include <sys/ioctl.h>
72cf2d4f 47#include <sys/queue.h>
c5e97233 48#ifndef __DragonFly__
7674e7bf
FB
49#include <sys/disk.h>
50#endif
c5e97233 51#endif
7674e7bf 52
49dc768d
AL
53#ifdef _WIN32
54#include <windows.h>
55#endif
56
1c9805a3
SH
57#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58
dc364f4c
BC
59static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61
2c1d04e0
HR
62static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64
8a22f02a
SH
65static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 67
f3930ed0
KW
68static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69 const char *reference, QDict *options, int flags,
70 BlockDriverState *parent,
ce343771 71 const BdrvChildRole *child_role, Error **errp);
f3930ed0 72
eb852011
MA
73/* If non-zero, use only whitelisted block drivers */
74static int use_bdrv_whitelist;
75
64dff520
HR
76static void bdrv_close(BlockDriverState *bs);
77
9e0b22f4
SH
78#ifdef _WIN32
79static int is_windows_drive_prefix(const char *filename)
80{
81 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83 filename[1] == ':');
84}
85
86int is_windows_drive(const char *filename)
87{
88 if (is_windows_drive_prefix(filename) &&
89 filename[2] == '\0')
90 return 1;
91 if (strstart(filename, "\\\\.\\", NULL) ||
92 strstart(filename, "//./", NULL))
93 return 1;
94 return 0;
95}
96#endif
97
339064d5
KW
98size_t bdrv_opt_mem_align(BlockDriverState *bs)
99{
100 if (!bs || !bs->drv) {
459b4e66
DL
101 /* page size or 4k (hdd sector size) should be on the safe side */
102 return MAX(4096, getpagesize());
339064d5
KW
103 }
104
105 return bs->bl.opt_mem_alignment;
106}
107
4196d2f0
DL
108size_t bdrv_min_mem_align(BlockDriverState *bs)
109{
110 if (!bs || !bs->drv) {
459b4e66
DL
111 /* page size or 4k (hdd sector size) should be on the safe side */
112 return MAX(4096, getpagesize());
4196d2f0
DL
113 }
114
115 return bs->bl.min_mem_alignment;
116}
117
9e0b22f4 118/* check if the path starts with "<protocol>:" */
5c98415b 119int path_has_protocol(const char *path)
9e0b22f4 120{
947995c0
PB
121 const char *p;
122
9e0b22f4
SH
123#ifdef _WIN32
124 if (is_windows_drive(path) ||
125 is_windows_drive_prefix(path)) {
126 return 0;
127 }
947995c0
PB
128 p = path + strcspn(path, ":/\\");
129#else
130 p = path + strcspn(path, ":/");
9e0b22f4
SH
131#endif
132
947995c0 133 return *p == ':';
9e0b22f4
SH
134}
135
83f64091 136int path_is_absolute(const char *path)
3b0d4f61 137{
21664424
FB
138#ifdef _WIN32
139 /* specific case for names like: "\\.\d:" */
f53f4da9 140 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 141 return 1;
f53f4da9
PB
142 }
143 return (*path == '/' || *path == '\\');
3b9f94e1 144#else
f53f4da9 145 return (*path == '/');
3b9f94e1 146#endif
3b0d4f61
FB
147}
148
83f64091
FB
149/* if filename is absolute, just copy it to dest. Otherwise, build a
150 path to it by considering it is relative to base_path. URL are
151 supported. */
152void path_combine(char *dest, int dest_size,
153 const char *base_path,
154 const char *filename)
3b0d4f61 155{
83f64091
FB
156 const char *p, *p1;
157 int len;
158
159 if (dest_size <= 0)
160 return;
161 if (path_is_absolute(filename)) {
162 pstrcpy(dest, dest_size, filename);
163 } else {
164 p = strchr(base_path, ':');
165 if (p)
166 p++;
167 else
168 p = base_path;
3b9f94e1
FB
169 p1 = strrchr(base_path, '/');
170#ifdef _WIN32
171 {
172 const char *p2;
173 p2 = strrchr(base_path, '\\');
174 if (!p1 || p2 > p1)
175 p1 = p2;
176 }
177#endif
83f64091
FB
178 if (p1)
179 p1++;
180 else
181 p1 = base_path;
182 if (p1 > p)
183 p = p1;
184 len = p - base_path;
185 if (len > dest_size - 1)
186 len = dest_size - 1;
187 memcpy(dest, base_path, len);
188 dest[len] = '\0';
189 pstrcat(dest, dest_size, filename);
3b0d4f61 190 }
3b0d4f61
FB
191}
192
0a82855a
HR
193void bdrv_get_full_backing_filename_from_filename(const char *backed,
194 const char *backing,
9f07429e
HR
195 char *dest, size_t sz,
196 Error **errp)
dc5a1371 197{
9f07429e
HR
198 if (backing[0] == '\0' || path_has_protocol(backing) ||
199 path_is_absolute(backing))
200 {
0a82855a 201 pstrcpy(dest, sz, backing);
9f07429e
HR
202 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203 error_setg(errp, "Cannot use relative backing file names for '%s'",
204 backed);
dc5a1371 205 } else {
0a82855a 206 path_combine(dest, sz, backed, backing);
dc5a1371
PB
207 }
208}
209
9f07429e
HR
210void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211 Error **errp)
0a82855a 212{
9f07429e
HR
213 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214
215 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216 dest, sz, errp);
0a82855a
HR
217}
218
0eb7217e
SH
219void bdrv_register(BlockDriver *bdrv)
220{
221 bdrv_setup_io_funcs(bdrv);
b2e12bc6 222
8a22f02a 223 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 224}
b338082b 225
7f06d47e 226BlockDriverState *bdrv_new_root(void)
b338082b 227{
9aaf28c6 228 return bdrv_new();
e4e9986b
MA
229}
230
231BlockDriverState *bdrv_new(void)
232{
233 BlockDriverState *bs;
234 int i;
235
5839e53b 236 bs = g_new0(BlockDriverState, 1);
e4654d2d 237 QLIST_INIT(&bs->dirty_bitmaps);
fbe40ff7
FZ
238 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
239 QLIST_INIT(&bs->op_blockers[i]);
240 }
d616b224 241 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
242 qemu_co_queue_init(&bs->throttled_reqs[0]);
243 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 244 bs->refcnt = 1;
dcd04228 245 bs->aio_context = qemu_get_aio_context();
d7d512f6 246
2c1d04e0
HR
247 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
248
b338082b
FB
249 return bs;
250}
251
ea2384d3
FB
252BlockDriver *bdrv_find_format(const char *format_name)
253{
254 BlockDriver *drv1;
8a22f02a
SH
255 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
256 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 257 return drv1;
8a22f02a 258 }
ea2384d3
FB
259 }
260 return NULL;
261}
262
b64ec4e4 263static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 264{
b64ec4e4
FZ
265 static const char *whitelist_rw[] = {
266 CONFIG_BDRV_RW_WHITELIST
267 };
268 static const char *whitelist_ro[] = {
269 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
270 };
271 const char **p;
272
b64ec4e4 273 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 274 return 1; /* no whitelist, anything goes */
b64ec4e4 275 }
eb852011 276
b64ec4e4 277 for (p = whitelist_rw; *p; p++) {
eb852011
MA
278 if (!strcmp(drv->format_name, *p)) {
279 return 1;
280 }
281 }
b64ec4e4
FZ
282 if (read_only) {
283 for (p = whitelist_ro; *p; p++) {
284 if (!strcmp(drv->format_name, *p)) {
285 return 1;
286 }
287 }
288 }
eb852011
MA
289 return 0;
290}
291
5b7e1542
ZYW
292typedef struct CreateCo {
293 BlockDriver *drv;
294 char *filename;
83d0521a 295 QemuOpts *opts;
5b7e1542 296 int ret;
cc84d90f 297 Error *err;
5b7e1542
ZYW
298} CreateCo;
299
300static void coroutine_fn bdrv_create_co_entry(void *opaque)
301{
cc84d90f
HR
302 Error *local_err = NULL;
303 int ret;
304
5b7e1542
ZYW
305 CreateCo *cco = opaque;
306 assert(cco->drv);
307
c282e1fd 308 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 309 if (local_err) {
cc84d90f
HR
310 error_propagate(&cco->err, local_err);
311 }
312 cco->ret = ret;
5b7e1542
ZYW
313}
314
0e7e1989 315int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 316 QemuOpts *opts, Error **errp)
ea2384d3 317{
5b7e1542
ZYW
318 int ret;
319
320 Coroutine *co;
321 CreateCo cco = {
322 .drv = drv,
323 .filename = g_strdup(filename),
83d0521a 324 .opts = opts,
5b7e1542 325 .ret = NOT_DONE,
cc84d90f 326 .err = NULL,
5b7e1542
ZYW
327 };
328
c282e1fd 329 if (!drv->bdrv_create) {
cc84d90f 330 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
331 ret = -ENOTSUP;
332 goto out;
5b7e1542
ZYW
333 }
334
335 if (qemu_in_coroutine()) {
336 /* Fast-path if already in coroutine context */
337 bdrv_create_co_entry(&cco);
338 } else {
339 co = qemu_coroutine_create(bdrv_create_co_entry);
340 qemu_coroutine_enter(co, &cco);
341 while (cco.ret == NOT_DONE) {
b47ec2c4 342 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
343 }
344 }
345
346 ret = cco.ret;
cc84d90f 347 if (ret < 0) {
84d18f06 348 if (cco.err) {
cc84d90f
HR
349 error_propagate(errp, cco.err);
350 } else {
351 error_setg_errno(errp, -ret, "Could not create image");
352 }
353 }
0e7e1989 354
80168bff
LC
355out:
356 g_free(cco.filename);
5b7e1542 357 return ret;
ea2384d3
FB
358}
359
c282e1fd 360int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
361{
362 BlockDriver *drv;
cc84d90f
HR
363 Error *local_err = NULL;
364 int ret;
84a12e66 365
b65a5e12 366 drv = bdrv_find_protocol(filename, true, errp);
84a12e66 367 if (drv == NULL) {
16905d71 368 return -ENOENT;
84a12e66
CH
369 }
370
c282e1fd 371 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 372 if (local_err) {
cc84d90f
HR
373 error_propagate(errp, local_err);
374 }
375 return ret;
84a12e66
CH
376}
377
892b7de8
ET
378/**
379 * Try to get @bs's logical and physical block size.
380 * On success, store them in @bsz struct and return 0.
381 * On failure return -errno.
382 * @bs must not be empty.
383 */
384int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
385{
386 BlockDriver *drv = bs->drv;
387
388 if (drv && drv->bdrv_probe_blocksizes) {
389 return drv->bdrv_probe_blocksizes(bs, bsz);
390 }
391
392 return -ENOTSUP;
393}
394
395/**
396 * Try to get @bs's geometry (cyls, heads, sectors).
397 * On success, store them in @geo struct and return 0.
398 * On failure return -errno.
399 * @bs must not be empty.
400 */
401int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
402{
403 BlockDriver *drv = bs->drv;
404
405 if (drv && drv->bdrv_probe_geometry) {
406 return drv->bdrv_probe_geometry(bs, geo);
407 }
408
409 return -ENOTSUP;
410}
411
eba25057
JM
412/*
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
415 */
416int get_tmp_filename(char *filename, int size)
d5249393 417{
eba25057 418#ifdef _WIN32
3b9f94e1 419 char temp_dir[MAX_PATH];
eba25057
JM
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size >= MAX_PATH);
423 return (GetTempPath(MAX_PATH, temp_dir)
424 && GetTempFileName(temp_dir, "qem", 0, filename)
425 ? 0 : -GetLastError());
d5249393 426#else
67b915a5 427 int fd;
7ccfb2eb 428 const char *tmpdir;
0badc1ee 429 tmpdir = getenv("TMPDIR");
69bef793
AS
430 if (!tmpdir) {
431 tmpdir = "/var/tmp";
432 }
eba25057
JM
433 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
434 return -EOVERFLOW;
435 }
ea2384d3 436 fd = mkstemp(filename);
fe235a06
DH
437 if (fd < 0) {
438 return -errno;
439 }
440 if (close(fd) != 0) {
441 unlink(filename);
eba25057
JM
442 return -errno;
443 }
444 return 0;
d5249393 445#endif
eba25057 446}
fc01f7e7 447
84a12e66
CH
448/*
449 * Detect host devices. By convention, /dev/cdrom[N] is always
450 * recognized as a host CDROM.
451 */
452static BlockDriver *find_hdev_driver(const char *filename)
453{
454 int score_max = 0, score;
455 BlockDriver *drv = NULL, *d;
456
457 QLIST_FOREACH(d, &bdrv_drivers, list) {
458 if (d->bdrv_probe_device) {
459 score = d->bdrv_probe_device(filename);
460 if (score > score_max) {
461 score_max = score;
462 drv = d;
463 }
464 }
465 }
466
467 return drv;
468}
469
98289620 470BlockDriver *bdrv_find_protocol(const char *filename,
b65a5e12
HR
471 bool allow_protocol_prefix,
472 Error **errp)
83f64091
FB
473{
474 BlockDriver *drv1;
475 char protocol[128];
1cec71e3 476 int len;
83f64091 477 const char *p;
19cb3738 478
66f82cee
KW
479 /* TODO Drivers without bdrv_file_open must be specified explicitly */
480
39508e7a
CH
481 /*
482 * XXX(hch): we really should not let host device detection
483 * override an explicit protocol specification, but moving this
484 * later breaks access to device names with colons in them.
485 * Thanks to the brain-dead persistent naming schemes on udev-
486 * based Linux systems those actually are quite common.
487 */
488 drv1 = find_hdev_driver(filename);
489 if (drv1) {
490 return drv1;
491 }
492
98289620 493 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
ef810437 494 return &bdrv_file;
84a12e66 495 }
98289620 496
9e0b22f4
SH
497 p = strchr(filename, ':');
498 assert(p != NULL);
1cec71e3
AL
499 len = p - filename;
500 if (len > sizeof(protocol) - 1)
501 len = sizeof(protocol) - 1;
502 memcpy(protocol, filename, len);
503 protocol[len] = '\0';
8a22f02a 504 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 505 if (drv1->protocol_name &&
8a22f02a 506 !strcmp(drv1->protocol_name, protocol)) {
83f64091 507 return drv1;
8a22f02a 508 }
83f64091 509 }
b65a5e12
HR
510
511 error_setg(errp, "Unknown protocol '%s'", protocol);
83f64091
FB
512 return NULL;
513}
514
c6684249
MA
515/*
516 * Guess image format by probing its contents.
517 * This is not a good idea when your image is raw (CVE-2008-2004), but
518 * we do it anyway for backward compatibility.
519 *
520 * @buf contains the image's first @buf_size bytes.
7cddd372
KW
521 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
522 * but can be smaller if the image file is smaller)
c6684249
MA
523 * @filename is its filename.
524 *
525 * For all block drivers, call the bdrv_probe() method to get its
526 * probing score.
527 * Return the first block driver with the highest probing score.
528 */
38f3ef57
KW
529BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
530 const char *filename)
c6684249
MA
531{
532 int score_max = 0, score;
533 BlockDriver *drv = NULL, *d;
534
535 QLIST_FOREACH(d, &bdrv_drivers, list) {
536 if (d->bdrv_probe) {
537 score = d->bdrv_probe(buf, buf_size, filename);
538 if (score > score_max) {
539 score_max = score;
540 drv = d;
541 }
542 }
543 }
544
545 return drv;
546}
547
f500a6d3 548static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 549 BlockDriver **pdrv, Error **errp)
f3a5d3f8 550{
c6684249 551 BlockDriver *drv;
7cddd372 552 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
f500a6d3 553 int ret = 0;
f8ea0b00 554
08a00559 555 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
b192af8a 556 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
ef810437 557 *pdrv = &bdrv_raw;
c98ac35d 558 return ret;
1a396859 559 }
f8ea0b00 560
83f64091 561 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 562 if (ret < 0) {
34b5d2c6
HR
563 error_setg_errno(errp, -ret, "Could not read image for determining its "
564 "format");
c98ac35d
SW
565 *pdrv = NULL;
566 return ret;
83f64091
FB
567 }
568
c6684249 569 drv = bdrv_probe_all(buf, ret, filename);
c98ac35d 570 if (!drv) {
34b5d2c6
HR
571 error_setg(errp, "Could not determine image format: No compatible "
572 "driver found");
c98ac35d
SW
573 ret = -ENOENT;
574 }
575 *pdrv = drv;
576 return ret;
ea2384d3
FB
577}
578
51762288
SH
579/**
580 * Set the current 'total_sectors' value
65a9bb25 581 * Return 0 on success, -errno on error.
51762288
SH
582 */
583static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
584{
585 BlockDriver *drv = bs->drv;
586
396759ad 587 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
b192af8a 588 if (bdrv_is_sg(bs))
396759ad
NB
589 return 0;
590
51762288
SH
591 /* query actual device if possible, otherwise just trust the hint */
592 if (drv->bdrv_getlength) {
593 int64_t length = drv->bdrv_getlength(bs);
594 if (length < 0) {
595 return length;
596 }
7e382003 597 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
598 }
599
600 bs->total_sectors = hint;
601 return 0;
602}
603
cddff5ba
KW
604/**
605 * Combines a QDict of new block driver @options with any missing options taken
606 * from @old_options, so that leaving out an option defaults to its old value.
607 */
608static void bdrv_join_options(BlockDriverState *bs, QDict *options,
609 QDict *old_options)
610{
611 if (bs->drv && bs->drv->bdrv_join_options) {
612 bs->drv->bdrv_join_options(options, old_options);
613 } else {
614 qdict_join(options, old_options, false);
615 }
616}
617
9e8f1835
PB
618/**
619 * Set open flags for a given discard mode
620 *
621 * Return 0 on success, -1 if the discard mode was invalid.
622 */
623int bdrv_parse_discard_flags(const char *mode, int *flags)
624{
625 *flags &= ~BDRV_O_UNMAP;
626
627 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
628 /* do nothing */
629 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
630 *flags |= BDRV_O_UNMAP;
631 } else {
632 return -1;
633 }
634
635 return 0;
636}
637
c3993cdc
SH
638/**
639 * Set open flags for a given cache mode
640 *
641 * Return 0 on success, -1 if the cache mode was invalid.
642 */
643int bdrv_parse_cache_flags(const char *mode, int *flags)
644{
645 *flags &= ~BDRV_O_CACHE_MASK;
646
647 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
648 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
649 } else if (!strcmp(mode, "directsync")) {
650 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
651 } else if (!strcmp(mode, "writeback")) {
652 *flags |= BDRV_O_CACHE_WB;
653 } else if (!strcmp(mode, "unsafe")) {
654 *flags |= BDRV_O_CACHE_WB;
655 *flags |= BDRV_O_NO_FLUSH;
656 } else if (!strcmp(mode, "writethrough")) {
657 /* this is the default */
658 } else {
659 return -1;
660 }
661
662 return 0;
663}
664
b1e6fc08 665/*
73176bee
KW
666 * Returns the options and flags that a temporary snapshot should get, based on
667 * the originally requested flags (the originally requested image will have
668 * flags like a backing file)
b1e6fc08 669 */
73176bee
KW
670static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
671 int parent_flags, QDict *parent_options)
b1e6fc08 672{
73176bee
KW
673 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
674
675 /* For temporary files, unconditional cache=unsafe is fine */
676 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
677 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
678 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
b1e6fc08
KW
679}
680
0b50cc88 681/*
8e2160e2
KW
682 * Returns the options and flags that bs->file should get if a protocol driver
683 * is expected, based on the given options and flags for the parent BDS
0b50cc88 684 */
8e2160e2
KW
685static void bdrv_inherited_options(int *child_flags, QDict *child_options,
686 int parent_flags, QDict *parent_options)
0b50cc88 687{
8e2160e2
KW
688 int flags = parent_flags;
689
0b50cc88
KW
690 /* Enable protocol handling, disable format probing for bs->file */
691 flags |= BDRV_O_PROTOCOL;
692
91a097e7
KW
693 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
694 * the parent. */
695 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
696 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
697
0b50cc88 698 /* Our block drivers take care to send flushes and respect unmap policy,
91a097e7
KW
699 * so we can default to enable both on lower layers regardless of the
700 * corresponding parent options. */
701 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
702 flags |= BDRV_O_UNMAP;
0b50cc88 703
0b50cc88 704 /* Clear flags that only apply to the top layer */
abb06c5a
DB
705 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
706 BDRV_O_NO_IO);
0b50cc88 707
8e2160e2 708 *child_flags = flags;
0b50cc88
KW
709}
710
f3930ed0 711const BdrvChildRole child_file = {
8e2160e2 712 .inherit_options = bdrv_inherited_options,
f3930ed0
KW
713};
714
715/*
8e2160e2
KW
716 * Returns the options and flags that bs->file should get if the use of formats
717 * (and not only protocols) is permitted for it, based on the given options and
718 * flags for the parent BDS
f3930ed0 719 */
8e2160e2
KW
720static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
721 int parent_flags, QDict *parent_options)
f3930ed0 722{
8e2160e2
KW
723 child_file.inherit_options(child_flags, child_options,
724 parent_flags, parent_options);
725
abb06c5a 726 *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
f3930ed0
KW
727}
728
729const BdrvChildRole child_format = {
8e2160e2 730 .inherit_options = bdrv_inherited_fmt_options,
f3930ed0
KW
731};
732
317fc44e 733/*
8e2160e2
KW
734 * Returns the options and flags that bs->backing should get, based on the
735 * given options and flags for the parent BDS
317fc44e 736 */
8e2160e2
KW
737static void bdrv_backing_options(int *child_flags, QDict *child_options,
738 int parent_flags, QDict *parent_options)
317fc44e 739{
8e2160e2
KW
740 int flags = parent_flags;
741
b8816a43
KW
742 /* The cache mode is inherited unmodified for backing files; except WCE,
743 * which is only applied on the top level (BlockBackend) */
744 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
91a097e7
KW
745 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
746 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
747
317fc44e
KW
748 /* backing files always opened read-only */
749 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
750
751 /* snapshot=on is handled on the top layer */
8bfea15d 752 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e 753
8e2160e2 754 *child_flags = flags;
317fc44e
KW
755}
756
f3930ed0 757static const BdrvChildRole child_backing = {
8e2160e2 758 .inherit_options = bdrv_backing_options,
f3930ed0
KW
759};
760
7b272452
KW
761static int bdrv_open_flags(BlockDriverState *bs, int flags)
762{
763 int open_flags = flags | BDRV_O_CACHE_WB;
764
765 /*
766 * Clear flags that are internal to the block layer before opening the
767 * image.
768 */
20cca275 769 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
770
771 /*
772 * Snapshots should be writable.
773 */
8bfea15d 774 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
775 open_flags |= BDRV_O_RDWR;
776 }
777
778 return open_flags;
779}
780
91a097e7
KW
781static void update_flags_from_options(int *flags, QemuOpts *opts)
782{
783 *flags &= ~BDRV_O_CACHE_MASK;
784
785 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
786 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
787 *flags |= BDRV_O_CACHE_WB;
788 }
789
790 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
791 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
792 *flags |= BDRV_O_NO_FLUSH;
793 }
794
795 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
796 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
797 *flags |= BDRV_O_NOCACHE;
798 }
799}
800
801static void update_options_from_flags(QDict *options, int flags)
802{
803 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
804 qdict_put(options, BDRV_OPT_CACHE_WB,
805 qbool_from_bool(flags & BDRV_O_CACHE_WB));
806 }
807 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
808 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
809 qbool_from_bool(flags & BDRV_O_NOCACHE));
810 }
811 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
812 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
813 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
814 }
815}
816
636ea370
KW
817static void bdrv_assign_node_name(BlockDriverState *bs,
818 const char *node_name,
819 Error **errp)
6913c0c2 820{
15489c76 821 char *gen_node_name = NULL;
6913c0c2 822
15489c76
JC
823 if (!node_name) {
824 node_name = gen_node_name = id_generate(ID_BLOCK);
825 } else if (!id_wellformed(node_name)) {
826 /*
827 * Check for empty string or invalid characters, but not if it is
828 * generated (generated names use characters not available to the user)
829 */
9aebf3b8 830 error_setg(errp, "Invalid node name");
636ea370 831 return;
6913c0c2
BC
832 }
833
0c5e94ee 834 /* takes care of avoiding namespaces collisions */
7f06d47e 835 if (blk_by_name(node_name)) {
0c5e94ee
BC
836 error_setg(errp, "node-name=%s is conflicting with a device id",
837 node_name);
15489c76 838 goto out;
0c5e94ee
BC
839 }
840
6913c0c2
BC
841 /* takes care of avoiding duplicates node names */
842 if (bdrv_find_node(node_name)) {
843 error_setg(errp, "Duplicate node name");
15489c76 844 goto out;
6913c0c2
BC
845 }
846
847 /* copy node name into the bs and insert it into the graph list */
848 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
849 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
15489c76
JC
850out:
851 g_free(gen_node_name);
6913c0c2
BC
852}
853
18edf289
KW
854static QemuOptsList bdrv_runtime_opts = {
855 .name = "bdrv_common",
856 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
857 .desc = {
858 {
859 .name = "node-name",
860 .type = QEMU_OPT_STRING,
861 .help = "Node name of the block device node",
862 },
62392ebb
KW
863 {
864 .name = "driver",
865 .type = QEMU_OPT_STRING,
866 .help = "Block driver to use for the node",
867 },
91a097e7
KW
868 {
869 .name = BDRV_OPT_CACHE_WB,
870 .type = QEMU_OPT_BOOL,
871 .help = "Enable writeback mode",
872 },
873 {
874 .name = BDRV_OPT_CACHE_DIRECT,
875 .type = QEMU_OPT_BOOL,
876 .help = "Bypass software writeback cache on the host",
877 },
878 {
879 .name = BDRV_OPT_CACHE_NO_FLUSH,
880 .type = QEMU_OPT_BOOL,
881 .help = "Ignore flush requests",
882 },
18edf289
KW
883 { /* end of list */ }
884 },
885};
886
57915332
KW
887/*
888 * Common part for opening disk images and files
b6ad491a
KW
889 *
890 * Removes all processed options from *options.
57915332 891 */
9a4f4c31 892static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
82dc8b41 893 QDict *options, Error **errp)
57915332
KW
894{
895 int ret, open_flags;
035fccdf 896 const char *filename;
62392ebb 897 const char *driver_name = NULL;
6913c0c2 898 const char *node_name = NULL;
18edf289 899 QemuOpts *opts;
62392ebb 900 BlockDriver *drv;
34b5d2c6 901 Error *local_err = NULL;
57915332 902
6405875c 903 assert(bs->file == NULL);
707ff828 904 assert(options != NULL && bs->options != options);
57915332 905
62392ebb
KW
906 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
907 qemu_opts_absorb_qdict(opts, options, &local_err);
908 if (local_err) {
909 error_propagate(errp, local_err);
910 ret = -EINVAL;
911 goto fail_opts;
912 }
913
914 driver_name = qemu_opt_get(opts, "driver");
915 drv = bdrv_find_format(driver_name);
916 assert(drv != NULL);
917
45673671 918 if (file != NULL) {
9a4f4c31 919 filename = file->bs->filename;
45673671
KW
920 } else {
921 filename = qdict_get_try_str(options, "filename");
922 }
923
765003db
KW
924 if (drv->bdrv_needs_filename && !filename) {
925 error_setg(errp, "The '%s' block driver requires a file name",
926 drv->format_name);
18edf289
KW
927 ret = -EINVAL;
928 goto fail_opts;
6913c0c2 929 }
6913c0c2 930
82dc8b41
KW
931 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
932 drv->format_name);
62392ebb 933
18edf289 934 node_name = qemu_opt_get(opts, "node-name");
636ea370 935 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 936 if (local_err) {
636ea370 937 error_propagate(errp, local_err);
18edf289
KW
938 ret = -EINVAL;
939 goto fail_opts;
5d186eb0
KW
940 }
941
c25f53b0 942 bs->request_alignment = 512;
0d51b4de 943 bs->zero_beyond_eof = true;
82dc8b41 944 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
b64ec4e4
FZ
945
946 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
947 error_setg(errp,
948 !bs->read_only && bdrv_is_whitelisted(drv, true)
949 ? "Driver '%s' can only be used for read-only devices"
950 : "Driver '%s' is not whitelisted",
951 drv->format_name);
18edf289
KW
952 ret = -ENOTSUP;
953 goto fail_opts;
b64ec4e4 954 }
57915332 955
53fec9d3 956 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
82dc8b41 957 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
0ebd24e0
KW
958 if (!bs->read_only) {
959 bdrv_enable_copy_on_read(bs);
960 } else {
961 error_setg(errp, "Can't use copy-on-read on read-only device");
18edf289
KW
962 ret = -EINVAL;
963 goto fail_opts;
0ebd24e0 964 }
53fec9d3
SH
965 }
966
c2ad1b0c
KW
967 if (filename != NULL) {
968 pstrcpy(bs->filename, sizeof(bs->filename), filename);
969 } else {
970 bs->filename[0] = '\0';
971 }
91af7014 972 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 973
57915332 974 bs->drv = drv;
7267c094 975 bs->opaque = g_malloc0(drv->instance_size);
57915332 976
91a097e7
KW
977 /* Apply cache mode options */
978 update_flags_from_options(&bs->open_flags, opts);
73ac451f
KW
979
980 if (!bs->blk && (bs->open_flags & BDRV_O_CACHE_WB) == 0) {
981 error_setg(errp, "Can't set writethrough mode except for the root");
982 ret = -EINVAL;
983 goto free_and_fail;
984 }
985
91a097e7 986 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
e7c63796 987
66f82cee 988 /* Open the image, either directly or using a protocol */
82dc8b41 989 open_flags = bdrv_open_flags(bs, bs->open_flags);
66f82cee 990 if (drv->bdrv_file_open) {
5d186eb0 991 assert(file == NULL);
030be321 992 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 993 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 994 } else {
2af5ef70 995 if (file == NULL) {
34b5d2c6
HR
996 error_setg(errp, "Can't use '%s' as a block driver for the "
997 "protocol level", drv->format_name);
2af5ef70
KW
998 ret = -EINVAL;
999 goto free_and_fail;
1000 }
f500a6d3 1001 bs->file = file;
34b5d2c6 1002 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
1003 }
1004
57915332 1005 if (ret < 0) {
84d18f06 1006 if (local_err) {
34b5d2c6 1007 error_propagate(errp, local_err);
2fa9aa59
DH
1008 } else if (bs->filename[0]) {
1009 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
1010 } else {
1011 error_setg_errno(errp, -ret, "Could not open image");
1012 }
57915332
KW
1013 goto free_and_fail;
1014 }
1015
a1f688f4
MA
1016 if (bs->encrypted) {
1017 error_report("Encrypted images are deprecated");
1018 error_printf("Support for them will be removed in a future release.\n"
1019 "You can use 'qemu-img convert' to convert your image"
1020 " to an unencrypted one.\n");
1021 }
1022
51762288
SH
1023 ret = refresh_total_sectors(bs, bs->total_sectors);
1024 if (ret < 0) {
34b5d2c6 1025 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 1026 goto free_and_fail;
57915332 1027 }
51762288 1028
3baca891
KW
1029 bdrv_refresh_limits(bs, &local_err);
1030 if (local_err) {
1031 error_propagate(errp, local_err);
1032 ret = -EINVAL;
1033 goto free_and_fail;
1034 }
1035
c25f53b0 1036 assert(bdrv_opt_mem_align(bs) != 0);
4196d2f0 1037 assert(bdrv_min_mem_align(bs) != 0);
b192af8a 1038 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
18edf289
KW
1039
1040 qemu_opts_del(opts);
57915332
KW
1041 return 0;
1042
1043free_and_fail:
f500a6d3 1044 bs->file = NULL;
7267c094 1045 g_free(bs->opaque);
57915332
KW
1046 bs->opaque = NULL;
1047 bs->drv = NULL;
18edf289
KW
1048fail_opts:
1049 qemu_opts_del(opts);
57915332
KW
1050 return ret;
1051}
1052
5e5c4f63
KW
1053static QDict *parse_json_filename(const char *filename, Error **errp)
1054{
1055 QObject *options_obj;
1056 QDict *options;
1057 int ret;
1058
1059 ret = strstart(filename, "json:", &filename);
1060 assert(ret);
1061
1062 options_obj = qobject_from_json(filename);
1063 if (!options_obj) {
1064 error_setg(errp, "Could not parse the JSON options");
1065 return NULL;
1066 }
1067
1068 if (qobject_type(options_obj) != QTYPE_QDICT) {
1069 qobject_decref(options_obj);
1070 error_setg(errp, "Invalid JSON object given");
1071 return NULL;
1072 }
1073
1074 options = qobject_to_qdict(options_obj);
1075 qdict_flatten(options);
1076
1077 return options;
1078}
1079
de3b53f0
KW
1080static void parse_json_protocol(QDict *options, const char **pfilename,
1081 Error **errp)
1082{
1083 QDict *json_options;
1084 Error *local_err = NULL;
1085
1086 /* Parse json: pseudo-protocol */
1087 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1088 return;
1089 }
1090
1091 json_options = parse_json_filename(*pfilename, &local_err);
1092 if (local_err) {
1093 error_propagate(errp, local_err);
1094 return;
1095 }
1096
1097 /* Options given in the filename have lower priority than options
1098 * specified directly */
1099 qdict_join(options, json_options, false);
1100 QDECREF(json_options);
1101 *pfilename = NULL;
1102}
1103
b6ce07aa 1104/*
f54120ff
KW
1105 * Fills in default options for opening images and converts the legacy
1106 * filename/flags pair to option QDict entries.
53a29513
HR
1107 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1108 * block driver has been specified explicitly.
b6ce07aa 1109 */
de3b53f0 1110static int bdrv_fill_options(QDict **options, const char *filename,
053e1578 1111 int *flags, Error **errp)
ea2384d3 1112{
c2ad1b0c 1113 const char *drvname;
53a29513 1114 bool protocol = *flags & BDRV_O_PROTOCOL;
e3fa4bfa 1115 bool parse_filename = false;
053e1578 1116 BlockDriver *drv = NULL;
34b5d2c6 1117 Error *local_err = NULL;
83f64091 1118
53a29513 1119 drvname = qdict_get_try_str(*options, "driver");
053e1578
HR
1120 if (drvname) {
1121 drv = bdrv_find_format(drvname);
1122 if (!drv) {
1123 error_setg(errp, "Unknown driver '%s'", drvname);
1124 return -ENOENT;
1125 }
1126 /* If the user has explicitly specified the driver, this choice should
1127 * override the BDRV_O_PROTOCOL flag */
1128 protocol = drv->bdrv_file_open;
53a29513
HR
1129 }
1130
1131 if (protocol) {
1132 *flags |= BDRV_O_PROTOCOL;
1133 } else {
1134 *flags &= ~BDRV_O_PROTOCOL;
1135 }
1136
91a097e7
KW
1137 /* Translate cache options from flags into options */
1138 update_options_from_flags(*options, *flags);
1139
035fccdf 1140 /* Fetch the file name from the options QDict if necessary */
17b005f1 1141 if (protocol && filename) {
f54120ff
KW
1142 if (!qdict_haskey(*options, "filename")) {
1143 qdict_put(*options, "filename", qstring_from_str(filename));
1144 parse_filename = true;
1145 } else {
1146 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1147 "the same time");
1148 return -EINVAL;
1149 }
035fccdf
KW
1150 }
1151
c2ad1b0c 1152 /* Find the right block driver */
f54120ff 1153 filename = qdict_get_try_str(*options, "filename");
f54120ff 1154
053e1578
HR
1155 if (!drvname && protocol) {
1156 if (filename) {
1157 drv = bdrv_find_protocol(filename, parse_filename, errp);
17b005f1 1158 if (!drv) {
053e1578 1159 return -EINVAL;
17b005f1 1160 }
053e1578
HR
1161
1162 drvname = drv->format_name;
1163 qdict_put(*options, "driver", qstring_from_str(drvname));
1164 } else {
1165 error_setg(errp, "Must specify either driver or file");
1166 return -EINVAL;
98289620 1167 }
c2ad1b0c
KW
1168 }
1169
17b005f1 1170 assert(drv || !protocol);
c2ad1b0c 1171
f54120ff 1172 /* Driver-specific filename parsing */
17b005f1 1173 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1174 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1175 if (local_err) {
34b5d2c6 1176 error_propagate(errp, local_err);
f54120ff 1177 return -EINVAL;
6963a30d 1178 }
cd5d031e
HR
1179
1180 if (!drv->bdrv_needs_filename) {
1181 qdict_del(*options, "filename");
cd5d031e 1182 }
6963a30d
KW
1183 }
1184
f54120ff
KW
1185 return 0;
1186}
1187
f21d96d0
KW
1188BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1189 const char *child_name,
1190 const BdrvChildRole *child_role)
df581792
KW
1191{
1192 BdrvChild *child = g_new(BdrvChild, 1);
1193 *child = (BdrvChild) {
1194 .bs = child_bs,
260fecf1 1195 .name = g_strdup(child_name),
df581792
KW
1196 .role = child_role,
1197 };
1198
d42a8a93 1199 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
b4b059f6
KW
1200
1201 return child;
df581792
KW
1202}
1203
f21d96d0
KW
1204static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1205 BlockDriverState *child_bs,
1206 const char *child_name,
1207 const BdrvChildRole *child_role)
1208{
1209 BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1210 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1211 return child;
1212}
1213
3f09bfbc 1214static void bdrv_detach_child(BdrvChild *child)
33a60407 1215{
f21d96d0
KW
1216 if (child->next.le_prev) {
1217 QLIST_REMOVE(child, next);
1218 child->next.le_prev = NULL;
1219 }
d42a8a93 1220 QLIST_REMOVE(child, next_parent);
260fecf1 1221 g_free(child->name);
33a60407
KW
1222 g_free(child);
1223}
1224
f21d96d0 1225void bdrv_root_unref_child(BdrvChild *child)
33a60407 1226{
779020cb
KW
1227 BlockDriverState *child_bs;
1228
f21d96d0
KW
1229 child_bs = child->bs;
1230 bdrv_detach_child(child);
1231 bdrv_unref(child_bs);
1232}
1233
1234void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1235{
779020cb
KW
1236 if (child == NULL) {
1237 return;
1238 }
33a60407
KW
1239
1240 if (child->bs->inherits_from == parent) {
1241 child->bs->inherits_from = NULL;
1242 }
1243
f21d96d0 1244 bdrv_root_unref_child(child);
33a60407
KW
1245}
1246
5db15a57
KW
1247/*
1248 * Sets the backing file link of a BDS. A new reference is created; callers
1249 * which don't need their own reference any more must call bdrv_unref().
1250 */
8d24cce1
FZ
1251void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1252{
5db15a57
KW
1253 if (backing_hd) {
1254 bdrv_ref(backing_hd);
1255 }
8d24cce1 1256
760e0063 1257 if (bs->backing) {
826b6ca0 1258 assert(bs->backing_blocker);
760e0063 1259 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
5db15a57 1260 bdrv_unref_child(bs, bs->backing);
826b6ca0
FZ
1261 } else if (backing_hd) {
1262 error_setg(&bs->backing_blocker,
81e5f78a
AG
1263 "node is used as backing hd of '%s'",
1264 bdrv_get_device_or_node_name(bs));
826b6ca0
FZ
1265 }
1266
8d24cce1 1267 if (!backing_hd) {
826b6ca0
FZ
1268 error_free(bs->backing_blocker);
1269 bs->backing_blocker = NULL;
760e0063 1270 bs->backing = NULL;
8d24cce1
FZ
1271 goto out;
1272 }
260fecf1 1273 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
8d24cce1
FZ
1274 bs->open_flags &= ~BDRV_O_NO_BACKING;
1275 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1276 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1277 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0 1278
760e0063 1279 bdrv_op_block_all(backing_hd, bs->backing_blocker);
826b6ca0 1280 /* Otherwise we won't be able to commit due to check in bdrv_commit */
760e0063 1281 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
826b6ca0 1282 bs->backing_blocker);
8d24cce1 1283out:
3baca891 1284 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1285}
1286
31ca6d07
KW
1287/*
1288 * Opens the backing file for a BlockDriverState if not yet open
1289 *
d9b7b057
KW
1290 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1291 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1292 * itself, all options starting with "${bdref_key}." are considered part of the
1293 * BlockdevRef.
1294 *
1295 * TODO Can this be unified with bdrv_open_image()?
31ca6d07 1296 */
d9b7b057
KW
1297int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1298 const char *bdref_key, Error **errp)
9156df12 1299{
1ba4b6a5 1300 char *backing_filename = g_malloc0(PATH_MAX);
d9b7b057
KW
1301 char *bdref_key_dot;
1302 const char *reference = NULL;
317fc44e 1303 int ret = 0;
8d24cce1 1304 BlockDriverState *backing_hd;
d9b7b057
KW
1305 QDict *options;
1306 QDict *tmp_parent_options = NULL;
34b5d2c6 1307 Error *local_err = NULL;
9156df12 1308
760e0063 1309 if (bs->backing != NULL) {
1ba4b6a5 1310 goto free_exit;
9156df12
PB
1311 }
1312
31ca6d07 1313 /* NULL means an empty set of options */
d9b7b057
KW
1314 if (parent_options == NULL) {
1315 tmp_parent_options = qdict_new();
1316 parent_options = tmp_parent_options;
31ca6d07
KW
1317 }
1318
9156df12 1319 bs->open_flags &= ~BDRV_O_NO_BACKING;
d9b7b057
KW
1320
1321 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1322 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1323 g_free(bdref_key_dot);
1324
1325 reference = qdict_get_try_str(parent_options, bdref_key);
1326 if (reference || qdict_haskey(options, "file.filename")) {
1cb6f506
KW
1327 backing_filename[0] = '\0';
1328 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1329 QDECREF(options);
1ba4b6a5 1330 goto free_exit;
dbecebdd 1331 } else {
9f07429e
HR
1332 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1333 &local_err);
1334 if (local_err) {
1335 ret = -EINVAL;
1336 error_propagate(errp, local_err);
1337 QDECREF(options);
1338 goto free_exit;
1339 }
9156df12
PB
1340 }
1341
8ee79e70
KW
1342 if (!bs->drv || !bs->drv->supports_backing) {
1343 ret = -EINVAL;
1344 error_setg(errp, "Driver doesn't support backing files");
1345 QDECREF(options);
1346 goto free_exit;
1347 }
1348
c5f6e493
KW
1349 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1350 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
9156df12
PB
1351 }
1352
d9b7b057 1353 backing_hd = NULL;
f3930ed0
KW
1354 ret = bdrv_open_inherit(&backing_hd,
1355 *backing_filename ? backing_filename : NULL,
d9b7b057 1356 reference, options, 0, bs, &child_backing,
e43bfd9c 1357 errp);
9156df12 1358 if (ret < 0) {
9156df12 1359 bs->open_flags |= BDRV_O_NO_BACKING;
e43bfd9c 1360 error_prepend(errp, "Could not open backing file: ");
1ba4b6a5 1361 goto free_exit;
9156df12 1362 }
df581792 1363
5db15a57
KW
1364 /* Hook up the backing file link; drop our reference, bs owns the
1365 * backing_hd reference now */
8d24cce1 1366 bdrv_set_backing_hd(bs, backing_hd);
5db15a57 1367 bdrv_unref(backing_hd);
d80ac658 1368
d9b7b057
KW
1369 qdict_del(parent_options, bdref_key);
1370
1ba4b6a5
BC
1371free_exit:
1372 g_free(backing_filename);
d9b7b057 1373 QDECREF(tmp_parent_options);
1ba4b6a5 1374 return ret;
9156df12
PB
1375}
1376
da557aac
HR
1377/*
1378 * Opens a disk image whose options are given as BlockdevRef in another block
1379 * device's options.
1380 *
da557aac 1381 * If allow_none is true, no image will be opened if filename is false and no
b4b059f6 1382 * BlockdevRef is given. NULL will be returned, but errp remains unset.
da557aac
HR
1383 *
1384 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1385 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1386 * itself, all options starting with "${bdref_key}." are considered part of the
1387 * BlockdevRef.
1388 *
1389 * The BlockdevRef will be removed from the options QDict.
1390 */
b4b059f6
KW
1391BdrvChild *bdrv_open_child(const char *filename,
1392 QDict *options, const char *bdref_key,
1393 BlockDriverState* parent,
1394 const BdrvChildRole *child_role,
1395 bool allow_none, Error **errp)
da557aac 1396{
b4b059f6
KW
1397 BdrvChild *c = NULL;
1398 BlockDriverState *bs;
da557aac
HR
1399 QDict *image_options;
1400 int ret;
1401 char *bdref_key_dot;
1402 const char *reference;
1403
df581792 1404 assert(child_role != NULL);
f67503e5 1405
da557aac
HR
1406 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1407 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1408 g_free(bdref_key_dot);
1409
1410 reference = qdict_get_try_str(options, bdref_key);
1411 if (!filename && !reference && !qdict_size(image_options)) {
b4b059f6 1412 if (!allow_none) {
da557aac
HR
1413 error_setg(errp, "A block device must be specified for \"%s\"",
1414 bdref_key);
da557aac 1415 }
b20e61e0 1416 QDECREF(image_options);
da557aac
HR
1417 goto done;
1418 }
1419
b4b059f6
KW
1420 bs = NULL;
1421 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
ce343771 1422 parent, child_role, errp);
df581792
KW
1423 if (ret < 0) {
1424 goto done;
1425 }
1426
260fecf1 1427 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
da557aac
HR
1428
1429done:
1430 qdict_del(options, bdref_key);
b4b059f6
KW
1431 return c;
1432}
1433
73176bee
KW
1434static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1435 QDict *snapshot_options, Error **errp)
b998875d
KW
1436{
1437 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1438 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d 1439 int64_t total_size;
83d0521a 1440 QemuOpts *opts = NULL;
b998875d 1441 BlockDriverState *bs_snapshot;
c2e0dbbf 1442 Error *local_err = NULL;
b998875d
KW
1443 int ret;
1444
1445 /* if snapshot, we create a temporary backing file and open it
1446 instead of opening 'filename' directly */
1447
1448 /* Get the required size from the image */
f187743a
KW
1449 total_size = bdrv_getlength(bs);
1450 if (total_size < 0) {
6b8aeca5 1451 ret = total_size;
f187743a 1452 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1453 goto out;
f187743a 1454 }
b998875d
KW
1455
1456 /* Create the temporary image */
1ba4b6a5 1457 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1458 if (ret < 0) {
1459 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1460 goto out;
b998875d
KW
1461 }
1462
ef810437 1463 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
c282e1fd 1464 &error_abort);
39101f25 1465 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
e43bfd9c 1466 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
83d0521a 1467 qemu_opts_del(opts);
b998875d 1468 if (ret < 0) {
e43bfd9c
MA
1469 error_prepend(errp, "Could not create temporary overlay '%s': ",
1470 tmp_filename);
1ba4b6a5 1471 goto out;
b998875d
KW
1472 }
1473
73176bee 1474 /* Prepare options QDict for the temporary file */
b998875d
KW
1475 qdict_put(snapshot_options, "file.driver",
1476 qstring_from_str("file"));
1477 qdict_put(snapshot_options, "file.filename",
1478 qstring_from_str(tmp_filename));
e6641719
HR
1479 qdict_put(snapshot_options, "driver",
1480 qstring_from_str("qcow2"));
b998875d 1481
e4e9986b 1482 bs_snapshot = bdrv_new();
b998875d
KW
1483
1484 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
6ebf9aa2 1485 flags, &local_err);
73176bee 1486 snapshot_options = NULL;
b998875d
KW
1487 if (ret < 0) {
1488 error_propagate(errp, local_err);
1ba4b6a5 1489 goto out;
b998875d
KW
1490 }
1491
1492 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1493
1494out:
73176bee 1495 QDECREF(snapshot_options);
1ba4b6a5 1496 g_free(tmp_filename);
6b8aeca5 1497 return ret;
b998875d
KW
1498}
1499
b6ce07aa
KW
1500/*
1501 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1502 *
1503 * options is a QDict of options to pass to the block drivers, or NULL for an
1504 * empty set of options. The reference to the QDict belongs to the block layer
1505 * after the call (even on failure), so if the caller intends to reuse the
1506 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1507 *
1508 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1509 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1510 *
1511 * The reference parameter may be used to specify an existing block device which
1512 * should be opened. If specified, neither options nor a filename may be given,
1513 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1514 */
f3930ed0
KW
1515static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1516 const char *reference, QDict *options, int flags,
1517 BlockDriverState *parent,
ce343771 1518 const BdrvChildRole *child_role, Error **errp)
ea2384d3 1519{
b6ce07aa 1520 int ret;
9a4f4c31
KW
1521 BdrvChild *file = NULL;
1522 BlockDriverState *bs;
ce343771 1523 BlockDriver *drv = NULL;
74fe54f2 1524 const char *drvname;
3e8c2e57 1525 const char *backing;
34b5d2c6 1526 Error *local_err = NULL;
73176bee 1527 QDict *snapshot_options = NULL;
b1e6fc08 1528 int snapshot_flags = 0;
712e7874 1529
f67503e5 1530 assert(pbs);
f3930ed0
KW
1531 assert(!child_role || !flags);
1532 assert(!child_role == !parent);
f67503e5 1533
ddf5636d
HR
1534 if (reference) {
1535 bool options_non_empty = options ? qdict_size(options) : false;
1536 QDECREF(options);
1537
1538 if (*pbs) {
1539 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1540 "another block device");
1541 return -EINVAL;
1542 }
1543
1544 if (filename || options_non_empty) {
1545 error_setg(errp, "Cannot reference an existing block device with "
1546 "additional options or a new filename");
1547 return -EINVAL;
1548 }
1549
1550 bs = bdrv_lookup_bs(reference, reference, errp);
1551 if (!bs) {
1552 return -ENODEV;
1553 }
1554 bdrv_ref(bs);
1555 *pbs = bs;
1556 return 0;
1557 }
1558
f67503e5
HR
1559 if (*pbs) {
1560 bs = *pbs;
1561 } else {
e4e9986b 1562 bs = bdrv_new();
f67503e5
HR
1563 }
1564
de9c0cec
KW
1565 /* NULL means an empty set of options */
1566 if (options == NULL) {
1567 options = qdict_new();
1568 }
1569
145f598e 1570 /* json: syntax counts as explicit options, as if in the QDict */
de3b53f0
KW
1571 parse_json_protocol(options, &filename, &local_err);
1572 if (local_err) {
1573 ret = -EINVAL;
1574 goto fail;
1575 }
1576
145f598e
KW
1577 bs->explicit_options = qdict_clone_shallow(options);
1578
f3930ed0 1579 if (child_role) {
bddcec37 1580 bs->inherits_from = parent;
8e2160e2
KW
1581 child_role->inherit_options(&flags, options,
1582 parent->open_flags, parent->options);
f3930ed0
KW
1583 }
1584
de3b53f0 1585 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
462f5bcf
KW
1586 if (local_err) {
1587 goto fail;
1588 }
1589
62392ebb
KW
1590 bs->open_flags = flags;
1591 bs->options = options;
1592 options = qdict_clone_shallow(options);
1593
76c591b0 1594 /* Find the right image format driver */
76c591b0
KW
1595 drvname = qdict_get_try_str(options, "driver");
1596 if (drvname) {
1597 drv = bdrv_find_format(drvname);
76c591b0
KW
1598 if (!drv) {
1599 error_setg(errp, "Unknown driver: '%s'", drvname);
1600 ret = -EINVAL;
1601 goto fail;
1602 }
1603 }
1604
1605 assert(drvname || !(flags & BDRV_O_PROTOCOL));
76c591b0 1606
3e8c2e57
AG
1607 backing = qdict_get_try_str(options, "backing");
1608 if (backing && *backing == '\0') {
1609 flags |= BDRV_O_NO_BACKING;
1610 qdict_del(options, "backing");
1611 }
1612
f500a6d3 1613 /* Open image file without format layer */
f4788adc
KW
1614 if ((flags & BDRV_O_PROTOCOL) == 0) {
1615 if (flags & BDRV_O_RDWR) {
1616 flags |= BDRV_O_ALLOW_RDWR;
1617 }
1618 if (flags & BDRV_O_SNAPSHOT) {
73176bee
KW
1619 snapshot_options = qdict_new();
1620 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1621 flags, options);
8e2160e2 1622 bdrv_backing_options(&flags, options, flags, options);
f4788adc 1623 }
f500a6d3 1624
f3930ed0 1625 bs->open_flags = flags;
1fdd6933 1626
9a4f4c31
KW
1627 file = bdrv_open_child(filename, options, "file", bs,
1628 &child_file, true, &local_err);
1fdd6933
KW
1629 if (local_err) {
1630 ret = -EINVAL;
f4788adc
KW
1631 goto fail;
1632 }
f500a6d3
KW
1633 }
1634
76c591b0 1635 /* Image format probing */
38f3ef57 1636 bs->probed = !drv;
76c591b0 1637 if (!drv && file) {
9a4f4c31 1638 ret = find_image_format(file->bs, filename, &drv, &local_err);
17b005f1 1639 if (ret < 0) {
8bfea15d 1640 goto fail;
2a05cbe4 1641 }
62392ebb
KW
1642 /*
1643 * This option update would logically belong in bdrv_fill_options(),
1644 * but we first need to open bs->file for the probing to work, while
1645 * opening bs->file already requires the (mostly) final set of options
1646 * so that cache mode etc. can be inherited.
1647 *
1648 * Adding the driver later is somewhat ugly, but it's not an option
1649 * that would ever be inherited, so it's correct. We just need to make
1650 * sure to update both bs->options (which has the full effective
1651 * options for bs) and options (which has file.* already removed).
1652 */
1653 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1654 qdict_put(options, "driver", qstring_from_str(drv->format_name));
76c591b0 1655 } else if (!drv) {
17b005f1
KW
1656 error_setg(errp, "Must specify either driver or file");
1657 ret = -EINVAL;
8bfea15d 1658 goto fail;
ea2384d3 1659 }
b6ce07aa 1660
53a29513
HR
1661 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1662 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1663 /* file must be NULL if a protocol BDS is about to be created
1664 * (the inverse results in an error message from bdrv_open_common()) */
1665 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1666
b6ce07aa 1667 /* Open the image */
82dc8b41 1668 ret = bdrv_open_common(bs, file, options, &local_err);
b6ce07aa 1669 if (ret < 0) {
8bfea15d 1670 goto fail;
6987307c
CH
1671 }
1672
2a05cbe4 1673 if (file && (bs->file != file)) {
9a4f4c31 1674 bdrv_unref_child(bs, file);
f500a6d3
KW
1675 file = NULL;
1676 }
1677
b6ce07aa 1678 /* If there is a backing file, use it */
9156df12 1679 if ((flags & BDRV_O_NO_BACKING) == 0) {
d9b7b057 1680 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
b6ce07aa 1681 if (ret < 0) {
b6ad491a 1682 goto close_and_fail;
b6ce07aa 1683 }
b6ce07aa
KW
1684 }
1685
91af7014
HR
1686 bdrv_refresh_filename(bs);
1687
b6ad491a 1688 /* Check if any unknown options were used */
5acd9d81 1689 if (options && (qdict_size(options) != 0)) {
b6ad491a 1690 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1691 if (flags & BDRV_O_PROTOCOL) {
1692 error_setg(errp, "Block protocol '%s' doesn't support the option "
1693 "'%s'", drv->format_name, entry->key);
1694 } else {
d0e46a55
HR
1695 error_setg(errp,
1696 "Block format '%s' does not support the option '%s'",
1697 drv->format_name, entry->key);
5acd9d81 1698 }
b6ad491a
KW
1699
1700 ret = -EINVAL;
1701 goto close_and_fail;
1702 }
b6ad491a 1703
b6ce07aa 1704 if (!bdrv_key_required(bs)) {
a7f53e26
MA
1705 if (bs->blk) {
1706 blk_dev_change_media_cb(bs->blk, true);
1707 }
c3adb58f
MA
1708 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1709 && !runstate_check(RUN_STATE_INMIGRATE)
1710 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1711 error_setg(errp,
1712 "Guest must be stopped for opening of encrypted image");
1713 ret = -EBUSY;
1714 goto close_and_fail;
b6ce07aa
KW
1715 }
1716
c3adb58f 1717 QDECREF(options);
f67503e5 1718 *pbs = bs;
dd62f1ca
KW
1719
1720 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1721 * temporary snapshot afterwards. */
1722 if (snapshot_flags) {
73176bee
KW
1723 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1724 &local_err);
1725 snapshot_options = NULL;
dd62f1ca
KW
1726 if (local_err) {
1727 goto close_and_fail;
1728 }
1729 }
1730
b6ce07aa
KW
1731 return 0;
1732
8bfea15d 1733fail:
f500a6d3 1734 if (file != NULL) {
9a4f4c31 1735 bdrv_unref_child(bs, file);
f500a6d3 1736 }
73176bee 1737 QDECREF(snapshot_options);
145f598e 1738 QDECREF(bs->explicit_options);
de9c0cec 1739 QDECREF(bs->options);
b6ad491a 1740 QDECREF(options);
de9c0cec 1741 bs->options = NULL;
f67503e5
HR
1742 if (!*pbs) {
1743 /* If *pbs is NULL, a new BDS has been created in this function and
1744 needs to be freed now. Otherwise, it does not need to be closed,
1745 since it has not really been opened yet. */
1746 bdrv_unref(bs);
1747 }
84d18f06 1748 if (local_err) {
34b5d2c6
HR
1749 error_propagate(errp, local_err);
1750 }
b6ad491a 1751 return ret;
de9c0cec 1752
b6ad491a 1753close_and_fail:
f67503e5
HR
1754 /* See fail path, but now the BDS has to be always closed */
1755 if (*pbs) {
1756 bdrv_close(bs);
1757 } else {
1758 bdrv_unref(bs);
1759 }
73176bee 1760 QDECREF(snapshot_options);
b6ad491a 1761 QDECREF(options);
84d18f06 1762 if (local_err) {
34b5d2c6
HR
1763 error_propagate(errp, local_err);
1764 }
b6ce07aa
KW
1765 return ret;
1766}
1767
f3930ed0 1768int bdrv_open(BlockDriverState **pbs, const char *filename,
6ebf9aa2 1769 const char *reference, QDict *options, int flags, Error **errp)
f3930ed0
KW
1770{
1771 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
ce343771 1772 NULL, errp);
f3930ed0
KW
1773}
1774
e971aa12
JC
1775typedef struct BlockReopenQueueEntry {
1776 bool prepared;
1777 BDRVReopenState state;
1778 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1779} BlockReopenQueueEntry;
1780
1781/*
1782 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1783 * reopen of multiple devices.
1784 *
1785 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1786 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1787 * be created and initialized. This newly created BlockReopenQueue should be
1788 * passed back in for subsequent calls that are intended to be of the same
1789 * atomic 'set'.
1790 *
1791 * bs is the BlockDriverState to add to the reopen queue.
1792 *
4d2cb092
KW
1793 * options contains the changed options for the associated bs
1794 * (the BlockReopenQueue takes ownership)
1795 *
e971aa12
JC
1796 * flags contains the open flags for the associated bs
1797 *
1798 * returns a pointer to bs_queue, which is either the newly allocated
1799 * bs_queue, or the existing bs_queue being used.
1800 *
1801 */
28518102
KW
1802static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1803 BlockDriverState *bs,
1804 QDict *options,
1805 int flags,
1806 const BdrvChildRole *role,
1807 QDict *parent_options,
1808 int parent_flags)
e971aa12
JC
1809{
1810 assert(bs != NULL);
1811
1812 BlockReopenQueueEntry *bs_entry;
67251a31 1813 BdrvChild *child;
145f598e 1814 QDict *old_options, *explicit_options;
67251a31 1815
e971aa12
JC
1816 if (bs_queue == NULL) {
1817 bs_queue = g_new0(BlockReopenQueue, 1);
1818 QSIMPLEQ_INIT(bs_queue);
1819 }
1820
4d2cb092
KW
1821 if (!options) {
1822 options = qdict_new();
1823 }
1824
28518102
KW
1825 /*
1826 * Precedence of options:
1827 * 1. Explicitly passed in options (highest)
91a097e7 1828 * 2. Set in flags (only for top level)
145f598e 1829 * 3. Retained from explicitly set options of bs
8e2160e2 1830 * 4. Inherited from parent node
28518102
KW
1831 * 5. Retained from effective options of bs
1832 */
1833
91a097e7
KW
1834 if (!parent_options) {
1835 /*
1836 * Any setting represented by flags is always updated. If the
1837 * corresponding QDict option is set, it takes precedence. Otherwise
1838 * the flag is translated into a QDict option. The old setting of bs is
1839 * not considered.
1840 */
1841 update_options_from_flags(options, flags);
1842 }
1843
145f598e
KW
1844 /* Old explicitly set values (don't overwrite by inherited value) */
1845 old_options = qdict_clone_shallow(bs->explicit_options);
1846 bdrv_join_options(bs, options, old_options);
1847 QDECREF(old_options);
1848
1849 explicit_options = qdict_clone_shallow(options);
1850
28518102
KW
1851 /* Inherit from parent node */
1852 if (parent_options) {
1853 assert(!flags);
8e2160e2 1854 role->inherit_options(&flags, options, parent_flags, parent_options);
28518102
KW
1855 }
1856
1857 /* Old values are used for options that aren't set yet */
4d2cb092 1858 old_options = qdict_clone_shallow(bs->options);
cddff5ba 1859 bdrv_join_options(bs, options, old_options);
4d2cb092
KW
1860 QDECREF(old_options);
1861
f1f25a2e
KW
1862 /* bdrv_open() masks this flag out */
1863 flags &= ~BDRV_O_PROTOCOL;
1864
67251a31 1865 QLIST_FOREACH(child, &bs->children, next) {
4c9dfe5d
KW
1866 QDict *new_child_options;
1867 char *child_key_dot;
67251a31 1868
4c9dfe5d
KW
1869 /* reopen can only change the options of block devices that were
1870 * implicitly created and inherited options. For other (referenced)
1871 * block devices, a syntax like "backing.foo" results in an error. */
67251a31
KW
1872 if (child->bs->inherits_from != bs) {
1873 continue;
1874 }
1875
4c9dfe5d
KW
1876 child_key_dot = g_strdup_printf("%s.", child->name);
1877 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1878 g_free(child_key_dot);
1879
28518102
KW
1880 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1881 child->role, options, flags);
e971aa12
JC
1882 }
1883
1884 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1885 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1886
1887 bs_entry->state.bs = bs;
4d2cb092 1888 bs_entry->state.options = options;
145f598e 1889 bs_entry->state.explicit_options = explicit_options;
e971aa12
JC
1890 bs_entry->state.flags = flags;
1891
1892 return bs_queue;
1893}
1894
28518102
KW
1895BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1896 BlockDriverState *bs,
1897 QDict *options, int flags)
1898{
1899 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1900 NULL, NULL, 0);
1901}
1902
e971aa12
JC
1903/*
1904 * Reopen multiple BlockDriverStates atomically & transactionally.
1905 *
1906 * The queue passed in (bs_queue) must have been built up previous
1907 * via bdrv_reopen_queue().
1908 *
1909 * Reopens all BDS specified in the queue, with the appropriate
1910 * flags. All devices are prepared for reopen, and failure of any
1911 * device will cause all device changes to be abandonded, and intermediate
1912 * data cleaned up.
1913 *
1914 * If all devices prepare successfully, then the changes are committed
1915 * to all devices.
1916 *
1917 */
1918int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1919{
1920 int ret = -1;
1921 BlockReopenQueueEntry *bs_entry, *next;
1922 Error *local_err = NULL;
1923
1924 assert(bs_queue != NULL);
1925
1926 bdrv_drain_all();
1927
1928 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1929 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1930 error_propagate(errp, local_err);
1931 goto cleanup;
1932 }
1933 bs_entry->prepared = true;
1934 }
1935
1936 /* If we reach this point, we have success and just need to apply the
1937 * changes
1938 */
1939 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1940 bdrv_reopen_commit(&bs_entry->state);
1941 }
1942
1943 ret = 0;
1944
1945cleanup:
1946 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1947 if (ret && bs_entry->prepared) {
1948 bdrv_reopen_abort(&bs_entry->state);
145f598e
KW
1949 } else if (ret) {
1950 QDECREF(bs_entry->state.explicit_options);
e971aa12 1951 }
4d2cb092 1952 QDECREF(bs_entry->state.options);
e971aa12
JC
1953 g_free(bs_entry);
1954 }
1955 g_free(bs_queue);
1956 return ret;
1957}
1958
1959
1960/* Reopen a single BlockDriverState with the specified flags. */
1961int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1962{
1963 int ret = -1;
1964 Error *local_err = NULL;
4d2cb092 1965 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
e971aa12
JC
1966
1967 ret = bdrv_reopen_multiple(queue, &local_err);
1968 if (local_err != NULL) {
1969 error_propagate(errp, local_err);
1970 }
1971 return ret;
1972}
1973
1974
1975/*
1976 * Prepares a BlockDriverState for reopen. All changes are staged in the
1977 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1978 * the block driver layer .bdrv_reopen_prepare()
1979 *
1980 * bs is the BlockDriverState to reopen
1981 * flags are the new open flags
1982 * queue is the reopen queue
1983 *
1984 * Returns 0 on success, non-zero on error. On error errp will be set
1985 * as well.
1986 *
1987 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1988 * It is the responsibility of the caller to then call the abort() or
1989 * commit() for any other BDS that have been left in a prepare() state
1990 *
1991 */
1992int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1993 Error **errp)
1994{
1995 int ret = -1;
1996 Error *local_err = NULL;
1997 BlockDriver *drv;
ccf9dc07
KW
1998 QemuOpts *opts;
1999 const char *value;
e971aa12
JC
2000
2001 assert(reopen_state != NULL);
2002 assert(reopen_state->bs->drv != NULL);
2003 drv = reopen_state->bs->drv;
2004
ccf9dc07
KW
2005 /* Process generic block layer options */
2006 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
2007 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
2008 if (local_err) {
2009 error_propagate(errp, local_err);
2010 ret = -EINVAL;
2011 goto error;
2012 }
2013
91a097e7
KW
2014 update_flags_from_options(&reopen_state->flags, opts);
2015
2016 /* If a guest device is attached, it owns WCE */
2017 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2018 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2019 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2020 if (old_wce != new_wce) {
2021 error_setg(errp, "Cannot change cache.writeback: Device attached");
2022 ret = -EINVAL;
2023 goto error;
2024 }
2025 }
2026
ccf9dc07
KW
2027 /* node-name and driver must be unchanged. Put them back into the QDict, so
2028 * that they are checked at the end of this function. */
2029 value = qemu_opt_get(opts, "node-name");
2030 if (value) {
2031 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2032 }
2033
2034 value = qemu_opt_get(opts, "driver");
2035 if (value) {
2036 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2037 }
2038
e971aa12
JC
2039 /* if we are to stay read-only, do not allow permission change
2040 * to r/w */
2041 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2042 reopen_state->flags & BDRV_O_RDWR) {
81e5f78a
AG
2043 error_setg(errp, "Node '%s' is read only",
2044 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2045 goto error;
2046 }
2047
2048
2049 ret = bdrv_flush(reopen_state->bs);
2050 if (ret) {
455b0fde 2051 error_setg_errno(errp, -ret, "Error flushing drive");
e971aa12
JC
2052 goto error;
2053 }
2054
2055 if (drv->bdrv_reopen_prepare) {
2056 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2057 if (ret) {
2058 if (local_err != NULL) {
2059 error_propagate(errp, local_err);
2060 } else {
d8b6895f
LC
2061 error_setg(errp, "failed while preparing to reopen image '%s'",
2062 reopen_state->bs->filename);
e971aa12
JC
2063 }
2064 goto error;
2065 }
2066 } else {
2067 /* It is currently mandatory to have a bdrv_reopen_prepare()
2068 * handler for each supported drv. */
81e5f78a
AG
2069 error_setg(errp, "Block format '%s' used by node '%s' "
2070 "does not support reopening files", drv->format_name,
2071 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2072 ret = -1;
2073 goto error;
2074 }
2075
4d2cb092
KW
2076 /* Options that are not handled are only okay if they are unchanged
2077 * compared to the old state. It is expected that some options are only
2078 * used for the initial open, but not reopen (e.g. filename) */
2079 if (qdict_size(reopen_state->options)) {
2080 const QDictEntry *entry = qdict_first(reopen_state->options);
2081
2082 do {
2083 QString *new_obj = qobject_to_qstring(entry->value);
2084 const char *new = qstring_get_str(new_obj);
2085 const char *old = qdict_get_try_str(reopen_state->bs->options,
2086 entry->key);
2087
2088 if (!old || strcmp(new, old)) {
2089 error_setg(errp, "Cannot change the option '%s'", entry->key);
2090 ret = -EINVAL;
2091 goto error;
2092 }
2093 } while ((entry = qdict_next(reopen_state->options, entry)));
2094 }
2095
e971aa12
JC
2096 ret = 0;
2097
2098error:
ccf9dc07 2099 qemu_opts_del(opts);
e971aa12
JC
2100 return ret;
2101}
2102
2103/*
2104 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2105 * makes them final by swapping the staging BlockDriverState contents into
2106 * the active BlockDriverState contents.
2107 */
2108void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2109{
2110 BlockDriver *drv;
2111
2112 assert(reopen_state != NULL);
2113 drv = reopen_state->bs->drv;
2114 assert(drv != NULL);
2115
2116 /* If there are any driver level actions to take */
2117 if (drv->bdrv_reopen_commit) {
2118 drv->bdrv_reopen_commit(reopen_state);
2119 }
2120
2121 /* set BDS specific flags now */
145f598e
KW
2122 QDECREF(reopen_state->bs->explicit_options);
2123
2124 reopen_state->bs->explicit_options = reopen_state->explicit_options;
e971aa12
JC
2125 reopen_state->bs->open_flags = reopen_state->flags;
2126 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2127 BDRV_O_CACHE_WB);
2128 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 2129
3baca891 2130 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
2131}
2132
2133/*
2134 * Abort the reopen, and delete and free the staged changes in
2135 * reopen_state
2136 */
2137void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2138{
2139 BlockDriver *drv;
2140
2141 assert(reopen_state != NULL);
2142 drv = reopen_state->bs->drv;
2143 assert(drv != NULL);
2144
2145 if (drv->bdrv_reopen_abort) {
2146 drv->bdrv_reopen_abort(reopen_state);
2147 }
145f598e
KW
2148
2149 QDECREF(reopen_state->explicit_options);
e971aa12
JC
2150}
2151
2152
64dff520 2153static void bdrv_close(BlockDriverState *bs)
fc01f7e7 2154{
33384421
HR
2155 BdrvAioNotifier *ban, *ban_next;
2156
ca9bd24c 2157 assert(!bs->job);
99b7e775
AG
2158
2159 /* Disable I/O limits and drain all pending throttled requests */
a0d64a61 2160 if (bs->throttle_state) {
99b7e775
AG
2161 bdrv_io_limits_disable(bs);
2162 }
2163
fc27291d 2164 bdrv_drained_begin(bs); /* complete I/O */
58fda173 2165 bdrv_flush(bs);
53ec73e2 2166 bdrv_drain(bs); /* in case flush left pending I/O */
fc27291d 2167
c5acdc9a
HR
2168 bdrv_release_named_dirty_bitmaps(bs);
2169 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2170
b4d02820
HR
2171 if (bs->blk) {
2172 blk_dev_change_media_cb(bs->blk, false);
2173 }
2174
3cbc002c 2175 if (bs->drv) {
6e93e7c4
KW
2176 BdrvChild *child, *next;
2177
9a7dedbc 2178 bs->drv->bdrv_close(bs);
9a4f4c31 2179 bs->drv = NULL;
9a7dedbc 2180
5db15a57 2181 bdrv_set_backing_hd(bs, NULL);
9a7dedbc 2182
9a4f4c31
KW
2183 if (bs->file != NULL) {
2184 bdrv_unref_child(bs, bs->file);
2185 bs->file = NULL;
2186 }
2187
6e93e7c4 2188 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
33a60407
KW
2189 /* TODO Remove bdrv_unref() from drivers' close function and use
2190 * bdrv_unref_child() here */
bddcec37
KW
2191 if (child->bs->inherits_from == bs) {
2192 child->bs->inherits_from = NULL;
2193 }
33a60407 2194 bdrv_detach_child(child);
6e93e7c4
KW
2195 }
2196
7267c094 2197 g_free(bs->opaque);
ea2384d3 2198 bs->opaque = NULL;
53fec9d3 2199 bs->copy_on_read = 0;
a275fa42
PB
2200 bs->backing_file[0] = '\0';
2201 bs->backing_format[0] = '\0';
6405875c
PB
2202 bs->total_sectors = 0;
2203 bs->encrypted = 0;
2204 bs->valid_key = 0;
2205 bs->sg = 0;
0d51b4de 2206 bs->zero_beyond_eof = false;
de9c0cec 2207 QDECREF(bs->options);
145f598e 2208 QDECREF(bs->explicit_options);
de9c0cec 2209 bs->options = NULL;
91af7014
HR
2210 QDECREF(bs->full_open_options);
2211 bs->full_open_options = NULL;
b338082b 2212 }
98f90dba 2213
33384421
HR
2214 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2215 g_free(ban);
2216 }
2217 QLIST_INIT(&bs->aio_notifiers);
fc27291d 2218 bdrv_drained_end(bs);
b338082b
FB
2219}
2220
2bc93fed
MK
2221void bdrv_close_all(void)
2222{
2223 BlockDriverState *bs;
ca9bd24c
HR
2224 AioContext *aio_context;
2225
2226 /* Drop references from requests still in flight, such as canceled block
2227 * jobs whose AIO context has not been polled yet */
2228 bdrv_drain_all();
2bc93fed 2229
ca9bd24c
HR
2230 blk_remove_all_bs();
2231 blockdev_close_all_bdrv_states();
ed78cda3 2232
ca9bd24c
HR
2233 /* Cancel all block jobs */
2234 while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2235 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2236 aio_context = bdrv_get_aio_context(bs);
2237
2238 aio_context_acquire(aio_context);
2239 if (bs->job) {
2240 block_job_cancel_sync(bs->job);
2241 aio_context_release(aio_context);
2242 break;
2243 }
2244 aio_context_release(aio_context);
2245 }
2246
2247 /* All the remaining BlockDriverStates are referenced directly or
2248 * indirectly from block jobs, so there needs to be at least one BDS
2249 * directly used by a block job */
2250 assert(bs);
2bc93fed
MK
2251 }
2252}
2253
8e419aef 2254/* Fields that need to stay with the top-level BDS */
4ddc07ca
PB
2255static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2256 BlockDriverState *bs_src)
8802d1fd 2257{
4ddc07ca 2258 /* move some fields that need to stay attached to the device */
8802d1fd
JC
2259
2260 /* dev info */
4ddc07ca 2261 bs_dest->enable_write_cache = bs_src->enable_write_cache;
063dd40e 2262}
a9fc4408 2263
dd62f1ca
KW
2264static void change_parent_backing_link(BlockDriverState *from,
2265 BlockDriverState *to)
2266{
2267 BdrvChild *c, *next;
2268
f21d96d0
KW
2269 if (from->blk) {
2270 /* FIXME We bypass blk_set_bs(), so we need to make these updates
2271 * manually. The root problem is not in this change function, but the
2272 * existence of BlockDriverState.blk. */
2273 to->blk = from->blk;
2274 from->blk = NULL;
2275 }
2276
dd62f1ca
KW
2277 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2278 assert(c->role != &child_backing);
2279 c->bs = to;
2280 QLIST_REMOVE(c, next_parent);
2281 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2282 bdrv_ref(to);
2283 bdrv_unref(from);
2284 }
dd62f1ca
KW
2285}
2286
2287static void swap_feature_fields(BlockDriverState *bs_top,
2288 BlockDriverState *bs_new)
2289{
2290 BlockDriverState tmp;
2291
2292 bdrv_move_feature_fields(&tmp, bs_top);
2293 bdrv_move_feature_fields(bs_top, bs_new);
2294 bdrv_move_feature_fields(bs_new, &tmp);
2295
2296 assert(!bs_new->throttle_state);
2297 if (bs_top->throttle_state) {
2298 assert(bs_top->io_limits_enabled);
2299 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2300 bdrv_io_limits_disable(bs_top);
2301 }
2302}
2303
4ddc07ca
PB
2304/*
2305 * Add new bs contents at the top of an image chain while the chain is
2306 * live, while keeping required fields on the top layer.
2307 *
2308 * This will modify the BlockDriverState fields, and swap contents
2309 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2310 *
bfb197e0 2311 * bs_new must not be attached to a BlockBackend.
4ddc07ca
PB
2312 *
2313 * This function does not create any image files.
dd62f1ca
KW
2314 *
2315 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2316 * that's what the callers commonly need. bs_new will be referenced by the old
2317 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2318 * reference of its own, it must call bdrv_ref().
4ddc07ca
PB
2319 */
2320void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2321{
dd62f1ca
KW
2322 assert(!bdrv_requests_pending(bs_top));
2323 assert(!bdrv_requests_pending(bs_new));
2324
2325 bdrv_ref(bs_top);
2326 change_parent_backing_link(bs_top, bs_new);
2327
2328 /* Some fields always stay on top of the backing file chain */
2329 swap_feature_fields(bs_top, bs_new);
2330
2331 bdrv_set_backing_hd(bs_new, bs_top);
2332 bdrv_unref(bs_top);
4ddc07ca 2333
dd62f1ca
KW
2334 /* bs_new is now referenced by its new parents, we don't need the
2335 * additional reference any more. */
2336 bdrv_unref(bs_new);
8802d1fd
JC
2337}
2338
3f09bfbc
KW
2339void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2340{
2341 assert(!bdrv_requests_pending(old));
2342 assert(!bdrv_requests_pending(new));
2343
2344 bdrv_ref(old);
2345
2346 if (old->blk) {
2347 /* As long as these fields aren't in BlockBackend, but in the top-level
2348 * BlockDriverState, it's not possible for a BDS to have two BBs.
2349 *
2350 * We really want to copy the fields from old to new, but we go for a
2351 * swap instead so that pointers aren't duplicated and cause trouble.
2352 * (Also, bdrv_swap() used to do the same.) */
2353 assert(!new->blk);
2354 swap_feature_fields(old, new);
2355 }
2356 change_parent_backing_link(old, new);
2357
2358 /* Change backing files if a previously independent node is added to the
2359 * chain. For active commit, we replace top by its own (indirect) backing
2360 * file and don't do anything here so we don't build a loop. */
2361 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2362 bdrv_set_backing_hd(new, backing_bs(old));
2363 bdrv_set_backing_hd(old, NULL);
2364 }
2365
2366 bdrv_unref(old);
2367}
2368
4f6fd349 2369static void bdrv_delete(BlockDriverState *bs)
b338082b 2370{
3e914655 2371 assert(!bs->job);
3718d8ab 2372 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2373 assert(!bs->refcnt);
18846dee 2374
e1b5c52e
SH
2375 bdrv_close(bs);
2376
1b7bdbc1 2377 /* remove from list, if necessary */
63eaaae0
KW
2378 if (bs->node_name[0] != '\0') {
2379 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2380 }
2c1d04e0
HR
2381 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2382
7267c094 2383 g_free(bs);
fc01f7e7
FB
2384}
2385
e97fc193
AL
2386/*
2387 * Run consistency checks on an image
2388 *
e076f338 2389 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2390 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2391 * check are stored in res.
e97fc193 2392 */
4534ff54 2393int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2394{
908bcd54
HR
2395 if (bs->drv == NULL) {
2396 return -ENOMEDIUM;
2397 }
e97fc193
AL
2398 if (bs->drv->bdrv_check == NULL) {
2399 return -ENOTSUP;
2400 }
2401
e076f338 2402 memset(res, 0, sizeof(*res));
4534ff54 2403 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2404}
2405
8a426614
KW
2406#define COMMIT_BUF_SECTORS 2048
2407
33e3963e
FB
2408/* commit COW file into the raw image */
2409int bdrv_commit(BlockDriverState *bs)
2410{
19cb3738 2411 BlockDriver *drv = bs->drv;
72706ea4 2412 int64_t sector, total_sectors, length, backing_length;
8a426614 2413 int n, ro, open_flags;
0bce597d 2414 int ret = 0;
72706ea4 2415 uint8_t *buf = NULL;
33e3963e 2416
19cb3738
FB
2417 if (!drv)
2418 return -ENOMEDIUM;
6bb45158 2419
760e0063 2420 if (!bs->backing) {
4dca4b63 2421 return -ENOTSUP;
33e3963e
FB
2422 }
2423
bb00021d 2424 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
760e0063 2425 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2d3735d3
SH
2426 return -EBUSY;
2427 }
2428
760e0063
KW
2429 ro = bs->backing->bs->read_only;
2430 open_flags = bs->backing->bs->open_flags;
4dca4b63
NS
2431
2432 if (ro) {
760e0063 2433 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
0bce597d 2434 return -EACCES;
4dca4b63 2435 }
ea2384d3 2436 }
33e3963e 2437
72706ea4
JC
2438 length = bdrv_getlength(bs);
2439 if (length < 0) {
2440 ret = length;
2441 goto ro_cleanup;
2442 }
2443
760e0063 2444 backing_length = bdrv_getlength(bs->backing->bs);
72706ea4
JC
2445 if (backing_length < 0) {
2446 ret = backing_length;
2447 goto ro_cleanup;
2448 }
2449
2450 /* If our top snapshot is larger than the backing file image,
2451 * grow the backing file image if possible. If not possible,
2452 * we must return an error */
2453 if (length > backing_length) {
760e0063 2454 ret = bdrv_truncate(bs->backing->bs, length);
72706ea4
JC
2455 if (ret < 0) {
2456 goto ro_cleanup;
2457 }
2458 }
2459
2460 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2461
2462 /* qemu_try_blockalign() for bs will choose an alignment that works for
760e0063 2463 * bs->backing->bs as well, so no need to compare the alignment manually. */
857d4f46
KW
2464 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2465 if (buf == NULL) {
2466 ret = -ENOMEM;
2467 goto ro_cleanup;
2468 }
8a426614
KW
2469
2470 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2471 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2472 if (ret < 0) {
2473 goto ro_cleanup;
2474 }
2475 if (ret) {
dabfa6cc
KW
2476 ret = bdrv_read(bs, sector, buf, n);
2477 if (ret < 0) {
8a426614
KW
2478 goto ro_cleanup;
2479 }
2480
760e0063 2481 ret = bdrv_write(bs->backing->bs, sector, buf, n);
dabfa6cc 2482 if (ret < 0) {
8a426614
KW
2483 goto ro_cleanup;
2484 }
ea2384d3 2485 }
33e3963e 2486 }
95389c86 2487
1d44952f
CH
2488 if (drv->bdrv_make_empty) {
2489 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2490 if (ret < 0) {
2491 goto ro_cleanup;
2492 }
1d44952f
CH
2493 bdrv_flush(bs);
2494 }
95389c86 2495
3f5075ae
CH
2496 /*
2497 * Make sure all data we wrote to the backing device is actually
2498 * stable on disk.
2499 */
760e0063
KW
2500 if (bs->backing) {
2501 bdrv_flush(bs->backing->bs);
dabfa6cc 2502 }
4dca4b63 2503
dabfa6cc 2504 ret = 0;
4dca4b63 2505ro_cleanup:
857d4f46 2506 qemu_vfree(buf);
4dca4b63
NS
2507
2508 if (ro) {
0bce597d 2509 /* ignoring error return here */
760e0063 2510 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2511 }
2512
1d44952f 2513 return ret;
33e3963e
FB
2514}
2515
756e6736
KW
2516/*
2517 * Return values:
2518 * 0 - success
2519 * -EINVAL - backing format specified, but no file
2520 * -ENOSPC - can't update the backing file because no space is left in the
2521 * image file header
2522 * -ENOTSUP - format driver doesn't support changing the backing file
2523 */
2524int bdrv_change_backing_file(BlockDriverState *bs,
2525 const char *backing_file, const char *backing_fmt)
2526{
2527 BlockDriver *drv = bs->drv;
469ef350 2528 int ret;
756e6736 2529
5f377794
PB
2530 /* Backing file format doesn't make sense without a backing file */
2531 if (backing_fmt && !backing_file) {
2532 return -EINVAL;
2533 }
2534
756e6736 2535 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2536 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2537 } else {
469ef350 2538 ret = -ENOTSUP;
756e6736 2539 }
469ef350
PB
2540
2541 if (ret == 0) {
2542 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2543 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2544 }
2545 return ret;
756e6736
KW
2546}
2547
6ebdcee2
JC
2548/*
2549 * Finds the image layer in the chain that has 'bs' as its backing file.
2550 *
2551 * active is the current topmost image.
2552 *
2553 * Returns NULL if bs is not found in active's image chain,
2554 * or if active == bs.
4caf0fcd
JC
2555 *
2556 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2557 */
2558BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2559 BlockDriverState *bs)
2560{
760e0063
KW
2561 while (active && bs != backing_bs(active)) {
2562 active = backing_bs(active);
6ebdcee2
JC
2563 }
2564
4caf0fcd
JC
2565 return active;
2566}
6ebdcee2 2567
4caf0fcd
JC
2568/* Given a BDS, searches for the base layer. */
2569BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2570{
2571 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2572}
2573
6ebdcee2
JC
2574/*
2575 * Drops images above 'base' up to and including 'top', and sets the image
2576 * above 'top' to have base as its backing file.
2577 *
2578 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2579 * information in 'bs' can be properly updated.
2580 *
2581 * E.g., this will convert the following chain:
2582 * bottom <- base <- intermediate <- top <- active
2583 *
2584 * to
2585 *
2586 * bottom <- base <- active
2587 *
2588 * It is allowed for bottom==base, in which case it converts:
2589 *
2590 * base <- intermediate <- top <- active
2591 *
2592 * to
2593 *
2594 * base <- active
2595 *
54e26900
JC
2596 * If backing_file_str is non-NULL, it will be used when modifying top's
2597 * overlay image metadata.
2598 *
6ebdcee2
JC
2599 * Error conditions:
2600 * if active == top, that is considered an error
2601 *
2602 */
2603int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2604 BlockDriverState *base, const char *backing_file_str)
6ebdcee2 2605{
6ebdcee2 2606 BlockDriverState *new_top_bs = NULL;
6ebdcee2
JC
2607 int ret = -EIO;
2608
6ebdcee2
JC
2609 if (!top->drv || !base->drv) {
2610 goto exit;
2611 }
2612
2613 new_top_bs = bdrv_find_overlay(active, top);
2614
2615 if (new_top_bs == NULL) {
2616 /* we could not find the image above 'top', this is an error */
2617 goto exit;
2618 }
2619
760e0063 2620 /* special case of new_top_bs->backing->bs already pointing to base - nothing
6ebdcee2 2621 * to do, no intermediate images */
760e0063 2622 if (backing_bs(new_top_bs) == base) {
6ebdcee2
JC
2623 ret = 0;
2624 goto exit;
2625 }
2626
5db15a57
KW
2627 /* Make sure that base is in the backing chain of top */
2628 if (!bdrv_chain_contains(top, base)) {
6ebdcee2
JC
2629 goto exit;
2630 }
2631
2632 /* success - we can delete the intermediate states, and link top->base */
5db15a57 2633 backing_file_str = backing_file_str ? backing_file_str : base->filename;
54e26900 2634 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
5db15a57 2635 base->drv ? base->drv->format_name : "");
6ebdcee2
JC
2636 if (ret) {
2637 goto exit;
2638 }
5db15a57 2639 bdrv_set_backing_hd(new_top_bs, base);
6ebdcee2 2640
6ebdcee2 2641 ret = 0;
6ebdcee2 2642exit:
6ebdcee2
JC
2643 return ret;
2644}
2645
61007b31
SH
2646/**
2647 * Truncate file to 'offset' bytes (needed only for file protocols)
2648 */
2649int bdrv_truncate(BlockDriverState *bs, int64_t offset)
71d0770c 2650{
61007b31
SH
2651 BlockDriver *drv = bs->drv;
2652 int ret;
2653 if (!drv)
71d0770c 2654 return -ENOMEDIUM;
61007b31
SH
2655 if (!drv->bdrv_truncate)
2656 return -ENOTSUP;
2657 if (bs->read_only)
2658 return -EACCES;
71d0770c 2659
61007b31
SH
2660 ret = drv->bdrv_truncate(bs, offset);
2661 if (ret == 0) {
2662 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2663 bdrv_dirty_bitmap_truncate(bs);
2664 if (bs->blk) {
2665 blk_dev_resize_cb(bs->blk);
2666 }
c0191e76 2667 }
61007b31 2668 return ret;
71d0770c
AL
2669}
2670
61007b31
SH
2671/**
2672 * Length of a allocated file in bytes. Sparse files are counted by actual
2673 * allocated space. Return < 0 if error or unknown.
2674 */
2675int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
71d0770c 2676{
61007b31
SH
2677 BlockDriver *drv = bs->drv;
2678 if (!drv) {
2679 return -ENOMEDIUM;
8f4754ed 2680 }
61007b31
SH
2681 if (drv->bdrv_get_allocated_file_size) {
2682 return drv->bdrv_get_allocated_file_size(bs);
2683 }
2684 if (bs->file) {
9a4f4c31 2685 return bdrv_get_allocated_file_size(bs->file->bs);
1c9805a3 2686 }
61007b31 2687 return -ENOTSUP;
1c9805a3 2688}
e7a8a783 2689
61007b31
SH
2690/**
2691 * Return number of sectors on success, -errno on error.
1c9805a3 2692 */
61007b31 2693int64_t bdrv_nb_sectors(BlockDriverState *bs)
1c9805a3 2694{
61007b31 2695 BlockDriver *drv = bs->drv;
498e386c 2696
61007b31
SH
2697 if (!drv)
2698 return -ENOMEDIUM;
2572b37a 2699
61007b31
SH
2700 if (drv->has_variable_length) {
2701 int ret = refresh_total_sectors(bs, bs->total_sectors);
2702 if (ret < 0) {
2703 return ret;
1c9805a3
SH
2704 }
2705 }
61007b31 2706 return bs->total_sectors;
1c9805a3 2707}
b338082b 2708
61007b31
SH
2709/**
2710 * Return length in bytes on success, -errno on error.
2711 * The length is always a multiple of BDRV_SECTOR_SIZE.
8d3b1a2d 2712 */
61007b31 2713int64_t bdrv_getlength(BlockDriverState *bs)
8d3b1a2d 2714{
61007b31 2715 int64_t ret = bdrv_nb_sectors(bs);
8d3b1a2d 2716
4a9c9ea0 2717 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
61007b31 2718 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2719}
2720
61007b31
SH
2721/* return 0 as number of sectors if no device present or error */
2722void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
07d27a44 2723{
61007b31 2724 int64_t nb_sectors = bdrv_nb_sectors(bs);
07d27a44 2725
61007b31 2726 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
07d27a44
MA
2727}
2728
61007b31 2729int bdrv_is_read_only(BlockDriverState *bs)
8d3b1a2d 2730{
61007b31 2731 return bs->read_only;
83f64091 2732}
83f64091 2733
61007b31 2734int bdrv_is_sg(BlockDriverState *bs)
f08145fe 2735{
61007b31 2736 return bs->sg;
f08145fe
KW
2737}
2738
61007b31 2739int bdrv_enable_write_cache(BlockDriverState *bs)
ab185921 2740{
61007b31 2741 return bs->enable_write_cache;
ab185921
SH
2742}
2743
61007b31 2744void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
da1fa91d 2745{
61007b31 2746 bs->enable_write_cache = wce;
ab185921 2747
61007b31
SH
2748 /* so a reopen() will preserve wce */
2749 if (wce) {
2750 bs->open_flags |= BDRV_O_CACHE_WB;
893a8f62 2751 } else {
61007b31 2752 bs->open_flags &= ~BDRV_O_CACHE_WB;
893a8f62 2753 }
da1fa91d
KW
2754}
2755
61007b31 2756int bdrv_is_encrypted(BlockDriverState *bs)
fc3959e4 2757{
760e0063 2758 if (bs->backing && bs->backing->bs->encrypted) {
61007b31 2759 return 1;
760e0063 2760 }
61007b31 2761 return bs->encrypted;
fc3959e4
FZ
2762}
2763
61007b31 2764int bdrv_key_required(BlockDriverState *bs)
fc3959e4 2765{
760e0063 2766 BdrvChild *backing = bs->backing;
61007b31 2767
760e0063 2768 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
61007b31 2769 return 1;
760e0063 2770 }
61007b31 2771 return (bs->encrypted && !bs->valid_key);
fc3959e4
FZ
2772}
2773
61007b31 2774int bdrv_set_key(BlockDriverState *bs, const char *key)
d0c7f642 2775{
d0c7f642 2776 int ret;
760e0063
KW
2777 if (bs->backing && bs->backing->bs->encrypted) {
2778 ret = bdrv_set_key(bs->backing->bs, key);
61007b31
SH
2779 if (ret < 0)
2780 return ret;
2781 if (!bs->encrypted)
2782 return 0;
2783 }
2784 if (!bs->encrypted) {
2785 return -EINVAL;
2786 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
d0c7f642
KW
2787 return -ENOMEDIUM;
2788 }
61007b31 2789 ret = bs->drv->bdrv_set_key(bs, key);
b9c64947 2790 if (ret < 0) {
61007b31
SH
2791 bs->valid_key = 0;
2792 } else if (!bs->valid_key) {
2793 bs->valid_key = 1;
2794 if (bs->blk) {
2795 /* call the change callback now, we skipped it on open */
2796 blk_dev_change_media_cb(bs->blk, true);
2797 }
1b0288ae 2798 }
61007b31
SH
2799 return ret;
2800}
f08f2dda 2801
c5fbe571 2802/*
61007b31
SH
2803 * Provide an encryption key for @bs.
2804 * If @key is non-null:
2805 * If @bs is not encrypted, fail.
2806 * Else if the key is invalid, fail.
2807 * Else set @bs's key to @key, replacing the existing key, if any.
2808 * If @key is null:
2809 * If @bs is encrypted and still lacks a key, fail.
2810 * Else do nothing.
2811 * On failure, store an error object through @errp if non-null.
c5fbe571 2812 */
61007b31 2813void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
c5fbe571 2814{
61007b31
SH
2815 if (key) {
2816 if (!bdrv_is_encrypted(bs)) {
2817 error_setg(errp, "Node '%s' is not encrypted",
2818 bdrv_get_device_or_node_name(bs));
2819 } else if (bdrv_set_key(bs, key) < 0) {
c6bd8c70 2820 error_setg(errp, QERR_INVALID_PASSWORD);
4d2855a3
MA
2821 }
2822 } else {
2823 if (bdrv_key_required(bs)) {
b1ca6391
MA
2824 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2825 "'%s' (%s) is encrypted",
81e5f78a 2826 bdrv_get_device_or_node_name(bs),
4d2855a3
MA
2827 bdrv_get_encrypted_filename(bs));
2828 }
2829 }
2830}
2831
61007b31 2832const char *bdrv_get_format_name(BlockDriverState *bs)
40b4f539 2833{
61007b31 2834 return bs->drv ? bs->drv->format_name : NULL;
40b4f539
KW
2835}
2836
61007b31 2837static int qsort_strcmp(const void *a, const void *b)
40b4f539 2838{
61007b31 2839 return strcmp(a, b);
40b4f539
KW
2840}
2841
61007b31
SH
2842void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2843 void *opaque)
40b4f539 2844{
61007b31
SH
2845 BlockDriver *drv;
2846 int count = 0;
2847 int i;
2848 const char **formats = NULL;
40b4f539 2849
61007b31
SH
2850 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2851 if (drv->format_name) {
2852 bool found = false;
2853 int i = count;
2854 while (formats && i && !found) {
2855 found = !strcmp(formats[--i], drv->format_name);
2856 }
e2a305fb 2857
61007b31
SH
2858 if (!found) {
2859 formats = g_renew(const char *, formats, count + 1);
2860 formats[count++] = drv->format_name;
2861 }
6c5a42ac 2862 }
61007b31 2863 }
6c5a42ac 2864
61007b31 2865 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
40b4f539 2866
61007b31
SH
2867 for (i = 0; i < count; i++) {
2868 it(opaque, formats[i]);
2869 }
40b4f539 2870
61007b31
SH
2871 g_free(formats);
2872}
40b4f539 2873
61007b31
SH
2874/* This function is to find a node in the bs graph */
2875BlockDriverState *bdrv_find_node(const char *node_name)
2876{
2877 BlockDriverState *bs;
391827eb 2878
61007b31 2879 assert(node_name);
40b4f539 2880
61007b31
SH
2881 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2882 if (!strcmp(node_name, bs->node_name)) {
2883 return bs;
40b4f539
KW
2884 }
2885 }
61007b31 2886 return NULL;
40b4f539
KW
2887}
2888
61007b31
SH
2889/* Put this QMP function here so it can access the static graph_bdrv_states. */
2890BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
40b4f539 2891{
61007b31
SH
2892 BlockDeviceInfoList *list, *entry;
2893 BlockDriverState *bs;
40b4f539 2894
61007b31
SH
2895 list = NULL;
2896 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2897 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2898 if (!info) {
2899 qapi_free_BlockDeviceInfoList(list);
2900 return NULL;
301db7c2 2901 }
61007b31
SH
2902 entry = g_malloc0(sizeof(*entry));
2903 entry->value = info;
2904 entry->next = list;
2905 list = entry;
301db7c2
RH
2906 }
2907
61007b31
SH
2908 return list;
2909}
40b4f539 2910
61007b31
SH
2911BlockDriverState *bdrv_lookup_bs(const char *device,
2912 const char *node_name,
2913 Error **errp)
2914{
2915 BlockBackend *blk;
2916 BlockDriverState *bs;
40b4f539 2917
61007b31
SH
2918 if (device) {
2919 blk = blk_by_name(device);
40b4f539 2920
61007b31 2921 if (blk) {
9f4ed6fb
AG
2922 bs = blk_bs(blk);
2923 if (!bs) {
5433c24f 2924 error_setg(errp, "Device '%s' has no medium", device);
5433c24f
HR
2925 }
2926
9f4ed6fb 2927 return bs;
61007b31
SH
2928 }
2929 }
40b4f539 2930
61007b31
SH
2931 if (node_name) {
2932 bs = bdrv_find_node(node_name);
6d519a5f 2933
61007b31
SH
2934 if (bs) {
2935 return bs;
2936 }
40b4f539
KW
2937 }
2938
61007b31
SH
2939 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2940 device ? device : "",
2941 node_name ? node_name : "");
2942 return NULL;
40b4f539
KW
2943}
2944
61007b31
SH
2945/* If 'base' is in the same chain as 'top', return true. Otherwise,
2946 * return false. If either argument is NULL, return false. */
2947bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
83f64091 2948{
61007b31 2949 while (top && top != base) {
760e0063 2950 top = backing_bs(top);
02c50efe 2951 }
61007b31
SH
2952
2953 return top != NULL;
02c50efe
FZ
2954}
2955
61007b31 2956BlockDriverState *bdrv_next_node(BlockDriverState *bs)
02c50efe 2957{
61007b31
SH
2958 if (!bs) {
2959 return QTAILQ_FIRST(&graph_bdrv_states);
02c50efe 2960 }
61007b31 2961 return QTAILQ_NEXT(bs, node_list);
83f64091
FB
2962}
2963
26260580
HR
2964/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2965 * the monitor or attached to a BlockBackend */
61007b31 2966BlockDriverState *bdrv_next(BlockDriverState *bs)
83f64091 2967{
26260580
HR
2968 if (!bs || bs->blk) {
2969 bs = blk_next_root_bs(bs);
2970 if (bs) {
2971 return bs;
2972 }
857d4f46 2973 }
26260580
HR
2974
2975 /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2976 * handled by the above block already */
2977 do {
2978 bs = bdrv_next_monitor_owned(bs);
2979 } while (bs && bs->blk);
2980 return bs;
83f64091 2981}
beac80cd 2982
61007b31 2983const char *bdrv_get_node_name(const BlockDriverState *bs)
83f64091 2984{
61007b31 2985 return bs->node_name;
beac80cd
FB
2986}
2987
61007b31
SH
2988/* TODO check what callers really want: bs->node_name or blk_name() */
2989const char *bdrv_get_device_name(const BlockDriverState *bs)
beac80cd 2990{
61007b31 2991 return bs->blk ? blk_name(bs->blk) : "";
f141eafe 2992}
83f64091 2993
61007b31
SH
2994/* This can be used to identify nodes that might not have a device
2995 * name associated. Since node and device names live in the same
2996 * namespace, the result is unambiguous. The exception is if both are
2997 * absent, then this returns an empty (non-null) string. */
2998const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
f141eafe 2999{
61007b31 3000 return bs->blk ? blk_name(bs->blk) : bs->node_name;
beac80cd 3001}
beac80cd 3002
61007b31 3003int bdrv_get_flags(BlockDriverState *bs)
0b5a2445 3004{
61007b31 3005 return bs->open_flags;
0b5a2445
PB
3006}
3007
61007b31 3008int bdrv_has_zero_init_1(BlockDriverState *bs)
68485420 3009{
61007b31 3010 return 1;
0b5a2445
PB
3011}
3012
61007b31 3013int bdrv_has_zero_init(BlockDriverState *bs)
0b5a2445 3014{
61007b31 3015 assert(bs->drv);
0b5a2445 3016
61007b31
SH
3017 /* If BS is a copy on write image, it is initialized to
3018 the contents of the base image, which may not be zeroes. */
760e0063 3019 if (bs->backing) {
61007b31
SH
3020 return 0;
3021 }
3022 if (bs->drv->bdrv_has_zero_init) {
3023 return bs->drv->bdrv_has_zero_init(bs);
0b5a2445 3024 }
61007b31
SH
3025
3026 /* safe default */
3027 return 0;
68485420
KW
3028}
3029
61007b31 3030bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
b2a61371 3031{
61007b31 3032 BlockDriverInfo bdi;
b2a61371 3033
760e0063 3034 if (bs->backing) {
61007b31
SH
3035 return false;
3036 }
3037
3038 if (bdrv_get_info(bs, &bdi) == 0) {
3039 return bdi.unallocated_blocks_are_zero;
b2a61371
SH
3040 }
3041
61007b31 3042 return false;
b2a61371
SH
3043}
3044
61007b31 3045bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
68485420 3046{
61007b31 3047 BlockDriverInfo bdi;
68485420 3048
760e0063 3049 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
61007b31
SH
3050 return false;
3051 }
68485420 3052
61007b31
SH
3053 if (bdrv_get_info(bs, &bdi) == 0) {
3054 return bdi.can_write_zeroes_with_unmap;
3055 }
68485420 3056
61007b31 3057 return false;
68485420
KW
3058}
3059
61007b31 3060const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
b2e12bc6 3061{
760e0063 3062 if (bs->backing && bs->backing->bs->encrypted)
61007b31
SH
3063 return bs->backing_file;
3064 else if (bs->encrypted)
3065 return bs->filename;
3066 else
3067 return NULL;
b2e12bc6
CH
3068}
3069
61007b31
SH
3070void bdrv_get_backing_filename(BlockDriverState *bs,
3071 char *filename, int filename_size)
016f5cf6 3072{
61007b31
SH
3073 pstrcpy(filename, filename_size, bs->backing_file);
3074}
d318aea9 3075
61007b31
SH
3076int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3077{
3078 BlockDriver *drv = bs->drv;
3079 if (!drv)
3080 return -ENOMEDIUM;
3081 if (!drv->bdrv_get_info)
3082 return -ENOTSUP;
3083 memset(bdi, 0, sizeof(*bdi));
3084 return drv->bdrv_get_info(bs, bdi);
3085}
016f5cf6 3086
61007b31
SH
3087ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3088{
3089 BlockDriver *drv = bs->drv;
3090 if (drv && drv->bdrv_get_specific_info) {
3091 return drv->bdrv_get_specific_info(bs);
3092 }
3093 return NULL;
016f5cf6
AG
3094}
3095
a31939e6 3096void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
4265d620 3097{
61007b31
SH
3098 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3099 return;
3100 }
4265d620 3101
61007b31 3102 bs->drv->bdrv_debug_event(bs, event);
4265d620
PB
3103}
3104
61007b31
SH
3105int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3106 const char *tag)
4265d620 3107{
61007b31 3108 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
9a4f4c31 3109 bs = bs->file ? bs->file->bs : NULL;
61007b31 3110 }
4265d620 3111
61007b31
SH
3112 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3113 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3114 }
4265d620 3115
61007b31 3116 return -ENOTSUP;
4265d620
PB
3117}
3118
61007b31 3119int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
ea2384d3 3120{
61007b31 3121 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
9a4f4c31 3122 bs = bs->file ? bs->file->bs : NULL;
61007b31 3123 }
ce1a14dc 3124
61007b31
SH
3125 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3126 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3127 }
3128
3129 return -ENOTSUP;
eb852011
MA
3130}
3131
61007b31 3132int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
ce1a14dc 3133{
61007b31 3134 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
9a4f4c31 3135 bs = bs->file ? bs->file->bs : NULL;
61007b31 3136 }
ce1a14dc 3137
61007b31
SH
3138 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3139 return bs->drv->bdrv_debug_resume(bs, tag);
3140 }
ce1a14dc 3141
61007b31 3142 return -ENOTSUP;
f197fe2b
FZ
3143}
3144
61007b31 3145bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
ce1a14dc 3146{
61007b31 3147 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
9a4f4c31 3148 bs = bs->file ? bs->file->bs : NULL;
f197fe2b 3149 }
19cb3738 3150
61007b31
SH
3151 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3152 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3153 }
f9f05dc5 3154
61007b31
SH
3155 return false;
3156}
f9f05dc5 3157
61007b31 3158int bdrv_is_snapshot(BlockDriverState *bs)
f9f05dc5 3159{
61007b31 3160 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
f9f05dc5
KW
3161}
3162
61007b31
SH
3163/* backing_file can either be relative, or absolute, or a protocol. If it is
3164 * relative, it must be relative to the chain. So, passing in bs->filename
3165 * from a BDS as backing_file should not be done, as that may be relative to
3166 * the CWD rather than the chain. */
3167BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3168 const char *backing_file)
f9f05dc5 3169{
61007b31
SH
3170 char *filename_full = NULL;
3171 char *backing_file_full = NULL;
3172 char *filename_tmp = NULL;
3173 int is_protocol = 0;
3174 BlockDriverState *curr_bs = NULL;
3175 BlockDriverState *retval = NULL;
f9f05dc5 3176
61007b31
SH
3177 if (!bs || !bs->drv || !backing_file) {
3178 return NULL;
f9f05dc5
KW
3179 }
3180
61007b31
SH
3181 filename_full = g_malloc(PATH_MAX);
3182 backing_file_full = g_malloc(PATH_MAX);
3183 filename_tmp = g_malloc(PATH_MAX);
f9f05dc5 3184
61007b31 3185 is_protocol = path_has_protocol(backing_file);
f9f05dc5 3186
760e0063 3187 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
f9f05dc5 3188
61007b31
SH
3189 /* If either of the filename paths is actually a protocol, then
3190 * compare unmodified paths; otherwise make paths relative */
3191 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3192 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
760e0063 3193 retval = curr_bs->backing->bs;
61007b31
SH
3194 break;
3195 }
3196 } else {
3197 /* If not an absolute filename path, make it relative to the current
3198 * image's filename path */
3199 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3200 backing_file);
f9f05dc5 3201
61007b31
SH
3202 /* We are going to compare absolute pathnames */
3203 if (!realpath(filename_tmp, filename_full)) {
3204 continue;
3205 }
07f07615 3206
61007b31
SH
3207 /* We need to make sure the backing filename we are comparing against
3208 * is relative to the current image filename (or absolute) */
3209 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3210 curr_bs->backing_file);
07f07615 3211
61007b31
SH
3212 if (!realpath(filename_tmp, backing_file_full)) {
3213 continue;
3214 }
eb489bb1 3215
61007b31 3216 if (strcmp(backing_file_full, filename_full) == 0) {
760e0063 3217 retval = curr_bs->backing->bs;
61007b31
SH
3218 break;
3219 }
3220 }
eb489bb1
KW
3221 }
3222
61007b31
SH
3223 g_free(filename_full);
3224 g_free(backing_file_full);
3225 g_free(filename_tmp);
3226 return retval;
3227}
3228
3229int bdrv_get_backing_file_depth(BlockDriverState *bs)
3230{
3231 if (!bs->drv) {
3232 return 0;
eb489bb1
KW
3233 }
3234
760e0063 3235 if (!bs->backing) {
61007b31 3236 return 0;
ca716364
KW
3237 }
3238
760e0063 3239 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
61007b31 3240}
07f07615 3241
61007b31
SH
3242void bdrv_init(void)
3243{
3244 module_call_init(MODULE_INIT_BLOCK);
3245}
29cdb251 3246
61007b31
SH
3247void bdrv_init_with_whitelist(void)
3248{
3249 use_bdrv_whitelist = 1;
3250 bdrv_init();
07f07615
PB
3251}
3252
5a8a30db 3253void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 3254{
5a8a30db
KW
3255 Error *local_err = NULL;
3256 int ret;
3257
3456a8d1
KW
3258 if (!bs->drv) {
3259 return;
3260 }
3261
04c01a5c 3262 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
7ea2d269
AK
3263 return;
3264 }
04c01a5c 3265 bs->open_flags &= ~BDRV_O_INACTIVE;
7ea2d269 3266
3456a8d1 3267 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 3268 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3456a8d1 3269 } else if (bs->file) {
9a4f4c31 3270 bdrv_invalidate_cache(bs->file->bs, &local_err);
5a8a30db
KW
3271 }
3272 if (local_err) {
04c01a5c 3273 bs->open_flags |= BDRV_O_INACTIVE;
5a8a30db
KW
3274 error_propagate(errp, local_err);
3275 return;
0f15423c 3276 }
3456a8d1 3277
5a8a30db
KW
3278 ret = refresh_total_sectors(bs, bs->total_sectors);
3279 if (ret < 0) {
04c01a5c 3280 bs->open_flags |= BDRV_O_INACTIVE;
5a8a30db
KW
3281 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3282 return;
3283 }
0f15423c
AL
3284}
3285
5a8a30db 3286void bdrv_invalidate_cache_all(Error **errp)
0f15423c 3287{
79720af6 3288 BlockDriverState *bs = NULL;
5a8a30db 3289 Error *local_err = NULL;
0f15423c 3290
79720af6 3291 while ((bs = bdrv_next(bs)) != NULL) {
ed78cda3
SH
3292 AioContext *aio_context = bdrv_get_aio_context(bs);
3293
3294 aio_context_acquire(aio_context);
5a8a30db 3295 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 3296 aio_context_release(aio_context);
5a8a30db
KW
3297 if (local_err) {
3298 error_propagate(errp, local_err);
3299 return;
3300 }
0f15423c
AL
3301 }
3302}
3303
76b1c7fe
KW
3304static int bdrv_inactivate(BlockDriverState *bs)
3305{
3306 int ret;
3307
3308 if (bs->drv->bdrv_inactivate) {
3309 ret = bs->drv->bdrv_inactivate(bs);
3310 if (ret < 0) {
3311 return ret;
3312 }
3313 }
3314
3315 bs->open_flags |= BDRV_O_INACTIVE;
3316 return 0;
3317}
3318
3319int bdrv_inactivate_all(void)
3320{
79720af6 3321 BlockDriverState *bs = NULL;
76b1c7fe
KW
3322 int ret;
3323
79720af6 3324 while ((bs = bdrv_next(bs)) != NULL) {
76b1c7fe
KW
3325 AioContext *aio_context = bdrv_get_aio_context(bs);
3326
3327 aio_context_acquire(aio_context);
3328 ret = bdrv_inactivate(bs);
3329 aio_context_release(aio_context);
3330 if (ret < 0) {
3331 return ret;
3332 }
3333 }
3334
3335 return 0;
3336}
3337
19cb3738
FB
3338/**************************************************************/
3339/* removable device support */
3340
3341/**
3342 * Return TRUE if the media is present
3343 */
e031f750 3344bool bdrv_is_inserted(BlockDriverState *bs)
19cb3738
FB
3345{
3346 BlockDriver *drv = bs->drv;
28d7a789 3347 BdrvChild *child;
a1aff5bf 3348
e031f750
HR
3349 if (!drv) {
3350 return false;
3351 }
28d7a789
HR
3352 if (drv->bdrv_is_inserted) {
3353 return drv->bdrv_is_inserted(bs);
3354 }
3355 QLIST_FOREACH(child, &bs->children, next) {
3356 if (!bdrv_is_inserted(child->bs)) {
3357 return false;
3358 }
e031f750 3359 }
28d7a789 3360 return true;
19cb3738
FB
3361}
3362
3363/**
8e49ca46
MA
3364 * Return whether the media changed since the last call to this
3365 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3366 */
3367int bdrv_media_changed(BlockDriverState *bs)
3368{
3369 BlockDriver *drv = bs->drv;
19cb3738 3370
8e49ca46
MA
3371 if (drv && drv->bdrv_media_changed) {
3372 return drv->bdrv_media_changed(bs);
3373 }
3374 return -ENOTSUP;
19cb3738
FB
3375}
3376
3377/**
3378 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3379 */
f36f3949 3380void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3381{
3382 BlockDriver *drv = bs->drv;
bfb197e0 3383 const char *device_name;
19cb3738 3384
822e1cd1
MA
3385 if (drv && drv->bdrv_eject) {
3386 drv->bdrv_eject(bs, eject_flag);
19cb3738 3387 }
6f382ed2 3388
bfb197e0
MA
3389 device_name = bdrv_get_device_name(bs);
3390 if (device_name[0] != '\0') {
3391 qapi_event_send_device_tray_moved(device_name,
a5ee7bd4 3392 eject_flag, &error_abort);
6f382ed2 3393 }
19cb3738
FB
3394}
3395
19cb3738
FB
3396/**
3397 * Lock or unlock the media (if it is locked, the user won't be able
3398 * to eject it manually).
3399 */
025e849a 3400void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3401{
3402 BlockDriver *drv = bs->drv;
3403
025e849a 3404 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3405
025e849a
MA
3406 if (drv && drv->bdrv_lock_medium) {
3407 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3408 }
3409}
985a03b0 3410
9fcb0251
FZ
3411/* Get a reference to bs */
3412void bdrv_ref(BlockDriverState *bs)
3413{
3414 bs->refcnt++;
3415}
3416
3417/* Release a previously grabbed reference to bs.
3418 * If after releasing, reference count is zero, the BlockDriverState is
3419 * deleted. */
3420void bdrv_unref(BlockDriverState *bs)
3421{
9a4d5ca6
JC
3422 if (!bs) {
3423 return;
3424 }
9fcb0251
FZ
3425 assert(bs->refcnt > 0);
3426 if (--bs->refcnt == 0) {
3427 bdrv_delete(bs);
3428 }
3429}
3430
fbe40ff7
FZ
3431struct BdrvOpBlocker {
3432 Error *reason;
3433 QLIST_ENTRY(BdrvOpBlocker) list;
3434};
3435
3436bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3437{
3438 BdrvOpBlocker *blocker;
3439 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3440 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3441 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3442 if (errp) {
e43bfd9c
MA
3443 *errp = error_copy(blocker->reason);
3444 error_prepend(errp, "Node '%s' is busy: ",
3445 bdrv_get_device_or_node_name(bs));
fbe40ff7
FZ
3446 }
3447 return true;
3448 }
3449 return false;
3450}
3451
3452void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3453{
3454 BdrvOpBlocker *blocker;
3455 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3456
5839e53b 3457 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
3458 blocker->reason = reason;
3459 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3460}
3461
3462void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3463{
3464 BdrvOpBlocker *blocker, *next;
3465 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3466 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3467 if (blocker->reason == reason) {
3468 QLIST_REMOVE(blocker, list);
3469 g_free(blocker);
3470 }
3471 }
3472}
3473
3474void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3475{
3476 int i;
3477 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3478 bdrv_op_block(bs, i, reason);
3479 }
3480}
3481
3482void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3483{
3484 int i;
3485 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3486 bdrv_op_unblock(bs, i, reason);
3487 }
3488}
3489
3490bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3491{
3492 int i;
3493
3494 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3495 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3496 return false;
3497 }
3498 }
3499 return true;
3500}
3501
d92ada22
LC
3502void bdrv_img_create(const char *filename, const char *fmt,
3503 const char *base_filename, const char *base_fmt,
f382d43a
MR
3504 char *options, uint64_t img_size, int flags,
3505 Error **errp, bool quiet)
f88e1a42 3506{
83d0521a
CL
3507 QemuOptsList *create_opts = NULL;
3508 QemuOpts *opts = NULL;
3509 const char *backing_fmt, *backing_file;
3510 int64_t size;
f88e1a42 3511 BlockDriver *drv, *proto_drv;
cc84d90f 3512 Error *local_err = NULL;
f88e1a42
JS
3513 int ret = 0;
3514
3515 /* Find driver and parse its options */
3516 drv = bdrv_find_format(fmt);
3517 if (!drv) {
71c79813 3518 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 3519 return;
f88e1a42
JS
3520 }
3521
b65a5e12 3522 proto_drv = bdrv_find_protocol(filename, true, errp);
f88e1a42 3523 if (!proto_drv) {
d92ada22 3524 return;
f88e1a42
JS
3525 }
3526
c6149724
HR
3527 if (!drv->create_opts) {
3528 error_setg(errp, "Format driver '%s' does not support image creation",
3529 drv->format_name);
3530 return;
3531 }
3532
3533 if (!proto_drv->create_opts) {
3534 error_setg(errp, "Protocol driver '%s' does not support image creation",
3535 proto_drv->format_name);
3536 return;
3537 }
3538
c282e1fd
CL
3539 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3540 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
3541
3542 /* Create parameter list with default values */
83d0521a 3543 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
39101f25 3544 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
f88e1a42
JS
3545
3546 /* Parse -o options */
3547 if (options) {
dc523cd3
MA
3548 qemu_opts_do_parse(opts, options, NULL, &local_err);
3549 if (local_err) {
3550 error_report_err(local_err);
3551 local_err = NULL;
83d0521a 3552 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
3553 goto out;
3554 }
3555 }
3556
3557 if (base_filename) {
f43e47db 3558 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
6be4194b 3559 if (local_err) {
71c79813
LC
3560 error_setg(errp, "Backing file not supported for file format '%s'",
3561 fmt);
f88e1a42
JS
3562 goto out;
3563 }
3564 }
3565
3566 if (base_fmt) {
f43e47db 3567 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
6be4194b 3568 if (local_err) {
71c79813
LC
3569 error_setg(errp, "Backing file format not supported for file "
3570 "format '%s'", fmt);
f88e1a42
JS
3571 goto out;
3572 }
3573 }
3574
83d0521a
CL
3575 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3576 if (backing_file) {
3577 if (!strcmp(filename, backing_file)) {
71c79813
LC
3578 error_setg(errp, "Error: Trying to create an image with the "
3579 "same filename as the backing file");
792da93a
JS
3580 goto out;
3581 }
3582 }
3583
83d0521a 3584 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
f88e1a42
JS
3585
3586 // The size for the image must always be specified, with one exception:
3587 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
3588 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3589 if (size == -1) {
3590 if (backing_file) {
66f6b814 3591 BlockDriverState *bs;
29168018 3592 char *full_backing = g_new0(char, PATH_MAX);
52bf1e72 3593 int64_t size;
63090dac 3594 int back_flags;
e6641719 3595 QDict *backing_options = NULL;
63090dac 3596
29168018
HR
3597 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3598 full_backing, PATH_MAX,
3599 &local_err);
3600 if (local_err) {
3601 g_free(full_backing);
3602 goto out;
3603 }
3604
63090dac
PB
3605 /* backing files always opened read-only */
3606 back_flags =
3607 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 3608
e6641719
HR
3609 if (backing_fmt) {
3610 backing_options = qdict_new();
3611 qdict_put(backing_options, "driver",
3612 qstring_from_str(backing_fmt));
3613 }
3614
f67503e5 3615 bs = NULL;
e6641719 3616 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
6ebf9aa2 3617 back_flags, &local_err);
29168018 3618 g_free(full_backing);
f88e1a42 3619 if (ret < 0) {
f88e1a42
JS
3620 goto out;
3621 }
52bf1e72
MA
3622 size = bdrv_getlength(bs);
3623 if (size < 0) {
3624 error_setg_errno(errp, -size, "Could not get size of '%s'",
3625 backing_file);
3626 bdrv_unref(bs);
3627 goto out;
3628 }
f88e1a42 3629
39101f25 3630 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
66f6b814
HR
3631
3632 bdrv_unref(bs);
f88e1a42 3633 } else {
71c79813 3634 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
3635 goto out;
3636 }
3637 }
3638
f382d43a 3639 if (!quiet) {
fe646693 3640 printf("Formatting '%s', fmt=%s ", filename, fmt);
43c5d8f8 3641 qemu_opts_print(opts, " ");
f382d43a
MR
3642 puts("");
3643 }
83d0521a 3644
c282e1fd 3645 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 3646
cc84d90f
HR
3647 if (ret == -EFBIG) {
3648 /* This is generally a better message than whatever the driver would
3649 * deliver (especially because of the cluster_size_hint), since that
3650 * is most probably not much different from "image too large". */
3651 const char *cluster_size_hint = "";
83d0521a 3652 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 3653 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 3654 }
cc84d90f
HR
3655 error_setg(errp, "The image size is too large for file format '%s'"
3656 "%s", fmt, cluster_size_hint);
3657 error_free(local_err);
3658 local_err = NULL;
f88e1a42
JS
3659 }
3660
3661out:
83d0521a
CL
3662 qemu_opts_del(opts);
3663 qemu_opts_free(create_opts);
84d18f06 3664 if (local_err) {
cc84d90f
HR
3665 error_propagate(errp, local_err);
3666 }
f88e1a42 3667}
85d126f3
SH
3668
3669AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3670{
dcd04228
SH
3671 return bs->aio_context;
3672}
3673
3674void bdrv_detach_aio_context(BlockDriverState *bs)
3675{
33384421
HR
3676 BdrvAioNotifier *baf;
3677
dcd04228
SH
3678 if (!bs->drv) {
3679 return;
3680 }
3681
33384421
HR
3682 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3683 baf->detach_aio_context(baf->opaque);
3684 }
3685
a0d64a61 3686 if (bs->throttle_state) {
0e5b0a2d 3687 throttle_timers_detach_aio_context(&bs->throttle_timers);
13af91eb 3688 }
dcd04228
SH
3689 if (bs->drv->bdrv_detach_aio_context) {
3690 bs->drv->bdrv_detach_aio_context(bs);
3691 }
3692 if (bs->file) {
9a4f4c31 3693 bdrv_detach_aio_context(bs->file->bs);
dcd04228 3694 }
760e0063
KW
3695 if (bs->backing) {
3696 bdrv_detach_aio_context(bs->backing->bs);
dcd04228
SH
3697 }
3698
3699 bs->aio_context = NULL;
3700}
3701
3702void bdrv_attach_aio_context(BlockDriverState *bs,
3703 AioContext *new_context)
3704{
33384421
HR
3705 BdrvAioNotifier *ban;
3706
dcd04228
SH
3707 if (!bs->drv) {
3708 return;
3709 }
3710
3711 bs->aio_context = new_context;
3712
760e0063
KW
3713 if (bs->backing) {
3714 bdrv_attach_aio_context(bs->backing->bs, new_context);
dcd04228
SH
3715 }
3716 if (bs->file) {
9a4f4c31 3717 bdrv_attach_aio_context(bs->file->bs, new_context);
dcd04228
SH
3718 }
3719 if (bs->drv->bdrv_attach_aio_context) {
3720 bs->drv->bdrv_attach_aio_context(bs, new_context);
3721 }
a0d64a61 3722 if (bs->throttle_state) {
0e5b0a2d 3723 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
13af91eb 3724 }
33384421
HR
3725
3726 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3727 ban->attached_aio_context(new_context, ban->opaque);
3728 }
dcd04228
SH
3729}
3730
3731void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3732{
53ec73e2 3733 bdrv_drain(bs); /* ensure there are no in-flight requests */
dcd04228
SH
3734
3735 bdrv_detach_aio_context(bs);
3736
3737 /* This function executes in the old AioContext so acquire the new one in
3738 * case it runs in a different thread.
3739 */
3740 aio_context_acquire(new_context);
3741 bdrv_attach_aio_context(bs, new_context);
3742 aio_context_release(new_context);
85d126f3 3743}
d616b224 3744
33384421
HR
3745void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3746 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3747 void (*detach_aio_context)(void *opaque), void *opaque)
3748{
3749 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3750 *ban = (BdrvAioNotifier){
3751 .attached_aio_context = attached_aio_context,
3752 .detach_aio_context = detach_aio_context,
3753 .opaque = opaque
3754 };
3755
3756 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3757}
3758
3759void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3760 void (*attached_aio_context)(AioContext *,
3761 void *),
3762 void (*detach_aio_context)(void *),
3763 void *opaque)
3764{
3765 BdrvAioNotifier *ban, *ban_next;
3766
3767 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3768 if (ban->attached_aio_context == attached_aio_context &&
3769 ban->detach_aio_context == detach_aio_context &&
3770 ban->opaque == opaque)
3771 {
3772 QLIST_REMOVE(ban, list);
3773 g_free(ban);
3774
3775 return;
3776 }
3777 }
3778
3779 abort();
3780}
3781
77485434 3782int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
8b13976d 3783 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
6f176b48 3784{
c282e1fd 3785 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
3786 return -ENOTSUP;
3787 }
8b13976d 3788 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
6f176b48 3789}
f6186f49 3790
b5042a36
BC
3791/* This function will be called by the bdrv_recurse_is_first_non_filter method
3792 * of block filter and by bdrv_is_first_non_filter.
3793 * It is used to test if the given bs is the candidate or recurse more in the
3794 * node graph.
212a5a8f 3795 */
b5042a36 3796bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 3797 BlockDriverState *candidate)
f6186f49 3798{
b5042a36
BC
3799 /* return false if basic checks fails */
3800 if (!bs || !bs->drv) {
212a5a8f 3801 return false;
f6186f49
BC
3802 }
3803
b5042a36
BC
3804 /* the code reached a non block filter driver -> check if the bs is
3805 * the same as the candidate. It's the recursion termination condition.
3806 */
3807 if (!bs->drv->is_filter) {
3808 return bs == candidate;
212a5a8f 3809 }
b5042a36 3810 /* Down this path the driver is a block filter driver */
212a5a8f 3811
b5042a36
BC
3812 /* If the block filter recursion method is defined use it to recurse down
3813 * the node graph.
3814 */
3815 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 3816 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
3817 }
3818
b5042a36
BC
3819 /* the driver is a block filter but don't allow to recurse -> return false
3820 */
3821 return false;
f6186f49
BC
3822}
3823
212a5a8f
BC
3824/* This function checks if the candidate is the first non filter bs down it's
3825 * bs chain. Since we don't have pointers to parents it explore all bs chains
3826 * from the top. Some filters can choose not to pass down the recursion.
3827 */
3828bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 3829{
79720af6 3830 BlockDriverState *bs = NULL;
212a5a8f
BC
3831
3832 /* walk down the bs forest recursively */
79720af6 3833 while ((bs = bdrv_next(bs)) != NULL) {
212a5a8f
BC
3834 bool perm;
3835
b5042a36 3836 /* try to recurse in this top level bs */
e6dc8a1f 3837 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
3838
3839 /* candidate is the first non filter */
3840 if (perm) {
3841 return true;
3842 }
3843 }
3844
3845 return false;
f6186f49 3846}
09158f00 3847
e12f3784
WC
3848BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3849 const char *node_name, Error **errp)
09158f00
BC
3850{
3851 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5a7e7a0b
SH
3852 AioContext *aio_context;
3853
09158f00
BC
3854 if (!to_replace_bs) {
3855 error_setg(errp, "Node name '%s' not found", node_name);
3856 return NULL;
3857 }
3858
5a7e7a0b
SH
3859 aio_context = bdrv_get_aio_context(to_replace_bs);
3860 aio_context_acquire(aio_context);
3861
09158f00 3862 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5a7e7a0b
SH
3863 to_replace_bs = NULL;
3864 goto out;
09158f00
BC
3865 }
3866
3867 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3868 * most non filter in order to prevent data corruption.
3869 * Another benefit is that this tests exclude backing files which are
3870 * blocked by the backing blockers.
3871 */
e12f3784 3872 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
09158f00 3873 error_setg(errp, "Only top most non filter can be replaced");
5a7e7a0b
SH
3874 to_replace_bs = NULL;
3875 goto out;
09158f00
BC
3876 }
3877
5a7e7a0b
SH
3878out:
3879 aio_context_release(aio_context);
09158f00
BC
3880 return to_replace_bs;
3881}
448ad91d 3882
91af7014
HR
3883static bool append_open_options(QDict *d, BlockDriverState *bs)
3884{
3885 const QDictEntry *entry;
9e700c1a 3886 QemuOptDesc *desc;
260fecf1 3887 BdrvChild *child;
91af7014 3888 bool found_any = false;
260fecf1 3889 const char *p;
91af7014
HR
3890
3891 for (entry = qdict_first(bs->options); entry;
3892 entry = qdict_next(bs->options, entry))
3893 {
260fecf1
KW
3894 /* Exclude options for children */
3895 QLIST_FOREACH(child, &bs->children, next) {
3896 if (strstart(qdict_entry_key(entry), child->name, &p)
3897 && (!*p || *p == '.'))
3898 {
3899 break;
3900 }
3901 }
3902 if (child) {
9e700c1a 3903 continue;
91af7014 3904 }
9e700c1a
KW
3905
3906 /* And exclude all non-driver-specific options */
3907 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3908 if (!strcmp(qdict_entry_key(entry), desc->name)) {
3909 break;
3910 }
3911 }
3912 if (desc->name) {
3913 continue;
3914 }
3915
3916 qobject_incref(qdict_entry_value(entry));
3917 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3918 found_any = true;
91af7014
HR
3919 }
3920
3921 return found_any;
3922}
3923
3924/* Updates the following BDS fields:
3925 * - exact_filename: A filename which may be used for opening a block device
3926 * which (mostly) equals the given BDS (even without any
3927 * other options; so reading and writing must return the same
3928 * results, but caching etc. may be different)
3929 * - full_open_options: Options which, when given when opening a block device
3930 * (without a filename), result in a BDS (mostly)
3931 * equalling the given one
3932 * - filename: If exact_filename is set, it is copied here. Otherwise,
3933 * full_open_options is converted to a JSON object, prefixed with
3934 * "json:" (for use through the JSON pseudo protocol) and put here.
3935 */
3936void bdrv_refresh_filename(BlockDriverState *bs)
3937{
3938 BlockDriver *drv = bs->drv;
3939 QDict *opts;
3940
3941 if (!drv) {
3942 return;
3943 }
3944
3945 /* This BDS's file name will most probably depend on its file's name, so
3946 * refresh that first */
3947 if (bs->file) {
9a4f4c31 3948 bdrv_refresh_filename(bs->file->bs);
91af7014
HR
3949 }
3950
3951 if (drv->bdrv_refresh_filename) {
3952 /* Obsolete information is of no use here, so drop the old file name
3953 * information before refreshing it */
3954 bs->exact_filename[0] = '\0';
3955 if (bs->full_open_options) {
3956 QDECREF(bs->full_open_options);
3957 bs->full_open_options = NULL;
3958 }
3959
4cdd01d3
KW
3960 opts = qdict_new();
3961 append_open_options(opts, bs);
3962 drv->bdrv_refresh_filename(bs, opts);
3963 QDECREF(opts);
91af7014
HR
3964 } else if (bs->file) {
3965 /* Try to reconstruct valid information from the underlying file */
3966 bool has_open_options;
3967
3968 bs->exact_filename[0] = '\0';
3969 if (bs->full_open_options) {
3970 QDECREF(bs->full_open_options);
3971 bs->full_open_options = NULL;
3972 }
3973
3974 opts = qdict_new();
3975 has_open_options = append_open_options(opts, bs);
3976
3977 /* If no specific options have been given for this BDS, the filename of
3978 * the underlying file should suffice for this one as well */
9a4f4c31
KW
3979 if (bs->file->bs->exact_filename[0] && !has_open_options) {
3980 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
91af7014
HR
3981 }
3982 /* Reconstructing the full options QDict is simple for most format block
3983 * drivers, as long as the full options are known for the underlying
3984 * file BDS. The full options QDict of that file BDS should somehow
3985 * contain a representation of the filename, therefore the following
3986 * suffices without querying the (exact_)filename of this BDS. */
9a4f4c31 3987 if (bs->file->bs->full_open_options) {
91af7014
HR
3988 qdict_put_obj(opts, "driver",
3989 QOBJECT(qstring_from_str(drv->format_name)));
9a4f4c31
KW
3990 QINCREF(bs->file->bs->full_open_options);
3991 qdict_put_obj(opts, "file",
3992 QOBJECT(bs->file->bs->full_open_options));
91af7014
HR
3993
3994 bs->full_open_options = opts;
3995 } else {
3996 QDECREF(opts);
3997 }
3998 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3999 /* There is no underlying file BDS (at least referenced by BDS.file),
4000 * so the full options QDict should be equal to the options given
4001 * specifically for this block device when it was opened (plus the
4002 * driver specification).
4003 * Because those options don't change, there is no need to update
4004 * full_open_options when it's already set. */
4005
4006 opts = qdict_new();
4007 append_open_options(opts, bs);
4008 qdict_put_obj(opts, "driver",
4009 QOBJECT(qstring_from_str(drv->format_name)));
4010
4011 if (bs->exact_filename[0]) {
4012 /* This may not work for all block protocol drivers (some may
4013 * require this filename to be parsed), but we have to find some
4014 * default solution here, so just include it. If some block driver
4015 * does not support pure options without any filename at all or
4016 * needs some special format of the options QDict, it needs to
4017 * implement the driver-specific bdrv_refresh_filename() function.
4018 */
4019 qdict_put_obj(opts, "filename",
4020 QOBJECT(qstring_from_str(bs->exact_filename)));
4021 }
4022
4023 bs->full_open_options = opts;
4024 }
4025
4026 if (bs->exact_filename[0]) {
4027 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4028 } else if (bs->full_open_options) {
4029 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4030 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4031 qstring_get_str(json));
4032 QDECREF(json);
4033 }
4034}