]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
block: Inactivate all children
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d38ea87a 24#include "qemu/osdep.h"
6d519a5f 25#include "trace.h"
737e150e
PB
26#include "block/block_int.h"
27#include "block/blockjob.h"
d49b6836 28#include "qemu/error-report.h"
1de7afc9 29#include "qemu/module.h"
cc7a8ea7 30#include "qapi/qmp/qerror.h"
91a097e7 31#include "qapi/qmp/qbool.h"
7b1b5d19 32#include "qapi/qmp/qjson.h"
bfb197e0 33#include "sysemu/block-backend.h"
9c17d615 34#include "sysemu/sysemu.h"
1de7afc9 35#include "qemu/notify.h"
10817bf0 36#include "qemu/coroutine.h"
c13163fb 37#include "block/qapi.h"
b2023818 38#include "qmp-commands.h"
1de7afc9 39#include "qemu/timer.h"
a5ee7bd4 40#include "qapi-event.h"
db628338 41#include "block/throttle-groups.h"
f348b6d1
VB
42#include "qemu/cutils.h"
43#include "qemu/id.h"
fc01f7e7 44
71e72a19 45#ifdef CONFIG_BSD
7674e7bf 46#include <sys/ioctl.h>
72cf2d4f 47#include <sys/queue.h>
c5e97233 48#ifndef __DragonFly__
7674e7bf
FB
49#include <sys/disk.h>
50#endif
c5e97233 51#endif
7674e7bf 52
49dc768d
AL
53#ifdef _WIN32
54#include <windows.h>
55#endif
56
1c9805a3
SH
57#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
58
dc364f4c
BC
59static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
60 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
61
2c1d04e0
HR
62static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
63 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
64
8a22f02a
SH
65static QLIST_HEAD(, BlockDriver) bdrv_drivers =
66 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 67
f3930ed0
KW
68static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
69 const char *reference, QDict *options, int flags,
70 BlockDriverState *parent,
ce343771 71 const BdrvChildRole *child_role, Error **errp);
f3930ed0 72
eb852011
MA
73/* If non-zero, use only whitelisted block drivers */
74static int use_bdrv_whitelist;
75
64dff520
HR
76static void bdrv_close(BlockDriverState *bs);
77
9e0b22f4
SH
78#ifdef _WIN32
79static int is_windows_drive_prefix(const char *filename)
80{
81 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
82 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
83 filename[1] == ':');
84}
85
86int is_windows_drive(const char *filename)
87{
88 if (is_windows_drive_prefix(filename) &&
89 filename[2] == '\0')
90 return 1;
91 if (strstart(filename, "\\\\.\\", NULL) ||
92 strstart(filename, "//./", NULL))
93 return 1;
94 return 0;
95}
96#endif
97
339064d5
KW
98size_t bdrv_opt_mem_align(BlockDriverState *bs)
99{
100 if (!bs || !bs->drv) {
459b4e66
DL
101 /* page size or 4k (hdd sector size) should be on the safe side */
102 return MAX(4096, getpagesize());
339064d5
KW
103 }
104
105 return bs->bl.opt_mem_alignment;
106}
107
4196d2f0
DL
108size_t bdrv_min_mem_align(BlockDriverState *bs)
109{
110 if (!bs || !bs->drv) {
459b4e66
DL
111 /* page size or 4k (hdd sector size) should be on the safe side */
112 return MAX(4096, getpagesize());
4196d2f0
DL
113 }
114
115 return bs->bl.min_mem_alignment;
116}
117
9e0b22f4 118/* check if the path starts with "<protocol>:" */
5c98415b 119int path_has_protocol(const char *path)
9e0b22f4 120{
947995c0
PB
121 const char *p;
122
9e0b22f4
SH
123#ifdef _WIN32
124 if (is_windows_drive(path) ||
125 is_windows_drive_prefix(path)) {
126 return 0;
127 }
947995c0
PB
128 p = path + strcspn(path, ":/\\");
129#else
130 p = path + strcspn(path, ":/");
9e0b22f4
SH
131#endif
132
947995c0 133 return *p == ':';
9e0b22f4
SH
134}
135
83f64091 136int path_is_absolute(const char *path)
3b0d4f61 137{
21664424
FB
138#ifdef _WIN32
139 /* specific case for names like: "\\.\d:" */
f53f4da9 140 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 141 return 1;
f53f4da9
PB
142 }
143 return (*path == '/' || *path == '\\');
3b9f94e1 144#else
f53f4da9 145 return (*path == '/');
3b9f94e1 146#endif
3b0d4f61
FB
147}
148
83f64091
FB
149/* if filename is absolute, just copy it to dest. Otherwise, build a
150 path to it by considering it is relative to base_path. URL are
151 supported. */
152void path_combine(char *dest, int dest_size,
153 const char *base_path,
154 const char *filename)
3b0d4f61 155{
83f64091
FB
156 const char *p, *p1;
157 int len;
158
159 if (dest_size <= 0)
160 return;
161 if (path_is_absolute(filename)) {
162 pstrcpy(dest, dest_size, filename);
163 } else {
164 p = strchr(base_path, ':');
165 if (p)
166 p++;
167 else
168 p = base_path;
3b9f94e1
FB
169 p1 = strrchr(base_path, '/');
170#ifdef _WIN32
171 {
172 const char *p2;
173 p2 = strrchr(base_path, '\\');
174 if (!p1 || p2 > p1)
175 p1 = p2;
176 }
177#endif
83f64091
FB
178 if (p1)
179 p1++;
180 else
181 p1 = base_path;
182 if (p1 > p)
183 p = p1;
184 len = p - base_path;
185 if (len > dest_size - 1)
186 len = dest_size - 1;
187 memcpy(dest, base_path, len);
188 dest[len] = '\0';
189 pstrcat(dest, dest_size, filename);
3b0d4f61 190 }
3b0d4f61
FB
191}
192
0a82855a
HR
193void bdrv_get_full_backing_filename_from_filename(const char *backed,
194 const char *backing,
9f07429e
HR
195 char *dest, size_t sz,
196 Error **errp)
dc5a1371 197{
9f07429e
HR
198 if (backing[0] == '\0' || path_has_protocol(backing) ||
199 path_is_absolute(backing))
200 {
0a82855a 201 pstrcpy(dest, sz, backing);
9f07429e
HR
202 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
203 error_setg(errp, "Cannot use relative backing file names for '%s'",
204 backed);
dc5a1371 205 } else {
0a82855a 206 path_combine(dest, sz, backed, backing);
dc5a1371
PB
207 }
208}
209
9f07429e
HR
210void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
211 Error **errp)
0a82855a 212{
9f07429e
HR
213 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
214
215 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
216 dest, sz, errp);
0a82855a
HR
217}
218
0eb7217e
SH
219void bdrv_register(BlockDriver *bdrv)
220{
8a22f02a 221 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 222}
b338082b 223
7f06d47e 224BlockDriverState *bdrv_new_root(void)
b338082b 225{
9aaf28c6 226 return bdrv_new();
e4e9986b
MA
227}
228
229BlockDriverState *bdrv_new(void)
230{
231 BlockDriverState *bs;
232 int i;
233
5839e53b 234 bs = g_new0(BlockDriverState, 1);
e4654d2d 235 QLIST_INIT(&bs->dirty_bitmaps);
fbe40ff7
FZ
236 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
237 QLIST_INIT(&bs->op_blockers[i]);
238 }
d616b224 239 notifier_with_return_list_init(&bs->before_write_notifiers);
cc0681c4
BC
240 qemu_co_queue_init(&bs->throttled_reqs[0]);
241 qemu_co_queue_init(&bs->throttled_reqs[1]);
9fcb0251 242 bs->refcnt = 1;
dcd04228 243 bs->aio_context = qemu_get_aio_context();
d7d512f6 244
2c1d04e0
HR
245 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
246
b338082b
FB
247 return bs;
248}
249
ea2384d3
FB
250BlockDriver *bdrv_find_format(const char *format_name)
251{
252 BlockDriver *drv1;
8a22f02a
SH
253 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
254 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 255 return drv1;
8a22f02a 256 }
ea2384d3
FB
257 }
258 return NULL;
259}
260
b64ec4e4 261static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
eb852011 262{
b64ec4e4
FZ
263 static const char *whitelist_rw[] = {
264 CONFIG_BDRV_RW_WHITELIST
265 };
266 static const char *whitelist_ro[] = {
267 CONFIG_BDRV_RO_WHITELIST
eb852011
MA
268 };
269 const char **p;
270
b64ec4e4 271 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 272 return 1; /* no whitelist, anything goes */
b64ec4e4 273 }
eb852011 274
b64ec4e4 275 for (p = whitelist_rw; *p; p++) {
eb852011
MA
276 if (!strcmp(drv->format_name, *p)) {
277 return 1;
278 }
279 }
b64ec4e4
FZ
280 if (read_only) {
281 for (p = whitelist_ro; *p; p++) {
282 if (!strcmp(drv->format_name, *p)) {
283 return 1;
284 }
285 }
286 }
eb852011
MA
287 return 0;
288}
289
e6ff69bf
DB
290bool bdrv_uses_whitelist(void)
291{
292 return use_bdrv_whitelist;
293}
294
5b7e1542
ZYW
295typedef struct CreateCo {
296 BlockDriver *drv;
297 char *filename;
83d0521a 298 QemuOpts *opts;
5b7e1542 299 int ret;
cc84d90f 300 Error *err;
5b7e1542
ZYW
301} CreateCo;
302
303static void coroutine_fn bdrv_create_co_entry(void *opaque)
304{
cc84d90f
HR
305 Error *local_err = NULL;
306 int ret;
307
5b7e1542
ZYW
308 CreateCo *cco = opaque;
309 assert(cco->drv);
310
c282e1fd 311 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
84d18f06 312 if (local_err) {
cc84d90f
HR
313 error_propagate(&cco->err, local_err);
314 }
315 cco->ret = ret;
5b7e1542
ZYW
316}
317
0e7e1989 318int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 319 QemuOpts *opts, Error **errp)
ea2384d3 320{
5b7e1542
ZYW
321 int ret;
322
323 Coroutine *co;
324 CreateCo cco = {
325 .drv = drv,
326 .filename = g_strdup(filename),
83d0521a 327 .opts = opts,
5b7e1542 328 .ret = NOT_DONE,
cc84d90f 329 .err = NULL,
5b7e1542
ZYW
330 };
331
c282e1fd 332 if (!drv->bdrv_create) {
cc84d90f 333 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
334 ret = -ENOTSUP;
335 goto out;
5b7e1542
ZYW
336 }
337
338 if (qemu_in_coroutine()) {
339 /* Fast-path if already in coroutine context */
340 bdrv_create_co_entry(&cco);
341 } else {
342 co = qemu_coroutine_create(bdrv_create_co_entry);
343 qemu_coroutine_enter(co, &cco);
344 while (cco.ret == NOT_DONE) {
b47ec2c4 345 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
346 }
347 }
348
349 ret = cco.ret;
cc84d90f 350 if (ret < 0) {
84d18f06 351 if (cco.err) {
cc84d90f
HR
352 error_propagate(errp, cco.err);
353 } else {
354 error_setg_errno(errp, -ret, "Could not create image");
355 }
356 }
0e7e1989 357
80168bff
LC
358out:
359 g_free(cco.filename);
5b7e1542 360 return ret;
ea2384d3
FB
361}
362
c282e1fd 363int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
84a12e66
CH
364{
365 BlockDriver *drv;
cc84d90f
HR
366 Error *local_err = NULL;
367 int ret;
84a12e66 368
b65a5e12 369 drv = bdrv_find_protocol(filename, true, errp);
84a12e66 370 if (drv == NULL) {
16905d71 371 return -ENOENT;
84a12e66
CH
372 }
373
c282e1fd 374 ret = bdrv_create(drv, filename, opts, &local_err);
84d18f06 375 if (local_err) {
cc84d90f
HR
376 error_propagate(errp, local_err);
377 }
378 return ret;
84a12e66
CH
379}
380
892b7de8
ET
381/**
382 * Try to get @bs's logical and physical block size.
383 * On success, store them in @bsz struct and return 0.
384 * On failure return -errno.
385 * @bs must not be empty.
386 */
387int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
388{
389 BlockDriver *drv = bs->drv;
390
391 if (drv && drv->bdrv_probe_blocksizes) {
392 return drv->bdrv_probe_blocksizes(bs, bsz);
393 }
394
395 return -ENOTSUP;
396}
397
398/**
399 * Try to get @bs's geometry (cyls, heads, sectors).
400 * On success, store them in @geo struct and return 0.
401 * On failure return -errno.
402 * @bs must not be empty.
403 */
404int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
405{
406 BlockDriver *drv = bs->drv;
407
408 if (drv && drv->bdrv_probe_geometry) {
409 return drv->bdrv_probe_geometry(bs, geo);
410 }
411
412 return -ENOTSUP;
413}
414
eba25057
JM
415/*
416 * Create a uniquely-named empty temporary file.
417 * Return 0 upon success, otherwise a negative errno value.
418 */
419int get_tmp_filename(char *filename, int size)
d5249393 420{
eba25057 421#ifdef _WIN32
3b9f94e1 422 char temp_dir[MAX_PATH];
eba25057
JM
423 /* GetTempFileName requires that its output buffer (4th param)
424 have length MAX_PATH or greater. */
425 assert(size >= MAX_PATH);
426 return (GetTempPath(MAX_PATH, temp_dir)
427 && GetTempFileName(temp_dir, "qem", 0, filename)
428 ? 0 : -GetLastError());
d5249393 429#else
67b915a5 430 int fd;
7ccfb2eb 431 const char *tmpdir;
0badc1ee 432 tmpdir = getenv("TMPDIR");
69bef793
AS
433 if (!tmpdir) {
434 tmpdir = "/var/tmp";
435 }
eba25057
JM
436 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
437 return -EOVERFLOW;
438 }
ea2384d3 439 fd = mkstemp(filename);
fe235a06
DH
440 if (fd < 0) {
441 return -errno;
442 }
443 if (close(fd) != 0) {
444 unlink(filename);
eba25057
JM
445 return -errno;
446 }
447 return 0;
d5249393 448#endif
eba25057 449}
fc01f7e7 450
84a12e66
CH
451/*
452 * Detect host devices. By convention, /dev/cdrom[N] is always
453 * recognized as a host CDROM.
454 */
455static BlockDriver *find_hdev_driver(const char *filename)
456{
457 int score_max = 0, score;
458 BlockDriver *drv = NULL, *d;
459
460 QLIST_FOREACH(d, &bdrv_drivers, list) {
461 if (d->bdrv_probe_device) {
462 score = d->bdrv_probe_device(filename);
463 if (score > score_max) {
464 score_max = score;
465 drv = d;
466 }
467 }
468 }
469
470 return drv;
471}
472
98289620 473BlockDriver *bdrv_find_protocol(const char *filename,
b65a5e12
HR
474 bool allow_protocol_prefix,
475 Error **errp)
83f64091
FB
476{
477 BlockDriver *drv1;
478 char protocol[128];
1cec71e3 479 int len;
83f64091 480 const char *p;
19cb3738 481
66f82cee
KW
482 /* TODO Drivers without bdrv_file_open must be specified explicitly */
483
39508e7a
CH
484 /*
485 * XXX(hch): we really should not let host device detection
486 * override an explicit protocol specification, but moving this
487 * later breaks access to device names with colons in them.
488 * Thanks to the brain-dead persistent naming schemes on udev-
489 * based Linux systems those actually are quite common.
490 */
491 drv1 = find_hdev_driver(filename);
492 if (drv1) {
493 return drv1;
494 }
495
98289620 496 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
ef810437 497 return &bdrv_file;
84a12e66 498 }
98289620 499
9e0b22f4
SH
500 p = strchr(filename, ':');
501 assert(p != NULL);
1cec71e3
AL
502 len = p - filename;
503 if (len > sizeof(protocol) - 1)
504 len = sizeof(protocol) - 1;
505 memcpy(protocol, filename, len);
506 protocol[len] = '\0';
8a22f02a 507 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 508 if (drv1->protocol_name &&
8a22f02a 509 !strcmp(drv1->protocol_name, protocol)) {
83f64091 510 return drv1;
8a22f02a 511 }
83f64091 512 }
b65a5e12
HR
513
514 error_setg(errp, "Unknown protocol '%s'", protocol);
83f64091
FB
515 return NULL;
516}
517
c6684249
MA
518/*
519 * Guess image format by probing its contents.
520 * This is not a good idea when your image is raw (CVE-2008-2004), but
521 * we do it anyway for backward compatibility.
522 *
523 * @buf contains the image's first @buf_size bytes.
7cddd372
KW
524 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
525 * but can be smaller if the image file is smaller)
c6684249
MA
526 * @filename is its filename.
527 *
528 * For all block drivers, call the bdrv_probe() method to get its
529 * probing score.
530 * Return the first block driver with the highest probing score.
531 */
38f3ef57
KW
532BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
533 const char *filename)
c6684249
MA
534{
535 int score_max = 0, score;
536 BlockDriver *drv = NULL, *d;
537
538 QLIST_FOREACH(d, &bdrv_drivers, list) {
539 if (d->bdrv_probe) {
540 score = d->bdrv_probe(buf, buf_size, filename);
541 if (score > score_max) {
542 score_max = score;
543 drv = d;
544 }
545 }
546 }
547
548 return drv;
549}
550
f500a6d3 551static int find_image_format(BlockDriverState *bs, const char *filename,
34b5d2c6 552 BlockDriver **pdrv, Error **errp)
f3a5d3f8 553{
c6684249 554 BlockDriver *drv;
7cddd372 555 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
f500a6d3 556 int ret = 0;
f8ea0b00 557
08a00559 558 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
b192af8a 559 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
ef810437 560 *pdrv = &bdrv_raw;
c98ac35d 561 return ret;
1a396859 562 }
f8ea0b00 563
83f64091 564 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
83f64091 565 if (ret < 0) {
34b5d2c6
HR
566 error_setg_errno(errp, -ret, "Could not read image for determining its "
567 "format");
c98ac35d
SW
568 *pdrv = NULL;
569 return ret;
83f64091
FB
570 }
571
c6684249 572 drv = bdrv_probe_all(buf, ret, filename);
c98ac35d 573 if (!drv) {
34b5d2c6
HR
574 error_setg(errp, "Could not determine image format: No compatible "
575 "driver found");
c98ac35d
SW
576 ret = -ENOENT;
577 }
578 *pdrv = drv;
579 return ret;
ea2384d3
FB
580}
581
51762288
SH
582/**
583 * Set the current 'total_sectors' value
65a9bb25 584 * Return 0 on success, -errno on error.
51762288
SH
585 */
586static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
587{
588 BlockDriver *drv = bs->drv;
589
396759ad 590 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
b192af8a 591 if (bdrv_is_sg(bs))
396759ad
NB
592 return 0;
593
51762288
SH
594 /* query actual device if possible, otherwise just trust the hint */
595 if (drv->bdrv_getlength) {
596 int64_t length = drv->bdrv_getlength(bs);
597 if (length < 0) {
598 return length;
599 }
7e382003 600 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
601 }
602
603 bs->total_sectors = hint;
604 return 0;
605}
606
cddff5ba
KW
607/**
608 * Combines a QDict of new block driver @options with any missing options taken
609 * from @old_options, so that leaving out an option defaults to its old value.
610 */
611static void bdrv_join_options(BlockDriverState *bs, QDict *options,
612 QDict *old_options)
613{
614 if (bs->drv && bs->drv->bdrv_join_options) {
615 bs->drv->bdrv_join_options(options, old_options);
616 } else {
617 qdict_join(options, old_options, false);
618 }
619}
620
9e8f1835
PB
621/**
622 * Set open flags for a given discard mode
623 *
624 * Return 0 on success, -1 if the discard mode was invalid.
625 */
626int bdrv_parse_discard_flags(const char *mode, int *flags)
627{
628 *flags &= ~BDRV_O_UNMAP;
629
630 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
631 /* do nothing */
632 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
633 *flags |= BDRV_O_UNMAP;
634 } else {
635 return -1;
636 }
637
638 return 0;
639}
640
c3993cdc
SH
641/**
642 * Set open flags for a given cache mode
643 *
644 * Return 0 on success, -1 if the cache mode was invalid.
645 */
53e8ae01 646int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
c3993cdc
SH
647{
648 *flags &= ~BDRV_O_CACHE_MASK;
649
650 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
53e8ae01
KW
651 *writethrough = false;
652 *flags |= BDRV_O_NOCACHE;
92196b2f 653 } else if (!strcmp(mode, "directsync")) {
53e8ae01 654 *writethrough = true;
92196b2f 655 *flags |= BDRV_O_NOCACHE;
c3993cdc 656 } else if (!strcmp(mode, "writeback")) {
53e8ae01 657 *writethrough = false;
c3993cdc 658 } else if (!strcmp(mode, "unsafe")) {
53e8ae01 659 *writethrough = false;
c3993cdc
SH
660 *flags |= BDRV_O_NO_FLUSH;
661 } else if (!strcmp(mode, "writethrough")) {
53e8ae01 662 *writethrough = true;
c3993cdc
SH
663 } else {
664 return -1;
665 }
666
667 return 0;
668}
669
b1e6fc08 670/*
73176bee
KW
671 * Returns the options and flags that a temporary snapshot should get, based on
672 * the originally requested flags (the originally requested image will have
673 * flags like a backing file)
b1e6fc08 674 */
73176bee
KW
675static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
676 int parent_flags, QDict *parent_options)
b1e6fc08 677{
73176bee
KW
678 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
679
680 /* For temporary files, unconditional cache=unsafe is fine */
73176bee
KW
681 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
682 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
b1e6fc08
KW
683}
684
0b50cc88 685/*
8e2160e2
KW
686 * Returns the options and flags that bs->file should get if a protocol driver
687 * is expected, based on the given options and flags for the parent BDS
0b50cc88 688 */
8e2160e2
KW
689static void bdrv_inherited_options(int *child_flags, QDict *child_options,
690 int parent_flags, QDict *parent_options)
0b50cc88 691{
8e2160e2
KW
692 int flags = parent_flags;
693
0b50cc88
KW
694 /* Enable protocol handling, disable format probing for bs->file */
695 flags |= BDRV_O_PROTOCOL;
696
91a097e7
KW
697 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
698 * the parent. */
699 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
700 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
701
0b50cc88 702 /* Our block drivers take care to send flushes and respect unmap policy,
91a097e7
KW
703 * so we can default to enable both on lower layers regardless of the
704 * corresponding parent options. */
91a097e7 705 flags |= BDRV_O_UNMAP;
0b50cc88 706
0b50cc88 707 /* Clear flags that only apply to the top layer */
abb06c5a
DB
708 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
709 BDRV_O_NO_IO);
0b50cc88 710
8e2160e2 711 *child_flags = flags;
0b50cc88
KW
712}
713
f3930ed0 714const BdrvChildRole child_file = {
8e2160e2 715 .inherit_options = bdrv_inherited_options,
f3930ed0
KW
716};
717
718/*
8e2160e2
KW
719 * Returns the options and flags that bs->file should get if the use of formats
720 * (and not only protocols) is permitted for it, based on the given options and
721 * flags for the parent BDS
f3930ed0 722 */
8e2160e2
KW
723static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
724 int parent_flags, QDict *parent_options)
f3930ed0 725{
8e2160e2
KW
726 child_file.inherit_options(child_flags, child_options,
727 parent_flags, parent_options);
728
abb06c5a 729 *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
f3930ed0
KW
730}
731
732const BdrvChildRole child_format = {
8e2160e2 733 .inherit_options = bdrv_inherited_fmt_options,
f3930ed0
KW
734};
735
317fc44e 736/*
8e2160e2
KW
737 * Returns the options and flags that bs->backing should get, based on the
738 * given options and flags for the parent BDS
317fc44e 739 */
8e2160e2
KW
740static void bdrv_backing_options(int *child_flags, QDict *child_options,
741 int parent_flags, QDict *parent_options)
317fc44e 742{
8e2160e2
KW
743 int flags = parent_flags;
744
b8816a43
KW
745 /* The cache mode is inherited unmodified for backing files; except WCE,
746 * which is only applied on the top level (BlockBackend) */
91a097e7
KW
747 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
748 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
749
317fc44e
KW
750 /* backing files always opened read-only */
751 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
752
753 /* snapshot=on is handled on the top layer */
8bfea15d 754 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
317fc44e 755
8e2160e2 756 *child_flags = flags;
317fc44e
KW
757}
758
f3930ed0 759static const BdrvChildRole child_backing = {
8e2160e2 760 .inherit_options = bdrv_backing_options,
f3930ed0
KW
761};
762
7b272452
KW
763static int bdrv_open_flags(BlockDriverState *bs, int flags)
764{
61de4c68 765 int open_flags = flags;
7b272452
KW
766
767 /*
768 * Clear flags that are internal to the block layer before opening the
769 * image.
770 */
20cca275 771 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452
KW
772
773 /*
774 * Snapshots should be writable.
775 */
8bfea15d 776 if (flags & BDRV_O_TEMPORARY) {
7b272452
KW
777 open_flags |= BDRV_O_RDWR;
778 }
779
780 return open_flags;
781}
782
91a097e7
KW
783static void update_flags_from_options(int *flags, QemuOpts *opts)
784{
785 *flags &= ~BDRV_O_CACHE_MASK;
786
91a097e7
KW
787 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
788 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
789 *flags |= BDRV_O_NO_FLUSH;
790 }
791
792 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
793 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
794 *flags |= BDRV_O_NOCACHE;
795 }
796}
797
798static void update_options_from_flags(QDict *options, int flags)
799{
91a097e7
KW
800 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
801 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
802 qbool_from_bool(flags & BDRV_O_NOCACHE));
803 }
804 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
805 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
806 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
807 }
808}
809
636ea370
KW
810static void bdrv_assign_node_name(BlockDriverState *bs,
811 const char *node_name,
812 Error **errp)
6913c0c2 813{
15489c76 814 char *gen_node_name = NULL;
6913c0c2 815
15489c76
JC
816 if (!node_name) {
817 node_name = gen_node_name = id_generate(ID_BLOCK);
818 } else if (!id_wellformed(node_name)) {
819 /*
820 * Check for empty string or invalid characters, but not if it is
821 * generated (generated names use characters not available to the user)
822 */
9aebf3b8 823 error_setg(errp, "Invalid node name");
636ea370 824 return;
6913c0c2
BC
825 }
826
0c5e94ee 827 /* takes care of avoiding namespaces collisions */
7f06d47e 828 if (blk_by_name(node_name)) {
0c5e94ee
BC
829 error_setg(errp, "node-name=%s is conflicting with a device id",
830 node_name);
15489c76 831 goto out;
0c5e94ee
BC
832 }
833
6913c0c2
BC
834 /* takes care of avoiding duplicates node names */
835 if (bdrv_find_node(node_name)) {
836 error_setg(errp, "Duplicate node name");
15489c76 837 goto out;
6913c0c2
BC
838 }
839
840 /* copy node name into the bs and insert it into the graph list */
841 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
842 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
15489c76
JC
843out:
844 g_free(gen_node_name);
6913c0c2
BC
845}
846
18edf289
KW
847static QemuOptsList bdrv_runtime_opts = {
848 .name = "bdrv_common",
849 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
850 .desc = {
851 {
852 .name = "node-name",
853 .type = QEMU_OPT_STRING,
854 .help = "Node name of the block device node",
855 },
62392ebb
KW
856 {
857 .name = "driver",
858 .type = QEMU_OPT_STRING,
859 .help = "Block driver to use for the node",
860 },
91a097e7
KW
861 {
862 .name = BDRV_OPT_CACHE_DIRECT,
863 .type = QEMU_OPT_BOOL,
864 .help = "Bypass software writeback cache on the host",
865 },
866 {
867 .name = BDRV_OPT_CACHE_NO_FLUSH,
868 .type = QEMU_OPT_BOOL,
869 .help = "Ignore flush requests",
870 },
18edf289
KW
871 { /* end of list */ }
872 },
873};
874
57915332
KW
875/*
876 * Common part for opening disk images and files
b6ad491a
KW
877 *
878 * Removes all processed options from *options.
57915332 879 */
9a4f4c31 880static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
82dc8b41 881 QDict *options, Error **errp)
57915332
KW
882{
883 int ret, open_flags;
035fccdf 884 const char *filename;
62392ebb 885 const char *driver_name = NULL;
6913c0c2 886 const char *node_name = NULL;
18edf289 887 QemuOpts *opts;
62392ebb 888 BlockDriver *drv;
34b5d2c6 889 Error *local_err = NULL;
57915332 890
6405875c 891 assert(bs->file == NULL);
707ff828 892 assert(options != NULL && bs->options != options);
57915332 893
62392ebb
KW
894 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
895 qemu_opts_absorb_qdict(opts, options, &local_err);
896 if (local_err) {
897 error_propagate(errp, local_err);
898 ret = -EINVAL;
899 goto fail_opts;
900 }
901
902 driver_name = qemu_opt_get(opts, "driver");
903 drv = bdrv_find_format(driver_name);
904 assert(drv != NULL);
905
45673671 906 if (file != NULL) {
9a4f4c31 907 filename = file->bs->filename;
45673671
KW
908 } else {
909 filename = qdict_get_try_str(options, "filename");
910 }
911
765003db
KW
912 if (drv->bdrv_needs_filename && !filename) {
913 error_setg(errp, "The '%s' block driver requires a file name",
914 drv->format_name);
18edf289
KW
915 ret = -EINVAL;
916 goto fail_opts;
6913c0c2 917 }
6913c0c2 918
82dc8b41
KW
919 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
920 drv->format_name);
62392ebb 921
18edf289 922 node_name = qemu_opt_get(opts, "node-name");
636ea370 923 bdrv_assign_node_name(bs, node_name, &local_err);
0fb6395c 924 if (local_err) {
636ea370 925 error_propagate(errp, local_err);
18edf289
KW
926 ret = -EINVAL;
927 goto fail_opts;
5d186eb0
KW
928 }
929
c25f53b0 930 bs->request_alignment = 512;
0d51b4de 931 bs->zero_beyond_eof = true;
82dc8b41 932 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
b64ec4e4
FZ
933
934 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
8f94a6e4
KW
935 error_setg(errp,
936 !bs->read_only && bdrv_is_whitelisted(drv, true)
937 ? "Driver '%s' can only be used for read-only devices"
938 : "Driver '%s' is not whitelisted",
939 drv->format_name);
18edf289
KW
940 ret = -ENOTSUP;
941 goto fail_opts;
b64ec4e4 942 }
57915332 943
53fec9d3 944 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
82dc8b41 945 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
0ebd24e0
KW
946 if (!bs->read_only) {
947 bdrv_enable_copy_on_read(bs);
948 } else {
949 error_setg(errp, "Can't use copy-on-read on read-only device");
18edf289
KW
950 ret = -EINVAL;
951 goto fail_opts;
0ebd24e0 952 }
53fec9d3
SH
953 }
954
c2ad1b0c
KW
955 if (filename != NULL) {
956 pstrcpy(bs->filename, sizeof(bs->filename), filename);
957 } else {
958 bs->filename[0] = '\0';
959 }
91af7014 960 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 961
57915332 962 bs->drv = drv;
7267c094 963 bs->opaque = g_malloc0(drv->instance_size);
57915332 964
91a097e7
KW
965 /* Apply cache mode options */
966 update_flags_from_options(&bs->open_flags, opts);
73ac451f 967
66f82cee 968 /* Open the image, either directly or using a protocol */
82dc8b41 969 open_flags = bdrv_open_flags(bs, bs->open_flags);
66f82cee 970 if (drv->bdrv_file_open) {
5d186eb0 971 assert(file == NULL);
030be321 972 assert(!drv->bdrv_needs_filename || filename != NULL);
34b5d2c6 973 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
f500a6d3 974 } else {
2af5ef70 975 if (file == NULL) {
34b5d2c6
HR
976 error_setg(errp, "Can't use '%s' as a block driver for the "
977 "protocol level", drv->format_name);
2af5ef70
KW
978 ret = -EINVAL;
979 goto free_and_fail;
980 }
f500a6d3 981 bs->file = file;
34b5d2c6 982 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
66f82cee
KW
983 }
984
57915332 985 if (ret < 0) {
84d18f06 986 if (local_err) {
34b5d2c6 987 error_propagate(errp, local_err);
2fa9aa59
DH
988 } else if (bs->filename[0]) {
989 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
34b5d2c6
HR
990 } else {
991 error_setg_errno(errp, -ret, "Could not open image");
992 }
57915332
KW
993 goto free_and_fail;
994 }
995
51762288
SH
996 ret = refresh_total_sectors(bs, bs->total_sectors);
997 if (ret < 0) {
34b5d2c6 998 error_setg_errno(errp, -ret, "Could not refresh total sector count");
51762288 999 goto free_and_fail;
57915332 1000 }
51762288 1001
3baca891
KW
1002 bdrv_refresh_limits(bs, &local_err);
1003 if (local_err) {
1004 error_propagate(errp, local_err);
1005 ret = -EINVAL;
1006 goto free_and_fail;
1007 }
1008
c25f53b0 1009 assert(bdrv_opt_mem_align(bs) != 0);
4196d2f0 1010 assert(bdrv_min_mem_align(bs) != 0);
b192af8a 1011 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
18edf289
KW
1012
1013 qemu_opts_del(opts);
57915332
KW
1014 return 0;
1015
1016free_and_fail:
f500a6d3 1017 bs->file = NULL;
7267c094 1018 g_free(bs->opaque);
57915332
KW
1019 bs->opaque = NULL;
1020 bs->drv = NULL;
18edf289
KW
1021fail_opts:
1022 qemu_opts_del(opts);
57915332
KW
1023 return ret;
1024}
1025
5e5c4f63
KW
1026static QDict *parse_json_filename(const char *filename, Error **errp)
1027{
1028 QObject *options_obj;
1029 QDict *options;
1030 int ret;
1031
1032 ret = strstart(filename, "json:", &filename);
1033 assert(ret);
1034
1035 options_obj = qobject_from_json(filename);
1036 if (!options_obj) {
1037 error_setg(errp, "Could not parse the JSON options");
1038 return NULL;
1039 }
1040
1041 if (qobject_type(options_obj) != QTYPE_QDICT) {
1042 qobject_decref(options_obj);
1043 error_setg(errp, "Invalid JSON object given");
1044 return NULL;
1045 }
1046
1047 options = qobject_to_qdict(options_obj);
1048 qdict_flatten(options);
1049
1050 return options;
1051}
1052
de3b53f0
KW
1053static void parse_json_protocol(QDict *options, const char **pfilename,
1054 Error **errp)
1055{
1056 QDict *json_options;
1057 Error *local_err = NULL;
1058
1059 /* Parse json: pseudo-protocol */
1060 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1061 return;
1062 }
1063
1064 json_options = parse_json_filename(*pfilename, &local_err);
1065 if (local_err) {
1066 error_propagate(errp, local_err);
1067 return;
1068 }
1069
1070 /* Options given in the filename have lower priority than options
1071 * specified directly */
1072 qdict_join(options, json_options, false);
1073 QDECREF(json_options);
1074 *pfilename = NULL;
1075}
1076
b6ce07aa 1077/*
f54120ff
KW
1078 * Fills in default options for opening images and converts the legacy
1079 * filename/flags pair to option QDict entries.
53a29513
HR
1080 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1081 * block driver has been specified explicitly.
b6ce07aa 1082 */
de3b53f0 1083static int bdrv_fill_options(QDict **options, const char *filename,
053e1578 1084 int *flags, Error **errp)
ea2384d3 1085{
c2ad1b0c 1086 const char *drvname;
53a29513 1087 bool protocol = *flags & BDRV_O_PROTOCOL;
e3fa4bfa 1088 bool parse_filename = false;
053e1578 1089 BlockDriver *drv = NULL;
34b5d2c6 1090 Error *local_err = NULL;
83f64091 1091
53a29513 1092 drvname = qdict_get_try_str(*options, "driver");
053e1578
HR
1093 if (drvname) {
1094 drv = bdrv_find_format(drvname);
1095 if (!drv) {
1096 error_setg(errp, "Unknown driver '%s'", drvname);
1097 return -ENOENT;
1098 }
1099 /* If the user has explicitly specified the driver, this choice should
1100 * override the BDRV_O_PROTOCOL flag */
1101 protocol = drv->bdrv_file_open;
53a29513
HR
1102 }
1103
1104 if (protocol) {
1105 *flags |= BDRV_O_PROTOCOL;
1106 } else {
1107 *flags &= ~BDRV_O_PROTOCOL;
1108 }
1109
91a097e7
KW
1110 /* Translate cache options from flags into options */
1111 update_options_from_flags(*options, *flags);
1112
035fccdf 1113 /* Fetch the file name from the options QDict if necessary */
17b005f1 1114 if (protocol && filename) {
f54120ff
KW
1115 if (!qdict_haskey(*options, "filename")) {
1116 qdict_put(*options, "filename", qstring_from_str(filename));
1117 parse_filename = true;
1118 } else {
1119 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1120 "the same time");
1121 return -EINVAL;
1122 }
035fccdf
KW
1123 }
1124
c2ad1b0c 1125 /* Find the right block driver */
f54120ff 1126 filename = qdict_get_try_str(*options, "filename");
f54120ff 1127
053e1578
HR
1128 if (!drvname && protocol) {
1129 if (filename) {
1130 drv = bdrv_find_protocol(filename, parse_filename, errp);
17b005f1 1131 if (!drv) {
053e1578 1132 return -EINVAL;
17b005f1 1133 }
053e1578
HR
1134
1135 drvname = drv->format_name;
1136 qdict_put(*options, "driver", qstring_from_str(drvname));
1137 } else {
1138 error_setg(errp, "Must specify either driver or file");
1139 return -EINVAL;
98289620 1140 }
c2ad1b0c
KW
1141 }
1142
17b005f1 1143 assert(drv || !protocol);
c2ad1b0c 1144
f54120ff 1145 /* Driver-specific filename parsing */
17b005f1 1146 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 1147 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 1148 if (local_err) {
34b5d2c6 1149 error_propagate(errp, local_err);
f54120ff 1150 return -EINVAL;
6963a30d 1151 }
cd5d031e
HR
1152
1153 if (!drv->bdrv_needs_filename) {
1154 qdict_del(*options, "filename");
cd5d031e 1155 }
6963a30d
KW
1156 }
1157
f54120ff
KW
1158 return 0;
1159}
1160
f21d96d0
KW
1161BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
1162 const char *child_name,
1163 const BdrvChildRole *child_role)
df581792
KW
1164{
1165 BdrvChild *child = g_new(BdrvChild, 1);
1166 *child = (BdrvChild) {
1167 .bs = child_bs,
260fecf1 1168 .name = g_strdup(child_name),
df581792
KW
1169 .role = child_role,
1170 };
1171
d42a8a93 1172 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
b4b059f6
KW
1173
1174 return child;
df581792
KW
1175}
1176
f21d96d0
KW
1177static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1178 BlockDriverState *child_bs,
1179 const char *child_name,
1180 const BdrvChildRole *child_role)
1181{
1182 BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
1183 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1184 return child;
1185}
1186
3f09bfbc 1187static void bdrv_detach_child(BdrvChild *child)
33a60407 1188{
f21d96d0
KW
1189 if (child->next.le_prev) {
1190 QLIST_REMOVE(child, next);
1191 child->next.le_prev = NULL;
1192 }
d42a8a93 1193 QLIST_REMOVE(child, next_parent);
260fecf1 1194 g_free(child->name);
33a60407
KW
1195 g_free(child);
1196}
1197
f21d96d0 1198void bdrv_root_unref_child(BdrvChild *child)
33a60407 1199{
779020cb
KW
1200 BlockDriverState *child_bs;
1201
f21d96d0
KW
1202 child_bs = child->bs;
1203 bdrv_detach_child(child);
1204 bdrv_unref(child_bs);
1205}
1206
1207void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1208{
779020cb
KW
1209 if (child == NULL) {
1210 return;
1211 }
33a60407
KW
1212
1213 if (child->bs->inherits_from == parent) {
1214 child->bs->inherits_from = NULL;
1215 }
1216
f21d96d0 1217 bdrv_root_unref_child(child);
33a60407
KW
1218}
1219
5db15a57
KW
1220/*
1221 * Sets the backing file link of a BDS. A new reference is created; callers
1222 * which don't need their own reference any more must call bdrv_unref().
1223 */
8d24cce1
FZ
1224void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1225{
5db15a57
KW
1226 if (backing_hd) {
1227 bdrv_ref(backing_hd);
1228 }
8d24cce1 1229
760e0063 1230 if (bs->backing) {
826b6ca0 1231 assert(bs->backing_blocker);
760e0063 1232 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
5db15a57 1233 bdrv_unref_child(bs, bs->backing);
826b6ca0
FZ
1234 } else if (backing_hd) {
1235 error_setg(&bs->backing_blocker,
81e5f78a
AG
1236 "node is used as backing hd of '%s'",
1237 bdrv_get_device_or_node_name(bs));
826b6ca0
FZ
1238 }
1239
8d24cce1 1240 if (!backing_hd) {
826b6ca0
FZ
1241 error_free(bs->backing_blocker);
1242 bs->backing_blocker = NULL;
760e0063 1243 bs->backing = NULL;
8d24cce1
FZ
1244 goto out;
1245 }
260fecf1 1246 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
8d24cce1
FZ
1247 bs->open_flags &= ~BDRV_O_NO_BACKING;
1248 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1249 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1250 backing_hd->drv ? backing_hd->drv->format_name : "");
826b6ca0 1251
760e0063 1252 bdrv_op_block_all(backing_hd, bs->backing_blocker);
826b6ca0 1253 /* Otherwise we won't be able to commit due to check in bdrv_commit */
760e0063 1254 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
826b6ca0 1255 bs->backing_blocker);
8d24cce1 1256out:
3baca891 1257 bdrv_refresh_limits(bs, NULL);
8d24cce1
FZ
1258}
1259
31ca6d07
KW
1260/*
1261 * Opens the backing file for a BlockDriverState if not yet open
1262 *
d9b7b057
KW
1263 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1264 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1265 * itself, all options starting with "${bdref_key}." are considered part of the
1266 * BlockdevRef.
1267 *
1268 * TODO Can this be unified with bdrv_open_image()?
31ca6d07 1269 */
d9b7b057
KW
1270int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1271 const char *bdref_key, Error **errp)
9156df12 1272{
1ba4b6a5 1273 char *backing_filename = g_malloc0(PATH_MAX);
d9b7b057
KW
1274 char *bdref_key_dot;
1275 const char *reference = NULL;
317fc44e 1276 int ret = 0;
8d24cce1 1277 BlockDriverState *backing_hd;
d9b7b057
KW
1278 QDict *options;
1279 QDict *tmp_parent_options = NULL;
34b5d2c6 1280 Error *local_err = NULL;
9156df12 1281
760e0063 1282 if (bs->backing != NULL) {
1ba4b6a5 1283 goto free_exit;
9156df12
PB
1284 }
1285
31ca6d07 1286 /* NULL means an empty set of options */
d9b7b057
KW
1287 if (parent_options == NULL) {
1288 tmp_parent_options = qdict_new();
1289 parent_options = tmp_parent_options;
31ca6d07
KW
1290 }
1291
9156df12 1292 bs->open_flags &= ~BDRV_O_NO_BACKING;
d9b7b057
KW
1293
1294 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1295 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1296 g_free(bdref_key_dot);
1297
1298 reference = qdict_get_try_str(parent_options, bdref_key);
1299 if (reference || qdict_haskey(options, "file.filename")) {
1cb6f506
KW
1300 backing_filename[0] = '\0';
1301 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
31ca6d07 1302 QDECREF(options);
1ba4b6a5 1303 goto free_exit;
dbecebdd 1304 } else {
9f07429e
HR
1305 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1306 &local_err);
1307 if (local_err) {
1308 ret = -EINVAL;
1309 error_propagate(errp, local_err);
1310 QDECREF(options);
1311 goto free_exit;
1312 }
9156df12
PB
1313 }
1314
8ee79e70
KW
1315 if (!bs->drv || !bs->drv->supports_backing) {
1316 ret = -EINVAL;
1317 error_setg(errp, "Driver doesn't support backing files");
1318 QDECREF(options);
1319 goto free_exit;
1320 }
1321
c5f6e493
KW
1322 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1323 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
9156df12
PB
1324 }
1325
d9b7b057 1326 backing_hd = NULL;
f3930ed0
KW
1327 ret = bdrv_open_inherit(&backing_hd,
1328 *backing_filename ? backing_filename : NULL,
d9b7b057 1329 reference, options, 0, bs, &child_backing,
e43bfd9c 1330 errp);
9156df12 1331 if (ret < 0) {
9156df12 1332 bs->open_flags |= BDRV_O_NO_BACKING;
e43bfd9c 1333 error_prepend(errp, "Could not open backing file: ");
1ba4b6a5 1334 goto free_exit;
9156df12 1335 }
df581792 1336
5db15a57
KW
1337 /* Hook up the backing file link; drop our reference, bs owns the
1338 * backing_hd reference now */
8d24cce1 1339 bdrv_set_backing_hd(bs, backing_hd);
5db15a57 1340 bdrv_unref(backing_hd);
d80ac658 1341
d9b7b057
KW
1342 qdict_del(parent_options, bdref_key);
1343
1ba4b6a5
BC
1344free_exit:
1345 g_free(backing_filename);
d9b7b057 1346 QDECREF(tmp_parent_options);
1ba4b6a5 1347 return ret;
9156df12
PB
1348}
1349
da557aac
HR
1350/*
1351 * Opens a disk image whose options are given as BlockdevRef in another block
1352 * device's options.
1353 *
da557aac 1354 * If allow_none is true, no image will be opened if filename is false and no
b4b059f6 1355 * BlockdevRef is given. NULL will be returned, but errp remains unset.
da557aac
HR
1356 *
1357 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1358 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1359 * itself, all options starting with "${bdref_key}." are considered part of the
1360 * BlockdevRef.
1361 *
1362 * The BlockdevRef will be removed from the options QDict.
1363 */
b4b059f6
KW
1364BdrvChild *bdrv_open_child(const char *filename,
1365 QDict *options, const char *bdref_key,
1366 BlockDriverState* parent,
1367 const BdrvChildRole *child_role,
1368 bool allow_none, Error **errp)
da557aac 1369{
b4b059f6
KW
1370 BdrvChild *c = NULL;
1371 BlockDriverState *bs;
da557aac
HR
1372 QDict *image_options;
1373 int ret;
1374 char *bdref_key_dot;
1375 const char *reference;
1376
df581792 1377 assert(child_role != NULL);
f67503e5 1378
da557aac
HR
1379 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1380 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1381 g_free(bdref_key_dot);
1382
1383 reference = qdict_get_try_str(options, bdref_key);
1384 if (!filename && !reference && !qdict_size(image_options)) {
b4b059f6 1385 if (!allow_none) {
da557aac
HR
1386 error_setg(errp, "A block device must be specified for \"%s\"",
1387 bdref_key);
da557aac 1388 }
b20e61e0 1389 QDECREF(image_options);
da557aac
HR
1390 goto done;
1391 }
1392
b4b059f6
KW
1393 bs = NULL;
1394 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
ce343771 1395 parent, child_role, errp);
df581792
KW
1396 if (ret < 0) {
1397 goto done;
1398 }
1399
260fecf1 1400 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
da557aac
HR
1401
1402done:
1403 qdict_del(options, bdref_key);
b4b059f6
KW
1404 return c;
1405}
1406
73176bee
KW
1407static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
1408 QDict *snapshot_options, Error **errp)
b998875d
KW
1409{
1410 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 1411 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d 1412 int64_t total_size;
83d0521a 1413 QemuOpts *opts = NULL;
b998875d 1414 BlockDriverState *bs_snapshot;
c2e0dbbf 1415 Error *local_err = NULL;
b998875d
KW
1416 int ret;
1417
1418 /* if snapshot, we create a temporary backing file and open it
1419 instead of opening 'filename' directly */
1420
1421 /* Get the required size from the image */
f187743a
KW
1422 total_size = bdrv_getlength(bs);
1423 if (total_size < 0) {
6b8aeca5 1424 ret = total_size;
f187743a 1425 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 1426 goto out;
f187743a 1427 }
b998875d
KW
1428
1429 /* Create the temporary image */
1ba4b6a5 1430 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
1431 if (ret < 0) {
1432 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 1433 goto out;
b998875d
KW
1434 }
1435
ef810437 1436 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
c282e1fd 1437 &error_abort);
39101f25 1438 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
e43bfd9c 1439 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
83d0521a 1440 qemu_opts_del(opts);
b998875d 1441 if (ret < 0) {
e43bfd9c
MA
1442 error_prepend(errp, "Could not create temporary overlay '%s': ",
1443 tmp_filename);
1ba4b6a5 1444 goto out;
b998875d
KW
1445 }
1446
73176bee 1447 /* Prepare options QDict for the temporary file */
b998875d
KW
1448 qdict_put(snapshot_options, "file.driver",
1449 qstring_from_str("file"));
1450 qdict_put(snapshot_options, "file.filename",
1451 qstring_from_str(tmp_filename));
e6641719
HR
1452 qdict_put(snapshot_options, "driver",
1453 qstring_from_str("qcow2"));
b998875d 1454
e4e9986b 1455 bs_snapshot = bdrv_new();
b998875d
KW
1456
1457 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
6ebf9aa2 1458 flags, &local_err);
73176bee 1459 snapshot_options = NULL;
b998875d
KW
1460 if (ret < 0) {
1461 error_propagate(errp, local_err);
1ba4b6a5 1462 goto out;
b998875d
KW
1463 }
1464
1465 bdrv_append(bs_snapshot, bs);
1ba4b6a5
BC
1466
1467out:
73176bee 1468 QDECREF(snapshot_options);
1ba4b6a5 1469 g_free(tmp_filename);
6b8aeca5 1470 return ret;
b998875d
KW
1471}
1472
b6ce07aa
KW
1473/*
1474 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
1475 *
1476 * options is a QDict of options to pass to the block drivers, or NULL for an
1477 * empty set of options. The reference to the QDict belongs to the block layer
1478 * after the call (even on failure), so if the caller intends to reuse the
1479 * dictionary, it needs to use QINCREF() before calling bdrv_open.
f67503e5
HR
1480 *
1481 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1482 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
1483 *
1484 * The reference parameter may be used to specify an existing block device which
1485 * should be opened. If specified, neither options nor a filename may be given,
1486 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 1487 */
f3930ed0
KW
1488static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1489 const char *reference, QDict *options, int flags,
1490 BlockDriverState *parent,
ce343771 1491 const BdrvChildRole *child_role, Error **errp)
ea2384d3 1492{
b6ce07aa 1493 int ret;
9a4f4c31
KW
1494 BdrvChild *file = NULL;
1495 BlockDriverState *bs;
ce343771 1496 BlockDriver *drv = NULL;
74fe54f2 1497 const char *drvname;
3e8c2e57 1498 const char *backing;
34b5d2c6 1499 Error *local_err = NULL;
73176bee 1500 QDict *snapshot_options = NULL;
b1e6fc08 1501 int snapshot_flags = 0;
712e7874 1502
f67503e5 1503 assert(pbs);
f3930ed0
KW
1504 assert(!child_role || !flags);
1505 assert(!child_role == !parent);
f67503e5 1506
ddf5636d
HR
1507 if (reference) {
1508 bool options_non_empty = options ? qdict_size(options) : false;
1509 QDECREF(options);
1510
1511 if (*pbs) {
1512 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1513 "another block device");
1514 return -EINVAL;
1515 }
1516
1517 if (filename || options_non_empty) {
1518 error_setg(errp, "Cannot reference an existing block device with "
1519 "additional options or a new filename");
1520 return -EINVAL;
1521 }
1522
1523 bs = bdrv_lookup_bs(reference, reference, errp);
1524 if (!bs) {
1525 return -ENODEV;
1526 }
76b22320
KW
1527
1528 if (bs->throttle_state) {
1529 error_setg(errp, "Cannot reference an existing block device for "
1530 "which I/O throttling is enabled");
1531 return -EINVAL;
1532 }
1533
ddf5636d
HR
1534 bdrv_ref(bs);
1535 *pbs = bs;
1536 return 0;
1537 }
1538
f67503e5
HR
1539 if (*pbs) {
1540 bs = *pbs;
1541 } else {
e4e9986b 1542 bs = bdrv_new();
f67503e5
HR
1543 }
1544
de9c0cec
KW
1545 /* NULL means an empty set of options */
1546 if (options == NULL) {
1547 options = qdict_new();
1548 }
1549
145f598e 1550 /* json: syntax counts as explicit options, as if in the QDict */
de3b53f0
KW
1551 parse_json_protocol(options, &filename, &local_err);
1552 if (local_err) {
1553 ret = -EINVAL;
1554 goto fail;
1555 }
1556
145f598e
KW
1557 bs->explicit_options = qdict_clone_shallow(options);
1558
f3930ed0 1559 if (child_role) {
bddcec37 1560 bs->inherits_from = parent;
8e2160e2
KW
1561 child_role->inherit_options(&flags, options,
1562 parent->open_flags, parent->options);
f3930ed0
KW
1563 }
1564
de3b53f0 1565 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
462f5bcf
KW
1566 if (local_err) {
1567 goto fail;
1568 }
1569
62392ebb
KW
1570 bs->open_flags = flags;
1571 bs->options = options;
1572 options = qdict_clone_shallow(options);
1573
76c591b0 1574 /* Find the right image format driver */
76c591b0
KW
1575 drvname = qdict_get_try_str(options, "driver");
1576 if (drvname) {
1577 drv = bdrv_find_format(drvname);
76c591b0
KW
1578 if (!drv) {
1579 error_setg(errp, "Unknown driver: '%s'", drvname);
1580 ret = -EINVAL;
1581 goto fail;
1582 }
1583 }
1584
1585 assert(drvname || !(flags & BDRV_O_PROTOCOL));
76c591b0 1586
3e8c2e57
AG
1587 backing = qdict_get_try_str(options, "backing");
1588 if (backing && *backing == '\0') {
1589 flags |= BDRV_O_NO_BACKING;
1590 qdict_del(options, "backing");
1591 }
1592
f500a6d3 1593 /* Open image file without format layer */
f4788adc
KW
1594 if ((flags & BDRV_O_PROTOCOL) == 0) {
1595 if (flags & BDRV_O_RDWR) {
1596 flags |= BDRV_O_ALLOW_RDWR;
1597 }
1598 if (flags & BDRV_O_SNAPSHOT) {
73176bee
KW
1599 snapshot_options = qdict_new();
1600 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
1601 flags, options);
8e2160e2 1602 bdrv_backing_options(&flags, options, flags, options);
f4788adc 1603 }
f500a6d3 1604
f3930ed0 1605 bs->open_flags = flags;
1fdd6933 1606
9a4f4c31
KW
1607 file = bdrv_open_child(filename, options, "file", bs,
1608 &child_file, true, &local_err);
1fdd6933
KW
1609 if (local_err) {
1610 ret = -EINVAL;
f4788adc
KW
1611 goto fail;
1612 }
f500a6d3
KW
1613 }
1614
76c591b0 1615 /* Image format probing */
38f3ef57 1616 bs->probed = !drv;
76c591b0 1617 if (!drv && file) {
9a4f4c31 1618 ret = find_image_format(file->bs, filename, &drv, &local_err);
17b005f1 1619 if (ret < 0) {
8bfea15d 1620 goto fail;
2a05cbe4 1621 }
62392ebb
KW
1622 /*
1623 * This option update would logically belong in bdrv_fill_options(),
1624 * but we first need to open bs->file for the probing to work, while
1625 * opening bs->file already requires the (mostly) final set of options
1626 * so that cache mode etc. can be inherited.
1627 *
1628 * Adding the driver later is somewhat ugly, but it's not an option
1629 * that would ever be inherited, so it's correct. We just need to make
1630 * sure to update both bs->options (which has the full effective
1631 * options for bs) and options (which has file.* already removed).
1632 */
1633 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1634 qdict_put(options, "driver", qstring_from_str(drv->format_name));
76c591b0 1635 } else if (!drv) {
17b005f1
KW
1636 error_setg(errp, "Must specify either driver or file");
1637 ret = -EINVAL;
8bfea15d 1638 goto fail;
ea2384d3 1639 }
b6ce07aa 1640
53a29513
HR
1641 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1642 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1643 /* file must be NULL if a protocol BDS is about to be created
1644 * (the inverse results in an error message from bdrv_open_common()) */
1645 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1646
b6ce07aa 1647 /* Open the image */
82dc8b41 1648 ret = bdrv_open_common(bs, file, options, &local_err);
b6ce07aa 1649 if (ret < 0) {
8bfea15d 1650 goto fail;
6987307c
CH
1651 }
1652
2a05cbe4 1653 if (file && (bs->file != file)) {
9a4f4c31 1654 bdrv_unref_child(bs, file);
f500a6d3
KW
1655 file = NULL;
1656 }
1657
b6ce07aa 1658 /* If there is a backing file, use it */
9156df12 1659 if ((flags & BDRV_O_NO_BACKING) == 0) {
d9b7b057 1660 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
b6ce07aa 1661 if (ret < 0) {
b6ad491a 1662 goto close_and_fail;
b6ce07aa 1663 }
b6ce07aa
KW
1664 }
1665
91af7014
HR
1666 bdrv_refresh_filename(bs);
1667
b6ad491a 1668 /* Check if any unknown options were used */
5acd9d81 1669 if (options && (qdict_size(options) != 0)) {
b6ad491a 1670 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
1671 if (flags & BDRV_O_PROTOCOL) {
1672 error_setg(errp, "Block protocol '%s' doesn't support the option "
1673 "'%s'", drv->format_name, entry->key);
1674 } else {
d0e46a55
HR
1675 error_setg(errp,
1676 "Block format '%s' does not support the option '%s'",
1677 drv->format_name, entry->key);
5acd9d81 1678 }
b6ad491a
KW
1679
1680 ret = -EINVAL;
1681 goto close_and_fail;
1682 }
b6ad491a 1683
b6ce07aa 1684 if (!bdrv_key_required(bs)) {
a7f53e26
MA
1685 if (bs->blk) {
1686 blk_dev_change_media_cb(bs->blk, true);
1687 }
c3adb58f
MA
1688 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1689 && !runstate_check(RUN_STATE_INMIGRATE)
1690 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1691 error_setg(errp,
1692 "Guest must be stopped for opening of encrypted image");
1693 ret = -EBUSY;
1694 goto close_and_fail;
b6ce07aa
KW
1695 }
1696
c3adb58f 1697 QDECREF(options);
f67503e5 1698 *pbs = bs;
dd62f1ca
KW
1699
1700 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1701 * temporary snapshot afterwards. */
1702 if (snapshot_flags) {
73176bee
KW
1703 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
1704 &local_err);
1705 snapshot_options = NULL;
dd62f1ca
KW
1706 if (local_err) {
1707 goto close_and_fail;
1708 }
1709 }
1710
b6ce07aa
KW
1711 return 0;
1712
8bfea15d 1713fail:
f500a6d3 1714 if (file != NULL) {
9a4f4c31 1715 bdrv_unref_child(bs, file);
f500a6d3 1716 }
73176bee 1717 QDECREF(snapshot_options);
145f598e 1718 QDECREF(bs->explicit_options);
de9c0cec 1719 QDECREF(bs->options);
b6ad491a 1720 QDECREF(options);
de9c0cec 1721 bs->options = NULL;
f67503e5
HR
1722 if (!*pbs) {
1723 /* If *pbs is NULL, a new BDS has been created in this function and
1724 needs to be freed now. Otherwise, it does not need to be closed,
1725 since it has not really been opened yet. */
1726 bdrv_unref(bs);
1727 }
84d18f06 1728 if (local_err) {
34b5d2c6
HR
1729 error_propagate(errp, local_err);
1730 }
b6ad491a 1731 return ret;
de9c0cec 1732
b6ad491a 1733close_and_fail:
f67503e5
HR
1734 /* See fail path, but now the BDS has to be always closed */
1735 if (*pbs) {
1736 bdrv_close(bs);
1737 } else {
1738 bdrv_unref(bs);
1739 }
73176bee 1740 QDECREF(snapshot_options);
b6ad491a 1741 QDECREF(options);
84d18f06 1742 if (local_err) {
34b5d2c6
HR
1743 error_propagate(errp, local_err);
1744 }
b6ce07aa
KW
1745 return ret;
1746}
1747
f3930ed0 1748int bdrv_open(BlockDriverState **pbs, const char *filename,
6ebf9aa2 1749 const char *reference, QDict *options, int flags, Error **errp)
f3930ed0
KW
1750{
1751 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
ce343771 1752 NULL, errp);
f3930ed0
KW
1753}
1754
e971aa12
JC
1755typedef struct BlockReopenQueueEntry {
1756 bool prepared;
1757 BDRVReopenState state;
1758 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1759} BlockReopenQueueEntry;
1760
1761/*
1762 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1763 * reopen of multiple devices.
1764 *
1765 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1766 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1767 * be created and initialized. This newly created BlockReopenQueue should be
1768 * passed back in for subsequent calls that are intended to be of the same
1769 * atomic 'set'.
1770 *
1771 * bs is the BlockDriverState to add to the reopen queue.
1772 *
4d2cb092
KW
1773 * options contains the changed options for the associated bs
1774 * (the BlockReopenQueue takes ownership)
1775 *
e971aa12
JC
1776 * flags contains the open flags for the associated bs
1777 *
1778 * returns a pointer to bs_queue, which is either the newly allocated
1779 * bs_queue, or the existing bs_queue being used.
1780 *
1781 */
28518102
KW
1782static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1783 BlockDriverState *bs,
1784 QDict *options,
1785 int flags,
1786 const BdrvChildRole *role,
1787 QDict *parent_options,
1788 int parent_flags)
e971aa12
JC
1789{
1790 assert(bs != NULL);
1791
1792 BlockReopenQueueEntry *bs_entry;
67251a31 1793 BdrvChild *child;
145f598e 1794 QDict *old_options, *explicit_options;
67251a31 1795
e971aa12
JC
1796 if (bs_queue == NULL) {
1797 bs_queue = g_new0(BlockReopenQueue, 1);
1798 QSIMPLEQ_INIT(bs_queue);
1799 }
1800
4d2cb092
KW
1801 if (!options) {
1802 options = qdict_new();
1803 }
1804
28518102
KW
1805 /*
1806 * Precedence of options:
1807 * 1. Explicitly passed in options (highest)
91a097e7 1808 * 2. Set in flags (only for top level)
145f598e 1809 * 3. Retained from explicitly set options of bs
8e2160e2 1810 * 4. Inherited from parent node
28518102
KW
1811 * 5. Retained from effective options of bs
1812 */
1813
91a097e7
KW
1814 if (!parent_options) {
1815 /*
1816 * Any setting represented by flags is always updated. If the
1817 * corresponding QDict option is set, it takes precedence. Otherwise
1818 * the flag is translated into a QDict option. The old setting of bs is
1819 * not considered.
1820 */
1821 update_options_from_flags(options, flags);
1822 }
1823
145f598e
KW
1824 /* Old explicitly set values (don't overwrite by inherited value) */
1825 old_options = qdict_clone_shallow(bs->explicit_options);
1826 bdrv_join_options(bs, options, old_options);
1827 QDECREF(old_options);
1828
1829 explicit_options = qdict_clone_shallow(options);
1830
28518102
KW
1831 /* Inherit from parent node */
1832 if (parent_options) {
1833 assert(!flags);
8e2160e2 1834 role->inherit_options(&flags, options, parent_flags, parent_options);
28518102
KW
1835 }
1836
1837 /* Old values are used for options that aren't set yet */
4d2cb092 1838 old_options = qdict_clone_shallow(bs->options);
cddff5ba 1839 bdrv_join_options(bs, options, old_options);
4d2cb092
KW
1840 QDECREF(old_options);
1841
f1f25a2e
KW
1842 /* bdrv_open() masks this flag out */
1843 flags &= ~BDRV_O_PROTOCOL;
1844
67251a31 1845 QLIST_FOREACH(child, &bs->children, next) {
4c9dfe5d
KW
1846 QDict *new_child_options;
1847 char *child_key_dot;
67251a31 1848
4c9dfe5d
KW
1849 /* reopen can only change the options of block devices that were
1850 * implicitly created and inherited options. For other (referenced)
1851 * block devices, a syntax like "backing.foo" results in an error. */
67251a31
KW
1852 if (child->bs->inherits_from != bs) {
1853 continue;
1854 }
1855
4c9dfe5d
KW
1856 child_key_dot = g_strdup_printf("%s.", child->name);
1857 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1858 g_free(child_key_dot);
1859
28518102
KW
1860 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1861 child->role, options, flags);
e971aa12
JC
1862 }
1863
1864 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1865 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1866
1867 bs_entry->state.bs = bs;
4d2cb092 1868 bs_entry->state.options = options;
145f598e 1869 bs_entry->state.explicit_options = explicit_options;
e971aa12
JC
1870 bs_entry->state.flags = flags;
1871
1872 return bs_queue;
1873}
1874
28518102
KW
1875BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1876 BlockDriverState *bs,
1877 QDict *options, int flags)
1878{
1879 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1880 NULL, NULL, 0);
1881}
1882
e971aa12
JC
1883/*
1884 * Reopen multiple BlockDriverStates atomically & transactionally.
1885 *
1886 * The queue passed in (bs_queue) must have been built up previous
1887 * via bdrv_reopen_queue().
1888 *
1889 * Reopens all BDS specified in the queue, with the appropriate
1890 * flags. All devices are prepared for reopen, and failure of any
1891 * device will cause all device changes to be abandonded, and intermediate
1892 * data cleaned up.
1893 *
1894 * If all devices prepare successfully, then the changes are committed
1895 * to all devices.
1896 *
1897 */
1898int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1899{
1900 int ret = -1;
1901 BlockReopenQueueEntry *bs_entry, *next;
1902 Error *local_err = NULL;
1903
1904 assert(bs_queue != NULL);
1905
1906 bdrv_drain_all();
1907
1908 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1909 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1910 error_propagate(errp, local_err);
1911 goto cleanup;
1912 }
1913 bs_entry->prepared = true;
1914 }
1915
1916 /* If we reach this point, we have success and just need to apply the
1917 * changes
1918 */
1919 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1920 bdrv_reopen_commit(&bs_entry->state);
1921 }
1922
1923 ret = 0;
1924
1925cleanup:
1926 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1927 if (ret && bs_entry->prepared) {
1928 bdrv_reopen_abort(&bs_entry->state);
145f598e
KW
1929 } else if (ret) {
1930 QDECREF(bs_entry->state.explicit_options);
e971aa12 1931 }
4d2cb092 1932 QDECREF(bs_entry->state.options);
e971aa12
JC
1933 g_free(bs_entry);
1934 }
1935 g_free(bs_queue);
1936 return ret;
1937}
1938
1939
1940/* Reopen a single BlockDriverState with the specified flags. */
1941int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1942{
1943 int ret = -1;
1944 Error *local_err = NULL;
4d2cb092 1945 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
e971aa12
JC
1946
1947 ret = bdrv_reopen_multiple(queue, &local_err);
1948 if (local_err != NULL) {
1949 error_propagate(errp, local_err);
1950 }
1951 return ret;
1952}
1953
1954
1955/*
1956 * Prepares a BlockDriverState for reopen. All changes are staged in the
1957 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1958 * the block driver layer .bdrv_reopen_prepare()
1959 *
1960 * bs is the BlockDriverState to reopen
1961 * flags are the new open flags
1962 * queue is the reopen queue
1963 *
1964 * Returns 0 on success, non-zero on error. On error errp will be set
1965 * as well.
1966 *
1967 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1968 * It is the responsibility of the caller to then call the abort() or
1969 * commit() for any other BDS that have been left in a prepare() state
1970 *
1971 */
1972int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1973 Error **errp)
1974{
1975 int ret = -1;
1976 Error *local_err = NULL;
1977 BlockDriver *drv;
ccf9dc07
KW
1978 QemuOpts *opts;
1979 const char *value;
e971aa12
JC
1980
1981 assert(reopen_state != NULL);
1982 assert(reopen_state->bs->drv != NULL);
1983 drv = reopen_state->bs->drv;
1984
ccf9dc07
KW
1985 /* Process generic block layer options */
1986 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1987 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1988 if (local_err) {
1989 error_propagate(errp, local_err);
1990 ret = -EINVAL;
1991 goto error;
1992 }
1993
91a097e7
KW
1994 update_flags_from_options(&reopen_state->flags, opts);
1995
ccf9dc07
KW
1996 /* node-name and driver must be unchanged. Put them back into the QDict, so
1997 * that they are checked at the end of this function. */
1998 value = qemu_opt_get(opts, "node-name");
1999 if (value) {
2000 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2001 }
2002
2003 value = qemu_opt_get(opts, "driver");
2004 if (value) {
2005 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2006 }
2007
e971aa12
JC
2008 /* if we are to stay read-only, do not allow permission change
2009 * to r/w */
2010 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2011 reopen_state->flags & BDRV_O_RDWR) {
81e5f78a
AG
2012 error_setg(errp, "Node '%s' is read only",
2013 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2014 goto error;
2015 }
2016
2017
2018 ret = bdrv_flush(reopen_state->bs);
2019 if (ret) {
455b0fde 2020 error_setg_errno(errp, -ret, "Error flushing drive");
e971aa12
JC
2021 goto error;
2022 }
2023
2024 if (drv->bdrv_reopen_prepare) {
2025 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2026 if (ret) {
2027 if (local_err != NULL) {
2028 error_propagate(errp, local_err);
2029 } else {
d8b6895f
LC
2030 error_setg(errp, "failed while preparing to reopen image '%s'",
2031 reopen_state->bs->filename);
e971aa12
JC
2032 }
2033 goto error;
2034 }
2035 } else {
2036 /* It is currently mandatory to have a bdrv_reopen_prepare()
2037 * handler for each supported drv. */
81e5f78a
AG
2038 error_setg(errp, "Block format '%s' used by node '%s' "
2039 "does not support reopening files", drv->format_name,
2040 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
2041 ret = -1;
2042 goto error;
2043 }
2044
4d2cb092
KW
2045 /* Options that are not handled are only okay if they are unchanged
2046 * compared to the old state. It is expected that some options are only
2047 * used for the initial open, but not reopen (e.g. filename) */
2048 if (qdict_size(reopen_state->options)) {
2049 const QDictEntry *entry = qdict_first(reopen_state->options);
2050
2051 do {
2052 QString *new_obj = qobject_to_qstring(entry->value);
2053 const char *new = qstring_get_str(new_obj);
2054 const char *old = qdict_get_try_str(reopen_state->bs->options,
2055 entry->key);
2056
2057 if (!old || strcmp(new, old)) {
2058 error_setg(errp, "Cannot change the option '%s'", entry->key);
2059 ret = -EINVAL;
2060 goto error;
2061 }
2062 } while ((entry = qdict_next(reopen_state->options, entry)));
2063 }
2064
e971aa12
JC
2065 ret = 0;
2066
2067error:
ccf9dc07 2068 qemu_opts_del(opts);
e971aa12
JC
2069 return ret;
2070}
2071
2072/*
2073 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2074 * makes them final by swapping the staging BlockDriverState contents into
2075 * the active BlockDriverState contents.
2076 */
2077void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2078{
2079 BlockDriver *drv;
2080
2081 assert(reopen_state != NULL);
2082 drv = reopen_state->bs->drv;
2083 assert(drv != NULL);
2084
2085 /* If there are any driver level actions to take */
2086 if (drv->bdrv_reopen_commit) {
2087 drv->bdrv_reopen_commit(reopen_state);
2088 }
2089
2090 /* set BDS specific flags now */
145f598e
KW
2091 QDECREF(reopen_state->bs->explicit_options);
2092
2093 reopen_state->bs->explicit_options = reopen_state->explicit_options;
e971aa12 2094 reopen_state->bs->open_flags = reopen_state->flags;
e971aa12 2095 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
355ef4ac 2096
3baca891 2097 bdrv_refresh_limits(reopen_state->bs, NULL);
e971aa12
JC
2098}
2099
2100/*
2101 * Abort the reopen, and delete and free the staged changes in
2102 * reopen_state
2103 */
2104void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2105{
2106 BlockDriver *drv;
2107
2108 assert(reopen_state != NULL);
2109 drv = reopen_state->bs->drv;
2110 assert(drv != NULL);
2111
2112 if (drv->bdrv_reopen_abort) {
2113 drv->bdrv_reopen_abort(reopen_state);
2114 }
145f598e
KW
2115
2116 QDECREF(reopen_state->explicit_options);
e971aa12
JC
2117}
2118
2119
64dff520 2120static void bdrv_close(BlockDriverState *bs)
fc01f7e7 2121{
33384421
HR
2122 BdrvAioNotifier *ban, *ban_next;
2123
ca9bd24c 2124 assert(!bs->job);
99b7e775
AG
2125
2126 /* Disable I/O limits and drain all pending throttled requests */
a0d64a61 2127 if (bs->throttle_state) {
99b7e775
AG
2128 bdrv_io_limits_disable(bs);
2129 }
2130
fc27291d 2131 bdrv_drained_begin(bs); /* complete I/O */
58fda173 2132 bdrv_flush(bs);
53ec73e2 2133 bdrv_drain(bs); /* in case flush left pending I/O */
fc27291d 2134
c5acdc9a
HR
2135 bdrv_release_named_dirty_bitmaps(bs);
2136 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2137
b4d02820
HR
2138 if (bs->blk) {
2139 blk_dev_change_media_cb(bs->blk, false);
2140 }
2141
3cbc002c 2142 if (bs->drv) {
6e93e7c4
KW
2143 BdrvChild *child, *next;
2144
9a7dedbc 2145 bs->drv->bdrv_close(bs);
9a4f4c31 2146 bs->drv = NULL;
9a7dedbc 2147
5db15a57 2148 bdrv_set_backing_hd(bs, NULL);
9a7dedbc 2149
9a4f4c31
KW
2150 if (bs->file != NULL) {
2151 bdrv_unref_child(bs, bs->file);
2152 bs->file = NULL;
2153 }
2154
6e93e7c4 2155 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
33a60407
KW
2156 /* TODO Remove bdrv_unref() from drivers' close function and use
2157 * bdrv_unref_child() here */
bddcec37
KW
2158 if (child->bs->inherits_from == bs) {
2159 child->bs->inherits_from = NULL;
2160 }
33a60407 2161 bdrv_detach_child(child);
6e93e7c4
KW
2162 }
2163
7267c094 2164 g_free(bs->opaque);
ea2384d3 2165 bs->opaque = NULL;
53fec9d3 2166 bs->copy_on_read = 0;
a275fa42
PB
2167 bs->backing_file[0] = '\0';
2168 bs->backing_format[0] = '\0';
6405875c
PB
2169 bs->total_sectors = 0;
2170 bs->encrypted = 0;
2171 bs->valid_key = 0;
2172 bs->sg = 0;
0d51b4de 2173 bs->zero_beyond_eof = false;
de9c0cec 2174 QDECREF(bs->options);
145f598e 2175 QDECREF(bs->explicit_options);
de9c0cec 2176 bs->options = NULL;
91af7014
HR
2177 QDECREF(bs->full_open_options);
2178 bs->full_open_options = NULL;
b338082b 2179 }
98f90dba 2180
33384421
HR
2181 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2182 g_free(ban);
2183 }
2184 QLIST_INIT(&bs->aio_notifiers);
fc27291d 2185 bdrv_drained_end(bs);
b338082b
FB
2186}
2187
2bc93fed
MK
2188void bdrv_close_all(void)
2189{
2190 BlockDriverState *bs;
ca9bd24c
HR
2191 AioContext *aio_context;
2192
2193 /* Drop references from requests still in flight, such as canceled block
2194 * jobs whose AIO context has not been polled yet */
2195 bdrv_drain_all();
2bc93fed 2196
ca9bd24c
HR
2197 blk_remove_all_bs();
2198 blockdev_close_all_bdrv_states();
ed78cda3 2199
ca9bd24c
HR
2200 /* Cancel all block jobs */
2201 while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2202 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2203 aio_context = bdrv_get_aio_context(bs);
2204
2205 aio_context_acquire(aio_context);
2206 if (bs->job) {
2207 block_job_cancel_sync(bs->job);
2208 aio_context_release(aio_context);
2209 break;
2210 }
2211 aio_context_release(aio_context);
2212 }
2213
2214 /* All the remaining BlockDriverStates are referenced directly or
2215 * indirectly from block jobs, so there needs to be at least one BDS
2216 * directly used by a block job */
2217 assert(bs);
2bc93fed
MK
2218 }
2219}
2220
8e419aef 2221/* Fields that need to stay with the top-level BDS */
4ddc07ca
PB
2222static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2223 BlockDriverState *bs_src)
8802d1fd 2224{
4ddc07ca 2225 /* move some fields that need to stay attached to the device */
063dd40e 2226}
a9fc4408 2227
dd62f1ca
KW
2228static void change_parent_backing_link(BlockDriverState *from,
2229 BlockDriverState *to)
2230{
2231 BdrvChild *c, *next;
2232
f21d96d0
KW
2233 if (from->blk) {
2234 /* FIXME We bypass blk_set_bs(), so we need to make these updates
2235 * manually. The root problem is not in this change function, but the
2236 * existence of BlockDriverState.blk. */
2237 to->blk = from->blk;
2238 from->blk = NULL;
2239 }
2240
dd62f1ca
KW
2241 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2242 assert(c->role != &child_backing);
2243 c->bs = to;
2244 QLIST_REMOVE(c, next_parent);
2245 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2246 bdrv_ref(to);
2247 bdrv_unref(from);
2248 }
dd62f1ca
KW
2249}
2250
2251static void swap_feature_fields(BlockDriverState *bs_top,
2252 BlockDriverState *bs_new)
2253{
2254 BlockDriverState tmp;
2255
2256 bdrv_move_feature_fields(&tmp, bs_top);
2257 bdrv_move_feature_fields(bs_top, bs_new);
2258 bdrv_move_feature_fields(bs_new, &tmp);
2259
2260 assert(!bs_new->throttle_state);
2261 if (bs_top->throttle_state) {
dd62f1ca
KW
2262 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2263 bdrv_io_limits_disable(bs_top);
2264 }
2265}
2266
4ddc07ca
PB
2267/*
2268 * Add new bs contents at the top of an image chain while the chain is
2269 * live, while keeping required fields on the top layer.
2270 *
2271 * This will modify the BlockDriverState fields, and swap contents
2272 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2273 *
bfb197e0 2274 * bs_new must not be attached to a BlockBackend.
4ddc07ca
PB
2275 *
2276 * This function does not create any image files.
dd62f1ca
KW
2277 *
2278 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2279 * that's what the callers commonly need. bs_new will be referenced by the old
2280 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2281 * reference of its own, it must call bdrv_ref().
4ddc07ca
PB
2282 */
2283void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2284{
dd62f1ca
KW
2285 assert(!bdrv_requests_pending(bs_top));
2286 assert(!bdrv_requests_pending(bs_new));
2287
2288 bdrv_ref(bs_top);
2289 change_parent_backing_link(bs_top, bs_new);
2290
2291 /* Some fields always stay on top of the backing file chain */
2292 swap_feature_fields(bs_top, bs_new);
2293
2294 bdrv_set_backing_hd(bs_new, bs_top);
2295 bdrv_unref(bs_top);
4ddc07ca 2296
dd62f1ca
KW
2297 /* bs_new is now referenced by its new parents, we don't need the
2298 * additional reference any more. */
2299 bdrv_unref(bs_new);
8802d1fd
JC
2300}
2301
3f09bfbc
KW
2302void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2303{
2304 assert(!bdrv_requests_pending(old));
2305 assert(!bdrv_requests_pending(new));
2306
2307 bdrv_ref(old);
2308
2309 if (old->blk) {
2310 /* As long as these fields aren't in BlockBackend, but in the top-level
2311 * BlockDriverState, it's not possible for a BDS to have two BBs.
2312 *
2313 * We really want to copy the fields from old to new, but we go for a
2314 * swap instead so that pointers aren't duplicated and cause trouble.
2315 * (Also, bdrv_swap() used to do the same.) */
2316 assert(!new->blk);
2317 swap_feature_fields(old, new);
2318 }
2319 change_parent_backing_link(old, new);
2320
2321 /* Change backing files if a previously independent node is added to the
2322 * chain. For active commit, we replace top by its own (indirect) backing
2323 * file and don't do anything here so we don't build a loop. */
2324 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2325 bdrv_set_backing_hd(new, backing_bs(old));
2326 bdrv_set_backing_hd(old, NULL);
2327 }
2328
2329 bdrv_unref(old);
2330}
2331
4f6fd349 2332static void bdrv_delete(BlockDriverState *bs)
b338082b 2333{
3e914655 2334 assert(!bs->job);
3718d8ab 2335 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 2336 assert(!bs->refcnt);
18846dee 2337
e1b5c52e
SH
2338 bdrv_close(bs);
2339
1b7bdbc1 2340 /* remove from list, if necessary */
63eaaae0
KW
2341 if (bs->node_name[0] != '\0') {
2342 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2343 }
2c1d04e0
HR
2344 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2345
7267c094 2346 g_free(bs);
fc01f7e7
FB
2347}
2348
e97fc193
AL
2349/*
2350 * Run consistency checks on an image
2351 *
e076f338 2352 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 2353 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 2354 * check are stored in res.
e97fc193 2355 */
4534ff54 2356int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 2357{
908bcd54
HR
2358 if (bs->drv == NULL) {
2359 return -ENOMEDIUM;
2360 }
e97fc193
AL
2361 if (bs->drv->bdrv_check == NULL) {
2362 return -ENOTSUP;
2363 }
2364
e076f338 2365 memset(res, 0, sizeof(*res));
4534ff54 2366 return bs->drv->bdrv_check(bs, res, fix);
e97fc193
AL
2367}
2368
8a426614
KW
2369#define COMMIT_BUF_SECTORS 2048
2370
33e3963e
FB
2371/* commit COW file into the raw image */
2372int bdrv_commit(BlockDriverState *bs)
2373{
19cb3738 2374 BlockDriver *drv = bs->drv;
72706ea4 2375 int64_t sector, total_sectors, length, backing_length;
8a426614 2376 int n, ro, open_flags;
0bce597d 2377 int ret = 0;
72706ea4 2378 uint8_t *buf = NULL;
33e3963e 2379
19cb3738
FB
2380 if (!drv)
2381 return -ENOMEDIUM;
6bb45158 2382
760e0063 2383 if (!bs->backing) {
4dca4b63 2384 return -ENOTSUP;
33e3963e
FB
2385 }
2386
bb00021d 2387 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
760e0063 2388 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2d3735d3
SH
2389 return -EBUSY;
2390 }
2391
760e0063
KW
2392 ro = bs->backing->bs->read_only;
2393 open_flags = bs->backing->bs->open_flags;
4dca4b63
NS
2394
2395 if (ro) {
760e0063 2396 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
0bce597d 2397 return -EACCES;
4dca4b63 2398 }
ea2384d3 2399 }
33e3963e 2400
72706ea4
JC
2401 length = bdrv_getlength(bs);
2402 if (length < 0) {
2403 ret = length;
2404 goto ro_cleanup;
2405 }
2406
760e0063 2407 backing_length = bdrv_getlength(bs->backing->bs);
72706ea4
JC
2408 if (backing_length < 0) {
2409 ret = backing_length;
2410 goto ro_cleanup;
2411 }
2412
2413 /* If our top snapshot is larger than the backing file image,
2414 * grow the backing file image if possible. If not possible,
2415 * we must return an error */
2416 if (length > backing_length) {
760e0063 2417 ret = bdrv_truncate(bs->backing->bs, length);
72706ea4
JC
2418 if (ret < 0) {
2419 goto ro_cleanup;
2420 }
2421 }
2422
2423 total_sectors = length >> BDRV_SECTOR_BITS;
857d4f46
KW
2424
2425 /* qemu_try_blockalign() for bs will choose an alignment that works for
760e0063 2426 * bs->backing->bs as well, so no need to compare the alignment manually. */
857d4f46
KW
2427 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2428 if (buf == NULL) {
2429 ret = -ENOMEM;
2430 goto ro_cleanup;
2431 }
8a426614
KW
2432
2433 for (sector = 0; sector < total_sectors; sector += n) {
d663640c
PB
2434 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2435 if (ret < 0) {
2436 goto ro_cleanup;
2437 }
2438 if (ret) {
dabfa6cc
KW
2439 ret = bdrv_read(bs, sector, buf, n);
2440 if (ret < 0) {
8a426614
KW
2441 goto ro_cleanup;
2442 }
2443
760e0063 2444 ret = bdrv_write(bs->backing->bs, sector, buf, n);
dabfa6cc 2445 if (ret < 0) {
8a426614
KW
2446 goto ro_cleanup;
2447 }
ea2384d3 2448 }
33e3963e 2449 }
95389c86 2450
1d44952f
CH
2451 if (drv->bdrv_make_empty) {
2452 ret = drv->bdrv_make_empty(bs);
dabfa6cc
KW
2453 if (ret < 0) {
2454 goto ro_cleanup;
2455 }
1d44952f
CH
2456 bdrv_flush(bs);
2457 }
95389c86 2458
3f5075ae
CH
2459 /*
2460 * Make sure all data we wrote to the backing device is actually
2461 * stable on disk.
2462 */
760e0063
KW
2463 if (bs->backing) {
2464 bdrv_flush(bs->backing->bs);
dabfa6cc 2465 }
4dca4b63 2466
dabfa6cc 2467 ret = 0;
4dca4b63 2468ro_cleanup:
857d4f46 2469 qemu_vfree(buf);
4dca4b63
NS
2470
2471 if (ro) {
0bce597d 2472 /* ignoring error return here */
760e0063 2473 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
4dca4b63
NS
2474 }
2475
1d44952f 2476 return ret;
33e3963e
FB
2477}
2478
756e6736
KW
2479/*
2480 * Return values:
2481 * 0 - success
2482 * -EINVAL - backing format specified, but no file
2483 * -ENOSPC - can't update the backing file because no space is left in the
2484 * image file header
2485 * -ENOTSUP - format driver doesn't support changing the backing file
2486 */
2487int bdrv_change_backing_file(BlockDriverState *bs,
2488 const char *backing_file, const char *backing_fmt)
2489{
2490 BlockDriver *drv = bs->drv;
469ef350 2491 int ret;
756e6736 2492
5f377794
PB
2493 /* Backing file format doesn't make sense without a backing file */
2494 if (backing_fmt && !backing_file) {
2495 return -EINVAL;
2496 }
2497
756e6736 2498 if (drv->bdrv_change_backing_file != NULL) {
469ef350 2499 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 2500 } else {
469ef350 2501 ret = -ENOTSUP;
756e6736 2502 }
469ef350
PB
2503
2504 if (ret == 0) {
2505 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2506 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2507 }
2508 return ret;
756e6736
KW
2509}
2510
6ebdcee2
JC
2511/*
2512 * Finds the image layer in the chain that has 'bs' as its backing file.
2513 *
2514 * active is the current topmost image.
2515 *
2516 * Returns NULL if bs is not found in active's image chain,
2517 * or if active == bs.
4caf0fcd
JC
2518 *
2519 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
2520 */
2521BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2522 BlockDriverState *bs)
2523{
760e0063
KW
2524 while (active && bs != backing_bs(active)) {
2525 active = backing_bs(active);
6ebdcee2
JC
2526 }
2527
4caf0fcd
JC
2528 return active;
2529}
6ebdcee2 2530
4caf0fcd
JC
2531/* Given a BDS, searches for the base layer. */
2532BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2533{
2534 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
2535}
2536
6ebdcee2
JC
2537/*
2538 * Drops images above 'base' up to and including 'top', and sets the image
2539 * above 'top' to have base as its backing file.
2540 *
2541 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2542 * information in 'bs' can be properly updated.
2543 *
2544 * E.g., this will convert the following chain:
2545 * bottom <- base <- intermediate <- top <- active
2546 *
2547 * to
2548 *
2549 * bottom <- base <- active
2550 *
2551 * It is allowed for bottom==base, in which case it converts:
2552 *
2553 * base <- intermediate <- top <- active
2554 *
2555 * to
2556 *
2557 * base <- active
2558 *
54e26900
JC
2559 * If backing_file_str is non-NULL, it will be used when modifying top's
2560 * overlay image metadata.
2561 *
6ebdcee2
JC
2562 * Error conditions:
2563 * if active == top, that is considered an error
2564 *
2565 */
2566int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
54e26900 2567 BlockDriverState *base, const char *backing_file_str)
6ebdcee2 2568{
6ebdcee2 2569 BlockDriverState *new_top_bs = NULL;
6ebdcee2
JC
2570 int ret = -EIO;
2571
6ebdcee2
JC
2572 if (!top->drv || !base->drv) {
2573 goto exit;
2574 }
2575
2576 new_top_bs = bdrv_find_overlay(active, top);
2577
2578 if (new_top_bs == NULL) {
2579 /* we could not find the image above 'top', this is an error */
2580 goto exit;
2581 }
2582
760e0063 2583 /* special case of new_top_bs->backing->bs already pointing to base - nothing
6ebdcee2 2584 * to do, no intermediate images */
760e0063 2585 if (backing_bs(new_top_bs) == base) {
6ebdcee2
JC
2586 ret = 0;
2587 goto exit;
2588 }
2589
5db15a57
KW
2590 /* Make sure that base is in the backing chain of top */
2591 if (!bdrv_chain_contains(top, base)) {
6ebdcee2
JC
2592 goto exit;
2593 }
2594
2595 /* success - we can delete the intermediate states, and link top->base */
5db15a57 2596 backing_file_str = backing_file_str ? backing_file_str : base->filename;
54e26900 2597 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
5db15a57 2598 base->drv ? base->drv->format_name : "");
6ebdcee2
JC
2599 if (ret) {
2600 goto exit;
2601 }
5db15a57 2602 bdrv_set_backing_hd(new_top_bs, base);
6ebdcee2 2603
6ebdcee2 2604 ret = 0;
6ebdcee2 2605exit:
6ebdcee2
JC
2606 return ret;
2607}
2608
61007b31
SH
2609/**
2610 * Truncate file to 'offset' bytes (needed only for file protocols)
2611 */
2612int bdrv_truncate(BlockDriverState *bs, int64_t offset)
71d0770c 2613{
61007b31
SH
2614 BlockDriver *drv = bs->drv;
2615 int ret;
2616 if (!drv)
71d0770c 2617 return -ENOMEDIUM;
61007b31
SH
2618 if (!drv->bdrv_truncate)
2619 return -ENOTSUP;
2620 if (bs->read_only)
2621 return -EACCES;
71d0770c 2622
61007b31
SH
2623 ret = drv->bdrv_truncate(bs, offset);
2624 if (ret == 0) {
2625 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2626 bdrv_dirty_bitmap_truncate(bs);
2627 if (bs->blk) {
2628 blk_dev_resize_cb(bs->blk);
2629 }
c0191e76 2630 }
61007b31 2631 return ret;
71d0770c
AL
2632}
2633
61007b31
SH
2634/**
2635 * Length of a allocated file in bytes. Sparse files are counted by actual
2636 * allocated space. Return < 0 if error or unknown.
2637 */
2638int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
71d0770c 2639{
61007b31
SH
2640 BlockDriver *drv = bs->drv;
2641 if (!drv) {
2642 return -ENOMEDIUM;
8f4754ed 2643 }
61007b31
SH
2644 if (drv->bdrv_get_allocated_file_size) {
2645 return drv->bdrv_get_allocated_file_size(bs);
2646 }
2647 if (bs->file) {
9a4f4c31 2648 return bdrv_get_allocated_file_size(bs->file->bs);
1c9805a3 2649 }
61007b31 2650 return -ENOTSUP;
1c9805a3 2651}
e7a8a783 2652
61007b31
SH
2653/**
2654 * Return number of sectors on success, -errno on error.
1c9805a3 2655 */
61007b31 2656int64_t bdrv_nb_sectors(BlockDriverState *bs)
1c9805a3 2657{
61007b31 2658 BlockDriver *drv = bs->drv;
498e386c 2659
61007b31
SH
2660 if (!drv)
2661 return -ENOMEDIUM;
2572b37a 2662
61007b31
SH
2663 if (drv->has_variable_length) {
2664 int ret = refresh_total_sectors(bs, bs->total_sectors);
2665 if (ret < 0) {
2666 return ret;
1c9805a3
SH
2667 }
2668 }
61007b31 2669 return bs->total_sectors;
1c9805a3 2670}
b338082b 2671
61007b31
SH
2672/**
2673 * Return length in bytes on success, -errno on error.
2674 * The length is always a multiple of BDRV_SECTOR_SIZE.
8d3b1a2d 2675 */
61007b31 2676int64_t bdrv_getlength(BlockDriverState *bs)
8d3b1a2d 2677{
61007b31 2678 int64_t ret = bdrv_nb_sectors(bs);
8d3b1a2d 2679
4a9c9ea0 2680 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
61007b31 2681 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2682}
2683
61007b31
SH
2684/* return 0 as number of sectors if no device present or error */
2685void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
07d27a44 2686{
61007b31 2687 int64_t nb_sectors = bdrv_nb_sectors(bs);
07d27a44 2688
61007b31 2689 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
07d27a44
MA
2690}
2691
61007b31 2692int bdrv_is_read_only(BlockDriverState *bs)
8d3b1a2d 2693{
61007b31 2694 return bs->read_only;
83f64091 2695}
83f64091 2696
61007b31 2697int bdrv_is_sg(BlockDriverState *bs)
f08145fe 2698{
61007b31 2699 return bs->sg;
f08145fe
KW
2700}
2701
61007b31 2702int bdrv_is_encrypted(BlockDriverState *bs)
fc3959e4 2703{
760e0063 2704 if (bs->backing && bs->backing->bs->encrypted) {
61007b31 2705 return 1;
760e0063 2706 }
61007b31 2707 return bs->encrypted;
fc3959e4
FZ
2708}
2709
61007b31 2710int bdrv_key_required(BlockDriverState *bs)
fc3959e4 2711{
760e0063 2712 BdrvChild *backing = bs->backing;
61007b31 2713
760e0063 2714 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
61007b31 2715 return 1;
760e0063 2716 }
61007b31 2717 return (bs->encrypted && !bs->valid_key);
fc3959e4
FZ
2718}
2719
61007b31 2720int bdrv_set_key(BlockDriverState *bs, const char *key)
d0c7f642 2721{
d0c7f642 2722 int ret;
760e0063
KW
2723 if (bs->backing && bs->backing->bs->encrypted) {
2724 ret = bdrv_set_key(bs->backing->bs, key);
61007b31
SH
2725 if (ret < 0)
2726 return ret;
2727 if (!bs->encrypted)
2728 return 0;
2729 }
2730 if (!bs->encrypted) {
2731 return -EINVAL;
2732 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
d0c7f642
KW
2733 return -ENOMEDIUM;
2734 }
61007b31 2735 ret = bs->drv->bdrv_set_key(bs, key);
b9c64947 2736 if (ret < 0) {
61007b31
SH
2737 bs->valid_key = 0;
2738 } else if (!bs->valid_key) {
2739 bs->valid_key = 1;
2740 if (bs->blk) {
2741 /* call the change callback now, we skipped it on open */
2742 blk_dev_change_media_cb(bs->blk, true);
2743 }
1b0288ae 2744 }
61007b31
SH
2745 return ret;
2746}
f08f2dda 2747
c5fbe571 2748/*
61007b31
SH
2749 * Provide an encryption key for @bs.
2750 * If @key is non-null:
2751 * If @bs is not encrypted, fail.
2752 * Else if the key is invalid, fail.
2753 * Else set @bs's key to @key, replacing the existing key, if any.
2754 * If @key is null:
2755 * If @bs is encrypted and still lacks a key, fail.
2756 * Else do nothing.
2757 * On failure, store an error object through @errp if non-null.
c5fbe571 2758 */
61007b31 2759void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
c5fbe571 2760{
61007b31
SH
2761 if (key) {
2762 if (!bdrv_is_encrypted(bs)) {
2763 error_setg(errp, "Node '%s' is not encrypted",
2764 bdrv_get_device_or_node_name(bs));
2765 } else if (bdrv_set_key(bs, key) < 0) {
c6bd8c70 2766 error_setg(errp, QERR_INVALID_PASSWORD);
4d2855a3
MA
2767 }
2768 } else {
2769 if (bdrv_key_required(bs)) {
b1ca6391
MA
2770 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2771 "'%s' (%s) is encrypted",
81e5f78a 2772 bdrv_get_device_or_node_name(bs),
4d2855a3
MA
2773 bdrv_get_encrypted_filename(bs));
2774 }
2775 }
2776}
2777
61007b31 2778const char *bdrv_get_format_name(BlockDriverState *bs)
40b4f539 2779{
61007b31 2780 return bs->drv ? bs->drv->format_name : NULL;
40b4f539
KW
2781}
2782
61007b31 2783static int qsort_strcmp(const void *a, const void *b)
40b4f539 2784{
61007b31 2785 return strcmp(a, b);
40b4f539
KW
2786}
2787
61007b31
SH
2788void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2789 void *opaque)
40b4f539 2790{
61007b31
SH
2791 BlockDriver *drv;
2792 int count = 0;
2793 int i;
2794 const char **formats = NULL;
40b4f539 2795
61007b31
SH
2796 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2797 if (drv->format_name) {
2798 bool found = false;
2799 int i = count;
2800 while (formats && i && !found) {
2801 found = !strcmp(formats[--i], drv->format_name);
2802 }
e2a305fb 2803
61007b31
SH
2804 if (!found) {
2805 formats = g_renew(const char *, formats, count + 1);
2806 formats[count++] = drv->format_name;
2807 }
6c5a42ac 2808 }
61007b31 2809 }
6c5a42ac 2810
61007b31 2811 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
40b4f539 2812
61007b31
SH
2813 for (i = 0; i < count; i++) {
2814 it(opaque, formats[i]);
2815 }
40b4f539 2816
61007b31
SH
2817 g_free(formats);
2818}
40b4f539 2819
61007b31
SH
2820/* This function is to find a node in the bs graph */
2821BlockDriverState *bdrv_find_node(const char *node_name)
2822{
2823 BlockDriverState *bs;
391827eb 2824
61007b31 2825 assert(node_name);
40b4f539 2826
61007b31
SH
2827 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2828 if (!strcmp(node_name, bs->node_name)) {
2829 return bs;
40b4f539
KW
2830 }
2831 }
61007b31 2832 return NULL;
40b4f539
KW
2833}
2834
61007b31
SH
2835/* Put this QMP function here so it can access the static graph_bdrv_states. */
2836BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
40b4f539 2837{
61007b31
SH
2838 BlockDeviceInfoList *list, *entry;
2839 BlockDriverState *bs;
40b4f539 2840
61007b31
SH
2841 list = NULL;
2842 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
c83f9fba 2843 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
61007b31
SH
2844 if (!info) {
2845 qapi_free_BlockDeviceInfoList(list);
2846 return NULL;
301db7c2 2847 }
61007b31
SH
2848 entry = g_malloc0(sizeof(*entry));
2849 entry->value = info;
2850 entry->next = list;
2851 list = entry;
301db7c2
RH
2852 }
2853
61007b31
SH
2854 return list;
2855}
40b4f539 2856
61007b31
SH
2857BlockDriverState *bdrv_lookup_bs(const char *device,
2858 const char *node_name,
2859 Error **errp)
2860{
2861 BlockBackend *blk;
2862 BlockDriverState *bs;
40b4f539 2863
61007b31
SH
2864 if (device) {
2865 blk = blk_by_name(device);
40b4f539 2866
61007b31 2867 if (blk) {
9f4ed6fb
AG
2868 bs = blk_bs(blk);
2869 if (!bs) {
5433c24f 2870 error_setg(errp, "Device '%s' has no medium", device);
5433c24f
HR
2871 }
2872
9f4ed6fb 2873 return bs;
61007b31
SH
2874 }
2875 }
40b4f539 2876
61007b31
SH
2877 if (node_name) {
2878 bs = bdrv_find_node(node_name);
6d519a5f 2879
61007b31
SH
2880 if (bs) {
2881 return bs;
2882 }
40b4f539
KW
2883 }
2884
61007b31
SH
2885 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2886 device ? device : "",
2887 node_name ? node_name : "");
2888 return NULL;
40b4f539
KW
2889}
2890
61007b31
SH
2891/* If 'base' is in the same chain as 'top', return true. Otherwise,
2892 * return false. If either argument is NULL, return false. */
2893bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
83f64091 2894{
61007b31 2895 while (top && top != base) {
760e0063 2896 top = backing_bs(top);
02c50efe 2897 }
61007b31
SH
2898
2899 return top != NULL;
02c50efe
FZ
2900}
2901
61007b31 2902BlockDriverState *bdrv_next_node(BlockDriverState *bs)
02c50efe 2903{
61007b31
SH
2904 if (!bs) {
2905 return QTAILQ_FIRST(&graph_bdrv_states);
02c50efe 2906 }
61007b31 2907 return QTAILQ_NEXT(bs, node_list);
83f64091
FB
2908}
2909
26260580
HR
2910/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
2911 * the monitor or attached to a BlockBackend */
61007b31 2912BlockDriverState *bdrv_next(BlockDriverState *bs)
83f64091 2913{
26260580
HR
2914 if (!bs || bs->blk) {
2915 bs = blk_next_root_bs(bs);
2916 if (bs) {
2917 return bs;
2918 }
857d4f46 2919 }
26260580
HR
2920
2921 /* Ignore all BDSs that are attached to a BlockBackend here; they have been
2922 * handled by the above block already */
2923 do {
2924 bs = bdrv_next_monitor_owned(bs);
2925 } while (bs && bs->blk);
2926 return bs;
83f64091 2927}
beac80cd 2928
61007b31 2929const char *bdrv_get_node_name(const BlockDriverState *bs)
83f64091 2930{
61007b31 2931 return bs->node_name;
beac80cd
FB
2932}
2933
61007b31
SH
2934/* TODO check what callers really want: bs->node_name or blk_name() */
2935const char *bdrv_get_device_name(const BlockDriverState *bs)
beac80cd 2936{
61007b31 2937 return bs->blk ? blk_name(bs->blk) : "";
f141eafe 2938}
83f64091 2939
61007b31
SH
2940/* This can be used to identify nodes that might not have a device
2941 * name associated. Since node and device names live in the same
2942 * namespace, the result is unambiguous. The exception is if both are
2943 * absent, then this returns an empty (non-null) string. */
2944const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
f141eafe 2945{
61007b31 2946 return bs->blk ? blk_name(bs->blk) : bs->node_name;
beac80cd 2947}
beac80cd 2948
61007b31 2949int bdrv_get_flags(BlockDriverState *bs)
0b5a2445 2950{
61007b31 2951 return bs->open_flags;
0b5a2445
PB
2952}
2953
61007b31 2954int bdrv_has_zero_init_1(BlockDriverState *bs)
68485420 2955{
61007b31 2956 return 1;
0b5a2445
PB
2957}
2958
61007b31 2959int bdrv_has_zero_init(BlockDriverState *bs)
0b5a2445 2960{
61007b31 2961 assert(bs->drv);
0b5a2445 2962
61007b31
SH
2963 /* If BS is a copy on write image, it is initialized to
2964 the contents of the base image, which may not be zeroes. */
760e0063 2965 if (bs->backing) {
61007b31
SH
2966 return 0;
2967 }
2968 if (bs->drv->bdrv_has_zero_init) {
2969 return bs->drv->bdrv_has_zero_init(bs);
0b5a2445 2970 }
61007b31
SH
2971
2972 /* safe default */
2973 return 0;
68485420
KW
2974}
2975
61007b31 2976bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
b2a61371 2977{
61007b31 2978 BlockDriverInfo bdi;
b2a61371 2979
760e0063 2980 if (bs->backing) {
61007b31
SH
2981 return false;
2982 }
2983
2984 if (bdrv_get_info(bs, &bdi) == 0) {
2985 return bdi.unallocated_blocks_are_zero;
b2a61371
SH
2986 }
2987
61007b31 2988 return false;
b2a61371
SH
2989}
2990
61007b31 2991bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
68485420 2992{
61007b31 2993 BlockDriverInfo bdi;
68485420 2994
760e0063 2995 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
61007b31
SH
2996 return false;
2997 }
68485420 2998
61007b31
SH
2999 if (bdrv_get_info(bs, &bdi) == 0) {
3000 return bdi.can_write_zeroes_with_unmap;
3001 }
68485420 3002
61007b31 3003 return false;
68485420
KW
3004}
3005
61007b31 3006const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
b2e12bc6 3007{
760e0063 3008 if (bs->backing && bs->backing->bs->encrypted)
61007b31
SH
3009 return bs->backing_file;
3010 else if (bs->encrypted)
3011 return bs->filename;
3012 else
3013 return NULL;
b2e12bc6
CH
3014}
3015
61007b31
SH
3016void bdrv_get_backing_filename(BlockDriverState *bs,
3017 char *filename, int filename_size)
016f5cf6 3018{
61007b31
SH
3019 pstrcpy(filename, filename_size, bs->backing_file);
3020}
d318aea9 3021
61007b31
SH
3022int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3023{
3024 BlockDriver *drv = bs->drv;
3025 if (!drv)
3026 return -ENOMEDIUM;
3027 if (!drv->bdrv_get_info)
3028 return -ENOTSUP;
3029 memset(bdi, 0, sizeof(*bdi));
3030 return drv->bdrv_get_info(bs, bdi);
3031}
016f5cf6 3032
61007b31
SH
3033ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3034{
3035 BlockDriver *drv = bs->drv;
3036 if (drv && drv->bdrv_get_specific_info) {
3037 return drv->bdrv_get_specific_info(bs);
3038 }
3039 return NULL;
016f5cf6
AG
3040}
3041
a31939e6 3042void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
4265d620 3043{
61007b31
SH
3044 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3045 return;
3046 }
4265d620 3047
61007b31 3048 bs->drv->bdrv_debug_event(bs, event);
4265d620
PB
3049}
3050
61007b31
SH
3051int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3052 const char *tag)
4265d620 3053{
61007b31 3054 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
9a4f4c31 3055 bs = bs->file ? bs->file->bs : NULL;
61007b31 3056 }
4265d620 3057
61007b31
SH
3058 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3059 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3060 }
4265d620 3061
61007b31 3062 return -ENOTSUP;
4265d620
PB
3063}
3064
61007b31 3065int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
ea2384d3 3066{
61007b31 3067 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
9a4f4c31 3068 bs = bs->file ? bs->file->bs : NULL;
61007b31 3069 }
ce1a14dc 3070
61007b31
SH
3071 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3072 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3073 }
3074
3075 return -ENOTSUP;
eb852011
MA
3076}
3077
61007b31 3078int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
ce1a14dc 3079{
61007b31 3080 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
9a4f4c31 3081 bs = bs->file ? bs->file->bs : NULL;
61007b31 3082 }
ce1a14dc 3083
61007b31
SH
3084 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3085 return bs->drv->bdrv_debug_resume(bs, tag);
3086 }
ce1a14dc 3087
61007b31 3088 return -ENOTSUP;
f197fe2b
FZ
3089}
3090
61007b31 3091bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
ce1a14dc 3092{
61007b31 3093 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
9a4f4c31 3094 bs = bs->file ? bs->file->bs : NULL;
f197fe2b 3095 }
19cb3738 3096
61007b31
SH
3097 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3098 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3099 }
f9f05dc5 3100
61007b31
SH
3101 return false;
3102}
f9f05dc5 3103
61007b31 3104int bdrv_is_snapshot(BlockDriverState *bs)
f9f05dc5 3105{
61007b31 3106 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
f9f05dc5
KW
3107}
3108
61007b31
SH
3109/* backing_file can either be relative, or absolute, or a protocol. If it is
3110 * relative, it must be relative to the chain. So, passing in bs->filename
3111 * from a BDS as backing_file should not be done, as that may be relative to
3112 * the CWD rather than the chain. */
3113BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3114 const char *backing_file)
f9f05dc5 3115{
61007b31
SH
3116 char *filename_full = NULL;
3117 char *backing_file_full = NULL;
3118 char *filename_tmp = NULL;
3119 int is_protocol = 0;
3120 BlockDriverState *curr_bs = NULL;
3121 BlockDriverState *retval = NULL;
f9f05dc5 3122
61007b31
SH
3123 if (!bs || !bs->drv || !backing_file) {
3124 return NULL;
f9f05dc5
KW
3125 }
3126
61007b31
SH
3127 filename_full = g_malloc(PATH_MAX);
3128 backing_file_full = g_malloc(PATH_MAX);
3129 filename_tmp = g_malloc(PATH_MAX);
f9f05dc5 3130
61007b31 3131 is_protocol = path_has_protocol(backing_file);
f9f05dc5 3132
760e0063 3133 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
f9f05dc5 3134
61007b31
SH
3135 /* If either of the filename paths is actually a protocol, then
3136 * compare unmodified paths; otherwise make paths relative */
3137 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3138 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
760e0063 3139 retval = curr_bs->backing->bs;
61007b31
SH
3140 break;
3141 }
3142 } else {
3143 /* If not an absolute filename path, make it relative to the current
3144 * image's filename path */
3145 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3146 backing_file);
f9f05dc5 3147
61007b31
SH
3148 /* We are going to compare absolute pathnames */
3149 if (!realpath(filename_tmp, filename_full)) {
3150 continue;
3151 }
07f07615 3152
61007b31
SH
3153 /* We need to make sure the backing filename we are comparing against
3154 * is relative to the current image filename (or absolute) */
3155 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3156 curr_bs->backing_file);
07f07615 3157
61007b31
SH
3158 if (!realpath(filename_tmp, backing_file_full)) {
3159 continue;
3160 }
eb489bb1 3161
61007b31 3162 if (strcmp(backing_file_full, filename_full) == 0) {
760e0063 3163 retval = curr_bs->backing->bs;
61007b31
SH
3164 break;
3165 }
3166 }
eb489bb1
KW
3167 }
3168
61007b31
SH
3169 g_free(filename_full);
3170 g_free(backing_file_full);
3171 g_free(filename_tmp);
3172 return retval;
3173}
3174
3175int bdrv_get_backing_file_depth(BlockDriverState *bs)
3176{
3177 if (!bs->drv) {
3178 return 0;
eb489bb1
KW
3179 }
3180
760e0063 3181 if (!bs->backing) {
61007b31 3182 return 0;
ca716364
KW
3183 }
3184
760e0063 3185 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
61007b31 3186}
07f07615 3187
61007b31
SH
3188void bdrv_init(void)
3189{
3190 module_call_init(MODULE_INIT_BLOCK);
3191}
29cdb251 3192
61007b31
SH
3193void bdrv_init_with_whitelist(void)
3194{
3195 use_bdrv_whitelist = 1;
3196 bdrv_init();
07f07615
PB
3197}
3198
5a8a30db 3199void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
0f15423c 3200{
0d1c5c91 3201 BdrvChild *child;
5a8a30db
KW
3202 Error *local_err = NULL;
3203 int ret;
3204
3456a8d1
KW
3205 if (!bs->drv) {
3206 return;
3207 }
3208
04c01a5c 3209 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
7ea2d269
AK
3210 return;
3211 }
04c01a5c 3212 bs->open_flags &= ~BDRV_O_INACTIVE;
7ea2d269 3213
3456a8d1 3214 if (bs->drv->bdrv_invalidate_cache) {
5a8a30db 3215 bs->drv->bdrv_invalidate_cache(bs, &local_err);
0d1c5c91
FZ
3216 if (local_err) {
3217 bs->open_flags |= BDRV_O_INACTIVE;
3218 error_propagate(errp, local_err);
3219 return;
3220 }
5a8a30db 3221 }
0d1c5c91
FZ
3222
3223 QLIST_FOREACH(child, &bs->children, next) {
3224 bdrv_invalidate_cache(child->bs, &local_err);
3225 if (local_err) {
3226 bs->open_flags |= BDRV_O_INACTIVE;
3227 error_propagate(errp, local_err);
3228 return;
3229 }
0f15423c 3230 }
3456a8d1 3231
5a8a30db
KW
3232 ret = refresh_total_sectors(bs, bs->total_sectors);
3233 if (ret < 0) {
04c01a5c 3234 bs->open_flags |= BDRV_O_INACTIVE;
5a8a30db
KW
3235 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3236 return;
3237 }
0f15423c
AL
3238}
3239
5a8a30db 3240void bdrv_invalidate_cache_all(Error **errp)
0f15423c 3241{
79720af6 3242 BlockDriverState *bs = NULL;
5a8a30db 3243 Error *local_err = NULL;
0f15423c 3244
79720af6 3245 while ((bs = bdrv_next(bs)) != NULL) {
ed78cda3
SH
3246 AioContext *aio_context = bdrv_get_aio_context(bs);
3247
3248 aio_context_acquire(aio_context);
5a8a30db 3249 bdrv_invalidate_cache(bs, &local_err);
ed78cda3 3250 aio_context_release(aio_context);
5a8a30db
KW
3251 if (local_err) {
3252 error_propagate(errp, local_err);
3253 return;
3254 }
0f15423c
AL
3255 }
3256}
3257
aad0b7a0
FZ
3258static int bdrv_inactivate_recurse(BlockDriverState *bs,
3259 bool setting_flag)
76b1c7fe 3260{
aad0b7a0 3261 BdrvChild *child;
76b1c7fe
KW
3262 int ret;
3263
aad0b7a0 3264 if (!setting_flag && bs->drv->bdrv_inactivate) {
76b1c7fe
KW
3265 ret = bs->drv->bdrv_inactivate(bs);
3266 if (ret < 0) {
3267 return ret;
3268 }
3269 }
3270
aad0b7a0
FZ
3271 QLIST_FOREACH(child, &bs->children, next) {
3272 ret = bdrv_inactivate_recurse(child->bs, setting_flag);
3273 if (ret < 0) {
3274 return ret;
3275 }
3276 }
3277
3278 if (setting_flag) {
3279 bs->open_flags |= BDRV_O_INACTIVE;
3280 }
76b1c7fe
KW
3281 return 0;
3282}
3283
3284int bdrv_inactivate_all(void)
3285{
79720af6 3286 BlockDriverState *bs = NULL;
aad0b7a0
FZ
3287 int ret = 0;
3288 int pass;
76b1c7fe 3289
79720af6 3290 while ((bs = bdrv_next(bs)) != NULL) {
aad0b7a0
FZ
3291 aio_context_acquire(bdrv_get_aio_context(bs));
3292 }
76b1c7fe 3293
aad0b7a0
FZ
3294 /* We do two passes of inactivation. The first pass calls to drivers'
3295 * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
3296 * the second pass sets the BDRV_O_INACTIVE flag so that no further write
3297 * is allowed. */
3298 for (pass = 0; pass < 2; pass++) {
3299 bs = NULL;
3300 while ((bs = bdrv_next(bs)) != NULL) {
3301 ret = bdrv_inactivate_recurse(bs, pass);
3302 if (ret < 0) {
3303 goto out;
3304 }
76b1c7fe
KW
3305 }
3306 }
3307
aad0b7a0
FZ
3308out:
3309 bs = NULL;
3310 while ((bs = bdrv_next(bs)) != NULL) {
3311 aio_context_release(bdrv_get_aio_context(bs));
3312 }
3313
3314 return ret;
76b1c7fe
KW
3315}
3316
19cb3738
FB
3317/**************************************************************/
3318/* removable device support */
3319
3320/**
3321 * Return TRUE if the media is present
3322 */
e031f750 3323bool bdrv_is_inserted(BlockDriverState *bs)
19cb3738
FB
3324{
3325 BlockDriver *drv = bs->drv;
28d7a789 3326 BdrvChild *child;
a1aff5bf 3327
e031f750
HR
3328 if (!drv) {
3329 return false;
3330 }
28d7a789
HR
3331 if (drv->bdrv_is_inserted) {
3332 return drv->bdrv_is_inserted(bs);
3333 }
3334 QLIST_FOREACH(child, &bs->children, next) {
3335 if (!bdrv_is_inserted(child->bs)) {
3336 return false;
3337 }
e031f750 3338 }
28d7a789 3339 return true;
19cb3738
FB
3340}
3341
3342/**
8e49ca46
MA
3343 * Return whether the media changed since the last call to this
3344 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3345 */
3346int bdrv_media_changed(BlockDriverState *bs)
3347{
3348 BlockDriver *drv = bs->drv;
19cb3738 3349
8e49ca46
MA
3350 if (drv && drv->bdrv_media_changed) {
3351 return drv->bdrv_media_changed(bs);
3352 }
3353 return -ENOTSUP;
19cb3738
FB
3354}
3355
3356/**
3357 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3358 */
f36f3949 3359void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3360{
3361 BlockDriver *drv = bs->drv;
bfb197e0 3362 const char *device_name;
19cb3738 3363
822e1cd1
MA
3364 if (drv && drv->bdrv_eject) {
3365 drv->bdrv_eject(bs, eject_flag);
19cb3738 3366 }
6f382ed2 3367
bfb197e0
MA
3368 device_name = bdrv_get_device_name(bs);
3369 if (device_name[0] != '\0') {
3370 qapi_event_send_device_tray_moved(device_name,
a5ee7bd4 3371 eject_flag, &error_abort);
6f382ed2 3372 }
19cb3738
FB
3373}
3374
19cb3738
FB
3375/**
3376 * Lock or unlock the media (if it is locked, the user won't be able
3377 * to eject it manually).
3378 */
025e849a 3379void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3380{
3381 BlockDriver *drv = bs->drv;
3382
025e849a 3383 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3384
025e849a
MA
3385 if (drv && drv->bdrv_lock_medium) {
3386 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3387 }
3388}
985a03b0 3389
9fcb0251
FZ
3390/* Get a reference to bs */
3391void bdrv_ref(BlockDriverState *bs)
3392{
3393 bs->refcnt++;
3394}
3395
3396/* Release a previously grabbed reference to bs.
3397 * If after releasing, reference count is zero, the BlockDriverState is
3398 * deleted. */
3399void bdrv_unref(BlockDriverState *bs)
3400{
9a4d5ca6
JC
3401 if (!bs) {
3402 return;
3403 }
9fcb0251
FZ
3404 assert(bs->refcnt > 0);
3405 if (--bs->refcnt == 0) {
3406 bdrv_delete(bs);
3407 }
3408}
3409
fbe40ff7
FZ
3410struct BdrvOpBlocker {
3411 Error *reason;
3412 QLIST_ENTRY(BdrvOpBlocker) list;
3413};
3414
3415bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3416{
3417 BdrvOpBlocker *blocker;
3418 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3419 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3420 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3421 if (errp) {
e43bfd9c
MA
3422 *errp = error_copy(blocker->reason);
3423 error_prepend(errp, "Node '%s' is busy: ",
3424 bdrv_get_device_or_node_name(bs));
fbe40ff7
FZ
3425 }
3426 return true;
3427 }
3428 return false;
3429}
3430
3431void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3432{
3433 BdrvOpBlocker *blocker;
3434 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3435
5839e53b 3436 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
3437 blocker->reason = reason;
3438 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3439}
3440
3441void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3442{
3443 BdrvOpBlocker *blocker, *next;
3444 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3445 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3446 if (blocker->reason == reason) {
3447 QLIST_REMOVE(blocker, list);
3448 g_free(blocker);
3449 }
3450 }
3451}
3452
3453void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3454{
3455 int i;
3456 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3457 bdrv_op_block(bs, i, reason);
3458 }
3459}
3460
3461void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3462{
3463 int i;
3464 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3465 bdrv_op_unblock(bs, i, reason);
3466 }
3467}
3468
3469bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3470{
3471 int i;
3472
3473 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3474 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3475 return false;
3476 }
3477 }
3478 return true;
3479}
3480
d92ada22
LC
3481void bdrv_img_create(const char *filename, const char *fmt,
3482 const char *base_filename, const char *base_fmt,
f382d43a
MR
3483 char *options, uint64_t img_size, int flags,
3484 Error **errp, bool quiet)
f88e1a42 3485{
83d0521a
CL
3486 QemuOptsList *create_opts = NULL;
3487 QemuOpts *opts = NULL;
3488 const char *backing_fmt, *backing_file;
3489 int64_t size;
f88e1a42 3490 BlockDriver *drv, *proto_drv;
cc84d90f 3491 Error *local_err = NULL;
f88e1a42
JS
3492 int ret = 0;
3493
3494 /* Find driver and parse its options */
3495 drv = bdrv_find_format(fmt);
3496 if (!drv) {
71c79813 3497 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 3498 return;
f88e1a42
JS
3499 }
3500
b65a5e12 3501 proto_drv = bdrv_find_protocol(filename, true, errp);
f88e1a42 3502 if (!proto_drv) {
d92ada22 3503 return;
f88e1a42
JS
3504 }
3505
c6149724
HR
3506 if (!drv->create_opts) {
3507 error_setg(errp, "Format driver '%s' does not support image creation",
3508 drv->format_name);
3509 return;
3510 }
3511
3512 if (!proto_drv->create_opts) {
3513 error_setg(errp, "Protocol driver '%s' does not support image creation",
3514 proto_drv->format_name);
3515 return;
3516 }
3517
c282e1fd
CL
3518 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3519 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42
JS
3520
3521 /* Create parameter list with default values */
83d0521a 3522 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
39101f25 3523 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
f88e1a42
JS
3524
3525 /* Parse -o options */
3526 if (options) {
dc523cd3
MA
3527 qemu_opts_do_parse(opts, options, NULL, &local_err);
3528 if (local_err) {
3529 error_report_err(local_err);
3530 local_err = NULL;
83d0521a 3531 error_setg(errp, "Invalid options for file format '%s'", fmt);
f88e1a42
JS
3532 goto out;
3533 }
3534 }
3535
3536 if (base_filename) {
f43e47db 3537 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
6be4194b 3538 if (local_err) {
71c79813
LC
3539 error_setg(errp, "Backing file not supported for file format '%s'",
3540 fmt);
f88e1a42
JS
3541 goto out;
3542 }
3543 }
3544
3545 if (base_fmt) {
f43e47db 3546 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
6be4194b 3547 if (local_err) {
71c79813
LC
3548 error_setg(errp, "Backing file format not supported for file "
3549 "format '%s'", fmt);
f88e1a42
JS
3550 goto out;
3551 }
3552 }
3553
83d0521a
CL
3554 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3555 if (backing_file) {
3556 if (!strcmp(filename, backing_file)) {
71c79813
LC
3557 error_setg(errp, "Error: Trying to create an image with the "
3558 "same filename as the backing file");
792da93a
JS
3559 goto out;
3560 }
3561 }
3562
83d0521a 3563 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
f88e1a42
JS
3564
3565 // The size for the image must always be specified, with one exception:
3566 // If we are using a backing file, we can obtain the size from there
83d0521a
CL
3567 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3568 if (size == -1) {
3569 if (backing_file) {
66f6b814 3570 BlockDriverState *bs;
29168018 3571 char *full_backing = g_new0(char, PATH_MAX);
52bf1e72 3572 int64_t size;
63090dac 3573 int back_flags;
e6641719 3574 QDict *backing_options = NULL;
63090dac 3575
29168018
HR
3576 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3577 full_backing, PATH_MAX,
3578 &local_err);
3579 if (local_err) {
3580 g_free(full_backing);
3581 goto out;
3582 }
3583
63090dac 3584 /* backing files always opened read-only */
61de4c68 3585 back_flags = flags;
bfd18d1e 3586 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 3587
e6641719
HR
3588 if (backing_fmt) {
3589 backing_options = qdict_new();
3590 qdict_put(backing_options, "driver",
3591 qstring_from_str(backing_fmt));
3592 }
3593
f67503e5 3594 bs = NULL;
e6641719 3595 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
6ebf9aa2 3596 back_flags, &local_err);
29168018 3597 g_free(full_backing);
f88e1a42 3598 if (ret < 0) {
f88e1a42
JS
3599 goto out;
3600 }
52bf1e72
MA
3601 size = bdrv_getlength(bs);
3602 if (size < 0) {
3603 error_setg_errno(errp, -size, "Could not get size of '%s'",
3604 backing_file);
3605 bdrv_unref(bs);
3606 goto out;
3607 }
f88e1a42 3608
39101f25 3609 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
66f6b814
HR
3610
3611 bdrv_unref(bs);
f88e1a42 3612 } else {
71c79813 3613 error_setg(errp, "Image creation needs a size parameter");
f88e1a42
JS
3614 goto out;
3615 }
3616 }
3617
f382d43a 3618 if (!quiet) {
fe646693 3619 printf("Formatting '%s', fmt=%s ", filename, fmt);
43c5d8f8 3620 qemu_opts_print(opts, " ");
f382d43a
MR
3621 puts("");
3622 }
83d0521a 3623
c282e1fd 3624 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 3625
cc84d90f
HR
3626 if (ret == -EFBIG) {
3627 /* This is generally a better message than whatever the driver would
3628 * deliver (especially because of the cluster_size_hint), since that
3629 * is most probably not much different from "image too large". */
3630 const char *cluster_size_hint = "";
83d0521a 3631 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 3632 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 3633 }
cc84d90f
HR
3634 error_setg(errp, "The image size is too large for file format '%s'"
3635 "%s", fmt, cluster_size_hint);
3636 error_free(local_err);
3637 local_err = NULL;
f88e1a42
JS
3638 }
3639
3640out:
83d0521a
CL
3641 qemu_opts_del(opts);
3642 qemu_opts_free(create_opts);
84d18f06 3643 if (local_err) {
cc84d90f
HR
3644 error_propagate(errp, local_err);
3645 }
f88e1a42 3646}
85d126f3
SH
3647
3648AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3649{
dcd04228
SH
3650 return bs->aio_context;
3651}
3652
3653void bdrv_detach_aio_context(BlockDriverState *bs)
3654{
33384421
HR
3655 BdrvAioNotifier *baf;
3656
dcd04228
SH
3657 if (!bs->drv) {
3658 return;
3659 }
3660
33384421
HR
3661 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3662 baf->detach_aio_context(baf->opaque);
3663 }
3664
a0d64a61 3665 if (bs->throttle_state) {
0e5b0a2d 3666 throttle_timers_detach_aio_context(&bs->throttle_timers);
13af91eb 3667 }
dcd04228
SH
3668 if (bs->drv->bdrv_detach_aio_context) {
3669 bs->drv->bdrv_detach_aio_context(bs);
3670 }
3671 if (bs->file) {
9a4f4c31 3672 bdrv_detach_aio_context(bs->file->bs);
dcd04228 3673 }
760e0063
KW
3674 if (bs->backing) {
3675 bdrv_detach_aio_context(bs->backing->bs);
dcd04228
SH
3676 }
3677
3678 bs->aio_context = NULL;
3679}
3680
3681void bdrv_attach_aio_context(BlockDriverState *bs,
3682 AioContext *new_context)
3683{
33384421
HR
3684 BdrvAioNotifier *ban;
3685
dcd04228
SH
3686 if (!bs->drv) {
3687 return;
3688 }
3689
3690 bs->aio_context = new_context;
3691
760e0063
KW
3692 if (bs->backing) {
3693 bdrv_attach_aio_context(bs->backing->bs, new_context);
dcd04228
SH
3694 }
3695 if (bs->file) {
9a4f4c31 3696 bdrv_attach_aio_context(bs->file->bs, new_context);
dcd04228
SH
3697 }
3698 if (bs->drv->bdrv_attach_aio_context) {
3699 bs->drv->bdrv_attach_aio_context(bs, new_context);
3700 }
a0d64a61 3701 if (bs->throttle_state) {
0e5b0a2d 3702 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
13af91eb 3703 }
33384421
HR
3704
3705 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3706 ban->attached_aio_context(new_context, ban->opaque);
3707 }
dcd04228
SH
3708}
3709
3710void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3711{
53ec73e2 3712 bdrv_drain(bs); /* ensure there are no in-flight requests */
dcd04228
SH
3713
3714 bdrv_detach_aio_context(bs);
3715
3716 /* This function executes in the old AioContext so acquire the new one in
3717 * case it runs in a different thread.
3718 */
3719 aio_context_acquire(new_context);
3720 bdrv_attach_aio_context(bs, new_context);
3721 aio_context_release(new_context);
85d126f3 3722}
d616b224 3723
33384421
HR
3724void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3725 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3726 void (*detach_aio_context)(void *opaque), void *opaque)
3727{
3728 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3729 *ban = (BdrvAioNotifier){
3730 .attached_aio_context = attached_aio_context,
3731 .detach_aio_context = detach_aio_context,
3732 .opaque = opaque
3733 };
3734
3735 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3736}
3737
3738void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3739 void (*attached_aio_context)(AioContext *,
3740 void *),
3741 void (*detach_aio_context)(void *),
3742 void *opaque)
3743{
3744 BdrvAioNotifier *ban, *ban_next;
3745
3746 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3747 if (ban->attached_aio_context == attached_aio_context &&
3748 ban->detach_aio_context == detach_aio_context &&
3749 ban->opaque == opaque)
3750 {
3751 QLIST_REMOVE(ban, list);
3752 g_free(ban);
3753
3754 return;
3755 }
3756 }
3757
3758 abort();
3759}
3760
77485434 3761int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
8b13976d 3762 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
6f176b48 3763{
c282e1fd 3764 if (!bs->drv->bdrv_amend_options) {
6f176b48
HR
3765 return -ENOTSUP;
3766 }
8b13976d 3767 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
6f176b48 3768}
f6186f49 3769
b5042a36
BC
3770/* This function will be called by the bdrv_recurse_is_first_non_filter method
3771 * of block filter and by bdrv_is_first_non_filter.
3772 * It is used to test if the given bs is the candidate or recurse more in the
3773 * node graph.
212a5a8f 3774 */
b5042a36 3775bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
212a5a8f 3776 BlockDriverState *candidate)
f6186f49 3777{
b5042a36
BC
3778 /* return false if basic checks fails */
3779 if (!bs || !bs->drv) {
212a5a8f 3780 return false;
f6186f49
BC
3781 }
3782
b5042a36
BC
3783 /* the code reached a non block filter driver -> check if the bs is
3784 * the same as the candidate. It's the recursion termination condition.
3785 */
3786 if (!bs->drv->is_filter) {
3787 return bs == candidate;
212a5a8f 3788 }
b5042a36 3789 /* Down this path the driver is a block filter driver */
212a5a8f 3790
b5042a36
BC
3791 /* If the block filter recursion method is defined use it to recurse down
3792 * the node graph.
3793 */
3794 if (bs->drv->bdrv_recurse_is_first_non_filter) {
212a5a8f 3795 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
f6186f49
BC
3796 }
3797
b5042a36
BC
3798 /* the driver is a block filter but don't allow to recurse -> return false
3799 */
3800 return false;
f6186f49
BC
3801}
3802
212a5a8f
BC
3803/* This function checks if the candidate is the first non filter bs down it's
3804 * bs chain. Since we don't have pointers to parents it explore all bs chains
3805 * from the top. Some filters can choose not to pass down the recursion.
3806 */
3807bool bdrv_is_first_non_filter(BlockDriverState *candidate)
f6186f49 3808{
79720af6 3809 BlockDriverState *bs = NULL;
212a5a8f
BC
3810
3811 /* walk down the bs forest recursively */
79720af6 3812 while ((bs = bdrv_next(bs)) != NULL) {
212a5a8f
BC
3813 bool perm;
3814
b5042a36 3815 /* try to recurse in this top level bs */
e6dc8a1f 3816 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
212a5a8f
BC
3817
3818 /* candidate is the first non filter */
3819 if (perm) {
3820 return true;
3821 }
3822 }
3823
3824 return false;
f6186f49 3825}
09158f00 3826
e12f3784
WC
3827BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
3828 const char *node_name, Error **errp)
09158f00
BC
3829{
3830 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5a7e7a0b
SH
3831 AioContext *aio_context;
3832
09158f00
BC
3833 if (!to_replace_bs) {
3834 error_setg(errp, "Node name '%s' not found", node_name);
3835 return NULL;
3836 }
3837
5a7e7a0b
SH
3838 aio_context = bdrv_get_aio_context(to_replace_bs);
3839 aio_context_acquire(aio_context);
3840
09158f00 3841 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5a7e7a0b
SH
3842 to_replace_bs = NULL;
3843 goto out;
09158f00
BC
3844 }
3845
3846 /* We don't want arbitrary node of the BDS chain to be replaced only the top
3847 * most non filter in order to prevent data corruption.
3848 * Another benefit is that this tests exclude backing files which are
3849 * blocked by the backing blockers.
3850 */
e12f3784 3851 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
09158f00 3852 error_setg(errp, "Only top most non filter can be replaced");
5a7e7a0b
SH
3853 to_replace_bs = NULL;
3854 goto out;
09158f00
BC
3855 }
3856
5a7e7a0b
SH
3857out:
3858 aio_context_release(aio_context);
09158f00
BC
3859 return to_replace_bs;
3860}
448ad91d 3861
91af7014
HR
3862static bool append_open_options(QDict *d, BlockDriverState *bs)
3863{
3864 const QDictEntry *entry;
9e700c1a 3865 QemuOptDesc *desc;
260fecf1 3866 BdrvChild *child;
91af7014 3867 bool found_any = false;
260fecf1 3868 const char *p;
91af7014
HR
3869
3870 for (entry = qdict_first(bs->options); entry;
3871 entry = qdict_next(bs->options, entry))
3872 {
260fecf1
KW
3873 /* Exclude options for children */
3874 QLIST_FOREACH(child, &bs->children, next) {
3875 if (strstart(qdict_entry_key(entry), child->name, &p)
3876 && (!*p || *p == '.'))
3877 {
3878 break;
3879 }
3880 }
3881 if (child) {
9e700c1a 3882 continue;
91af7014 3883 }
9e700c1a
KW
3884
3885 /* And exclude all non-driver-specific options */
3886 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
3887 if (!strcmp(qdict_entry_key(entry), desc->name)) {
3888 break;
3889 }
3890 }
3891 if (desc->name) {
3892 continue;
3893 }
3894
3895 qobject_incref(qdict_entry_value(entry));
3896 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
3897 found_any = true;
91af7014
HR
3898 }
3899
3900 return found_any;
3901}
3902
3903/* Updates the following BDS fields:
3904 * - exact_filename: A filename which may be used for opening a block device
3905 * which (mostly) equals the given BDS (even without any
3906 * other options; so reading and writing must return the same
3907 * results, but caching etc. may be different)
3908 * - full_open_options: Options which, when given when opening a block device
3909 * (without a filename), result in a BDS (mostly)
3910 * equalling the given one
3911 * - filename: If exact_filename is set, it is copied here. Otherwise,
3912 * full_open_options is converted to a JSON object, prefixed with
3913 * "json:" (for use through the JSON pseudo protocol) and put here.
3914 */
3915void bdrv_refresh_filename(BlockDriverState *bs)
3916{
3917 BlockDriver *drv = bs->drv;
3918 QDict *opts;
3919
3920 if (!drv) {
3921 return;
3922 }
3923
3924 /* This BDS's file name will most probably depend on its file's name, so
3925 * refresh that first */
3926 if (bs->file) {
9a4f4c31 3927 bdrv_refresh_filename(bs->file->bs);
91af7014
HR
3928 }
3929
3930 if (drv->bdrv_refresh_filename) {
3931 /* Obsolete information is of no use here, so drop the old file name
3932 * information before refreshing it */
3933 bs->exact_filename[0] = '\0';
3934 if (bs->full_open_options) {
3935 QDECREF(bs->full_open_options);
3936 bs->full_open_options = NULL;
3937 }
3938
4cdd01d3
KW
3939 opts = qdict_new();
3940 append_open_options(opts, bs);
3941 drv->bdrv_refresh_filename(bs, opts);
3942 QDECREF(opts);
91af7014
HR
3943 } else if (bs->file) {
3944 /* Try to reconstruct valid information from the underlying file */
3945 bool has_open_options;
3946
3947 bs->exact_filename[0] = '\0';
3948 if (bs->full_open_options) {
3949 QDECREF(bs->full_open_options);
3950 bs->full_open_options = NULL;
3951 }
3952
3953 opts = qdict_new();
3954 has_open_options = append_open_options(opts, bs);
3955
3956 /* If no specific options have been given for this BDS, the filename of
3957 * the underlying file should suffice for this one as well */
9a4f4c31
KW
3958 if (bs->file->bs->exact_filename[0] && !has_open_options) {
3959 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
91af7014
HR
3960 }
3961 /* Reconstructing the full options QDict is simple for most format block
3962 * drivers, as long as the full options are known for the underlying
3963 * file BDS. The full options QDict of that file BDS should somehow
3964 * contain a representation of the filename, therefore the following
3965 * suffices without querying the (exact_)filename of this BDS. */
9a4f4c31 3966 if (bs->file->bs->full_open_options) {
91af7014
HR
3967 qdict_put_obj(opts, "driver",
3968 QOBJECT(qstring_from_str(drv->format_name)));
9a4f4c31
KW
3969 QINCREF(bs->file->bs->full_open_options);
3970 qdict_put_obj(opts, "file",
3971 QOBJECT(bs->file->bs->full_open_options));
91af7014
HR
3972
3973 bs->full_open_options = opts;
3974 } else {
3975 QDECREF(opts);
3976 }
3977 } else if (!bs->full_open_options && qdict_size(bs->options)) {
3978 /* There is no underlying file BDS (at least referenced by BDS.file),
3979 * so the full options QDict should be equal to the options given
3980 * specifically for this block device when it was opened (plus the
3981 * driver specification).
3982 * Because those options don't change, there is no need to update
3983 * full_open_options when it's already set. */
3984
3985 opts = qdict_new();
3986 append_open_options(opts, bs);
3987 qdict_put_obj(opts, "driver",
3988 QOBJECT(qstring_from_str(drv->format_name)));
3989
3990 if (bs->exact_filename[0]) {
3991 /* This may not work for all block protocol drivers (some may
3992 * require this filename to be parsed), but we have to find some
3993 * default solution here, so just include it. If some block driver
3994 * does not support pure options without any filename at all or
3995 * needs some special format of the options QDict, it needs to
3996 * implement the driver-specific bdrv_refresh_filename() function.
3997 */
3998 qdict_put_obj(opts, "filename",
3999 QOBJECT(qstring_from_str(bs->exact_filename)));
4000 }
4001
4002 bs->full_open_options = opts;
4003 }
4004
4005 if (bs->exact_filename[0]) {
4006 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4007 } else if (bs->full_open_options) {
4008 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4009 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4010 qstring_get_str(json));
4011 QDECREF(json);
4012 }
4013}