]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
block: protect path_has_protocol from filenames with colons
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
947995c0
PB
201 const char *p;
202
9e0b22f4
SH
203#ifdef _WIN32
204 if (is_windows_drive(path) ||
205 is_windows_drive_prefix(path)) {
206 return 0;
207 }
947995c0
PB
208 p = path + strcspn(path, ":/\\");
209#else
210 p = path + strcspn(path, ":/");
9e0b22f4
SH
211#endif
212
947995c0 213 return *p == ':';
9e0b22f4
SH
214}
215
83f64091 216int path_is_absolute(const char *path)
3b0d4f61 217{
21664424
FB
218#ifdef _WIN32
219 /* specific case for names like: "\\.\d:" */
f53f4da9 220 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 221 return 1;
f53f4da9
PB
222 }
223 return (*path == '/' || *path == '\\');
3b9f94e1 224#else
f53f4da9 225 return (*path == '/');
3b9f94e1 226#endif
3b0d4f61
FB
227}
228
83f64091
FB
229/* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
231 supported. */
232void path_combine(char *dest, int dest_size,
233 const char *base_path,
234 const char *filename)
3b0d4f61 235{
83f64091
FB
236 const char *p, *p1;
237 int len;
238
239 if (dest_size <= 0)
240 return;
241 if (path_is_absolute(filename)) {
242 pstrcpy(dest, dest_size, filename);
243 } else {
244 p = strchr(base_path, ':');
245 if (p)
246 p++;
247 else
248 p = base_path;
3b9f94e1
FB
249 p1 = strrchr(base_path, '/');
250#ifdef _WIN32
251 {
252 const char *p2;
253 p2 = strrchr(base_path, '\\');
254 if (!p1 || p2 > p1)
255 p1 = p2;
256 }
257#endif
83f64091
FB
258 if (p1)
259 p1++;
260 else
261 p1 = base_path;
262 if (p1 > p)
263 p = p1;
264 len = p - base_path;
265 if (len > dest_size - 1)
266 len = dest_size - 1;
267 memcpy(dest, base_path, len);
268 dest[len] = '\0';
269 pstrcat(dest, dest_size, filename);
3b0d4f61 270 }
3b0d4f61
FB
271}
272
5efa9d5a 273void bdrv_register(BlockDriver *bdrv)
ea2384d3 274{
8c5873d6
SH
275 /* Block drivers without coroutine functions need emulation */
276 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
277 bdrv->bdrv_co_readv = bdrv_co_readv_em;
278 bdrv->bdrv_co_writev = bdrv_co_writev_em;
279
f8c35c1d
SH
280 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
281 * the block driver lacks aio we need to emulate that too.
282 */
f9f05dc5
KW
283 if (!bdrv->bdrv_aio_readv) {
284 /* add AIO emulation layer */
285 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
286 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 287 }
83f64091 288 }
b2e12bc6 289
8a22f02a 290 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 291}
b338082b
FB
292
293/* create a new block device (by default it is empty) */
294BlockDriverState *bdrv_new(const char *device_name)
295{
1b7bdbc1 296 BlockDriverState *bs;
b338082b 297
7267c094 298 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 299 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 300 if (device_name[0] != '\0') {
1b7bdbc1 301 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 302 }
28a7282a 303 bdrv_iostatus_disable(bs);
b338082b
FB
304 return bs;
305}
306
ea2384d3
FB
307BlockDriver *bdrv_find_format(const char *format_name)
308{
309 BlockDriver *drv1;
8a22f02a
SH
310 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
311 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 312 return drv1;
8a22f02a 313 }
ea2384d3
FB
314 }
315 return NULL;
316}
317
eb852011
MA
318static int bdrv_is_whitelisted(BlockDriver *drv)
319{
320 static const char *whitelist[] = {
321 CONFIG_BDRV_WHITELIST
322 };
323 const char **p;
324
325 if (!whitelist[0])
326 return 1; /* no whitelist, anything goes */
327
328 for (p = whitelist; *p; p++) {
329 if (!strcmp(drv->format_name, *p)) {
330 return 1;
331 }
332 }
333 return 0;
334}
335
336BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
337{
338 BlockDriver *drv = bdrv_find_format(format_name);
339 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
340}
341
5b7e1542
ZYW
342typedef struct CreateCo {
343 BlockDriver *drv;
344 char *filename;
345 QEMUOptionParameter *options;
346 int ret;
347} CreateCo;
348
349static void coroutine_fn bdrv_create_co_entry(void *opaque)
350{
351 CreateCo *cco = opaque;
352 assert(cco->drv);
353
354 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
355}
356
0e7e1989
KW
357int bdrv_create(BlockDriver *drv, const char* filename,
358 QEMUOptionParameter *options)
ea2384d3 359{
5b7e1542
ZYW
360 int ret;
361
362 Coroutine *co;
363 CreateCo cco = {
364 .drv = drv,
365 .filename = g_strdup(filename),
366 .options = options,
367 .ret = NOT_DONE,
368 };
369
370 if (!drv->bdrv_create) {
ea2384d3 371 return -ENOTSUP;
5b7e1542
ZYW
372 }
373
374 if (qemu_in_coroutine()) {
375 /* Fast-path if already in coroutine context */
376 bdrv_create_co_entry(&cco);
377 } else {
378 co = qemu_coroutine_create(bdrv_create_co_entry);
379 qemu_coroutine_enter(co, &cco);
380 while (cco.ret == NOT_DONE) {
381 qemu_aio_wait();
382 }
383 }
384
385 ret = cco.ret;
386 g_free(cco.filename);
0e7e1989 387
5b7e1542 388 return ret;
ea2384d3
FB
389}
390
84a12e66
CH
391int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
392{
393 BlockDriver *drv;
394
b50cbabc 395 drv = bdrv_find_protocol(filename);
84a12e66 396 if (drv == NULL) {
16905d71 397 return -ENOENT;
84a12e66
CH
398 }
399
400 return bdrv_create(drv, filename, options);
401}
402
d5249393 403#ifdef _WIN32
95389c86 404void get_tmp_filename(char *filename, int size)
d5249393 405{
3b9f94e1 406 char temp_dir[MAX_PATH];
3b46e624 407
3b9f94e1
FB
408 GetTempPath(MAX_PATH, temp_dir);
409 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
410}
411#else
95389c86 412void get_tmp_filename(char *filename, int size)
fc01f7e7 413{
67b915a5 414 int fd;
7ccfb2eb 415 const char *tmpdir;
d5249393 416 /* XXX: race condition possible */
0badc1ee
AJ
417 tmpdir = getenv("TMPDIR");
418 if (!tmpdir)
419 tmpdir = "/tmp";
420 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
421 fd = mkstemp(filename);
422 close(fd);
423}
d5249393 424#endif
fc01f7e7 425
84a12e66
CH
426/*
427 * Detect host devices. By convention, /dev/cdrom[N] is always
428 * recognized as a host CDROM.
429 */
430static BlockDriver *find_hdev_driver(const char *filename)
431{
432 int score_max = 0, score;
433 BlockDriver *drv = NULL, *d;
434
435 QLIST_FOREACH(d, &bdrv_drivers, list) {
436 if (d->bdrv_probe_device) {
437 score = d->bdrv_probe_device(filename);
438 if (score > score_max) {
439 score_max = score;
440 drv = d;
441 }
442 }
443 }
444
445 return drv;
446}
447
b50cbabc 448BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
449{
450 BlockDriver *drv1;
451 char protocol[128];
1cec71e3 452 int len;
83f64091 453 const char *p;
19cb3738 454
66f82cee
KW
455 /* TODO Drivers without bdrv_file_open must be specified explicitly */
456
39508e7a
CH
457 /*
458 * XXX(hch): we really should not let host device detection
459 * override an explicit protocol specification, but moving this
460 * later breaks access to device names with colons in them.
461 * Thanks to the brain-dead persistent naming schemes on udev-
462 * based Linux systems those actually are quite common.
463 */
464 drv1 = find_hdev_driver(filename);
465 if (drv1) {
466 return drv1;
467 }
468
9e0b22f4 469 if (!path_has_protocol(filename)) {
39508e7a 470 return bdrv_find_format("file");
84a12e66 471 }
9e0b22f4
SH
472 p = strchr(filename, ':');
473 assert(p != NULL);
1cec71e3
AL
474 len = p - filename;
475 if (len > sizeof(protocol) - 1)
476 len = sizeof(protocol) - 1;
477 memcpy(protocol, filename, len);
478 protocol[len] = '\0';
8a22f02a 479 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 480 if (drv1->protocol_name &&
8a22f02a 481 !strcmp(drv1->protocol_name, protocol)) {
83f64091 482 return drv1;
8a22f02a 483 }
83f64091
FB
484 }
485 return NULL;
486}
487
c98ac35d 488static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
489{
490 int ret, score, score_max;
491 BlockDriver *drv1, *drv;
492 uint8_t buf[2048];
493 BlockDriverState *bs;
494
f5edb014 495 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
496 if (ret < 0) {
497 *pdrv = NULL;
498 return ret;
499 }
f8ea0b00 500
08a00559
KW
501 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
502 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 503 bdrv_delete(bs);
c98ac35d
SW
504 drv = bdrv_find_format("raw");
505 if (!drv) {
506 ret = -ENOENT;
507 }
508 *pdrv = drv;
509 return ret;
1a396859 510 }
f8ea0b00 511
83f64091
FB
512 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
513 bdrv_delete(bs);
514 if (ret < 0) {
c98ac35d
SW
515 *pdrv = NULL;
516 return ret;
83f64091
FB
517 }
518
ea2384d3 519 score_max = 0;
84a12e66 520 drv = NULL;
8a22f02a 521 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
522 if (drv1->bdrv_probe) {
523 score = drv1->bdrv_probe(buf, ret, filename);
524 if (score > score_max) {
525 score_max = score;
526 drv = drv1;
527 }
0849bf08 528 }
fc01f7e7 529 }
c98ac35d
SW
530 if (!drv) {
531 ret = -ENOENT;
532 }
533 *pdrv = drv;
534 return ret;
ea2384d3
FB
535}
536
51762288
SH
537/**
538 * Set the current 'total_sectors' value
539 */
540static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
541{
542 BlockDriver *drv = bs->drv;
543
396759ad
NB
544 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
545 if (bs->sg)
546 return 0;
547
51762288
SH
548 /* query actual device if possible, otherwise just trust the hint */
549 if (drv->bdrv_getlength) {
550 int64_t length = drv->bdrv_getlength(bs);
551 if (length < 0) {
552 return length;
553 }
554 hint = length >> BDRV_SECTOR_BITS;
555 }
556
557 bs->total_sectors = hint;
558 return 0;
559}
560
c3993cdc
SH
561/**
562 * Set open flags for a given cache mode
563 *
564 * Return 0 on success, -1 if the cache mode was invalid.
565 */
566int bdrv_parse_cache_flags(const char *mode, int *flags)
567{
568 *flags &= ~BDRV_O_CACHE_MASK;
569
570 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
571 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
572 } else if (!strcmp(mode, "directsync")) {
573 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
574 } else if (!strcmp(mode, "writeback")) {
575 *flags |= BDRV_O_CACHE_WB;
576 } else if (!strcmp(mode, "unsafe")) {
577 *flags |= BDRV_O_CACHE_WB;
578 *flags |= BDRV_O_NO_FLUSH;
579 } else if (!strcmp(mode, "writethrough")) {
580 /* this is the default */
581 } else {
582 return -1;
583 }
584
585 return 0;
586}
587
53fec9d3
SH
588/**
589 * The copy-on-read flag is actually a reference count so multiple users may
590 * use the feature without worrying about clobbering its previous state.
591 * Copy-on-read stays enabled until all users have called to disable it.
592 */
593void bdrv_enable_copy_on_read(BlockDriverState *bs)
594{
595 bs->copy_on_read++;
596}
597
598void bdrv_disable_copy_on_read(BlockDriverState *bs)
599{
600 assert(bs->copy_on_read > 0);
601 bs->copy_on_read--;
602}
603
57915332
KW
604/*
605 * Common part for opening disk images and files
606 */
607static int bdrv_open_common(BlockDriverState *bs, const char *filename,
608 int flags, BlockDriver *drv)
609{
610 int ret, open_flags;
611
612 assert(drv != NULL);
613
28dcee10
SH
614 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
615
66f82cee 616 bs->file = NULL;
51762288 617 bs->total_sectors = 0;
57915332
KW
618 bs->encrypted = 0;
619 bs->valid_key = 0;
03f541bd 620 bs->sg = 0;
57915332 621 bs->open_flags = flags;
03f541bd 622 bs->growable = 0;
57915332
KW
623 bs->buffer_alignment = 512;
624
53fec9d3
SH
625 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
626 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
627 bdrv_enable_copy_on_read(bs);
628 }
629
57915332 630 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 631 bs->backing_file[0] = '\0';
57915332
KW
632
633 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
634 return -ENOTSUP;
635 }
636
637 bs->drv = drv;
7267c094 638 bs->opaque = g_malloc0(drv->instance_size);
57915332 639
03f541bd 640 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
641
642 /*
643 * Clear flags that are internal to the block layer before opening the
644 * image.
645 */
646 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
647
648 /*
ebabb67a 649 * Snapshots should be writable.
57915332
KW
650 */
651 if (bs->is_temporary) {
652 open_flags |= BDRV_O_RDWR;
653 }
654
e7c63796
SH
655 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
656
66f82cee
KW
657 /* Open the image, either directly or using a protocol */
658 if (drv->bdrv_file_open) {
659 ret = drv->bdrv_file_open(bs, filename, open_flags);
660 } else {
661 ret = bdrv_file_open(&bs->file, filename, open_flags);
662 if (ret >= 0) {
663 ret = drv->bdrv_open(bs, open_flags);
664 }
665 }
666
57915332
KW
667 if (ret < 0) {
668 goto free_and_fail;
669 }
670
51762288
SH
671 ret = refresh_total_sectors(bs, bs->total_sectors);
672 if (ret < 0) {
673 goto free_and_fail;
57915332 674 }
51762288 675
57915332
KW
676#ifndef _WIN32
677 if (bs->is_temporary) {
678 unlink(filename);
679 }
680#endif
681 return 0;
682
683free_and_fail:
66f82cee
KW
684 if (bs->file) {
685 bdrv_delete(bs->file);
686 bs->file = NULL;
687 }
7267c094 688 g_free(bs->opaque);
57915332
KW
689 bs->opaque = NULL;
690 bs->drv = NULL;
691 return ret;
692}
693
b6ce07aa
KW
694/*
695 * Opens a file using a protocol (file, host_device, nbd, ...)
696 */
83f64091 697int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 698{
83f64091 699 BlockDriverState *bs;
6db95603 700 BlockDriver *drv;
83f64091
FB
701 int ret;
702
b50cbabc 703 drv = bdrv_find_protocol(filename);
6db95603
CH
704 if (!drv) {
705 return -ENOENT;
706 }
707
83f64091 708 bs = bdrv_new("");
b6ce07aa 709 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
710 if (ret < 0) {
711 bdrv_delete(bs);
712 return ret;
3b0d4f61 713 }
71d0770c 714 bs->growable = 1;
83f64091
FB
715 *pbs = bs;
716 return 0;
717}
718
b6ce07aa
KW
719/*
720 * Opens a disk image (raw, qcow2, vmdk, ...)
721 */
d6e9098e
KW
722int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
723 BlockDriver *drv)
ea2384d3 724{
b6ce07aa 725 int ret;
2b572816 726 char tmp_filename[PATH_MAX];
712e7874 727
83f64091 728 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
729 BlockDriverState *bs1;
730 int64_t total_size;
7c96d46e 731 int is_protocol = 0;
91a073a9
KW
732 BlockDriver *bdrv_qcow2;
733 QEMUOptionParameter *options;
b6ce07aa 734 char backing_filename[PATH_MAX];
3b46e624 735
ea2384d3
FB
736 /* if snapshot, we create a temporary backing file and open it
737 instead of opening 'filename' directly */
33e3963e 738
ea2384d3
FB
739 /* if there is a backing file, use it */
740 bs1 = bdrv_new("");
d6e9098e 741 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 742 if (ret < 0) {
ea2384d3 743 bdrv_delete(bs1);
51d7c00c 744 return ret;
ea2384d3 745 }
3e82990b 746 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
747
748 if (bs1->drv && bs1->drv->protocol_name)
749 is_protocol = 1;
750
ea2384d3 751 bdrv_delete(bs1);
3b46e624 752
ea2384d3 753 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
754
755 /* Real path is meaningless for protocols */
756 if (is_protocol)
757 snprintf(backing_filename, sizeof(backing_filename),
758 "%s", filename);
114cdfa9
KS
759 else if (!realpath(filename, backing_filename))
760 return -errno;
7c96d46e 761
91a073a9
KW
762 bdrv_qcow2 = bdrv_find_format("qcow2");
763 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
764
3e82990b 765 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
766 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
767 if (drv) {
768 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
769 drv->format_name);
770 }
771
772 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 773 free_option_parameters(options);
51d7c00c
AL
774 if (ret < 0) {
775 return ret;
ea2384d3 776 }
91a073a9 777
ea2384d3 778 filename = tmp_filename;
91a073a9 779 drv = bdrv_qcow2;
ea2384d3
FB
780 bs->is_temporary = 1;
781 }
712e7874 782
b6ce07aa 783 /* Find the right image format driver */
6db95603 784 if (!drv) {
c98ac35d 785 ret = find_image_format(filename, &drv);
51d7c00c 786 }
6987307c 787
51d7c00c 788 if (!drv) {
51d7c00c 789 goto unlink_and_fail;
ea2384d3 790 }
b6ce07aa
KW
791
792 /* Open the image */
793 ret = bdrv_open_common(bs, filename, flags, drv);
794 if (ret < 0) {
6987307c
CH
795 goto unlink_and_fail;
796 }
797
b6ce07aa
KW
798 /* If there is a backing file, use it */
799 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
800 char backing_filename[PATH_MAX];
801 int back_flags;
802 BlockDriver *back_drv = NULL;
803
804 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
805
806 if (path_has_protocol(bs->backing_file)) {
807 pstrcpy(backing_filename, sizeof(backing_filename),
808 bs->backing_file);
809 } else {
810 path_combine(backing_filename, sizeof(backing_filename),
811 filename, bs->backing_file);
812 }
813
814 if (bs->backing_format[0] != '\0') {
b6ce07aa 815 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 816 }
b6ce07aa
KW
817
818 /* backing files always opened read-only */
819 back_flags =
820 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
821
822 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
823 if (ret < 0) {
824 bdrv_close(bs);
825 return ret;
826 }
827 if (bs->is_temporary) {
828 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
829 } else {
830 /* base image inherits from "parent" */
831 bs->backing_hd->keep_read_only = bs->keep_read_only;
832 }
833 }
834
835 if (!bdrv_key_required(bs)) {
7d4b4ba5 836 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
837 }
838
98f90dba
ZYW
839 /* throttling disk I/O limits */
840 if (bs->io_limits_enabled) {
841 bdrv_io_limits_enable(bs);
842 }
843
b6ce07aa
KW
844 return 0;
845
846unlink_and_fail:
847 if (bs->is_temporary) {
848 unlink(filename);
849 }
850 return ret;
851}
852
fc01f7e7
FB
853void bdrv_close(BlockDriverState *bs)
854{
80ccf93b 855 bdrv_flush(bs);
19cb3738 856 if (bs->drv) {
3e914655
PB
857 if (bs->job) {
858 block_job_cancel_sync(bs->job);
859 }
7094f12f
KW
860 bdrv_drain_all();
861
f9092b10
MA
862 if (bs == bs_snapshots) {
863 bs_snapshots = NULL;
864 }
557df6ac 865 if (bs->backing_hd) {
ea2384d3 866 bdrv_delete(bs->backing_hd);
557df6ac
SH
867 bs->backing_hd = NULL;
868 }
ea2384d3 869 bs->drv->bdrv_close(bs);
7267c094 870 g_free(bs->opaque);
ea2384d3
FB
871#ifdef _WIN32
872 if (bs->is_temporary) {
873 unlink(bs->filename);
874 }
67b915a5 875#endif
ea2384d3
FB
876 bs->opaque = NULL;
877 bs->drv = NULL;
53fec9d3 878 bs->copy_on_read = 0;
a275fa42
PB
879 bs->backing_file[0] = '\0';
880 bs->backing_format[0] = '\0';
b338082b 881
66f82cee 882 if (bs->file != NULL) {
0ac9377d
PB
883 bdrv_delete(bs->file);
884 bs->file = NULL;
66f82cee
KW
885 }
886
7d4b4ba5 887 bdrv_dev_change_media_cb(bs, false);
b338082b 888 }
98f90dba
ZYW
889
890 /*throttling disk I/O limits*/
891 if (bs->io_limits_enabled) {
892 bdrv_io_limits_disable(bs);
893 }
b338082b
FB
894}
895
2bc93fed
MK
896void bdrv_close_all(void)
897{
898 BlockDriverState *bs;
899
900 QTAILQ_FOREACH(bs, &bdrv_states, list) {
901 bdrv_close(bs);
902 }
903}
904
922453bc
SH
905/*
906 * Wait for pending requests to complete across all BlockDriverStates
907 *
908 * This function does not flush data to disk, use bdrv_flush_all() for that
909 * after calling this function.
4c355d53
ZYW
910 *
911 * Note that completion of an asynchronous I/O operation can trigger any
912 * number of other I/O operations on other devices---for example a coroutine
913 * can be arbitrarily complex and a constant flow of I/O can come until the
914 * coroutine is complete. Because of this, it is not possible to have a
915 * function to drain a single device's I/O queue.
922453bc
SH
916 */
917void bdrv_drain_all(void)
918{
919 BlockDriverState *bs;
4c355d53
ZYW
920 bool busy;
921
922 do {
923 busy = qemu_aio_wait();
922453bc 924
4c355d53
ZYW
925 /* FIXME: We do not have timer support here, so this is effectively
926 * a busy wait.
927 */
928 QTAILQ_FOREACH(bs, &bdrv_states, list) {
929 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
930 qemu_co_queue_restart_all(&bs->throttled_reqs);
931 busy = true;
932 }
933 }
934 } while (busy);
922453bc
SH
935
936 /* If requests are still pending there is a bug somewhere */
937 QTAILQ_FOREACH(bs, &bdrv_states, list) {
938 assert(QLIST_EMPTY(&bs->tracked_requests));
939 assert(qemu_co_queue_empty(&bs->throttled_reqs));
940 }
941}
942
d22b2f41
RH
943/* make a BlockDriverState anonymous by removing from bdrv_state list.
944 Also, NULL terminate the device_name to prevent double remove */
945void bdrv_make_anon(BlockDriverState *bs)
946{
947 if (bs->device_name[0] != '\0') {
948 QTAILQ_REMOVE(&bdrv_states, bs, list);
949 }
950 bs->device_name[0] = '\0';
951}
952
e023b2e2
PB
953static void bdrv_rebind(BlockDriverState *bs)
954{
955 if (bs->drv && bs->drv->bdrv_rebind) {
956 bs->drv->bdrv_rebind(bs);
957 }
958}
959
8802d1fd
JC
960/*
961 * Add new bs contents at the top of an image chain while the chain is
962 * live, while keeping required fields on the top layer.
963 *
964 * This will modify the BlockDriverState fields, and swap contents
965 * between bs_new and bs_top. Both bs_new and bs_top are modified.
966 *
f6801b83
JC
967 * bs_new is required to be anonymous.
968 *
8802d1fd
JC
969 * This function does not create any image files.
970 */
971void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
972{
973 BlockDriverState tmp;
974
f6801b83
JC
975 /* bs_new must be anonymous */
976 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
977
978 tmp = *bs_new;
979
980 /* there are some fields that need to stay on the top layer: */
3a389e79 981 tmp.open_flags = bs_top->open_flags;
8802d1fd
JC
982
983 /* dev info */
984 tmp.dev_ops = bs_top->dev_ops;
985 tmp.dev_opaque = bs_top->dev_opaque;
986 tmp.dev = bs_top->dev;
987 tmp.buffer_alignment = bs_top->buffer_alignment;
988 tmp.copy_on_read = bs_top->copy_on_read;
989
990 /* i/o timing parameters */
991 tmp.slice_time = bs_top->slice_time;
992 tmp.slice_start = bs_top->slice_start;
993 tmp.slice_end = bs_top->slice_end;
994 tmp.io_limits = bs_top->io_limits;
995 tmp.io_base = bs_top->io_base;
996 tmp.throttled_reqs = bs_top->throttled_reqs;
997 tmp.block_timer = bs_top->block_timer;
998 tmp.io_limits_enabled = bs_top->io_limits_enabled;
999
1000 /* geometry */
1001 tmp.cyls = bs_top->cyls;
1002 tmp.heads = bs_top->heads;
1003 tmp.secs = bs_top->secs;
1004 tmp.translation = bs_top->translation;
1005
1006 /* r/w error */
1007 tmp.on_read_error = bs_top->on_read_error;
1008 tmp.on_write_error = bs_top->on_write_error;
1009
1010 /* i/o status */
1011 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1012 tmp.iostatus = bs_top->iostatus;
1013
1014 /* keep the same entry in bdrv_states */
1015 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1016 tmp.list = bs_top->list;
1017
1018 /* The contents of 'tmp' will become bs_top, as we are
1019 * swapping bs_new and bs_top contents. */
1020 tmp.backing_hd = bs_new;
1021 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f6801b83 1022 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
8802d1fd
JC
1023
1024 /* swap contents of the fixed new bs and the current top */
1025 *bs_new = *bs_top;
1026 *bs_top = tmp;
1027
f6801b83
JC
1028 /* device_name[] was carried over from the old bs_top. bs_new
1029 * shouldn't be in bdrv_states, so we need to make device_name[]
1030 * reflect the anonymity of bs_new
1031 */
1032 bs_new->device_name[0] = '\0';
1033
8802d1fd
JC
1034 /* clear the copied fields in the new backing file */
1035 bdrv_detach_dev(bs_new, bs_new->dev);
1036
1037 qemu_co_queue_init(&bs_new->throttled_reqs);
1038 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1039 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1040 bdrv_iostatus_disable(bs_new);
1041
1042 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1043 * to affect or delete the block_timer, as it has been moved to bs_top */
1044 bs_new->io_limits_enabled = false;
1045 bs_new->block_timer = NULL;
1046 bs_new->slice_time = 0;
1047 bs_new->slice_start = 0;
1048 bs_new->slice_end = 0;
e023b2e2
PB
1049
1050 bdrv_rebind(bs_new);
1051 bdrv_rebind(bs_top);
8802d1fd
JC
1052}
1053
b338082b
FB
1054void bdrv_delete(BlockDriverState *bs)
1055{
fa879d62 1056 assert(!bs->dev);
3e914655
PB
1057 assert(!bs->job);
1058 assert(!bs->in_use);
18846dee 1059
1b7bdbc1 1060 /* remove from list, if necessary */
d22b2f41 1061 bdrv_make_anon(bs);
34c6f050 1062
b338082b 1063 bdrv_close(bs);
66f82cee 1064
f9092b10 1065 assert(bs != bs_snapshots);
7267c094 1066 g_free(bs);
fc01f7e7
FB
1067}
1068
fa879d62
MA
1069int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1070/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1071{
fa879d62 1072 if (bs->dev) {
18846dee
MA
1073 return -EBUSY;
1074 }
fa879d62 1075 bs->dev = dev;
28a7282a 1076 bdrv_iostatus_reset(bs);
18846dee
MA
1077 return 0;
1078}
1079
fa879d62
MA
1080/* TODO qdevified devices don't use this, remove when devices are qdevified */
1081void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1082{
fa879d62
MA
1083 if (bdrv_attach_dev(bs, dev) < 0) {
1084 abort();
1085 }
1086}
1087
1088void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1089/* TODO change to DeviceState *dev when all users are qdevified */
1090{
1091 assert(bs->dev == dev);
1092 bs->dev = NULL;
0e49de52
MA
1093 bs->dev_ops = NULL;
1094 bs->dev_opaque = NULL;
29e05f20 1095 bs->buffer_alignment = 512;
18846dee
MA
1096}
1097
fa879d62
MA
1098/* TODO change to return DeviceState * when all users are qdevified */
1099void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1100{
fa879d62 1101 return bs->dev;
18846dee
MA
1102}
1103
0e49de52
MA
1104void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1105 void *opaque)
1106{
1107 bs->dev_ops = ops;
1108 bs->dev_opaque = opaque;
2c6942fa
MA
1109 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1110 bs_snapshots = NULL;
1111 }
0e49de52
MA
1112}
1113
329c0a48
LC
1114void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1115 BlockQMPEventAction action, int is_read)
1116{
1117 QObject *data;
1118 const char *action_str;
1119
1120 switch (action) {
1121 case BDRV_ACTION_REPORT:
1122 action_str = "report";
1123 break;
1124 case BDRV_ACTION_IGNORE:
1125 action_str = "ignore";
1126 break;
1127 case BDRV_ACTION_STOP:
1128 action_str = "stop";
1129 break;
1130 default:
1131 abort();
1132 }
1133
1134 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1135 bdrv->device_name,
1136 action_str,
1137 is_read ? "read" : "write");
1138 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1139
1140 qobject_decref(data);
1141}
1142
6f382ed2
LC
1143static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1144{
1145 QObject *data;
1146
1147 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1148 bdrv_get_device_name(bs), ejected);
1149 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1150
1151 qobject_decref(data);
1152}
1153
7d4b4ba5 1154static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1155{
145feb17 1156 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1157 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1158 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1159 if (tray_was_closed) {
1160 /* tray open */
1161 bdrv_emit_qmp_eject_event(bs, true);
1162 }
1163 if (load) {
1164 /* tray close */
1165 bdrv_emit_qmp_eject_event(bs, false);
1166 }
145feb17
MA
1167 }
1168}
1169
2c6942fa
MA
1170bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1171{
1172 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1173}
1174
025ccaa7
PB
1175void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1176{
1177 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1178 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1179 }
1180}
1181
e4def80b
MA
1182bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1183{
1184 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1185 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1186 }
1187 return false;
1188}
1189
145feb17
MA
1190static void bdrv_dev_resize_cb(BlockDriverState *bs)
1191{
1192 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1193 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1194 }
1195}
1196
f107639a
MA
1197bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1198{
1199 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1200 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1201 }
1202 return false;
1203}
1204
e97fc193
AL
1205/*
1206 * Run consistency checks on an image
1207 *
e076f338 1208 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1209 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1210 * check are stored in res.
e97fc193 1211 */
e076f338 1212int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
1213{
1214 if (bs->drv->bdrv_check == NULL) {
1215 return -ENOTSUP;
1216 }
1217
e076f338 1218 memset(res, 0, sizeof(*res));
9ac228e0 1219 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
1220}
1221
8a426614
KW
1222#define COMMIT_BUF_SECTORS 2048
1223
33e3963e
FB
1224/* commit COW file into the raw image */
1225int bdrv_commit(BlockDriverState *bs)
1226{
19cb3738 1227 BlockDriver *drv = bs->drv;
ee181196 1228 BlockDriver *backing_drv;
8a426614
KW
1229 int64_t sector, total_sectors;
1230 int n, ro, open_flags;
4dca4b63 1231 int ret = 0, rw_ret = 0;
8a426614 1232 uint8_t *buf;
4dca4b63
NS
1233 char filename[1024];
1234 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1235
19cb3738
FB
1236 if (!drv)
1237 return -ENOMEDIUM;
4dca4b63
NS
1238
1239 if (!bs->backing_hd) {
1240 return -ENOTSUP;
33e3963e
FB
1241 }
1242
4dca4b63
NS
1243 if (bs->backing_hd->keep_read_only) {
1244 return -EACCES;
1245 }
ee181196 1246
2d3735d3
SH
1247 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1248 return -EBUSY;
1249 }
1250
ee181196 1251 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1252 ro = bs->backing_hd->read_only;
1253 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1254 open_flags = bs->backing_hd->open_flags;
1255
1256 if (ro) {
1257 /* re-open as RW */
1258 bdrv_delete(bs->backing_hd);
1259 bs->backing_hd = NULL;
1260 bs_rw = bdrv_new("");
ee181196
KW
1261 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1262 backing_drv);
4dca4b63
NS
1263 if (rw_ret < 0) {
1264 bdrv_delete(bs_rw);
1265 /* try to re-open read-only */
1266 bs_ro = bdrv_new("");
ee181196
KW
1267 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1268 backing_drv);
4dca4b63
NS
1269 if (ret < 0) {
1270 bdrv_delete(bs_ro);
1271 /* drive not functional anymore */
1272 bs->drv = NULL;
1273 return ret;
1274 }
1275 bs->backing_hd = bs_ro;
1276 return rw_ret;
1277 }
1278 bs->backing_hd = bs_rw;
ea2384d3 1279 }
33e3963e 1280
6ea44308 1281 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1282 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1283
1284 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1285 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1286
1287 if (bdrv_read(bs, sector, buf, n) != 0) {
1288 ret = -EIO;
1289 goto ro_cleanup;
1290 }
1291
1292 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1293 ret = -EIO;
1294 goto ro_cleanup;
1295 }
ea2384d3 1296 }
33e3963e 1297 }
95389c86 1298
1d44952f
CH
1299 if (drv->bdrv_make_empty) {
1300 ret = drv->bdrv_make_empty(bs);
1301 bdrv_flush(bs);
1302 }
95389c86 1303
3f5075ae
CH
1304 /*
1305 * Make sure all data we wrote to the backing device is actually
1306 * stable on disk.
1307 */
1308 if (bs->backing_hd)
1309 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1310
1311ro_cleanup:
7267c094 1312 g_free(buf);
4dca4b63
NS
1313
1314 if (ro) {
1315 /* re-open as RO */
1316 bdrv_delete(bs->backing_hd);
1317 bs->backing_hd = NULL;
1318 bs_ro = bdrv_new("");
ee181196
KW
1319 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1320 backing_drv);
4dca4b63
NS
1321 if (ret < 0) {
1322 bdrv_delete(bs_ro);
1323 /* drive not functional anymore */
1324 bs->drv = NULL;
1325 return ret;
1326 }
1327 bs->backing_hd = bs_ro;
1328 bs->backing_hd->keep_read_only = 0;
1329 }
1330
1d44952f 1331 return ret;
33e3963e
FB
1332}
1333
e8877497 1334int bdrv_commit_all(void)
6ab4b5ab
MA
1335{
1336 BlockDriverState *bs;
1337
1338 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1339 int ret = bdrv_commit(bs);
1340 if (ret < 0) {
1341 return ret;
1342 }
6ab4b5ab 1343 }
e8877497 1344 return 0;
6ab4b5ab
MA
1345}
1346
dbffbdcf
SH
1347struct BdrvTrackedRequest {
1348 BlockDriverState *bs;
1349 int64_t sector_num;
1350 int nb_sectors;
1351 bool is_write;
1352 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1353 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1354 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1355};
1356
1357/**
1358 * Remove an active request from the tracked requests list
1359 *
1360 * This function should be called when a tracked request is completing.
1361 */
1362static void tracked_request_end(BdrvTrackedRequest *req)
1363{
1364 QLIST_REMOVE(req, list);
f4658285 1365 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1366}
1367
1368/**
1369 * Add an active request to the tracked requests list
1370 */
1371static void tracked_request_begin(BdrvTrackedRequest *req,
1372 BlockDriverState *bs,
1373 int64_t sector_num,
1374 int nb_sectors, bool is_write)
1375{
1376 *req = (BdrvTrackedRequest){
1377 .bs = bs,
1378 .sector_num = sector_num,
1379 .nb_sectors = nb_sectors,
1380 .is_write = is_write,
5f8b6491 1381 .co = qemu_coroutine_self(),
dbffbdcf
SH
1382 };
1383
f4658285
SH
1384 qemu_co_queue_init(&req->wait_queue);
1385
dbffbdcf
SH
1386 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1387}
1388
d83947ac
SH
1389/**
1390 * Round a region to cluster boundaries
1391 */
1392static void round_to_clusters(BlockDriverState *bs,
1393 int64_t sector_num, int nb_sectors,
1394 int64_t *cluster_sector_num,
1395 int *cluster_nb_sectors)
1396{
1397 BlockDriverInfo bdi;
1398
1399 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1400 *cluster_sector_num = sector_num;
1401 *cluster_nb_sectors = nb_sectors;
1402 } else {
1403 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1404 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1405 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1406 nb_sectors, c);
1407 }
1408}
1409
f4658285
SH
1410static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1411 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1412 /* aaaa bbbb */
1413 if (sector_num >= req->sector_num + req->nb_sectors) {
1414 return false;
1415 }
1416 /* bbbb aaaa */
1417 if (req->sector_num >= sector_num + nb_sectors) {
1418 return false;
1419 }
1420 return true;
f4658285
SH
1421}
1422
1423static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1424 int64_t sector_num, int nb_sectors)
1425{
1426 BdrvTrackedRequest *req;
d83947ac
SH
1427 int64_t cluster_sector_num;
1428 int cluster_nb_sectors;
f4658285
SH
1429 bool retry;
1430
d83947ac
SH
1431 /* If we touch the same cluster it counts as an overlap. This guarantees
1432 * that allocating writes will be serialized and not race with each other
1433 * for the same cluster. For example, in copy-on-read it ensures that the
1434 * CoR read and write operations are atomic and guest writes cannot
1435 * interleave between them.
1436 */
1437 round_to_clusters(bs, sector_num, nb_sectors,
1438 &cluster_sector_num, &cluster_nb_sectors);
1439
f4658285
SH
1440 do {
1441 retry = false;
1442 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1443 if (tracked_request_overlaps(req, cluster_sector_num,
1444 cluster_nb_sectors)) {
5f8b6491
SH
1445 /* Hitting this means there was a reentrant request, for
1446 * example, a block driver issuing nested requests. This must
1447 * never happen since it means deadlock.
1448 */
1449 assert(qemu_coroutine_self() != req->co);
1450
f4658285
SH
1451 qemu_co_queue_wait(&req->wait_queue);
1452 retry = true;
1453 break;
1454 }
1455 }
1456 } while (retry);
1457}
1458
756e6736
KW
1459/*
1460 * Return values:
1461 * 0 - success
1462 * -EINVAL - backing format specified, but no file
1463 * -ENOSPC - can't update the backing file because no space is left in the
1464 * image file header
1465 * -ENOTSUP - format driver doesn't support changing the backing file
1466 */
1467int bdrv_change_backing_file(BlockDriverState *bs,
1468 const char *backing_file, const char *backing_fmt)
1469{
1470 BlockDriver *drv = bs->drv;
469ef350 1471 int ret;
756e6736 1472
5f377794
PB
1473 /* Backing file format doesn't make sense without a backing file */
1474 if (backing_fmt && !backing_file) {
1475 return -EINVAL;
1476 }
1477
756e6736 1478 if (drv->bdrv_change_backing_file != NULL) {
469ef350 1479 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 1480 } else {
469ef350 1481 ret = -ENOTSUP;
756e6736 1482 }
469ef350
PB
1483
1484 if (ret == 0) {
1485 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1486 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1487 }
1488 return ret;
756e6736
KW
1489}
1490
71d0770c
AL
1491static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1492 size_t size)
1493{
1494 int64_t len;
1495
1496 if (!bdrv_is_inserted(bs))
1497 return -ENOMEDIUM;
1498
1499 if (bs->growable)
1500 return 0;
1501
1502 len = bdrv_getlength(bs);
1503
fbb7b4e0
KW
1504 if (offset < 0)
1505 return -EIO;
1506
1507 if ((offset > len) || (len - offset < size))
71d0770c
AL
1508 return -EIO;
1509
1510 return 0;
1511}
1512
1513static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1514 int nb_sectors)
1515{
eb5a3165
JS
1516 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1517 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1518}
1519
1c9805a3
SH
1520typedef struct RwCo {
1521 BlockDriverState *bs;
1522 int64_t sector_num;
1523 int nb_sectors;
1524 QEMUIOVector *qiov;
1525 bool is_write;
1526 int ret;
1527} RwCo;
1528
1529static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1530{
1c9805a3 1531 RwCo *rwco = opaque;
ea2384d3 1532
1c9805a3
SH
1533 if (!rwco->is_write) {
1534 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1535 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1536 } else {
1537 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1538 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1539 }
1540}
e7a8a783 1541
1c9805a3
SH
1542/*
1543 * Process a synchronous request using coroutines
1544 */
1545static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1546 int nb_sectors, bool is_write)
1547{
1548 QEMUIOVector qiov;
1549 struct iovec iov = {
1550 .iov_base = (void *)buf,
1551 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1552 };
1553 Coroutine *co;
1554 RwCo rwco = {
1555 .bs = bs,
1556 .sector_num = sector_num,
1557 .nb_sectors = nb_sectors,
1558 .qiov = &qiov,
1559 .is_write = is_write,
1560 .ret = NOT_DONE,
1561 };
e7a8a783 1562
1c9805a3 1563 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1564
498e386c
ZYW
1565 /**
1566 * In sync call context, when the vcpu is blocked, this throttling timer
1567 * will not fire; so the I/O throttling function has to be disabled here
1568 * if it has been enabled.
1569 */
1570 if (bs->io_limits_enabled) {
1571 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1572 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1573 bdrv_io_limits_disable(bs);
1574 }
1575
1c9805a3
SH
1576 if (qemu_in_coroutine()) {
1577 /* Fast-path if already in coroutine context */
1578 bdrv_rw_co_entry(&rwco);
1579 } else {
1580 co = qemu_coroutine_create(bdrv_rw_co_entry);
1581 qemu_coroutine_enter(co, &rwco);
1582 while (rwco.ret == NOT_DONE) {
1583 qemu_aio_wait();
1584 }
1585 }
1586 return rwco.ret;
1587}
b338082b 1588
1c9805a3
SH
1589/* return < 0 if error. See bdrv_write() for the return codes */
1590int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1591 uint8_t *buf, int nb_sectors)
1592{
1593 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1594}
1595
71df14fc
PB
1596#define BITS_PER_LONG (sizeof(unsigned long) * 8)
1597
7cd1e32a 1598static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1599 int nb_sectors, int dirty)
7cd1e32a
LS
1600{
1601 int64_t start, end;
c6d22830 1602 unsigned long val, idx, bit;
a55eb92c 1603
6ea44308 1604 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1605 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1606
1607 for (; start <= end; start++) {
71df14fc
PB
1608 idx = start / BITS_PER_LONG;
1609 bit = start % BITS_PER_LONG;
c6d22830
JK
1610 val = bs->dirty_bitmap[idx];
1611 if (dirty) {
6d59fec1 1612 if (!(val & (1UL << bit))) {
aaa0eb75 1613 bs->dirty_count++;
6d59fec1 1614 val |= 1UL << bit;
aaa0eb75 1615 }
c6d22830 1616 } else {
6d59fec1 1617 if (val & (1UL << bit)) {
aaa0eb75 1618 bs->dirty_count--;
6d59fec1 1619 val &= ~(1UL << bit);
aaa0eb75 1620 }
c6d22830
JK
1621 }
1622 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1623 }
1624}
1625
5fafdf24 1626/* Return < 0 if error. Important errors are:
19cb3738
FB
1627 -EIO generic I/O error (may happen for all errors)
1628 -ENOMEDIUM No media inserted.
1629 -EINVAL Invalid sector number or nb_sectors
1630 -EACCES Trying to write a read-only device
1631*/
5fafdf24 1632int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1633 const uint8_t *buf, int nb_sectors)
1634{
1c9805a3 1635 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1636}
1637
eda578e5
AL
1638int bdrv_pread(BlockDriverState *bs, int64_t offset,
1639 void *buf, int count1)
83f64091 1640{
6ea44308 1641 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1642 int len, nb_sectors, count;
1643 int64_t sector_num;
9a8c4cce 1644 int ret;
83f64091
FB
1645
1646 count = count1;
1647 /* first read to align to sector start */
6ea44308 1648 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1649 if (len > count)
1650 len = count;
6ea44308 1651 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1652 if (len > 0) {
9a8c4cce
KW
1653 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1654 return ret;
6ea44308 1655 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1656 count -= len;
1657 if (count == 0)
1658 return count1;
1659 sector_num++;
1660 buf += len;
1661 }
1662
1663 /* read the sectors "in place" */
6ea44308 1664 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1665 if (nb_sectors > 0) {
9a8c4cce
KW
1666 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1667 return ret;
83f64091 1668 sector_num += nb_sectors;
6ea44308 1669 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1670 buf += len;
1671 count -= len;
1672 }
1673
1674 /* add data from the last sector */
1675 if (count > 0) {
9a8c4cce
KW
1676 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1677 return ret;
83f64091
FB
1678 memcpy(buf, tmp_buf, count);
1679 }
1680 return count1;
1681}
1682
eda578e5
AL
1683int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1684 const void *buf, int count1)
83f64091 1685{
6ea44308 1686 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1687 int len, nb_sectors, count;
1688 int64_t sector_num;
9a8c4cce 1689 int ret;
83f64091
FB
1690
1691 count = count1;
1692 /* first write to align to sector start */
6ea44308 1693 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1694 if (len > count)
1695 len = count;
6ea44308 1696 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1697 if (len > 0) {
9a8c4cce
KW
1698 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1699 return ret;
6ea44308 1700 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1701 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1702 return ret;
83f64091
FB
1703 count -= len;
1704 if (count == 0)
1705 return count1;
1706 sector_num++;
1707 buf += len;
1708 }
1709
1710 /* write the sectors "in place" */
6ea44308 1711 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1712 if (nb_sectors > 0) {
9a8c4cce
KW
1713 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1714 return ret;
83f64091 1715 sector_num += nb_sectors;
6ea44308 1716 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1717 buf += len;
1718 count -= len;
1719 }
1720
1721 /* add data from the last sector */
1722 if (count > 0) {
9a8c4cce
KW
1723 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1724 return ret;
83f64091 1725 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1726 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1727 return ret;
83f64091
FB
1728 }
1729 return count1;
1730}
83f64091 1731
f08145fe
KW
1732/*
1733 * Writes to the file and ensures that no writes are reordered across this
1734 * request (acts as a barrier)
1735 *
1736 * Returns 0 on success, -errno in error cases.
1737 */
1738int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1739 const void *buf, int count)
1740{
1741 int ret;
1742
1743 ret = bdrv_pwrite(bs, offset, buf, count);
1744 if (ret < 0) {
1745 return ret;
1746 }
1747
92196b2f
SH
1748 /* No flush needed for cache modes that use O_DSYNC */
1749 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1750 bdrv_flush(bs);
1751 }
1752
1753 return 0;
1754}
1755
470c0504 1756static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1757 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1758{
1759 /* Perform I/O through a temporary buffer so that users who scribble over
1760 * their read buffer while the operation is in progress do not end up
1761 * modifying the image file. This is critical for zero-copy guest I/O
1762 * where anything might happen inside guest memory.
1763 */
1764 void *bounce_buffer;
1765
79c053bd 1766 BlockDriver *drv = bs->drv;
ab185921
SH
1767 struct iovec iov;
1768 QEMUIOVector bounce_qiov;
1769 int64_t cluster_sector_num;
1770 int cluster_nb_sectors;
1771 size_t skip_bytes;
1772 int ret;
1773
1774 /* Cover entire cluster so no additional backing file I/O is required when
1775 * allocating cluster in the image file.
1776 */
1777 round_to_clusters(bs, sector_num, nb_sectors,
1778 &cluster_sector_num, &cluster_nb_sectors);
1779
470c0504
SH
1780 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1781 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1782
1783 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1784 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1785 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1786
79c053bd
SH
1787 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1788 &bounce_qiov);
ab185921
SH
1789 if (ret < 0) {
1790 goto err;
1791 }
1792
79c053bd
SH
1793 if (drv->bdrv_co_write_zeroes &&
1794 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1795 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1796 cluster_nb_sectors);
79c053bd
SH
1797 } else {
1798 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1799 &bounce_qiov);
79c053bd
SH
1800 }
1801
ab185921
SH
1802 if (ret < 0) {
1803 /* It might be okay to ignore write errors for guest requests. If this
1804 * is a deliberate copy-on-read then we don't want to ignore the error.
1805 * Simply report it in all cases.
1806 */
1807 goto err;
1808 }
1809
1810 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1811 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1812 nb_sectors * BDRV_SECTOR_SIZE);
1813
1814err:
1815 qemu_vfree(bounce_buffer);
1816 return ret;
1817}
1818
c5fbe571
SH
1819/*
1820 * Handle a read request in coroutine context
1821 */
1822static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1823 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1824 BdrvRequestFlags flags)
da1fa91d
KW
1825{
1826 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1827 BdrvTrackedRequest req;
1828 int ret;
da1fa91d 1829
da1fa91d
KW
1830 if (!drv) {
1831 return -ENOMEDIUM;
1832 }
1833 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1834 return -EIO;
1835 }
1836
98f90dba
ZYW
1837 /* throttling disk read I/O */
1838 if (bs->io_limits_enabled) {
1839 bdrv_io_limits_intercept(bs, false, nb_sectors);
1840 }
1841
f4658285 1842 if (bs->copy_on_read) {
470c0504
SH
1843 flags |= BDRV_REQ_COPY_ON_READ;
1844 }
1845 if (flags & BDRV_REQ_COPY_ON_READ) {
1846 bs->copy_on_read_in_flight++;
1847 }
1848
1849 if (bs->copy_on_read_in_flight) {
f4658285
SH
1850 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1851 }
1852
dbffbdcf 1853 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1854
470c0504 1855 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1856 int pnum;
1857
1858 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1859 if (ret < 0) {
1860 goto out;
1861 }
1862
1863 if (!ret || pnum != nb_sectors) {
470c0504 1864 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1865 goto out;
1866 }
1867 }
1868
dbffbdcf 1869 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1870
1871out:
dbffbdcf 1872 tracked_request_end(&req);
470c0504
SH
1873
1874 if (flags & BDRV_REQ_COPY_ON_READ) {
1875 bs->copy_on_read_in_flight--;
1876 }
1877
dbffbdcf 1878 return ret;
da1fa91d
KW
1879}
1880
c5fbe571 1881int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1882 int nb_sectors, QEMUIOVector *qiov)
1883{
c5fbe571 1884 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1885
470c0504
SH
1886 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1887}
1888
1889int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1890 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1891{
1892 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1893
1894 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1895 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1896}
1897
f08f2dda
SH
1898static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1899 int64_t sector_num, int nb_sectors)
1900{
1901 BlockDriver *drv = bs->drv;
1902 QEMUIOVector qiov;
1903 struct iovec iov;
1904 int ret;
1905
621f0589
KW
1906 /* TODO Emulate only part of misaligned requests instead of letting block
1907 * drivers return -ENOTSUP and emulate everything */
1908
f08f2dda
SH
1909 /* First try the efficient write zeroes operation */
1910 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1911 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1912 if (ret != -ENOTSUP) {
1913 return ret;
1914 }
f08f2dda
SH
1915 }
1916
1917 /* Fall back to bounce buffer if write zeroes is unsupported */
1918 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1919 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1920 memset(iov.iov_base, 0, iov.iov_len);
1921 qemu_iovec_init_external(&qiov, &iov, 1);
1922
1923 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1924
1925 qemu_vfree(iov.iov_base);
1926 return ret;
1927}
1928
c5fbe571
SH
1929/*
1930 * Handle a write request in coroutine context
1931 */
1932static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1933 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1934 BdrvRequestFlags flags)
c5fbe571
SH
1935{
1936 BlockDriver *drv = bs->drv;
dbffbdcf 1937 BdrvTrackedRequest req;
6b7cb247 1938 int ret;
da1fa91d
KW
1939
1940 if (!bs->drv) {
1941 return -ENOMEDIUM;
1942 }
1943 if (bs->read_only) {
1944 return -EACCES;
1945 }
1946 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1947 return -EIO;
1948 }
1949
98f90dba
ZYW
1950 /* throttling disk write I/O */
1951 if (bs->io_limits_enabled) {
1952 bdrv_io_limits_intercept(bs, true, nb_sectors);
1953 }
1954
470c0504 1955 if (bs->copy_on_read_in_flight) {
f4658285
SH
1956 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1957 }
1958
dbffbdcf
SH
1959 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1960
f08f2dda
SH
1961 if (flags & BDRV_REQ_ZERO_WRITE) {
1962 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1963 } else {
1964 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1965 }
6b7cb247 1966
da1fa91d
KW
1967 if (bs->dirty_bitmap) {
1968 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1969 }
1970
1971 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1972 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1973 }
1974
dbffbdcf
SH
1975 tracked_request_end(&req);
1976
6b7cb247 1977 return ret;
da1fa91d
KW
1978}
1979
c5fbe571
SH
1980int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1981 int nb_sectors, QEMUIOVector *qiov)
1982{
1983 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1984
f08f2dda
SH
1985 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1986}
1987
1988int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1989 int64_t sector_num, int nb_sectors)
1990{
1991 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1992
1993 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1994 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
1995}
1996
83f64091
FB
1997/**
1998 * Truncate file to 'offset' bytes (needed only for file protocols)
1999 */
2000int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2001{
2002 BlockDriver *drv = bs->drv;
51762288 2003 int ret;
83f64091 2004 if (!drv)
19cb3738 2005 return -ENOMEDIUM;
83f64091
FB
2006 if (!drv->bdrv_truncate)
2007 return -ENOTSUP;
59f2689d
NS
2008 if (bs->read_only)
2009 return -EACCES;
8591675f
MT
2010 if (bdrv_in_use(bs))
2011 return -EBUSY;
51762288
SH
2012 ret = drv->bdrv_truncate(bs, offset);
2013 if (ret == 0) {
2014 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 2015 bdrv_dev_resize_cb(bs);
51762288
SH
2016 }
2017 return ret;
83f64091
FB
2018}
2019
4a1d5e1f
FZ
2020/**
2021 * Length of a allocated file in bytes. Sparse files are counted by actual
2022 * allocated space. Return < 0 if error or unknown.
2023 */
2024int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2025{
2026 BlockDriver *drv = bs->drv;
2027 if (!drv) {
2028 return -ENOMEDIUM;
2029 }
2030 if (drv->bdrv_get_allocated_file_size) {
2031 return drv->bdrv_get_allocated_file_size(bs);
2032 }
2033 if (bs->file) {
2034 return bdrv_get_allocated_file_size(bs->file);
2035 }
2036 return -ENOTSUP;
2037}
2038
83f64091
FB
2039/**
2040 * Length of a file in bytes. Return < 0 if error or unknown.
2041 */
2042int64_t bdrv_getlength(BlockDriverState *bs)
2043{
2044 BlockDriver *drv = bs->drv;
2045 if (!drv)
19cb3738 2046 return -ENOMEDIUM;
51762288 2047
2c6942fa 2048 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2049 if (drv->bdrv_getlength) {
2050 return drv->bdrv_getlength(bs);
2051 }
83f64091 2052 }
46a4e4e6 2053 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2054}
2055
19cb3738 2056/* return 0 as number of sectors if no device present or error */
96b8f136 2057void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2058{
19cb3738
FB
2059 int64_t length;
2060 length = bdrv_getlength(bs);
2061 if (length < 0)
2062 length = 0;
2063 else
6ea44308 2064 length = length >> BDRV_SECTOR_BITS;
19cb3738 2065 *nb_sectors_ptr = length;
fc01f7e7 2066}
cf98951b 2067
f3d54fc4
AL
2068struct partition {
2069 uint8_t boot_ind; /* 0x80 - active */
2070 uint8_t head; /* starting head */
2071 uint8_t sector; /* starting sector */
2072 uint8_t cyl; /* starting cylinder */
2073 uint8_t sys_ind; /* What partition type */
2074 uint8_t end_head; /* end head */
2075 uint8_t end_sector; /* end sector */
2076 uint8_t end_cyl; /* end cylinder */
2077 uint32_t start_sect; /* starting sector counting from 0 */
2078 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2079} QEMU_PACKED;
f3d54fc4
AL
2080
2081/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2082static int guess_disk_lchs(BlockDriverState *bs,
2083 int *pcylinders, int *pheads, int *psectors)
2084{
eb5a3165 2085 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2086 int ret, i, heads, sectors, cylinders;
2087 struct partition *p;
2088 uint32_t nr_sects;
a38131b6 2089 uint64_t nb_sectors;
498e386c 2090 bool enabled;
f3d54fc4
AL
2091
2092 bdrv_get_geometry(bs, &nb_sectors);
2093
498e386c
ZYW
2094 /**
2095 * The function will be invoked during startup not only in sync I/O mode,
2096 * but also in async I/O mode. So the I/O throttling function has to
2097 * be disabled temporarily here, not permanently.
2098 */
2099 enabled = bs->io_limits_enabled;
2100 bs->io_limits_enabled = false;
f3d54fc4 2101 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2102 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2103 if (ret < 0)
2104 return -1;
2105 /* test msdos magic */
2106 if (buf[510] != 0x55 || buf[511] != 0xaa)
2107 return -1;
2108 for(i = 0; i < 4; i++) {
2109 p = ((struct partition *)(buf + 0x1be)) + i;
2110 nr_sects = le32_to_cpu(p->nr_sects);
2111 if (nr_sects && p->end_head) {
2112 /* We make the assumption that the partition terminates on
2113 a cylinder boundary */
2114 heads = p->end_head + 1;
2115 sectors = p->end_sector & 63;
2116 if (sectors == 0)
2117 continue;
2118 cylinders = nb_sectors / (heads * sectors);
2119 if (cylinders < 1 || cylinders > 16383)
2120 continue;
2121 *pheads = heads;
2122 *psectors = sectors;
2123 *pcylinders = cylinders;
2124#if 0
2125 printf("guessed geometry: LCHS=%d %d %d\n",
2126 cylinders, heads, sectors);
2127#endif
2128 return 0;
2129 }
2130 }
2131 return -1;
2132}
2133
2134void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2135{
2136 int translation, lba_detected = 0;
2137 int cylinders, heads, secs;
a38131b6 2138 uint64_t nb_sectors;
f3d54fc4
AL
2139
2140 /* if a geometry hint is available, use it */
2141 bdrv_get_geometry(bs, &nb_sectors);
2142 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2143 translation = bdrv_get_translation_hint(bs);
2144 if (cylinders != 0) {
2145 *pcyls = cylinders;
2146 *pheads = heads;
2147 *psecs = secs;
2148 } else {
2149 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2150 if (heads > 16) {
2151 /* if heads > 16, it means that a BIOS LBA
2152 translation was active, so the default
2153 hardware geometry is OK */
2154 lba_detected = 1;
2155 goto default_geometry;
2156 } else {
2157 *pcyls = cylinders;
2158 *pheads = heads;
2159 *psecs = secs;
2160 /* disable any translation to be in sync with
2161 the logical geometry */
2162 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2163 bdrv_set_translation_hint(bs,
2164 BIOS_ATA_TRANSLATION_NONE);
2165 }
2166 }
2167 } else {
2168 default_geometry:
2169 /* if no geometry, use a standard physical disk geometry */
2170 cylinders = nb_sectors / (16 * 63);
2171
2172 if (cylinders > 16383)
2173 cylinders = 16383;
2174 else if (cylinders < 2)
2175 cylinders = 2;
2176 *pcyls = cylinders;
2177 *pheads = 16;
2178 *psecs = 63;
2179 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2180 if ((*pcyls * *pheads) <= 131072) {
2181 bdrv_set_translation_hint(bs,
2182 BIOS_ATA_TRANSLATION_LARGE);
2183 } else {
2184 bdrv_set_translation_hint(bs,
2185 BIOS_ATA_TRANSLATION_LBA);
2186 }
2187 }
2188 }
2189 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2190 }
2191}
2192
5fafdf24 2193void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2194 int cyls, int heads, int secs)
2195{
2196 bs->cyls = cyls;
2197 bs->heads = heads;
2198 bs->secs = secs;
2199}
2200
46d4767d
FB
2201void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2202{
2203 bs->translation = translation;
2204}
2205
5fafdf24 2206void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2207 int *pcyls, int *pheads, int *psecs)
2208{
2209 *pcyls = bs->cyls;
2210 *pheads = bs->heads;
2211 *psecs = bs->secs;
2212}
2213
0563e191
ZYW
2214/* throttling disk io limits */
2215void bdrv_set_io_limits(BlockDriverState *bs,
2216 BlockIOLimit *io_limits)
2217{
2218 bs->io_limits = *io_limits;
2219 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2220}
2221
5bbdbb46
BS
2222/* Recognize floppy formats */
2223typedef struct FDFormat {
2224 FDriveType drive;
2225 uint8_t last_sect;
2226 uint8_t max_track;
2227 uint8_t max_head;
f8d3d128 2228 FDriveRate rate;
5bbdbb46
BS
2229} FDFormat;
2230
2231static const FDFormat fd_formats[] = {
2232 /* First entry is default format */
2233 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2234 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2235 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2236 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2237 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2238 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2239 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2240 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2241 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2242 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2243 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2244 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2245 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2246 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2247 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2248 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2249 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2250 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2251 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2252 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2253 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2254 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2255 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2256 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2257 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2258 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2259 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2260 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2261 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2262 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2263 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2264 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2265 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2266 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2267 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2268 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2269 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2270 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2271 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2272 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2273 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2274 /* end */
f8d3d128 2275 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2276};
2277
2278void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2279 int *max_track, int *last_sect,
f8d3d128
HP
2280 FDriveType drive_in, FDriveType *drive,
2281 FDriveRate *rate)
5bbdbb46
BS
2282{
2283 const FDFormat *parse;
2284 uint64_t nb_sectors, size;
2285 int i, first_match, match;
2286
2287 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2288 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2289 /* User defined disk */
f8d3d128 2290 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2291 } else {
2292 bdrv_get_geometry(bs, &nb_sectors);
2293 match = -1;
2294 first_match = -1;
2295 for (i = 0; ; i++) {
2296 parse = &fd_formats[i];
2297 if (parse->drive == FDRIVE_DRV_NONE) {
2298 break;
2299 }
2300 if (drive_in == parse->drive ||
2301 drive_in == FDRIVE_DRV_NONE) {
2302 size = (parse->max_head + 1) * parse->max_track *
2303 parse->last_sect;
2304 if (nb_sectors == size) {
2305 match = i;
2306 break;
2307 }
2308 if (first_match == -1) {
2309 first_match = i;
2310 }
2311 }
2312 }
2313 if (match == -1) {
2314 if (first_match == -1) {
2315 match = 1;
2316 } else {
2317 match = first_match;
2318 }
2319 parse = &fd_formats[match];
2320 }
2321 *nb_heads = parse->max_head + 1;
2322 *max_track = parse->max_track;
2323 *last_sect = parse->last_sect;
2324 *drive = parse->drive;
f8d3d128 2325 *rate = parse->rate;
5bbdbb46
BS
2326 }
2327}
2328
46d4767d
FB
2329int bdrv_get_translation_hint(BlockDriverState *bs)
2330{
2331 return bs->translation;
2332}
2333
abd7f68d
MA
2334void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2335 BlockErrorAction on_write_error)
2336{
2337 bs->on_read_error = on_read_error;
2338 bs->on_write_error = on_write_error;
2339}
2340
2341BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2342{
2343 return is_read ? bs->on_read_error : bs->on_write_error;
2344}
2345
b338082b
FB
2346int bdrv_is_read_only(BlockDriverState *bs)
2347{
2348 return bs->read_only;
2349}
2350
985a03b0
TS
2351int bdrv_is_sg(BlockDriverState *bs)
2352{
2353 return bs->sg;
2354}
2355
e900a7b7
CH
2356int bdrv_enable_write_cache(BlockDriverState *bs)
2357{
2358 return bs->enable_write_cache;
2359}
2360
ea2384d3
FB
2361int bdrv_is_encrypted(BlockDriverState *bs)
2362{
2363 if (bs->backing_hd && bs->backing_hd->encrypted)
2364 return 1;
2365 return bs->encrypted;
2366}
2367
c0f4ce77
AL
2368int bdrv_key_required(BlockDriverState *bs)
2369{
2370 BlockDriverState *backing_hd = bs->backing_hd;
2371
2372 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2373 return 1;
2374 return (bs->encrypted && !bs->valid_key);
2375}
2376
ea2384d3
FB
2377int bdrv_set_key(BlockDriverState *bs, const char *key)
2378{
2379 int ret;
2380 if (bs->backing_hd && bs->backing_hd->encrypted) {
2381 ret = bdrv_set_key(bs->backing_hd, key);
2382 if (ret < 0)
2383 return ret;
2384 if (!bs->encrypted)
2385 return 0;
2386 }
fd04a2ae
SH
2387 if (!bs->encrypted) {
2388 return -EINVAL;
2389 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2390 return -ENOMEDIUM;
2391 }
c0f4ce77 2392 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2393 if (ret < 0) {
2394 bs->valid_key = 0;
2395 } else if (!bs->valid_key) {
2396 bs->valid_key = 1;
2397 /* call the change callback now, we skipped it on open */
7d4b4ba5 2398 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2399 }
c0f4ce77 2400 return ret;
ea2384d3
FB
2401}
2402
2403void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2404{
19cb3738 2405 if (!bs->drv) {
ea2384d3
FB
2406 buf[0] = '\0';
2407 } else {
2408 pstrcpy(buf, buf_size, bs->drv->format_name);
2409 }
2410}
2411
5fafdf24 2412void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2413 void *opaque)
2414{
2415 BlockDriver *drv;
2416
8a22f02a 2417 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2418 it(opaque, drv->format_name);
2419 }
2420}
2421
b338082b
FB
2422BlockDriverState *bdrv_find(const char *name)
2423{
2424 BlockDriverState *bs;
2425
1b7bdbc1
SH
2426 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2427 if (!strcmp(name, bs->device_name)) {
b338082b 2428 return bs;
1b7bdbc1 2429 }
b338082b
FB
2430 }
2431 return NULL;
2432}
2433
2f399b0a
MA
2434BlockDriverState *bdrv_next(BlockDriverState *bs)
2435{
2436 if (!bs) {
2437 return QTAILQ_FIRST(&bdrv_states);
2438 }
2439 return QTAILQ_NEXT(bs, list);
2440}
2441
51de9760 2442void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2443{
2444 BlockDriverState *bs;
2445
1b7bdbc1 2446 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2447 it(opaque, bs);
81d0912d
FB
2448 }
2449}
2450
ea2384d3
FB
2451const char *bdrv_get_device_name(BlockDriverState *bs)
2452{
2453 return bs->device_name;
2454}
2455
c6ca28d6
AL
2456void bdrv_flush_all(void)
2457{
2458 BlockDriverState *bs;
2459
1b7bdbc1 2460 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2461 bdrv_flush(bs);
1b7bdbc1 2462 }
c6ca28d6
AL
2463}
2464
f2feebbd
KW
2465int bdrv_has_zero_init(BlockDriverState *bs)
2466{
2467 assert(bs->drv);
2468
336c1c12
KW
2469 if (bs->drv->bdrv_has_zero_init) {
2470 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2471 }
2472
2473 return 1;
2474}
2475
376ae3f1
SH
2476typedef struct BdrvCoIsAllocatedData {
2477 BlockDriverState *bs;
2478 int64_t sector_num;
2479 int nb_sectors;
2480 int *pnum;
2481 int ret;
2482 bool done;
2483} BdrvCoIsAllocatedData;
2484
f58c7b35
TS
2485/*
2486 * Returns true iff the specified sector is present in the disk image. Drivers
2487 * not implementing the functionality are assumed to not support backing files,
2488 * hence all their sectors are reported as allocated.
2489 *
bd9533e3
SH
2490 * If 'sector_num' is beyond the end of the disk image the return value is 0
2491 * and 'pnum' is set to 0.
2492 *
f58c7b35
TS
2493 * 'pnum' is set to the number of sectors (including and immediately following
2494 * the specified sector) that are known to be in the same
2495 * allocated/unallocated state.
2496 *
bd9533e3
SH
2497 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2498 * beyond the end of the disk image it will be clamped.
f58c7b35 2499 */
060f51c9
SH
2500int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2501 int nb_sectors, int *pnum)
f58c7b35 2502{
bd9533e3
SH
2503 int64_t n;
2504
2505 if (sector_num >= bs->total_sectors) {
2506 *pnum = 0;
2507 return 0;
2508 }
2509
2510 n = bs->total_sectors - sector_num;
2511 if (n < nb_sectors) {
2512 nb_sectors = n;
2513 }
2514
6aebab14 2515 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2516 *pnum = nb_sectors;
f58c7b35
TS
2517 return 1;
2518 }
6aebab14 2519
060f51c9
SH
2520 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2521}
2522
2523/* Coroutine wrapper for bdrv_is_allocated() */
2524static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2525{
2526 BdrvCoIsAllocatedData *data = opaque;
2527 BlockDriverState *bs = data->bs;
2528
2529 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2530 data->pnum);
2531 data->done = true;
2532}
2533
2534/*
2535 * Synchronous wrapper around bdrv_co_is_allocated().
2536 *
2537 * See bdrv_co_is_allocated() for details.
2538 */
2539int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2540 int *pnum)
2541{
6aebab14
SH
2542 Coroutine *co;
2543 BdrvCoIsAllocatedData data = {
2544 .bs = bs,
2545 .sector_num = sector_num,
2546 .nb_sectors = nb_sectors,
2547 .pnum = pnum,
2548 .done = false,
2549 };
2550
2551 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2552 qemu_coroutine_enter(co, &data);
2553 while (!data.done) {
2554 qemu_aio_wait();
2555 }
2556 return data.ret;
f58c7b35
TS
2557}
2558
b2023818 2559BlockInfoList *qmp_query_block(Error **errp)
b338082b 2560{
b2023818 2561 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2562 BlockDriverState *bs;
2563
1b7bdbc1 2564 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2565 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2566
b2023818
LC
2567 info->value = g_malloc0(sizeof(*info->value));
2568 info->value->device = g_strdup(bs->device_name);
2569 info->value->type = g_strdup("unknown");
2570 info->value->locked = bdrv_dev_is_medium_locked(bs);
2571 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2572
e4def80b 2573 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2574 info->value->has_tray_open = true;
2575 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2576 }
f04ef601
LC
2577
2578 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2579 info->value->has_io_status = true;
2580 info->value->io_status = bs->iostatus;
f04ef601
LC
2581 }
2582
19cb3738 2583 if (bs->drv) {
b2023818
LC
2584 info->value->has_inserted = true;
2585 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2586 info->value->inserted->file = g_strdup(bs->filename);
2587 info->value->inserted->ro = bs->read_only;
2588 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2589 info->value->inserted->encrypted = bs->encrypted;
2590 if (bs->backing_file[0]) {
2591 info->value->inserted->has_backing_file = true;
2592 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2593 }
727f005e
ZYW
2594
2595 if (bs->io_limits_enabled) {
2596 info->value->inserted->bps =
2597 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2598 info->value->inserted->bps_rd =
2599 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2600 info->value->inserted->bps_wr =
2601 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2602 info->value->inserted->iops =
2603 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2604 info->value->inserted->iops_rd =
2605 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2606 info->value->inserted->iops_wr =
2607 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2608 }
b2023818 2609 }
d15e5465 2610
b2023818
LC
2611 /* XXX: waiting for the qapi to support GSList */
2612 if (!cur_item) {
2613 head = cur_item = info;
2614 } else {
2615 cur_item->next = info;
2616 cur_item = info;
b338082b 2617 }
b338082b 2618 }
d15e5465 2619
b2023818 2620 return head;
b338082b 2621}
a36e69dd 2622
f11f57e4
LC
2623/* Consider exposing this as a full fledged QMP command */
2624static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2625{
2626 BlockStats *s;
2627
2628 s = g_malloc0(sizeof(*s));
2629
2630 if (bs->device_name[0]) {
2631 s->has_device = true;
2632 s->device = g_strdup(bs->device_name);
294cc35f
KW
2633 }
2634
f11f57e4
LC
2635 s->stats = g_malloc0(sizeof(*s->stats));
2636 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2637 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2638 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2639 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2640 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2641 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2642 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2643 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2644 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2645
294cc35f 2646 if (bs->file) {
f11f57e4
LC
2647 s->has_parent = true;
2648 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2649 }
2650
f11f57e4 2651 return s;
294cc35f
KW
2652}
2653
f11f57e4 2654BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2655{
f11f57e4 2656 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2657 BlockDriverState *bs;
2658
1b7bdbc1 2659 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2660 BlockStatsList *info = g_malloc0(sizeof(*info));
2661 info->value = qmp_query_blockstat(bs, NULL);
2662
2663 /* XXX: waiting for the qapi to support GSList */
2664 if (!cur_item) {
2665 head = cur_item = info;
2666 } else {
2667 cur_item->next = info;
2668 cur_item = info;
2669 }
a36e69dd 2670 }
218a536a 2671
f11f57e4 2672 return head;
a36e69dd 2673}
ea2384d3 2674
045df330
AL
2675const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2676{
2677 if (bs->backing_hd && bs->backing_hd->encrypted)
2678 return bs->backing_file;
2679 else if (bs->encrypted)
2680 return bs->filename;
2681 else
2682 return NULL;
2683}
2684
5fafdf24 2685void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2686 char *filename, int filename_size)
2687{
3574c608 2688 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2689}
2690
5fafdf24 2691int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2692 const uint8_t *buf, int nb_sectors)
2693{
2694 BlockDriver *drv = bs->drv;
2695 if (!drv)
19cb3738 2696 return -ENOMEDIUM;
faea38e7
FB
2697 if (!drv->bdrv_write_compressed)
2698 return -ENOTSUP;
fbb7b4e0
KW
2699 if (bdrv_check_request(bs, sector_num, nb_sectors))
2700 return -EIO;
a55eb92c 2701
c6d22830 2702 if (bs->dirty_bitmap) {
7cd1e32a
LS
2703 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2704 }
a55eb92c 2705
faea38e7
FB
2706 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2707}
3b46e624 2708
faea38e7
FB
2709int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2710{
2711 BlockDriver *drv = bs->drv;
2712 if (!drv)
19cb3738 2713 return -ENOMEDIUM;
faea38e7
FB
2714 if (!drv->bdrv_get_info)
2715 return -ENOTSUP;
2716 memset(bdi, 0, sizeof(*bdi));
2717 return drv->bdrv_get_info(bs, bdi);
2718}
2719
45566e9c
CH
2720int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2721 int64_t pos, int size)
178e08a5
AL
2722{
2723 BlockDriver *drv = bs->drv;
2724 if (!drv)
2725 return -ENOMEDIUM;
7cdb1f6d
MK
2726 if (drv->bdrv_save_vmstate)
2727 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2728 if (bs->file)
2729 return bdrv_save_vmstate(bs->file, buf, pos, size);
2730 return -ENOTSUP;
178e08a5
AL
2731}
2732
45566e9c
CH
2733int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2734 int64_t pos, int size)
178e08a5
AL
2735{
2736 BlockDriver *drv = bs->drv;
2737 if (!drv)
2738 return -ENOMEDIUM;
7cdb1f6d
MK
2739 if (drv->bdrv_load_vmstate)
2740 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2741 if (bs->file)
2742 return bdrv_load_vmstate(bs->file, buf, pos, size);
2743 return -ENOTSUP;
178e08a5
AL
2744}
2745
8b9b0cc2
KW
2746void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2747{
2748 BlockDriver *drv = bs->drv;
2749
2750 if (!drv || !drv->bdrv_debug_event) {
2751 return;
2752 }
2753
2754 return drv->bdrv_debug_event(bs, event);
2755
2756}
2757
faea38e7
FB
2758/**************************************************************/
2759/* handling of snapshots */
2760
feeee5ac
MDCF
2761int bdrv_can_snapshot(BlockDriverState *bs)
2762{
2763 BlockDriver *drv = bs->drv;
07b70bfb 2764 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2765 return 0;
2766 }
2767
2768 if (!drv->bdrv_snapshot_create) {
2769 if (bs->file != NULL) {
2770 return bdrv_can_snapshot(bs->file);
2771 }
2772 return 0;
2773 }
2774
2775 return 1;
2776}
2777
199630b6
BS
2778int bdrv_is_snapshot(BlockDriverState *bs)
2779{
2780 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2781}
2782
f9092b10
MA
2783BlockDriverState *bdrv_snapshots(void)
2784{
2785 BlockDriverState *bs;
2786
3ac906f7 2787 if (bs_snapshots) {
f9092b10 2788 return bs_snapshots;
3ac906f7 2789 }
f9092b10
MA
2790
2791 bs = NULL;
2792 while ((bs = bdrv_next(bs))) {
2793 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2794 bs_snapshots = bs;
2795 return bs;
f9092b10
MA
2796 }
2797 }
2798 return NULL;
f9092b10
MA
2799}
2800
5fafdf24 2801int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2802 QEMUSnapshotInfo *sn_info)
2803{
2804 BlockDriver *drv = bs->drv;
2805 if (!drv)
19cb3738 2806 return -ENOMEDIUM;
7cdb1f6d
MK
2807 if (drv->bdrv_snapshot_create)
2808 return drv->bdrv_snapshot_create(bs, sn_info);
2809 if (bs->file)
2810 return bdrv_snapshot_create(bs->file, sn_info);
2811 return -ENOTSUP;
faea38e7
FB
2812}
2813
5fafdf24 2814int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2815 const char *snapshot_id)
2816{
2817 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2818 int ret, open_ret;
2819
faea38e7 2820 if (!drv)
19cb3738 2821 return -ENOMEDIUM;
7cdb1f6d
MK
2822 if (drv->bdrv_snapshot_goto)
2823 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2824
2825 if (bs->file) {
2826 drv->bdrv_close(bs);
2827 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2828 open_ret = drv->bdrv_open(bs, bs->open_flags);
2829 if (open_ret < 0) {
2830 bdrv_delete(bs->file);
2831 bs->drv = NULL;
2832 return open_ret;
2833 }
2834 return ret;
2835 }
2836
2837 return -ENOTSUP;
faea38e7
FB
2838}
2839
2840int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2841{
2842 BlockDriver *drv = bs->drv;
2843 if (!drv)
19cb3738 2844 return -ENOMEDIUM;
7cdb1f6d
MK
2845 if (drv->bdrv_snapshot_delete)
2846 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2847 if (bs->file)
2848 return bdrv_snapshot_delete(bs->file, snapshot_id);
2849 return -ENOTSUP;
faea38e7
FB
2850}
2851
5fafdf24 2852int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2853 QEMUSnapshotInfo **psn_info)
2854{
2855 BlockDriver *drv = bs->drv;
2856 if (!drv)
19cb3738 2857 return -ENOMEDIUM;
7cdb1f6d
MK
2858 if (drv->bdrv_snapshot_list)
2859 return drv->bdrv_snapshot_list(bs, psn_info);
2860 if (bs->file)
2861 return bdrv_snapshot_list(bs->file, psn_info);
2862 return -ENOTSUP;
faea38e7
FB
2863}
2864
51ef6727 2865int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2866 const char *snapshot_name)
2867{
2868 BlockDriver *drv = bs->drv;
2869 if (!drv) {
2870 return -ENOMEDIUM;
2871 }
2872 if (!bs->read_only) {
2873 return -EINVAL;
2874 }
2875 if (drv->bdrv_snapshot_load_tmp) {
2876 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2877 }
2878 return -ENOTSUP;
2879}
2880
e8a6bb9c
MT
2881BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2882 const char *backing_file)
2883{
2884 if (!bs->drv) {
2885 return NULL;
2886 }
2887
2888 if (bs->backing_hd) {
2889 if (strcmp(bs->backing_file, backing_file) == 0) {
2890 return bs->backing_hd;
2891 } else {
2892 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2893 }
2894 }
2895
2896 return NULL;
2897}
2898
faea38e7
FB
2899#define NB_SUFFIXES 4
2900
2901char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2902{
2903 static const char suffixes[NB_SUFFIXES] = "KMGT";
2904 int64_t base;
2905 int i;
2906
2907 if (size <= 999) {
2908 snprintf(buf, buf_size, "%" PRId64, size);
2909 } else {
2910 base = 1024;
2911 for(i = 0; i < NB_SUFFIXES; i++) {
2912 if (size < (10 * base)) {
5fafdf24 2913 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2914 (double)size / base,
2915 suffixes[i]);
2916 break;
2917 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2918 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2919 ((size + (base >> 1)) / base),
2920 suffixes[i]);
2921 break;
2922 }
2923 base = base * 1024;
2924 }
2925 }
2926 return buf;
2927}
2928
2929char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2930{
2931 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2932#ifdef _WIN32
2933 struct tm *ptm;
2934#else
faea38e7 2935 struct tm tm;
3b9f94e1 2936#endif
faea38e7
FB
2937 time_t ti;
2938 int64_t secs;
2939
2940 if (!sn) {
5fafdf24
TS
2941 snprintf(buf, buf_size,
2942 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2943 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2944 } else {
2945 ti = sn->date_sec;
3b9f94e1
FB
2946#ifdef _WIN32
2947 ptm = localtime(&ti);
2948 strftime(date_buf, sizeof(date_buf),
2949 "%Y-%m-%d %H:%M:%S", ptm);
2950#else
faea38e7
FB
2951 localtime_r(&ti, &tm);
2952 strftime(date_buf, sizeof(date_buf),
2953 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2954#endif
faea38e7
FB
2955 secs = sn->vm_clock_nsec / 1000000000;
2956 snprintf(clock_buf, sizeof(clock_buf),
2957 "%02d:%02d:%02d.%03d",
2958 (int)(secs / 3600),
2959 (int)((secs / 60) % 60),
5fafdf24 2960 (int)(secs % 60),
faea38e7
FB
2961 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2962 snprintf(buf, buf_size,
5fafdf24 2963 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2964 sn->id_str, sn->name,
2965 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2966 date_buf,
2967 clock_buf);
2968 }
2969 return buf;
2970}
2971
ea2384d3 2972/**************************************************************/
83f64091 2973/* async I/Os */
ea2384d3 2974
3b69e4b9 2975BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2976 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2977 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2978{
bbf0a440
SH
2979 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2980
b2a61371 2981 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2982 cb, opaque, false);
ea2384d3
FB
2983}
2984
f141eafe
AL
2985BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2986 QEMUIOVector *qiov, int nb_sectors,
2987 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2988{
bbf0a440
SH
2989 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2990
1a6e115b 2991 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2992 cb, opaque, true);
83f64091
FB
2993}
2994
40b4f539
KW
2995
2996typedef struct MultiwriteCB {
2997 int error;
2998 int num_requests;
2999 int num_callbacks;
3000 struct {
3001 BlockDriverCompletionFunc *cb;
3002 void *opaque;
3003 QEMUIOVector *free_qiov;
40b4f539
KW
3004 } callbacks[];
3005} MultiwriteCB;
3006
3007static void multiwrite_user_cb(MultiwriteCB *mcb)
3008{
3009 int i;
3010
3011 for (i = 0; i < mcb->num_callbacks; i++) {
3012 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
3013 if (mcb->callbacks[i].free_qiov) {
3014 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
3015 }
7267c094 3016 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
3017 }
3018}
3019
3020static void multiwrite_cb(void *opaque, int ret)
3021{
3022 MultiwriteCB *mcb = opaque;
3023
6d519a5f
SH
3024 trace_multiwrite_cb(mcb, ret);
3025
cb6d3ca0 3026 if (ret < 0 && !mcb->error) {
40b4f539 3027 mcb->error = ret;
40b4f539
KW
3028 }
3029
3030 mcb->num_requests--;
3031 if (mcb->num_requests == 0) {
de189a1b 3032 multiwrite_user_cb(mcb);
7267c094 3033 g_free(mcb);
40b4f539
KW
3034 }
3035}
3036
3037static int multiwrite_req_compare(const void *a, const void *b)
3038{
77be4366
CH
3039 const BlockRequest *req1 = a, *req2 = b;
3040
3041 /*
3042 * Note that we can't simply subtract req2->sector from req1->sector
3043 * here as that could overflow the return value.
3044 */
3045 if (req1->sector > req2->sector) {
3046 return 1;
3047 } else if (req1->sector < req2->sector) {
3048 return -1;
3049 } else {
3050 return 0;
3051 }
40b4f539
KW
3052}
3053
3054/*
3055 * Takes a bunch of requests and tries to merge them. Returns the number of
3056 * requests that remain after merging.
3057 */
3058static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3059 int num_reqs, MultiwriteCB *mcb)
3060{
3061 int i, outidx;
3062
3063 // Sort requests by start sector
3064 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3065
3066 // Check if adjacent requests touch the same clusters. If so, combine them,
3067 // filling up gaps with zero sectors.
3068 outidx = 0;
3069 for (i = 1; i < num_reqs; i++) {
3070 int merge = 0;
3071 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3072
b6a127a1 3073 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3074 if (reqs[i].sector <= oldreq_last) {
3075 merge = 1;
3076 }
3077
e2a305fb
CH
3078 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3079 merge = 0;
3080 }
3081
40b4f539
KW
3082 if (merge) {
3083 size_t size;
7267c094 3084 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3085 qemu_iovec_init(qiov,
3086 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3087
3088 // Add the first request to the merged one. If the requests are
3089 // overlapping, drop the last sectors of the first request.
3090 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3091 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3092
b6a127a1
PB
3093 // We should need to add any zeros between the two requests
3094 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3095
3096 // Add the second request
3097 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3098
cbf1dff2 3099 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3100 reqs[outidx].qiov = qiov;
3101
3102 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3103 } else {
3104 outidx++;
3105 reqs[outidx].sector = reqs[i].sector;
3106 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3107 reqs[outidx].qiov = reqs[i].qiov;
3108 }
3109 }
3110
3111 return outidx + 1;
3112}
3113
3114/*
3115 * Submit multiple AIO write requests at once.
3116 *
3117 * On success, the function returns 0 and all requests in the reqs array have
3118 * been submitted. In error case this function returns -1, and any of the
3119 * requests may or may not be submitted yet. In particular, this means that the
3120 * callback will be called for some of the requests, for others it won't. The
3121 * caller must check the error field of the BlockRequest to wait for the right
3122 * callbacks (if error != 0, no callback will be called).
3123 *
3124 * The implementation may modify the contents of the reqs array, e.g. to merge
3125 * requests. However, the fields opaque and error are left unmodified as they
3126 * are used to signal failure for a single request to the caller.
3127 */
3128int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3129{
40b4f539
KW
3130 MultiwriteCB *mcb;
3131 int i;
3132
301db7c2
RH
3133 /* don't submit writes if we don't have a medium */
3134 if (bs->drv == NULL) {
3135 for (i = 0; i < num_reqs; i++) {
3136 reqs[i].error = -ENOMEDIUM;
3137 }
3138 return -1;
3139 }
3140
40b4f539
KW
3141 if (num_reqs == 0) {
3142 return 0;
3143 }
3144
3145 // Create MultiwriteCB structure
7267c094 3146 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3147 mcb->num_requests = 0;
3148 mcb->num_callbacks = num_reqs;
3149
3150 for (i = 0; i < num_reqs; i++) {
3151 mcb->callbacks[i].cb = reqs[i].cb;
3152 mcb->callbacks[i].opaque = reqs[i].opaque;
3153 }
3154
3155 // Check for mergable requests
3156 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3157
6d519a5f
SH
3158 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3159
df9309fb
PB
3160 /* Run the aio requests. */
3161 mcb->num_requests = num_reqs;
40b4f539 3162 for (i = 0; i < num_reqs; i++) {
ad54ae80 3163 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3164 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3165 }
3166
3167 return 0;
40b4f539
KW
3168}
3169
83f64091 3170void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3171{
6bbff9a0 3172 acb->pool->cancel(acb);
83f64091
FB
3173}
3174
98f90dba
ZYW
3175/* block I/O throttling */
3176static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3177 bool is_write, double elapsed_time, uint64_t *wait)
3178{
3179 uint64_t bps_limit = 0;
3180 double bytes_limit, bytes_base, bytes_res;
3181 double slice_time, wait_time;
3182
3183 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3184 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3185 } else if (bs->io_limits.bps[is_write]) {
3186 bps_limit = bs->io_limits.bps[is_write];
3187 } else {
3188 if (wait) {
3189 *wait = 0;
3190 }
3191
3192 return false;
3193 }
3194
3195 slice_time = bs->slice_end - bs->slice_start;
3196 slice_time /= (NANOSECONDS_PER_SECOND);
3197 bytes_limit = bps_limit * slice_time;
3198 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3199 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3200 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3201 }
3202
3203 /* bytes_base: the bytes of data which have been read/written; and
3204 * it is obtained from the history statistic info.
3205 * bytes_res: the remaining bytes of data which need to be read/written.
3206 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3207 * the total time for completing reading/writting all data.
3208 */
3209 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3210
3211 if (bytes_base + bytes_res <= bytes_limit) {
3212 if (wait) {
3213 *wait = 0;
3214 }
3215
3216 return false;
3217 }
3218
3219 /* Calc approx time to dispatch */
3220 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3221
3222 /* When the I/O rate at runtime exceeds the limits,
3223 * bs->slice_end need to be extended in order that the current statistic
3224 * info can be kept until the timer fire, so it is increased and tuned
3225 * based on the result of experiment.
3226 */
3227 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3228 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3229 if (wait) {
3230 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3231 }
3232
3233 return true;
3234}
3235
3236static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3237 double elapsed_time, uint64_t *wait)
3238{
3239 uint64_t iops_limit = 0;
3240 double ios_limit, ios_base;
3241 double slice_time, wait_time;
3242
3243 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3244 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3245 } else if (bs->io_limits.iops[is_write]) {
3246 iops_limit = bs->io_limits.iops[is_write];
3247 } else {
3248 if (wait) {
3249 *wait = 0;
3250 }
3251
3252 return false;
3253 }
3254
3255 slice_time = bs->slice_end - bs->slice_start;
3256 slice_time /= (NANOSECONDS_PER_SECOND);
3257 ios_limit = iops_limit * slice_time;
3258 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3259 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3260 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3261 }
3262
3263 if (ios_base + 1 <= ios_limit) {
3264 if (wait) {
3265 *wait = 0;
3266 }
3267
3268 return false;
3269 }
3270
3271 /* Calc approx time to dispatch */
3272 wait_time = (ios_base + 1) / iops_limit;
3273 if (wait_time > elapsed_time) {
3274 wait_time = wait_time - elapsed_time;
3275 } else {
3276 wait_time = 0;
3277 }
3278
3279 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3280 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3281 if (wait) {
3282 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3283 }
3284
3285 return true;
3286}
3287
3288static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3289 bool is_write, int64_t *wait)
3290{
3291 int64_t now, max_wait;
3292 uint64_t bps_wait = 0, iops_wait = 0;
3293 double elapsed_time;
3294 int bps_ret, iops_ret;
3295
3296 now = qemu_get_clock_ns(vm_clock);
3297 if ((bs->slice_start < now)
3298 && (bs->slice_end > now)) {
3299 bs->slice_end = now + bs->slice_time;
3300 } else {
3301 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3302 bs->slice_start = now;
3303 bs->slice_end = now + bs->slice_time;
3304
3305 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3306 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3307
3308 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3309 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3310 }
3311
3312 elapsed_time = now - bs->slice_start;
3313 elapsed_time /= (NANOSECONDS_PER_SECOND);
3314
3315 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3316 is_write, elapsed_time, &bps_wait);
3317 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3318 elapsed_time, &iops_wait);
3319 if (bps_ret || iops_ret) {
3320 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3321 if (wait) {
3322 *wait = max_wait;
3323 }
3324
3325 now = qemu_get_clock_ns(vm_clock);
3326 if (bs->slice_end < now + max_wait) {
3327 bs->slice_end = now + max_wait;
3328 }
3329
3330 return true;
3331 }
3332
3333 if (wait) {
3334 *wait = 0;
3335 }
3336
3337 return false;
3338}
ce1a14dc 3339
83f64091
FB
3340/**************************************************************/
3341/* async block device emulation */
3342
c16b5a2c
CH
3343typedef struct BlockDriverAIOCBSync {
3344 BlockDriverAIOCB common;
3345 QEMUBH *bh;
3346 int ret;
3347 /* vector translation state */
3348 QEMUIOVector *qiov;
3349 uint8_t *bounce;
3350 int is_write;
3351} BlockDriverAIOCBSync;
3352
3353static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3354{
b666d239
KW
3355 BlockDriverAIOCBSync *acb =
3356 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3357 qemu_bh_delete(acb->bh);
36afc451 3358 acb->bh = NULL;
c16b5a2c
CH
3359 qemu_aio_release(acb);
3360}
3361
3362static AIOPool bdrv_em_aio_pool = {
3363 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3364 .cancel = bdrv_aio_cancel_em,
3365};
3366
ce1a14dc 3367static void bdrv_aio_bh_cb(void *opaque)
83f64091 3368{
ce1a14dc 3369 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3370
f141eafe
AL
3371 if (!acb->is_write)
3372 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3373 qemu_vfree(acb->bounce);
ce1a14dc 3374 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3375 qemu_bh_delete(acb->bh);
36afc451 3376 acb->bh = NULL;
ce1a14dc 3377 qemu_aio_release(acb);
83f64091 3378}
beac80cd 3379
f141eafe
AL
3380static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3381 int64_t sector_num,
3382 QEMUIOVector *qiov,
3383 int nb_sectors,
3384 BlockDriverCompletionFunc *cb,
3385 void *opaque,
3386 int is_write)
3387
83f64091 3388{
ce1a14dc 3389 BlockDriverAIOCBSync *acb;
ce1a14dc 3390
c16b5a2c 3391 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3392 acb->is_write = is_write;
3393 acb->qiov = qiov;
e268ca52 3394 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3395 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3396
3397 if (is_write) {
3398 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3399 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3400 } else {
1ed20acf 3401 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3402 }
3403
ce1a14dc 3404 qemu_bh_schedule(acb->bh);
f141eafe 3405
ce1a14dc 3406 return &acb->common;
beac80cd
FB
3407}
3408
f141eafe
AL
3409static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3410 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3411 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3412{
f141eafe
AL
3413 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3414}
83f64091 3415
f141eafe
AL
3416static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3417 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3418 BlockDriverCompletionFunc *cb, void *opaque)
3419{
3420 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3421}
beac80cd 3422
68485420
KW
3423
3424typedef struct BlockDriverAIOCBCoroutine {
3425 BlockDriverAIOCB common;
3426 BlockRequest req;
3427 bool is_write;
3428 QEMUBH* bh;
3429} BlockDriverAIOCBCoroutine;
3430
3431static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3432{
3433 qemu_aio_flush();
3434}
3435
3436static AIOPool bdrv_em_co_aio_pool = {
3437 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3438 .cancel = bdrv_aio_co_cancel_em,
3439};
3440
35246a68 3441static void bdrv_co_em_bh(void *opaque)
68485420
KW
3442{
3443 BlockDriverAIOCBCoroutine *acb = opaque;
3444
3445 acb->common.cb(acb->common.opaque, acb->req.error);
3446 qemu_bh_delete(acb->bh);
3447 qemu_aio_release(acb);
3448}
3449
b2a61371
SH
3450/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3451static void coroutine_fn bdrv_co_do_rw(void *opaque)
3452{
3453 BlockDriverAIOCBCoroutine *acb = opaque;
3454 BlockDriverState *bs = acb->common.bs;
3455
3456 if (!acb->is_write) {
3457 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3458 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3459 } else {
3460 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3461 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3462 }
3463
35246a68 3464 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3465 qemu_bh_schedule(acb->bh);
3466}
3467
68485420
KW
3468static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3469 int64_t sector_num,
3470 QEMUIOVector *qiov,
3471 int nb_sectors,
3472 BlockDriverCompletionFunc *cb,
3473 void *opaque,
8c5873d6 3474 bool is_write)
68485420
KW
3475{
3476 Coroutine *co;
3477 BlockDriverAIOCBCoroutine *acb;
3478
3479 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3480 acb->req.sector = sector_num;
3481 acb->req.nb_sectors = nb_sectors;
3482 acb->req.qiov = qiov;
3483 acb->is_write = is_write;
3484
8c5873d6 3485 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3486 qemu_coroutine_enter(co, acb);
3487
3488 return &acb->common;
3489}
3490
07f07615 3491static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3492{
07f07615
PB
3493 BlockDriverAIOCBCoroutine *acb = opaque;
3494 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3495
07f07615
PB
3496 acb->req.error = bdrv_co_flush(bs);
3497 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3498 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3499}
3500
07f07615 3501BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3502 BlockDriverCompletionFunc *cb, void *opaque)
3503{
07f07615 3504 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3505
07f07615
PB
3506 Coroutine *co;
3507 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3508
07f07615
PB
3509 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3510 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3511 qemu_coroutine_enter(co, acb);
016f5cf6 3512
016f5cf6
AG
3513 return &acb->common;
3514}
3515
4265d620
PB
3516static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3517{
3518 BlockDriverAIOCBCoroutine *acb = opaque;
3519 BlockDriverState *bs = acb->common.bs;
3520
3521 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3522 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3523 qemu_bh_schedule(acb->bh);
3524}
3525
3526BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3527 int64_t sector_num, int nb_sectors,
3528 BlockDriverCompletionFunc *cb, void *opaque)
3529{
3530 Coroutine *co;
3531 BlockDriverAIOCBCoroutine *acb;
3532
3533 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3534
3535 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3536 acb->req.sector = sector_num;
3537 acb->req.nb_sectors = nb_sectors;
3538 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3539 qemu_coroutine_enter(co, acb);
3540
3541 return &acb->common;
3542}
3543
ea2384d3
FB
3544void bdrv_init(void)
3545{
5efa9d5a 3546 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3547}
ce1a14dc 3548
eb852011
MA
3549void bdrv_init_with_whitelist(void)
3550{
3551 use_bdrv_whitelist = 1;
3552 bdrv_init();
3553}
3554
c16b5a2c
CH
3555void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3556 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3557{
ce1a14dc
PB
3558 BlockDriverAIOCB *acb;
3559
6bbff9a0
AL
3560 if (pool->free_aiocb) {
3561 acb = pool->free_aiocb;
3562 pool->free_aiocb = acb->next;
ce1a14dc 3563 } else {
7267c094 3564 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3565 acb->pool = pool;
ce1a14dc
PB
3566 }
3567 acb->bs = bs;
3568 acb->cb = cb;
3569 acb->opaque = opaque;
3570 return acb;
3571}
3572
3573void qemu_aio_release(void *p)
3574{
6bbff9a0
AL
3575 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3576 AIOPool *pool = acb->pool;
3577 acb->next = pool->free_aiocb;
3578 pool->free_aiocb = acb;
ce1a14dc 3579}
19cb3738 3580
f9f05dc5
KW
3581/**************************************************************/
3582/* Coroutine block device emulation */
3583
3584typedef struct CoroutineIOCompletion {
3585 Coroutine *coroutine;
3586 int ret;
3587} CoroutineIOCompletion;
3588
3589static void bdrv_co_io_em_complete(void *opaque, int ret)
3590{
3591 CoroutineIOCompletion *co = opaque;
3592
3593 co->ret = ret;
3594 qemu_coroutine_enter(co->coroutine, NULL);
3595}
3596
3597static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3598 int nb_sectors, QEMUIOVector *iov,
3599 bool is_write)
3600{
3601 CoroutineIOCompletion co = {
3602 .coroutine = qemu_coroutine_self(),
3603 };
3604 BlockDriverAIOCB *acb;
3605
3606 if (is_write) {
a652d160
SH
3607 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3608 bdrv_co_io_em_complete, &co);
f9f05dc5 3609 } else {
a652d160
SH
3610 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3611 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3612 }
3613
59370aaa 3614 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3615 if (!acb) {
3616 return -EIO;
3617 }
3618 qemu_coroutine_yield();
3619
3620 return co.ret;
3621}
3622
3623static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3624 int64_t sector_num, int nb_sectors,
3625 QEMUIOVector *iov)
3626{
3627 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3628}
3629
3630static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3631 int64_t sector_num, int nb_sectors,
3632 QEMUIOVector *iov)
3633{
3634 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3635}
3636
07f07615 3637static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3638{
07f07615
PB
3639 RwCo *rwco = opaque;
3640
3641 rwco->ret = bdrv_co_flush(rwco->bs);
3642}
3643
3644int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3645{
eb489bb1
KW
3646 int ret;
3647
29cdb251 3648 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3649 return 0;
eb489bb1
KW
3650 }
3651
ca716364 3652 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3653 if (bs->drv->bdrv_co_flush_to_os) {
3654 ret = bs->drv->bdrv_co_flush_to_os(bs);
3655 if (ret < 0) {
3656 return ret;
3657 }
3658 }
3659
ca716364
KW
3660 /* But don't actually force it to the disk with cache=unsafe */
3661 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3662 return 0;
3663 }
3664
eb489bb1 3665 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3666 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3667 } else if (bs->drv->bdrv_aio_flush) {
3668 BlockDriverAIOCB *acb;
3669 CoroutineIOCompletion co = {
3670 .coroutine = qemu_coroutine_self(),
3671 };
3672
3673 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3674 if (acb == NULL) {
29cdb251 3675 ret = -EIO;
07f07615
PB
3676 } else {
3677 qemu_coroutine_yield();
29cdb251 3678 ret = co.ret;
07f07615 3679 }
07f07615
PB
3680 } else {
3681 /*
3682 * Some block drivers always operate in either writethrough or unsafe
3683 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3684 * know how the server works (because the behaviour is hardcoded or
3685 * depends on server-side configuration), so we can't ensure that
3686 * everything is safe on disk. Returning an error doesn't work because
3687 * that would break guests even if the server operates in writethrough
3688 * mode.
3689 *
3690 * Let's hope the user knows what he's doing.
3691 */
29cdb251 3692 ret = 0;
07f07615 3693 }
29cdb251
PB
3694 if (ret < 0) {
3695 return ret;
3696 }
3697
3698 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3699 * in the case of cache=unsafe, so there are no useless flushes.
3700 */
3701 return bdrv_co_flush(bs->file);
07f07615
PB
3702}
3703
0f15423c
AL
3704void bdrv_invalidate_cache(BlockDriverState *bs)
3705{
3706 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3707 bs->drv->bdrv_invalidate_cache(bs);
3708 }
3709}
3710
3711void bdrv_invalidate_cache_all(void)
3712{
3713 BlockDriverState *bs;
3714
3715 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3716 bdrv_invalidate_cache(bs);
3717 }
3718}
3719
07789269
BC
3720void bdrv_clear_incoming_migration_all(void)
3721{
3722 BlockDriverState *bs;
3723
3724 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3725 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3726 }
3727}
3728
07f07615
PB
3729int bdrv_flush(BlockDriverState *bs)
3730{
3731 Coroutine *co;
3732 RwCo rwco = {
3733 .bs = bs,
3734 .ret = NOT_DONE,
e7a8a783 3735 };
e7a8a783 3736
07f07615
PB
3737 if (qemu_in_coroutine()) {
3738 /* Fast-path if already in coroutine context */
3739 bdrv_flush_co_entry(&rwco);
3740 } else {
3741 co = qemu_coroutine_create(bdrv_flush_co_entry);
3742 qemu_coroutine_enter(co, &rwco);
3743 while (rwco.ret == NOT_DONE) {
3744 qemu_aio_wait();
3745 }
e7a8a783 3746 }
07f07615
PB
3747
3748 return rwco.ret;
e7a8a783
KW
3749}
3750
4265d620
PB
3751static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3752{
3753 RwCo *rwco = opaque;
3754
3755 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3756}
3757
3758int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3759 int nb_sectors)
3760{
3761 if (!bs->drv) {
3762 return -ENOMEDIUM;
3763 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3764 return -EIO;
3765 } else if (bs->read_only) {
3766 return -EROFS;
3767 } else if (bs->drv->bdrv_co_discard) {
3768 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3769 } else if (bs->drv->bdrv_aio_discard) {
3770 BlockDriverAIOCB *acb;
3771 CoroutineIOCompletion co = {
3772 .coroutine = qemu_coroutine_self(),
3773 };
3774
3775 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3776 bdrv_co_io_em_complete, &co);
3777 if (acb == NULL) {
3778 return -EIO;
3779 } else {
3780 qemu_coroutine_yield();
3781 return co.ret;
3782 }
4265d620
PB
3783 } else {
3784 return 0;
3785 }
3786}
3787
3788int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3789{
3790 Coroutine *co;
3791 RwCo rwco = {
3792 .bs = bs,
3793 .sector_num = sector_num,
3794 .nb_sectors = nb_sectors,
3795 .ret = NOT_DONE,
3796 };
3797
3798 if (qemu_in_coroutine()) {
3799 /* Fast-path if already in coroutine context */
3800 bdrv_discard_co_entry(&rwco);
3801 } else {
3802 co = qemu_coroutine_create(bdrv_discard_co_entry);
3803 qemu_coroutine_enter(co, &rwco);
3804 while (rwco.ret == NOT_DONE) {
3805 qemu_aio_wait();
3806 }
3807 }
3808
3809 return rwco.ret;
3810}
3811
19cb3738
FB
3812/**************************************************************/
3813/* removable device support */
3814
3815/**
3816 * Return TRUE if the media is present
3817 */
3818int bdrv_is_inserted(BlockDriverState *bs)
3819{
3820 BlockDriver *drv = bs->drv;
a1aff5bf 3821
19cb3738
FB
3822 if (!drv)
3823 return 0;
3824 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3825 return 1;
3826 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3827}
3828
3829/**
8e49ca46
MA
3830 * Return whether the media changed since the last call to this
3831 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3832 */
3833int bdrv_media_changed(BlockDriverState *bs)
3834{
3835 BlockDriver *drv = bs->drv;
19cb3738 3836
8e49ca46
MA
3837 if (drv && drv->bdrv_media_changed) {
3838 return drv->bdrv_media_changed(bs);
3839 }
3840 return -ENOTSUP;
19cb3738
FB
3841}
3842
3843/**
3844 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3845 */
f36f3949 3846void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3847{
3848 BlockDriver *drv = bs->drv;
19cb3738 3849
822e1cd1
MA
3850 if (drv && drv->bdrv_eject) {
3851 drv->bdrv_eject(bs, eject_flag);
19cb3738 3852 }
6f382ed2
LC
3853
3854 if (bs->device_name[0] != '\0') {
3855 bdrv_emit_qmp_eject_event(bs, eject_flag);
3856 }
19cb3738
FB
3857}
3858
19cb3738
FB
3859/**
3860 * Lock or unlock the media (if it is locked, the user won't be able
3861 * to eject it manually).
3862 */
025e849a 3863void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3864{
3865 BlockDriver *drv = bs->drv;
3866
025e849a 3867 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3868
025e849a
MA
3869 if (drv && drv->bdrv_lock_medium) {
3870 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3871 }
3872}
985a03b0
TS
3873
3874/* needed for generic scsi interface */
3875
3876int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3877{
3878 BlockDriver *drv = bs->drv;
3879
3880 if (drv && drv->bdrv_ioctl)
3881 return drv->bdrv_ioctl(bs, req, buf);
3882 return -ENOTSUP;
3883}
7d780669 3884
221f715d
AL
3885BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3886 unsigned long int req, void *buf,
3887 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3888{
221f715d 3889 BlockDriver *drv = bs->drv;
7d780669 3890
221f715d
AL
3891 if (drv && drv->bdrv_aio_ioctl)
3892 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3893 return NULL;
7d780669 3894}
e268ca52 3895
7b6f9300
MA
3896void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3897{
3898 bs->buffer_alignment = align;
3899}
7cd1e32a 3900
e268ca52
AL
3901void *qemu_blockalign(BlockDriverState *bs, size_t size)
3902{
3903 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3904}
7cd1e32a
LS
3905
3906void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3907{
3908 int64_t bitmap_size;
a55eb92c 3909
aaa0eb75 3910 bs->dirty_count = 0;
a55eb92c 3911 if (enable) {
c6d22830
JK
3912 if (!bs->dirty_bitmap) {
3913 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
71df14fc
PB
3914 BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
3915 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
a55eb92c 3916
71df14fc 3917 bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
a55eb92c 3918 }
7cd1e32a 3919 } else {
c6d22830 3920 if (bs->dirty_bitmap) {
7267c094 3921 g_free(bs->dirty_bitmap);
c6d22830 3922 bs->dirty_bitmap = NULL;
a55eb92c 3923 }
7cd1e32a
LS
3924 }
3925}
3926
3927int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3928{
6ea44308 3929 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3930
c6d22830
JK
3931 if (bs->dirty_bitmap &&
3932 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3933 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3934 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3935 } else {
3936 return 0;
3937 }
3938}
3939
a55eb92c
JK
3940void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3941 int nr_sectors)
7cd1e32a
LS
3942{
3943 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3944}
aaa0eb75
LS
3945
3946int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3947{
3948 return bs->dirty_count;
3949}
f88e1a42 3950
db593f25
MT
3951void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3952{
3953 assert(bs->in_use != in_use);
3954 bs->in_use = in_use;
3955}
3956
3957int bdrv_in_use(BlockDriverState *bs)
3958{
3959 return bs->in_use;
3960}
3961
28a7282a
LC
3962void bdrv_iostatus_enable(BlockDriverState *bs)
3963{
d6bf279e 3964 bs->iostatus_enabled = true;
58e21ef5 3965 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3966}
3967
3968/* The I/O status is only enabled if the drive explicitly
3969 * enables it _and_ the VM is configured to stop on errors */
3970bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3971{
d6bf279e 3972 return (bs->iostatus_enabled &&
28a7282a
LC
3973 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3974 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3975 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3976}
3977
3978void bdrv_iostatus_disable(BlockDriverState *bs)
3979{
d6bf279e 3980 bs->iostatus_enabled = false;
28a7282a
LC
3981}
3982
3983void bdrv_iostatus_reset(BlockDriverState *bs)
3984{
3985 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3986 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3987 }
3988}
3989
3990/* XXX: Today this is set by device models because it makes the implementation
3991 quite simple. However, the block layer knows about the error, so it's
3992 possible to implement this without device models being involved */
3993void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3994{
58e21ef5
LC
3995 if (bdrv_iostatus_is_enabled(bs) &&
3996 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3997 assert(error >= 0);
58e21ef5
LC
3998 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3999 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
4000 }
4001}
4002
a597e79c
CH
4003void
4004bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
4005 enum BlockAcctType type)
4006{
4007 assert(type < BDRV_MAX_IOTYPE);
4008
4009 cookie->bytes = bytes;
c488c7f6 4010 cookie->start_time_ns = get_clock();
a597e79c
CH
4011 cookie->type = type;
4012}
4013
4014void
4015bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
4016{
4017 assert(cookie->type < BDRV_MAX_IOTYPE);
4018
4019 bs->nr_bytes[cookie->type] += cookie->bytes;
4020 bs->nr_ops[cookie->type]++;
c488c7f6 4021 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
4022}
4023
f88e1a42
JS
4024int bdrv_img_create(const char *filename, const char *fmt,
4025 const char *base_filename, const char *base_fmt,
4026 char *options, uint64_t img_size, int flags)
4027{
4028 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 4029 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
4030 BlockDriverState *bs = NULL;
4031 BlockDriver *drv, *proto_drv;
96df67d1 4032 BlockDriver *backing_drv = NULL;
f88e1a42
JS
4033 int ret = 0;
4034
4035 /* Find driver and parse its options */
4036 drv = bdrv_find_format(fmt);
4037 if (!drv) {
4038 error_report("Unknown file format '%s'", fmt);
4f70f249 4039 ret = -EINVAL;
f88e1a42
JS
4040 goto out;
4041 }
4042
4043 proto_drv = bdrv_find_protocol(filename);
4044 if (!proto_drv) {
4045 error_report("Unknown protocol '%s'", filename);
4f70f249 4046 ret = -EINVAL;
f88e1a42
JS
4047 goto out;
4048 }
4049
4050 create_options = append_option_parameters(create_options,
4051 drv->create_options);
4052 create_options = append_option_parameters(create_options,
4053 proto_drv->create_options);
4054
4055 /* Create parameter list with default values */
4056 param = parse_option_parameters("", create_options, param);
4057
4058 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4059
4060 /* Parse -o options */
4061 if (options) {
4062 param = parse_option_parameters(options, create_options, param);
4063 if (param == NULL) {
4064 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4065 ret = -EINVAL;
f88e1a42
JS
4066 goto out;
4067 }
4068 }
4069
4070 if (base_filename) {
4071 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4072 base_filename)) {
4073 error_report("Backing file not supported for file format '%s'",
4074 fmt);
4f70f249 4075 ret = -EINVAL;
f88e1a42
JS
4076 goto out;
4077 }
4078 }
4079
4080 if (base_fmt) {
4081 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4082 error_report("Backing file format not supported for file "
4083 "format '%s'", fmt);
4f70f249 4084 ret = -EINVAL;
f88e1a42
JS
4085 goto out;
4086 }
4087 }
4088
792da93a
JS
4089 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4090 if (backing_file && backing_file->value.s) {
4091 if (!strcmp(filename, backing_file->value.s)) {
4092 error_report("Error: Trying to create an image with the "
4093 "same filename as the backing file");
4f70f249 4094 ret = -EINVAL;
792da93a
JS
4095 goto out;
4096 }
4097 }
4098
f88e1a42
JS
4099 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4100 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4101 backing_drv = bdrv_find_format(backing_fmt->value.s);
4102 if (!backing_drv) {
f88e1a42
JS
4103 error_report("Unknown backing file format '%s'",
4104 backing_fmt->value.s);
4f70f249 4105 ret = -EINVAL;
f88e1a42
JS
4106 goto out;
4107 }
4108 }
4109
4110 // The size for the image must always be specified, with one exception:
4111 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4112 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4113 if (size && size->value.n == -1) {
f88e1a42
JS
4114 if (backing_file && backing_file->value.s) {
4115 uint64_t size;
f88e1a42 4116 char buf[32];
63090dac
PB
4117 int back_flags;
4118
4119 /* backing files always opened read-only */
4120 back_flags =
4121 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
f88e1a42 4122
f88e1a42
JS
4123 bs = bdrv_new("");
4124
63090dac 4125 ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
f88e1a42 4126 if (ret < 0) {
96df67d1 4127 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4128 goto out;
4129 }
4130 bdrv_get_geometry(bs, &size);
4131 size *= 512;
4132
4133 snprintf(buf, sizeof(buf), "%" PRId64, size);
4134 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4135 } else {
4136 error_report("Image creation needs a size parameter");
4f70f249 4137 ret = -EINVAL;
f88e1a42
JS
4138 goto out;
4139 }
4140 }
4141
4142 printf("Formatting '%s', fmt=%s ", filename, fmt);
4143 print_option_parameters(param);
4144 puts("");
4145
4146 ret = bdrv_create(drv, filename, param);
4147
4148 if (ret < 0) {
4149 if (ret == -ENOTSUP) {
4150 error_report("Formatting or formatting option not supported for "
4151 "file format '%s'", fmt);
4152 } else if (ret == -EFBIG) {
4153 error_report("The image size is too large for file format '%s'",
4154 fmt);
4155 } else {
4156 error_report("%s: error while creating %s: %s", filename, fmt,
4157 strerror(-ret));
4158 }
4159 }
4160
4161out:
4162 free_option_parameters(create_options);
4163 free_option_parameters(param);
4164
4165 if (bs) {
4166 bdrv_delete(bs);
4167 }
4f70f249
JS
4168
4169 return ret;
f88e1a42 4170}
eeec61f2
SH
4171
4172void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4173 int64_t speed, BlockDriverCompletionFunc *cb,
4174 void *opaque, Error **errp)
eeec61f2
SH
4175{
4176 BlockJob *job;
4177
4178 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4179 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4180 return NULL;
4181 }
4182 bdrv_set_in_use(bs, 1);
4183
4184 job = g_malloc0(job_type->instance_size);
4185 job->job_type = job_type;
4186 job->bs = bs;
4187 job->cb = cb;
4188 job->opaque = opaque;
4513eafe 4189 job->busy = true;
eeec61f2 4190 bs->job = job;
c83c66c3
SH
4191
4192 /* Only set speed when necessary to avoid NotSupported error */
4193 if (speed != 0) {
4194 Error *local_err = NULL;
4195
4196 block_job_set_speed(job, speed, &local_err);
4197 if (error_is_set(&local_err)) {
4198 bs->job = NULL;
4199 g_free(job);
4200 bdrv_set_in_use(bs, 0);
4201 error_propagate(errp, local_err);
4202 return NULL;
4203 }
4204 }
eeec61f2
SH
4205 return job;
4206}
4207
4208void block_job_complete(BlockJob *job, int ret)
4209{
4210 BlockDriverState *bs = job->bs;
4211
4212 assert(bs->job == job);
4213 job->cb(job->opaque, ret);
4214 bs->job = NULL;
4215 g_free(job);
4216 bdrv_set_in_use(bs, 0);
4217}
4218
882ec7ce 4219void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4220{
9e6636c7 4221 Error *local_err = NULL;
9f25eccc 4222
eeec61f2 4223 if (!job->job_type->set_speed) {
9e6636c7
SH
4224 error_set(errp, QERR_NOT_SUPPORTED);
4225 return;
eeec61f2 4226 }
882ec7ce 4227 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4228 if (error_is_set(&local_err)) {
4229 error_propagate(errp, local_err);
4230 return;
9f25eccc 4231 }
9e6636c7 4232
882ec7ce 4233 job->speed = speed;
eeec61f2
SH
4234}
4235
4236void block_job_cancel(BlockJob *job)
4237{
4238 job->cancelled = true;
fa4478d5
PB
4239 if (job->co && !job->busy) {
4240 qemu_coroutine_enter(job->co, NULL);
4241 }
eeec61f2
SH
4242}
4243
4244bool block_job_is_cancelled(BlockJob *job)
4245{
4246 return job->cancelled;
4247}
3e914655 4248
fa4478d5
PB
4249struct BlockCancelData {
4250 BlockJob *job;
4251 BlockDriverCompletionFunc *cb;
4252 void *opaque;
4253 bool cancelled;
4254 int ret;
4255};
4256
4257static void block_job_cancel_cb(void *opaque, int ret)
3e914655 4258{
fa4478d5
PB
4259 struct BlockCancelData *data = opaque;
4260
4261 data->cancelled = block_job_is_cancelled(data->job);
4262 data->ret = ret;
4263 data->cb(data->opaque, ret);
4264}
4265
4266int block_job_cancel_sync(BlockJob *job)
4267{
4268 struct BlockCancelData data;
3e914655
PB
4269 BlockDriverState *bs = job->bs;
4270
4271 assert(bs->job == job);
fa4478d5
PB
4272
4273 /* Set up our own callback to store the result and chain to
4274 * the original callback.
4275 */
4276 data.job = job;
4277 data.cb = job->cb;
4278 data.opaque = job->opaque;
4279 data.ret = -EINPROGRESS;
4280 job->cb = block_job_cancel_cb;
4281 job->opaque = &data;
3e914655 4282 block_job_cancel(job);
fa4478d5 4283 while (data.ret == -EINPROGRESS) {
3e914655
PB
4284 qemu_aio_wait();
4285 }
fa4478d5 4286 return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
3e914655 4287}
4513eafe
PB
4288
4289void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
4290{
4291 /* Check cancellation *before* setting busy = false, too! */
4292 if (!block_job_is_cancelled(job)) {
4293 job->busy = false;
4294 co_sleep_ns(clock, ns);
4295 job->busy = true;
4296 }
4297}