]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
block: fail live snapshot if disk has no medium
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
f795e743 30#include "qjson.h"
68485420 31#include "qemu-coroutine.h"
b2023818 32#include "qmp-commands.h"
0563e191 33#include "qemu-timer.h"
fc01f7e7 34
71e72a19 35#ifdef CONFIG_BSD
7674e7bf
FB
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/ioctl.h>
72cf2d4f 39#include <sys/queue.h>
c5e97233 40#ifndef __DragonFly__
7674e7bf
FB
41#include <sys/disk.h>
42#endif
c5e97233 43#endif
7674e7bf 44
49dc768d
AL
45#ifdef _WIN32
46#include <windows.h>
47#endif
48
1c9805a3
SH
49#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50
470c0504
SH
51typedef enum {
52 BDRV_REQ_COPY_ON_READ = 0x1,
f08f2dda 53 BDRV_REQ_ZERO_WRITE = 0x2,
470c0504
SH
54} BdrvRequestFlags;
55
7d4b4ba5 56static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
57static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
58 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 59 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
60static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
61 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 62 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
63static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors,
65 QEMUIOVector *iov);
66static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors,
68 QEMUIOVector *iov);
c5fbe571 69static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
70 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
71 BdrvRequestFlags flags);
1c9805a3 72static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
73 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
74 BdrvRequestFlags flags);
b2a61371
SH
75static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
76 int64_t sector_num,
77 QEMUIOVector *qiov,
78 int nb_sectors,
79 BlockDriverCompletionFunc *cb,
80 void *opaque,
8c5873d6 81 bool is_write);
b2a61371 82static void coroutine_fn bdrv_co_do_rw(void *opaque);
621f0589
KW
83static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
84 int64_t sector_num, int nb_sectors);
ec530c81 85
98f90dba
ZYW
86static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
87 bool is_write, double elapsed_time, uint64_t *wait);
88static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
89 double elapsed_time, uint64_t *wait);
90static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
91 bool is_write, int64_t *wait);
92
1b7bdbc1
SH
93static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
94 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 95
8a22f02a
SH
96static QLIST_HEAD(, BlockDriver) bdrv_drivers =
97 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 98
f9092b10
MA
99/* The device to use for VM snapshots */
100static BlockDriverState *bs_snapshots;
101
eb852011
MA
102/* If non-zero, use only whitelisted block drivers */
103static int use_bdrv_whitelist;
104
9e0b22f4
SH
105#ifdef _WIN32
106static int is_windows_drive_prefix(const char *filename)
107{
108 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
109 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
110 filename[1] == ':');
111}
112
113int is_windows_drive(const char *filename)
114{
115 if (is_windows_drive_prefix(filename) &&
116 filename[2] == '\0')
117 return 1;
118 if (strstart(filename, "\\\\.\\", NULL) ||
119 strstart(filename, "//./", NULL))
120 return 1;
121 return 0;
122}
123#endif
124
0563e191 125/* throttling disk I/O limits */
98f90dba
ZYW
126void bdrv_io_limits_disable(BlockDriverState *bs)
127{
128 bs->io_limits_enabled = false;
129
130 while (qemu_co_queue_next(&bs->throttled_reqs));
131
132 if (bs->block_timer) {
133 qemu_del_timer(bs->block_timer);
134 qemu_free_timer(bs->block_timer);
135 bs->block_timer = NULL;
136 }
137
138 bs->slice_start = 0;
139 bs->slice_end = 0;
140 bs->slice_time = 0;
141 memset(&bs->io_base, 0, sizeof(bs->io_base));
142}
143
0563e191
ZYW
144static void bdrv_block_timer(void *opaque)
145{
146 BlockDriverState *bs = opaque;
147
148 qemu_co_queue_next(&bs->throttled_reqs);
149}
150
151void bdrv_io_limits_enable(BlockDriverState *bs)
152{
153 qemu_co_queue_init(&bs->throttled_reqs);
154 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
155 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
156 bs->slice_start = qemu_get_clock_ns(vm_clock);
157 bs->slice_end = bs->slice_start + bs->slice_time;
158 memset(&bs->io_base, 0, sizeof(bs->io_base));
159 bs->io_limits_enabled = true;
160}
161
162bool bdrv_io_limits_enabled(BlockDriverState *bs)
163{
164 BlockIOLimit *io_limits = &bs->io_limits;
165 return io_limits->bps[BLOCK_IO_LIMIT_READ]
166 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
167 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
168 || io_limits->iops[BLOCK_IO_LIMIT_READ]
169 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
170 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
171}
172
98f90dba
ZYW
173static void bdrv_io_limits_intercept(BlockDriverState *bs,
174 bool is_write, int nb_sectors)
175{
176 int64_t wait_time = -1;
177
178 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
179 qemu_co_queue_wait(&bs->throttled_reqs);
180 }
181
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
187 */
188
189 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
190 qemu_mod_timer(bs->block_timer,
191 wait_time + qemu_get_clock_ns(vm_clock));
192 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
193 }
194
195 qemu_co_queue_next(&bs->throttled_reqs);
196}
197
9e0b22f4
SH
198/* check if the path starts with "<protocol>:" */
199static int path_has_protocol(const char *path)
200{
201#ifdef _WIN32
202 if (is_windows_drive(path) ||
203 is_windows_drive_prefix(path)) {
204 return 0;
205 }
206#endif
207
208 return strchr(path, ':') != NULL;
209}
210
83f64091 211int path_is_absolute(const char *path)
3b0d4f61 212{
83f64091 213 const char *p;
21664424
FB
214#ifdef _WIN32
215 /* specific case for names like: "\\.\d:" */
216 if (*path == '/' || *path == '\\')
217 return 1;
218#endif
83f64091
FB
219 p = strchr(path, ':');
220 if (p)
221 p++;
222 else
223 p = path;
3b9f94e1
FB
224#ifdef _WIN32
225 return (*p == '/' || *p == '\\');
226#else
227 return (*p == '/');
228#endif
3b0d4f61
FB
229}
230
83f64091
FB
231/* if filename is absolute, just copy it to dest. Otherwise, build a
232 path to it by considering it is relative to base_path. URL are
233 supported. */
234void path_combine(char *dest, int dest_size,
235 const char *base_path,
236 const char *filename)
3b0d4f61 237{
83f64091
FB
238 const char *p, *p1;
239 int len;
240
241 if (dest_size <= 0)
242 return;
243 if (path_is_absolute(filename)) {
244 pstrcpy(dest, dest_size, filename);
245 } else {
246 p = strchr(base_path, ':');
247 if (p)
248 p++;
249 else
250 p = base_path;
3b9f94e1
FB
251 p1 = strrchr(base_path, '/');
252#ifdef _WIN32
253 {
254 const char *p2;
255 p2 = strrchr(base_path, '\\');
256 if (!p1 || p2 > p1)
257 p1 = p2;
258 }
259#endif
83f64091
FB
260 if (p1)
261 p1++;
262 else
263 p1 = base_path;
264 if (p1 > p)
265 p = p1;
266 len = p - base_path;
267 if (len > dest_size - 1)
268 len = dest_size - 1;
269 memcpy(dest, base_path, len);
270 dest[len] = '\0';
271 pstrcat(dest, dest_size, filename);
3b0d4f61 272 }
3b0d4f61
FB
273}
274
5efa9d5a 275void bdrv_register(BlockDriver *bdrv)
ea2384d3 276{
8c5873d6
SH
277 /* Block drivers without coroutine functions need emulation */
278 if (!bdrv->bdrv_co_readv) {
f9f05dc5
KW
279 bdrv->bdrv_co_readv = bdrv_co_readv_em;
280 bdrv->bdrv_co_writev = bdrv_co_writev_em;
281
f8c35c1d
SH
282 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
283 * the block driver lacks aio we need to emulate that too.
284 */
f9f05dc5
KW
285 if (!bdrv->bdrv_aio_readv) {
286 /* add AIO emulation layer */
287 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
288 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
f9f05dc5 289 }
83f64091 290 }
b2e12bc6 291
8a22f02a 292 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 293}
b338082b
FB
294
295/* create a new block device (by default it is empty) */
296BlockDriverState *bdrv_new(const char *device_name)
297{
1b7bdbc1 298 BlockDriverState *bs;
b338082b 299
7267c094 300 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 301 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 302 if (device_name[0] != '\0') {
1b7bdbc1 303 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 304 }
28a7282a 305 bdrv_iostatus_disable(bs);
b338082b
FB
306 return bs;
307}
308
ea2384d3
FB
309BlockDriver *bdrv_find_format(const char *format_name)
310{
311 BlockDriver *drv1;
8a22f02a
SH
312 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
313 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 314 return drv1;
8a22f02a 315 }
ea2384d3
FB
316 }
317 return NULL;
318}
319
eb852011
MA
320static int bdrv_is_whitelisted(BlockDriver *drv)
321{
322 static const char *whitelist[] = {
323 CONFIG_BDRV_WHITELIST
324 };
325 const char **p;
326
327 if (!whitelist[0])
328 return 1; /* no whitelist, anything goes */
329
330 for (p = whitelist; *p; p++) {
331 if (!strcmp(drv->format_name, *p)) {
332 return 1;
333 }
334 }
335 return 0;
336}
337
338BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
339{
340 BlockDriver *drv = bdrv_find_format(format_name);
341 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
342}
343
5b7e1542
ZYW
344typedef struct CreateCo {
345 BlockDriver *drv;
346 char *filename;
347 QEMUOptionParameter *options;
348 int ret;
349} CreateCo;
350
351static void coroutine_fn bdrv_create_co_entry(void *opaque)
352{
353 CreateCo *cco = opaque;
354 assert(cco->drv);
355
356 cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
357}
358
0e7e1989
KW
359int bdrv_create(BlockDriver *drv, const char* filename,
360 QEMUOptionParameter *options)
ea2384d3 361{
5b7e1542
ZYW
362 int ret;
363
364 Coroutine *co;
365 CreateCo cco = {
366 .drv = drv,
367 .filename = g_strdup(filename),
368 .options = options,
369 .ret = NOT_DONE,
370 };
371
372 if (!drv->bdrv_create) {
ea2384d3 373 return -ENOTSUP;
5b7e1542
ZYW
374 }
375
376 if (qemu_in_coroutine()) {
377 /* Fast-path if already in coroutine context */
378 bdrv_create_co_entry(&cco);
379 } else {
380 co = qemu_coroutine_create(bdrv_create_co_entry);
381 qemu_coroutine_enter(co, &cco);
382 while (cco.ret == NOT_DONE) {
383 qemu_aio_wait();
384 }
385 }
386
387 ret = cco.ret;
388 g_free(cco.filename);
0e7e1989 389
5b7e1542 390 return ret;
ea2384d3
FB
391}
392
84a12e66
CH
393int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
394{
395 BlockDriver *drv;
396
b50cbabc 397 drv = bdrv_find_protocol(filename);
84a12e66 398 if (drv == NULL) {
16905d71 399 return -ENOENT;
84a12e66
CH
400 }
401
402 return bdrv_create(drv, filename, options);
403}
404
d5249393 405#ifdef _WIN32
95389c86 406void get_tmp_filename(char *filename, int size)
d5249393 407{
3b9f94e1 408 char temp_dir[MAX_PATH];
3b46e624 409
3b9f94e1
FB
410 GetTempPath(MAX_PATH, temp_dir);
411 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
412}
413#else
95389c86 414void get_tmp_filename(char *filename, int size)
fc01f7e7 415{
67b915a5 416 int fd;
7ccfb2eb 417 const char *tmpdir;
d5249393 418 /* XXX: race condition possible */
0badc1ee
AJ
419 tmpdir = getenv("TMPDIR");
420 if (!tmpdir)
421 tmpdir = "/tmp";
422 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
423 fd = mkstemp(filename);
424 close(fd);
425}
d5249393 426#endif
fc01f7e7 427
84a12e66
CH
428/*
429 * Detect host devices. By convention, /dev/cdrom[N] is always
430 * recognized as a host CDROM.
431 */
432static BlockDriver *find_hdev_driver(const char *filename)
433{
434 int score_max = 0, score;
435 BlockDriver *drv = NULL, *d;
436
437 QLIST_FOREACH(d, &bdrv_drivers, list) {
438 if (d->bdrv_probe_device) {
439 score = d->bdrv_probe_device(filename);
440 if (score > score_max) {
441 score_max = score;
442 drv = d;
443 }
444 }
445 }
446
447 return drv;
448}
449
b50cbabc 450BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
451{
452 BlockDriver *drv1;
453 char protocol[128];
1cec71e3 454 int len;
83f64091 455 const char *p;
19cb3738 456
66f82cee
KW
457 /* TODO Drivers without bdrv_file_open must be specified explicitly */
458
39508e7a
CH
459 /*
460 * XXX(hch): we really should not let host device detection
461 * override an explicit protocol specification, but moving this
462 * later breaks access to device names with colons in them.
463 * Thanks to the brain-dead persistent naming schemes on udev-
464 * based Linux systems those actually are quite common.
465 */
466 drv1 = find_hdev_driver(filename);
467 if (drv1) {
468 return drv1;
469 }
470
9e0b22f4 471 if (!path_has_protocol(filename)) {
39508e7a 472 return bdrv_find_format("file");
84a12e66 473 }
9e0b22f4
SH
474 p = strchr(filename, ':');
475 assert(p != NULL);
1cec71e3
AL
476 len = p - filename;
477 if (len > sizeof(protocol) - 1)
478 len = sizeof(protocol) - 1;
479 memcpy(protocol, filename, len);
480 protocol[len] = '\0';
8a22f02a 481 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 482 if (drv1->protocol_name &&
8a22f02a 483 !strcmp(drv1->protocol_name, protocol)) {
83f64091 484 return drv1;
8a22f02a 485 }
83f64091
FB
486 }
487 return NULL;
488}
489
c98ac35d 490static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
491{
492 int ret, score, score_max;
493 BlockDriver *drv1, *drv;
494 uint8_t buf[2048];
495 BlockDriverState *bs;
496
f5edb014 497 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
498 if (ret < 0) {
499 *pdrv = NULL;
500 return ret;
501 }
f8ea0b00 502
08a00559
KW
503 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
504 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 505 bdrv_delete(bs);
c98ac35d
SW
506 drv = bdrv_find_format("raw");
507 if (!drv) {
508 ret = -ENOENT;
509 }
510 *pdrv = drv;
511 return ret;
1a396859 512 }
f8ea0b00 513
83f64091
FB
514 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
515 bdrv_delete(bs);
516 if (ret < 0) {
c98ac35d
SW
517 *pdrv = NULL;
518 return ret;
83f64091
FB
519 }
520
ea2384d3 521 score_max = 0;
84a12e66 522 drv = NULL;
8a22f02a 523 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
524 if (drv1->bdrv_probe) {
525 score = drv1->bdrv_probe(buf, ret, filename);
526 if (score > score_max) {
527 score_max = score;
528 drv = drv1;
529 }
0849bf08 530 }
fc01f7e7 531 }
c98ac35d
SW
532 if (!drv) {
533 ret = -ENOENT;
534 }
535 *pdrv = drv;
536 return ret;
ea2384d3
FB
537}
538
51762288
SH
539/**
540 * Set the current 'total_sectors' value
541 */
542static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
543{
544 BlockDriver *drv = bs->drv;
545
396759ad
NB
546 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
547 if (bs->sg)
548 return 0;
549
51762288
SH
550 /* query actual device if possible, otherwise just trust the hint */
551 if (drv->bdrv_getlength) {
552 int64_t length = drv->bdrv_getlength(bs);
553 if (length < 0) {
554 return length;
555 }
556 hint = length >> BDRV_SECTOR_BITS;
557 }
558
559 bs->total_sectors = hint;
560 return 0;
561}
562
c3993cdc
SH
563/**
564 * Set open flags for a given cache mode
565 *
566 * Return 0 on success, -1 if the cache mode was invalid.
567 */
568int bdrv_parse_cache_flags(const char *mode, int *flags)
569{
570 *flags &= ~BDRV_O_CACHE_MASK;
571
572 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
573 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
574 } else if (!strcmp(mode, "directsync")) {
575 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
576 } else if (!strcmp(mode, "writeback")) {
577 *flags |= BDRV_O_CACHE_WB;
578 } else if (!strcmp(mode, "unsafe")) {
579 *flags |= BDRV_O_CACHE_WB;
580 *flags |= BDRV_O_NO_FLUSH;
581 } else if (!strcmp(mode, "writethrough")) {
582 /* this is the default */
583 } else {
584 return -1;
585 }
586
587 return 0;
588}
589
53fec9d3
SH
590/**
591 * The copy-on-read flag is actually a reference count so multiple users may
592 * use the feature without worrying about clobbering its previous state.
593 * Copy-on-read stays enabled until all users have called to disable it.
594 */
595void bdrv_enable_copy_on_read(BlockDriverState *bs)
596{
597 bs->copy_on_read++;
598}
599
600void bdrv_disable_copy_on_read(BlockDriverState *bs)
601{
602 assert(bs->copy_on_read > 0);
603 bs->copy_on_read--;
604}
605
57915332
KW
606/*
607 * Common part for opening disk images and files
608 */
609static int bdrv_open_common(BlockDriverState *bs, const char *filename,
610 int flags, BlockDriver *drv)
611{
612 int ret, open_flags;
613
614 assert(drv != NULL);
615
28dcee10
SH
616 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
617
66f82cee 618 bs->file = NULL;
51762288 619 bs->total_sectors = 0;
57915332
KW
620 bs->encrypted = 0;
621 bs->valid_key = 0;
03f541bd 622 bs->sg = 0;
57915332 623 bs->open_flags = flags;
03f541bd 624 bs->growable = 0;
57915332
KW
625 bs->buffer_alignment = 512;
626
53fec9d3
SH
627 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
628 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
629 bdrv_enable_copy_on_read(bs);
630 }
631
57915332 632 pstrcpy(bs->filename, sizeof(bs->filename), filename);
03f541bd 633 bs->backing_file[0] = '\0';
57915332
KW
634
635 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
636 return -ENOTSUP;
637 }
638
639 bs->drv = drv;
7267c094 640 bs->opaque = g_malloc0(drv->instance_size);
57915332 641
03f541bd 642 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
57915332
KW
643
644 /*
645 * Clear flags that are internal to the block layer before opening the
646 * image.
647 */
648 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
649
650 /*
ebabb67a 651 * Snapshots should be writable.
57915332
KW
652 */
653 if (bs->is_temporary) {
654 open_flags |= BDRV_O_RDWR;
655 }
656
e7c63796
SH
657 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
658
66f82cee
KW
659 /* Open the image, either directly or using a protocol */
660 if (drv->bdrv_file_open) {
661 ret = drv->bdrv_file_open(bs, filename, open_flags);
662 } else {
663 ret = bdrv_file_open(&bs->file, filename, open_flags);
664 if (ret >= 0) {
665 ret = drv->bdrv_open(bs, open_flags);
666 }
667 }
668
57915332
KW
669 if (ret < 0) {
670 goto free_and_fail;
671 }
672
51762288
SH
673 ret = refresh_total_sectors(bs, bs->total_sectors);
674 if (ret < 0) {
675 goto free_and_fail;
57915332 676 }
51762288 677
57915332
KW
678#ifndef _WIN32
679 if (bs->is_temporary) {
680 unlink(filename);
681 }
682#endif
683 return 0;
684
685free_and_fail:
66f82cee
KW
686 if (bs->file) {
687 bdrv_delete(bs->file);
688 bs->file = NULL;
689 }
7267c094 690 g_free(bs->opaque);
57915332
KW
691 bs->opaque = NULL;
692 bs->drv = NULL;
693 return ret;
694}
695
b6ce07aa
KW
696/*
697 * Opens a file using a protocol (file, host_device, nbd, ...)
698 */
83f64091 699int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 700{
83f64091 701 BlockDriverState *bs;
6db95603 702 BlockDriver *drv;
83f64091
FB
703 int ret;
704
b50cbabc 705 drv = bdrv_find_protocol(filename);
6db95603
CH
706 if (!drv) {
707 return -ENOENT;
708 }
709
83f64091 710 bs = bdrv_new("");
b6ce07aa 711 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
712 if (ret < 0) {
713 bdrv_delete(bs);
714 return ret;
3b0d4f61 715 }
71d0770c 716 bs->growable = 1;
83f64091
FB
717 *pbs = bs;
718 return 0;
719}
720
b6ce07aa
KW
721/*
722 * Opens a disk image (raw, qcow2, vmdk, ...)
723 */
d6e9098e
KW
724int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
725 BlockDriver *drv)
ea2384d3 726{
b6ce07aa 727 int ret;
2b572816 728 char tmp_filename[PATH_MAX];
712e7874 729
83f64091 730 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
731 BlockDriverState *bs1;
732 int64_t total_size;
7c96d46e 733 int is_protocol = 0;
91a073a9
KW
734 BlockDriver *bdrv_qcow2;
735 QEMUOptionParameter *options;
b6ce07aa 736 char backing_filename[PATH_MAX];
3b46e624 737
ea2384d3
FB
738 /* if snapshot, we create a temporary backing file and open it
739 instead of opening 'filename' directly */
33e3963e 740
ea2384d3
FB
741 /* if there is a backing file, use it */
742 bs1 = bdrv_new("");
d6e9098e 743 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 744 if (ret < 0) {
ea2384d3 745 bdrv_delete(bs1);
51d7c00c 746 return ret;
ea2384d3 747 }
3e82990b 748 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
749
750 if (bs1->drv && bs1->drv->protocol_name)
751 is_protocol = 1;
752
ea2384d3 753 bdrv_delete(bs1);
3b46e624 754
ea2384d3 755 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
756
757 /* Real path is meaningless for protocols */
758 if (is_protocol)
759 snprintf(backing_filename, sizeof(backing_filename),
760 "%s", filename);
114cdfa9
KS
761 else if (!realpath(filename, backing_filename))
762 return -errno;
7c96d46e 763
91a073a9
KW
764 bdrv_qcow2 = bdrv_find_format("qcow2");
765 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
766
3e82990b 767 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
768 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
769 if (drv) {
770 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
771 drv->format_name);
772 }
773
774 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 775 free_option_parameters(options);
51d7c00c
AL
776 if (ret < 0) {
777 return ret;
ea2384d3 778 }
91a073a9 779
ea2384d3 780 filename = tmp_filename;
91a073a9 781 drv = bdrv_qcow2;
ea2384d3
FB
782 bs->is_temporary = 1;
783 }
712e7874 784
b6ce07aa 785 /* Find the right image format driver */
6db95603 786 if (!drv) {
c98ac35d 787 ret = find_image_format(filename, &drv);
51d7c00c 788 }
6987307c 789
51d7c00c 790 if (!drv) {
51d7c00c 791 goto unlink_and_fail;
ea2384d3 792 }
b6ce07aa
KW
793
794 /* Open the image */
795 ret = bdrv_open_common(bs, filename, flags, drv);
796 if (ret < 0) {
6987307c
CH
797 goto unlink_and_fail;
798 }
799
b6ce07aa
KW
800 /* If there is a backing file, use it */
801 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
802 char backing_filename[PATH_MAX];
803 int back_flags;
804 BlockDriver *back_drv = NULL;
805
806 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
807
808 if (path_has_protocol(bs->backing_file)) {
809 pstrcpy(backing_filename, sizeof(backing_filename),
810 bs->backing_file);
811 } else {
812 path_combine(backing_filename, sizeof(backing_filename),
813 filename, bs->backing_file);
814 }
815
816 if (bs->backing_format[0] != '\0') {
b6ce07aa 817 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 818 }
b6ce07aa
KW
819
820 /* backing files always opened read-only */
821 back_flags =
822 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
823
824 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
825 if (ret < 0) {
826 bdrv_close(bs);
827 return ret;
828 }
829 if (bs->is_temporary) {
830 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
831 } else {
832 /* base image inherits from "parent" */
833 bs->backing_hd->keep_read_only = bs->keep_read_only;
834 }
835 }
836
837 if (!bdrv_key_required(bs)) {
7d4b4ba5 838 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
839 }
840
98f90dba
ZYW
841 /* throttling disk I/O limits */
842 if (bs->io_limits_enabled) {
843 bdrv_io_limits_enable(bs);
844 }
845
b6ce07aa
KW
846 return 0;
847
848unlink_and_fail:
849 if (bs->is_temporary) {
850 unlink(filename);
851 }
852 return ret;
853}
854
fc01f7e7
FB
855void bdrv_close(BlockDriverState *bs)
856{
80ccf93b 857 bdrv_flush(bs);
19cb3738 858 if (bs->drv) {
3e914655
PB
859 if (bs->job) {
860 block_job_cancel_sync(bs->job);
861 }
7094f12f
KW
862 bdrv_drain_all();
863
f9092b10
MA
864 if (bs == bs_snapshots) {
865 bs_snapshots = NULL;
866 }
557df6ac 867 if (bs->backing_hd) {
ea2384d3 868 bdrv_delete(bs->backing_hd);
557df6ac
SH
869 bs->backing_hd = NULL;
870 }
ea2384d3 871 bs->drv->bdrv_close(bs);
7267c094 872 g_free(bs->opaque);
ea2384d3
FB
873#ifdef _WIN32
874 if (bs->is_temporary) {
875 unlink(bs->filename);
876 }
67b915a5 877#endif
ea2384d3
FB
878 bs->opaque = NULL;
879 bs->drv = NULL;
53fec9d3 880 bs->copy_on_read = 0;
b338082b 881
66f82cee
KW
882 if (bs->file != NULL) {
883 bdrv_close(bs->file);
884 }
885
7d4b4ba5 886 bdrv_dev_change_media_cb(bs, false);
b338082b 887 }
98f90dba
ZYW
888
889 /*throttling disk I/O limits*/
890 if (bs->io_limits_enabled) {
891 bdrv_io_limits_disable(bs);
892 }
b338082b
FB
893}
894
2bc93fed
MK
895void bdrv_close_all(void)
896{
897 BlockDriverState *bs;
898
899 QTAILQ_FOREACH(bs, &bdrv_states, list) {
900 bdrv_close(bs);
901 }
902}
903
922453bc
SH
904/*
905 * Wait for pending requests to complete across all BlockDriverStates
906 *
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
4c355d53
ZYW
909 *
910 * Note that completion of an asynchronous I/O operation can trigger any
911 * number of other I/O operations on other devices---for example a coroutine
912 * can be arbitrarily complex and a constant flow of I/O can come until the
913 * coroutine is complete. Because of this, it is not possible to have a
914 * function to drain a single device's I/O queue.
922453bc
SH
915 */
916void bdrv_drain_all(void)
917{
918 BlockDriverState *bs;
4c355d53
ZYW
919 bool busy;
920
921 do {
922 busy = qemu_aio_wait();
922453bc 923
4c355d53
ZYW
924 /* FIXME: We do not have timer support here, so this is effectively
925 * a busy wait.
926 */
927 QTAILQ_FOREACH(bs, &bdrv_states, list) {
928 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
929 qemu_co_queue_restart_all(&bs->throttled_reqs);
930 busy = true;
931 }
932 }
933 } while (busy);
922453bc
SH
934
935 /* If requests are still pending there is a bug somewhere */
936 QTAILQ_FOREACH(bs, &bdrv_states, list) {
937 assert(QLIST_EMPTY(&bs->tracked_requests));
938 assert(qemu_co_queue_empty(&bs->throttled_reqs));
939 }
940}
941
d22b2f41
RH
942/* make a BlockDriverState anonymous by removing from bdrv_state list.
943 Also, NULL terminate the device_name to prevent double remove */
944void bdrv_make_anon(BlockDriverState *bs)
945{
946 if (bs->device_name[0] != '\0') {
947 QTAILQ_REMOVE(&bdrv_states, bs, list);
948 }
949 bs->device_name[0] = '\0';
950}
951
8802d1fd
JC
952/*
953 * Add new bs contents at the top of an image chain while the chain is
954 * live, while keeping required fields on the top layer.
955 *
956 * This will modify the BlockDriverState fields, and swap contents
957 * between bs_new and bs_top. Both bs_new and bs_top are modified.
958 *
f6801b83
JC
959 * bs_new is required to be anonymous.
960 *
8802d1fd
JC
961 * This function does not create any image files.
962 */
963void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
964{
965 BlockDriverState tmp;
966
f6801b83
JC
967 /* bs_new must be anonymous */
968 assert(bs_new->device_name[0] == '\0');
8802d1fd
JC
969
970 tmp = *bs_new;
971
972 /* there are some fields that need to stay on the top layer: */
973
974 /* dev info */
975 tmp.dev_ops = bs_top->dev_ops;
976 tmp.dev_opaque = bs_top->dev_opaque;
977 tmp.dev = bs_top->dev;
978 tmp.buffer_alignment = bs_top->buffer_alignment;
979 tmp.copy_on_read = bs_top->copy_on_read;
980
981 /* i/o timing parameters */
982 tmp.slice_time = bs_top->slice_time;
983 tmp.slice_start = bs_top->slice_start;
984 tmp.slice_end = bs_top->slice_end;
985 tmp.io_limits = bs_top->io_limits;
986 tmp.io_base = bs_top->io_base;
987 tmp.throttled_reqs = bs_top->throttled_reqs;
988 tmp.block_timer = bs_top->block_timer;
989 tmp.io_limits_enabled = bs_top->io_limits_enabled;
990
991 /* geometry */
992 tmp.cyls = bs_top->cyls;
993 tmp.heads = bs_top->heads;
994 tmp.secs = bs_top->secs;
995 tmp.translation = bs_top->translation;
996
997 /* r/w error */
998 tmp.on_read_error = bs_top->on_read_error;
999 tmp.on_write_error = bs_top->on_write_error;
1000
1001 /* i/o status */
1002 tmp.iostatus_enabled = bs_top->iostatus_enabled;
1003 tmp.iostatus = bs_top->iostatus;
1004
1005 /* keep the same entry in bdrv_states */
1006 pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
1007 tmp.list = bs_top->list;
1008
1009 /* The contents of 'tmp' will become bs_top, as we are
1010 * swapping bs_new and bs_top contents. */
1011 tmp.backing_hd = bs_new;
1012 pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
f6801b83 1013 bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
8802d1fd
JC
1014
1015 /* swap contents of the fixed new bs and the current top */
1016 *bs_new = *bs_top;
1017 *bs_top = tmp;
1018
f6801b83
JC
1019 /* device_name[] was carried over from the old bs_top. bs_new
1020 * shouldn't be in bdrv_states, so we need to make device_name[]
1021 * reflect the anonymity of bs_new
1022 */
1023 bs_new->device_name[0] = '\0';
1024
8802d1fd
JC
1025 /* clear the copied fields in the new backing file */
1026 bdrv_detach_dev(bs_new, bs_new->dev);
1027
1028 qemu_co_queue_init(&bs_new->throttled_reqs);
1029 memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
1030 memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
1031 bdrv_iostatus_disable(bs_new);
1032
1033 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1034 * to affect or delete the block_timer, as it has been moved to bs_top */
1035 bs_new->io_limits_enabled = false;
1036 bs_new->block_timer = NULL;
1037 bs_new->slice_time = 0;
1038 bs_new->slice_start = 0;
1039 bs_new->slice_end = 0;
1040}
1041
b338082b
FB
1042void bdrv_delete(BlockDriverState *bs)
1043{
fa879d62 1044 assert(!bs->dev);
3e914655
PB
1045 assert(!bs->job);
1046 assert(!bs->in_use);
18846dee 1047
1b7bdbc1 1048 /* remove from list, if necessary */
d22b2f41 1049 bdrv_make_anon(bs);
34c6f050 1050
b338082b 1051 bdrv_close(bs);
66f82cee
KW
1052 if (bs->file != NULL) {
1053 bdrv_delete(bs->file);
1054 }
1055
f9092b10 1056 assert(bs != bs_snapshots);
7267c094 1057 g_free(bs);
fc01f7e7
FB
1058}
1059
fa879d62
MA
1060int bdrv_attach_dev(BlockDriverState *bs, void *dev)
1061/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 1062{
fa879d62 1063 if (bs->dev) {
18846dee
MA
1064 return -EBUSY;
1065 }
fa879d62 1066 bs->dev = dev;
28a7282a 1067 bdrv_iostatus_reset(bs);
18846dee
MA
1068 return 0;
1069}
1070
fa879d62
MA
1071/* TODO qdevified devices don't use this, remove when devices are qdevified */
1072void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 1073{
fa879d62
MA
1074 if (bdrv_attach_dev(bs, dev) < 0) {
1075 abort();
1076 }
1077}
1078
1079void bdrv_detach_dev(BlockDriverState *bs, void *dev)
1080/* TODO change to DeviceState *dev when all users are qdevified */
1081{
1082 assert(bs->dev == dev);
1083 bs->dev = NULL;
0e49de52
MA
1084 bs->dev_ops = NULL;
1085 bs->dev_opaque = NULL;
29e05f20 1086 bs->buffer_alignment = 512;
18846dee
MA
1087}
1088
fa879d62
MA
1089/* TODO change to return DeviceState * when all users are qdevified */
1090void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 1091{
fa879d62 1092 return bs->dev;
18846dee
MA
1093}
1094
0e49de52
MA
1095void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
1096 void *opaque)
1097{
1098 bs->dev_ops = ops;
1099 bs->dev_opaque = opaque;
2c6942fa
MA
1100 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
1101 bs_snapshots = NULL;
1102 }
0e49de52
MA
1103}
1104
329c0a48
LC
1105void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
1106 BlockQMPEventAction action, int is_read)
1107{
1108 QObject *data;
1109 const char *action_str;
1110
1111 switch (action) {
1112 case BDRV_ACTION_REPORT:
1113 action_str = "report";
1114 break;
1115 case BDRV_ACTION_IGNORE:
1116 action_str = "ignore";
1117 break;
1118 case BDRV_ACTION_STOP:
1119 action_str = "stop";
1120 break;
1121 default:
1122 abort();
1123 }
1124
1125 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1126 bdrv->device_name,
1127 action_str,
1128 is_read ? "read" : "write");
1129 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1130
1131 qobject_decref(data);
1132}
1133
6f382ed2
LC
1134static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
1135{
1136 QObject *data;
1137
1138 data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1139 bdrv_get_device_name(bs), ejected);
1140 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
1141
1142 qobject_decref(data);
1143}
1144
7d4b4ba5 1145static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 1146{
145feb17 1147 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
6f382ed2 1148 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
7d4b4ba5 1149 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
6f382ed2
LC
1150 if (tray_was_closed) {
1151 /* tray open */
1152 bdrv_emit_qmp_eject_event(bs, true);
1153 }
1154 if (load) {
1155 /* tray close */
1156 bdrv_emit_qmp_eject_event(bs, false);
1157 }
145feb17
MA
1158 }
1159}
1160
2c6942fa
MA
1161bool bdrv_dev_has_removable_media(BlockDriverState *bs)
1162{
1163 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
1164}
1165
025ccaa7
PB
1166void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
1167{
1168 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
1169 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
1170 }
1171}
1172
e4def80b
MA
1173bool bdrv_dev_is_tray_open(BlockDriverState *bs)
1174{
1175 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
1176 return bs->dev_ops->is_tray_open(bs->dev_opaque);
1177 }
1178 return false;
1179}
1180
145feb17
MA
1181static void bdrv_dev_resize_cb(BlockDriverState *bs)
1182{
1183 if (bs->dev_ops && bs->dev_ops->resize_cb) {
1184 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
1185 }
1186}
1187
f107639a
MA
1188bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
1189{
1190 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
1191 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
1192 }
1193 return false;
1194}
1195
e97fc193
AL
1196/*
1197 * Run consistency checks on an image
1198 *
e076f338 1199 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 1200 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 1201 * check are stored in res.
e97fc193 1202 */
e076f338 1203int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
1204{
1205 if (bs->drv->bdrv_check == NULL) {
1206 return -ENOTSUP;
1207 }
1208
e076f338 1209 memset(res, 0, sizeof(*res));
9ac228e0 1210 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
1211}
1212
8a426614
KW
1213#define COMMIT_BUF_SECTORS 2048
1214
33e3963e
FB
1215/* commit COW file into the raw image */
1216int bdrv_commit(BlockDriverState *bs)
1217{
19cb3738 1218 BlockDriver *drv = bs->drv;
ee181196 1219 BlockDriver *backing_drv;
8a426614
KW
1220 int64_t sector, total_sectors;
1221 int n, ro, open_flags;
4dca4b63 1222 int ret = 0, rw_ret = 0;
8a426614 1223 uint8_t *buf;
4dca4b63
NS
1224 char filename[1024];
1225 BlockDriverState *bs_rw, *bs_ro;
33e3963e 1226
19cb3738
FB
1227 if (!drv)
1228 return -ENOMEDIUM;
4dca4b63
NS
1229
1230 if (!bs->backing_hd) {
1231 return -ENOTSUP;
33e3963e
FB
1232 }
1233
4dca4b63
NS
1234 if (bs->backing_hd->keep_read_only) {
1235 return -EACCES;
1236 }
ee181196 1237
2d3735d3
SH
1238 if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
1239 return -EBUSY;
1240 }
1241
ee181196 1242 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
1243 ro = bs->backing_hd->read_only;
1244 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1245 open_flags = bs->backing_hd->open_flags;
1246
1247 if (ro) {
1248 /* re-open as RW */
1249 bdrv_delete(bs->backing_hd);
1250 bs->backing_hd = NULL;
1251 bs_rw = bdrv_new("");
ee181196
KW
1252 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1253 backing_drv);
4dca4b63
NS
1254 if (rw_ret < 0) {
1255 bdrv_delete(bs_rw);
1256 /* try to re-open read-only */
1257 bs_ro = bdrv_new("");
ee181196
KW
1258 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1259 backing_drv);
4dca4b63
NS
1260 if (ret < 0) {
1261 bdrv_delete(bs_ro);
1262 /* drive not functional anymore */
1263 bs->drv = NULL;
1264 return ret;
1265 }
1266 bs->backing_hd = bs_ro;
1267 return rw_ret;
1268 }
1269 bs->backing_hd = bs_rw;
ea2384d3 1270 }
33e3963e 1271
6ea44308 1272 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 1273 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
1274
1275 for (sector = 0; sector < total_sectors; sector += n) {
05c4af54 1276 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
8a426614
KW
1277
1278 if (bdrv_read(bs, sector, buf, n) != 0) {
1279 ret = -EIO;
1280 goto ro_cleanup;
1281 }
1282
1283 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1284 ret = -EIO;
1285 goto ro_cleanup;
1286 }
ea2384d3 1287 }
33e3963e 1288 }
95389c86 1289
1d44952f
CH
1290 if (drv->bdrv_make_empty) {
1291 ret = drv->bdrv_make_empty(bs);
1292 bdrv_flush(bs);
1293 }
95389c86 1294
3f5075ae
CH
1295 /*
1296 * Make sure all data we wrote to the backing device is actually
1297 * stable on disk.
1298 */
1299 if (bs->backing_hd)
1300 bdrv_flush(bs->backing_hd);
4dca4b63
NS
1301
1302ro_cleanup:
7267c094 1303 g_free(buf);
4dca4b63
NS
1304
1305 if (ro) {
1306 /* re-open as RO */
1307 bdrv_delete(bs->backing_hd);
1308 bs->backing_hd = NULL;
1309 bs_ro = bdrv_new("");
ee181196
KW
1310 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1311 backing_drv);
4dca4b63
NS
1312 if (ret < 0) {
1313 bdrv_delete(bs_ro);
1314 /* drive not functional anymore */
1315 bs->drv = NULL;
1316 return ret;
1317 }
1318 bs->backing_hd = bs_ro;
1319 bs->backing_hd->keep_read_only = 0;
1320 }
1321
1d44952f 1322 return ret;
33e3963e
FB
1323}
1324
e8877497 1325int bdrv_commit_all(void)
6ab4b5ab
MA
1326{
1327 BlockDriverState *bs;
1328
1329 QTAILQ_FOREACH(bs, &bdrv_states, list) {
e8877497
SH
1330 int ret = bdrv_commit(bs);
1331 if (ret < 0) {
1332 return ret;
1333 }
6ab4b5ab 1334 }
e8877497 1335 return 0;
6ab4b5ab
MA
1336}
1337
dbffbdcf
SH
1338struct BdrvTrackedRequest {
1339 BlockDriverState *bs;
1340 int64_t sector_num;
1341 int nb_sectors;
1342 bool is_write;
1343 QLIST_ENTRY(BdrvTrackedRequest) list;
5f8b6491 1344 Coroutine *co; /* owner, used for deadlock detection */
f4658285 1345 CoQueue wait_queue; /* coroutines blocked on this request */
dbffbdcf
SH
1346};
1347
1348/**
1349 * Remove an active request from the tracked requests list
1350 *
1351 * This function should be called when a tracked request is completing.
1352 */
1353static void tracked_request_end(BdrvTrackedRequest *req)
1354{
1355 QLIST_REMOVE(req, list);
f4658285 1356 qemu_co_queue_restart_all(&req->wait_queue);
dbffbdcf
SH
1357}
1358
1359/**
1360 * Add an active request to the tracked requests list
1361 */
1362static void tracked_request_begin(BdrvTrackedRequest *req,
1363 BlockDriverState *bs,
1364 int64_t sector_num,
1365 int nb_sectors, bool is_write)
1366{
1367 *req = (BdrvTrackedRequest){
1368 .bs = bs,
1369 .sector_num = sector_num,
1370 .nb_sectors = nb_sectors,
1371 .is_write = is_write,
5f8b6491 1372 .co = qemu_coroutine_self(),
dbffbdcf
SH
1373 };
1374
f4658285
SH
1375 qemu_co_queue_init(&req->wait_queue);
1376
dbffbdcf
SH
1377 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1378}
1379
d83947ac
SH
1380/**
1381 * Round a region to cluster boundaries
1382 */
1383static void round_to_clusters(BlockDriverState *bs,
1384 int64_t sector_num, int nb_sectors,
1385 int64_t *cluster_sector_num,
1386 int *cluster_nb_sectors)
1387{
1388 BlockDriverInfo bdi;
1389
1390 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1391 *cluster_sector_num = sector_num;
1392 *cluster_nb_sectors = nb_sectors;
1393 } else {
1394 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1395 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1396 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1397 nb_sectors, c);
1398 }
1399}
1400
f4658285
SH
1401static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1402 int64_t sector_num, int nb_sectors) {
d83947ac
SH
1403 /* aaaa bbbb */
1404 if (sector_num >= req->sector_num + req->nb_sectors) {
1405 return false;
1406 }
1407 /* bbbb aaaa */
1408 if (req->sector_num >= sector_num + nb_sectors) {
1409 return false;
1410 }
1411 return true;
f4658285
SH
1412}
1413
1414static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1415 int64_t sector_num, int nb_sectors)
1416{
1417 BdrvTrackedRequest *req;
d83947ac
SH
1418 int64_t cluster_sector_num;
1419 int cluster_nb_sectors;
f4658285
SH
1420 bool retry;
1421
d83947ac
SH
1422 /* If we touch the same cluster it counts as an overlap. This guarantees
1423 * that allocating writes will be serialized and not race with each other
1424 * for the same cluster. For example, in copy-on-read it ensures that the
1425 * CoR read and write operations are atomic and guest writes cannot
1426 * interleave between them.
1427 */
1428 round_to_clusters(bs, sector_num, nb_sectors,
1429 &cluster_sector_num, &cluster_nb_sectors);
1430
f4658285
SH
1431 do {
1432 retry = false;
1433 QLIST_FOREACH(req, &bs->tracked_requests, list) {
d83947ac
SH
1434 if (tracked_request_overlaps(req, cluster_sector_num,
1435 cluster_nb_sectors)) {
5f8b6491
SH
1436 /* Hitting this means there was a reentrant request, for
1437 * example, a block driver issuing nested requests. This must
1438 * never happen since it means deadlock.
1439 */
1440 assert(qemu_coroutine_self() != req->co);
1441
f4658285
SH
1442 qemu_co_queue_wait(&req->wait_queue);
1443 retry = true;
1444 break;
1445 }
1446 }
1447 } while (retry);
1448}
1449
756e6736
KW
1450/*
1451 * Return values:
1452 * 0 - success
1453 * -EINVAL - backing format specified, but no file
1454 * -ENOSPC - can't update the backing file because no space is left in the
1455 * image file header
1456 * -ENOTSUP - format driver doesn't support changing the backing file
1457 */
1458int bdrv_change_backing_file(BlockDriverState *bs,
1459 const char *backing_file, const char *backing_fmt)
1460{
1461 BlockDriver *drv = bs->drv;
1462
1463 if (drv->bdrv_change_backing_file != NULL) {
1464 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1465 } else {
1466 return -ENOTSUP;
1467 }
1468}
1469
71d0770c
AL
1470static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1471 size_t size)
1472{
1473 int64_t len;
1474
1475 if (!bdrv_is_inserted(bs))
1476 return -ENOMEDIUM;
1477
1478 if (bs->growable)
1479 return 0;
1480
1481 len = bdrv_getlength(bs);
1482
fbb7b4e0
KW
1483 if (offset < 0)
1484 return -EIO;
1485
1486 if ((offset > len) || (len - offset < size))
71d0770c
AL
1487 return -EIO;
1488
1489 return 0;
1490}
1491
1492static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1493 int nb_sectors)
1494{
eb5a3165
JS
1495 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1496 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1497}
1498
1c9805a3
SH
1499typedef struct RwCo {
1500 BlockDriverState *bs;
1501 int64_t sector_num;
1502 int nb_sectors;
1503 QEMUIOVector *qiov;
1504 bool is_write;
1505 int ret;
1506} RwCo;
1507
1508static void coroutine_fn bdrv_rw_co_entry(void *opaque)
fc01f7e7 1509{
1c9805a3 1510 RwCo *rwco = opaque;
ea2384d3 1511
1c9805a3
SH
1512 if (!rwco->is_write) {
1513 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
470c0504 1514 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1515 } else {
1516 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
f08f2dda 1517 rwco->nb_sectors, rwco->qiov, 0);
1c9805a3
SH
1518 }
1519}
e7a8a783 1520
1c9805a3
SH
1521/*
1522 * Process a synchronous request using coroutines
1523 */
1524static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1525 int nb_sectors, bool is_write)
1526{
1527 QEMUIOVector qiov;
1528 struct iovec iov = {
1529 .iov_base = (void *)buf,
1530 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1531 };
1532 Coroutine *co;
1533 RwCo rwco = {
1534 .bs = bs,
1535 .sector_num = sector_num,
1536 .nb_sectors = nb_sectors,
1537 .qiov = &qiov,
1538 .is_write = is_write,
1539 .ret = NOT_DONE,
1540 };
e7a8a783 1541
1c9805a3 1542 qemu_iovec_init_external(&qiov, &iov, 1);
e7a8a783 1543
498e386c
ZYW
1544 /**
1545 * In sync call context, when the vcpu is blocked, this throttling timer
1546 * will not fire; so the I/O throttling function has to be disabled here
1547 * if it has been enabled.
1548 */
1549 if (bs->io_limits_enabled) {
1550 fprintf(stderr, "Disabling I/O throttling on '%s' due "
1551 "to synchronous I/O.\n", bdrv_get_device_name(bs));
1552 bdrv_io_limits_disable(bs);
1553 }
1554
1c9805a3
SH
1555 if (qemu_in_coroutine()) {
1556 /* Fast-path if already in coroutine context */
1557 bdrv_rw_co_entry(&rwco);
1558 } else {
1559 co = qemu_coroutine_create(bdrv_rw_co_entry);
1560 qemu_coroutine_enter(co, &rwco);
1561 while (rwco.ret == NOT_DONE) {
1562 qemu_aio_wait();
1563 }
1564 }
1565 return rwco.ret;
1566}
b338082b 1567
1c9805a3
SH
1568/* return < 0 if error. See bdrv_write() for the return codes */
1569int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1570 uint8_t *buf, int nb_sectors)
1571{
1572 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
fc01f7e7
FB
1573}
1574
7cd1e32a 1575static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1576 int nb_sectors, int dirty)
7cd1e32a
LS
1577{
1578 int64_t start, end;
c6d22830 1579 unsigned long val, idx, bit;
a55eb92c 1580
6ea44308 1581 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1582 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1583
1584 for (; start <= end; start++) {
c6d22830
JK
1585 idx = start / (sizeof(unsigned long) * 8);
1586 bit = start % (sizeof(unsigned long) * 8);
1587 val = bs->dirty_bitmap[idx];
1588 if (dirty) {
6d59fec1 1589 if (!(val & (1UL << bit))) {
aaa0eb75 1590 bs->dirty_count++;
6d59fec1 1591 val |= 1UL << bit;
aaa0eb75 1592 }
c6d22830 1593 } else {
6d59fec1 1594 if (val & (1UL << bit)) {
aaa0eb75 1595 bs->dirty_count--;
6d59fec1 1596 val &= ~(1UL << bit);
aaa0eb75 1597 }
c6d22830
JK
1598 }
1599 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1600 }
1601}
1602
5fafdf24 1603/* Return < 0 if error. Important errors are:
19cb3738
FB
1604 -EIO generic I/O error (may happen for all errors)
1605 -ENOMEDIUM No media inserted.
1606 -EINVAL Invalid sector number or nb_sectors
1607 -EACCES Trying to write a read-only device
1608*/
5fafdf24 1609int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1610 const uint8_t *buf, int nb_sectors)
1611{
1c9805a3 1612 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
83f64091
FB
1613}
1614
eda578e5
AL
1615int bdrv_pread(BlockDriverState *bs, int64_t offset,
1616 void *buf, int count1)
83f64091 1617{
6ea44308 1618 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1619 int len, nb_sectors, count;
1620 int64_t sector_num;
9a8c4cce 1621 int ret;
83f64091
FB
1622
1623 count = count1;
1624 /* first read to align to sector start */
6ea44308 1625 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1626 if (len > count)
1627 len = count;
6ea44308 1628 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1629 if (len > 0) {
9a8c4cce
KW
1630 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1631 return ret;
6ea44308 1632 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1633 count -= len;
1634 if (count == 0)
1635 return count1;
1636 sector_num++;
1637 buf += len;
1638 }
1639
1640 /* read the sectors "in place" */
6ea44308 1641 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1642 if (nb_sectors > 0) {
9a8c4cce
KW
1643 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1644 return ret;
83f64091 1645 sector_num += nb_sectors;
6ea44308 1646 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1647 buf += len;
1648 count -= len;
1649 }
1650
1651 /* add data from the last sector */
1652 if (count > 0) {
9a8c4cce
KW
1653 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1654 return ret;
83f64091
FB
1655 memcpy(buf, tmp_buf, count);
1656 }
1657 return count1;
1658}
1659
eda578e5
AL
1660int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1661 const void *buf, int count1)
83f64091 1662{
6ea44308 1663 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1664 int len, nb_sectors, count;
1665 int64_t sector_num;
9a8c4cce 1666 int ret;
83f64091
FB
1667
1668 count = count1;
1669 /* first write to align to sector start */
6ea44308 1670 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1671 if (len > count)
1672 len = count;
6ea44308 1673 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1674 if (len > 0) {
9a8c4cce
KW
1675 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1676 return ret;
6ea44308 1677 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1678 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1679 return ret;
83f64091
FB
1680 count -= len;
1681 if (count == 0)
1682 return count1;
1683 sector_num++;
1684 buf += len;
1685 }
1686
1687 /* write the sectors "in place" */
6ea44308 1688 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1689 if (nb_sectors > 0) {
9a8c4cce
KW
1690 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1691 return ret;
83f64091 1692 sector_num += nb_sectors;
6ea44308 1693 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1694 buf += len;
1695 count -= len;
1696 }
1697
1698 /* add data from the last sector */
1699 if (count > 0) {
9a8c4cce
KW
1700 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1701 return ret;
83f64091 1702 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1703 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1704 return ret;
83f64091
FB
1705 }
1706 return count1;
1707}
83f64091 1708
f08145fe
KW
1709/*
1710 * Writes to the file and ensures that no writes are reordered across this
1711 * request (acts as a barrier)
1712 *
1713 * Returns 0 on success, -errno in error cases.
1714 */
1715int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1716 const void *buf, int count)
1717{
1718 int ret;
1719
1720 ret = bdrv_pwrite(bs, offset, buf, count);
1721 if (ret < 0) {
1722 return ret;
1723 }
1724
92196b2f
SH
1725 /* No flush needed for cache modes that use O_DSYNC */
1726 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1727 bdrv_flush(bs);
1728 }
1729
1730 return 0;
1731}
1732
470c0504 1733static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
ab185921
SH
1734 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1735{
1736 /* Perform I/O through a temporary buffer so that users who scribble over
1737 * their read buffer while the operation is in progress do not end up
1738 * modifying the image file. This is critical for zero-copy guest I/O
1739 * where anything might happen inside guest memory.
1740 */
1741 void *bounce_buffer;
1742
79c053bd 1743 BlockDriver *drv = bs->drv;
ab185921
SH
1744 struct iovec iov;
1745 QEMUIOVector bounce_qiov;
1746 int64_t cluster_sector_num;
1747 int cluster_nb_sectors;
1748 size_t skip_bytes;
1749 int ret;
1750
1751 /* Cover entire cluster so no additional backing file I/O is required when
1752 * allocating cluster in the image file.
1753 */
1754 round_to_clusters(bs, sector_num, nb_sectors,
1755 &cluster_sector_num, &cluster_nb_sectors);
1756
470c0504
SH
1757 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
1758 cluster_sector_num, cluster_nb_sectors);
ab185921
SH
1759
1760 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1761 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1762 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1763
79c053bd
SH
1764 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1765 &bounce_qiov);
ab185921
SH
1766 if (ret < 0) {
1767 goto err;
1768 }
1769
79c053bd
SH
1770 if (drv->bdrv_co_write_zeroes &&
1771 buffer_is_zero(bounce_buffer, iov.iov_len)) {
621f0589
KW
1772 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
1773 cluster_nb_sectors);
79c053bd
SH
1774 } else {
1775 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
ab185921 1776 &bounce_qiov);
79c053bd
SH
1777 }
1778
ab185921
SH
1779 if (ret < 0) {
1780 /* It might be okay to ignore write errors for guest requests. If this
1781 * is a deliberate copy-on-read then we don't want to ignore the error.
1782 * Simply report it in all cases.
1783 */
1784 goto err;
1785 }
1786
1787 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1788 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1789 nb_sectors * BDRV_SECTOR_SIZE);
1790
1791err:
1792 qemu_vfree(bounce_buffer);
1793 return ret;
1794}
1795
c5fbe571
SH
1796/*
1797 * Handle a read request in coroutine context
1798 */
1799static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
470c0504
SH
1800 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1801 BdrvRequestFlags flags)
da1fa91d
KW
1802{
1803 BlockDriver *drv = bs->drv;
dbffbdcf
SH
1804 BdrvTrackedRequest req;
1805 int ret;
da1fa91d 1806
da1fa91d
KW
1807 if (!drv) {
1808 return -ENOMEDIUM;
1809 }
1810 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1811 return -EIO;
1812 }
1813
98f90dba
ZYW
1814 /* throttling disk read I/O */
1815 if (bs->io_limits_enabled) {
1816 bdrv_io_limits_intercept(bs, false, nb_sectors);
1817 }
1818
f4658285 1819 if (bs->copy_on_read) {
470c0504
SH
1820 flags |= BDRV_REQ_COPY_ON_READ;
1821 }
1822 if (flags & BDRV_REQ_COPY_ON_READ) {
1823 bs->copy_on_read_in_flight++;
1824 }
1825
1826 if (bs->copy_on_read_in_flight) {
f4658285
SH
1827 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1828 }
1829
dbffbdcf 1830 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
ab185921 1831
470c0504 1832 if (flags & BDRV_REQ_COPY_ON_READ) {
ab185921
SH
1833 int pnum;
1834
1835 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1836 if (ret < 0) {
1837 goto out;
1838 }
1839
1840 if (!ret || pnum != nb_sectors) {
470c0504 1841 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1842 goto out;
1843 }
1844 }
1845
dbffbdcf 1846 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
ab185921
SH
1847
1848out:
dbffbdcf 1849 tracked_request_end(&req);
470c0504
SH
1850
1851 if (flags & BDRV_REQ_COPY_ON_READ) {
1852 bs->copy_on_read_in_flight--;
1853 }
1854
dbffbdcf 1855 return ret;
da1fa91d
KW
1856}
1857
c5fbe571 1858int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
da1fa91d
KW
1859 int nb_sectors, QEMUIOVector *qiov)
1860{
c5fbe571 1861 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
da1fa91d 1862
470c0504
SH
1863 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
1864}
1865
1866int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1867 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1868{
1869 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
1870
1871 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
1872 BDRV_REQ_COPY_ON_READ);
c5fbe571
SH
1873}
1874
f08f2dda
SH
1875static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
1876 int64_t sector_num, int nb_sectors)
1877{
1878 BlockDriver *drv = bs->drv;
1879 QEMUIOVector qiov;
1880 struct iovec iov;
1881 int ret;
1882
621f0589
KW
1883 /* TODO Emulate only part of misaligned requests instead of letting block
1884 * drivers return -ENOTSUP and emulate everything */
1885
f08f2dda
SH
1886 /* First try the efficient write zeroes operation */
1887 if (drv->bdrv_co_write_zeroes) {
621f0589
KW
1888 ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1889 if (ret != -ENOTSUP) {
1890 return ret;
1891 }
f08f2dda
SH
1892 }
1893
1894 /* Fall back to bounce buffer if write zeroes is unsupported */
1895 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
1896 iov.iov_base = qemu_blockalign(bs, iov.iov_len);
1897 memset(iov.iov_base, 0, iov.iov_len);
1898 qemu_iovec_init_external(&qiov, &iov, 1);
1899
1900 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1901
1902 qemu_vfree(iov.iov_base);
1903 return ret;
1904}
1905
c5fbe571
SH
1906/*
1907 * Handle a write request in coroutine context
1908 */
1909static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
f08f2dda
SH
1910 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
1911 BdrvRequestFlags flags)
c5fbe571
SH
1912{
1913 BlockDriver *drv = bs->drv;
dbffbdcf 1914 BdrvTrackedRequest req;
6b7cb247 1915 int ret;
da1fa91d
KW
1916
1917 if (!bs->drv) {
1918 return -ENOMEDIUM;
1919 }
1920 if (bs->read_only) {
1921 return -EACCES;
1922 }
1923 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1924 return -EIO;
1925 }
1926
98f90dba
ZYW
1927 /* throttling disk write I/O */
1928 if (bs->io_limits_enabled) {
1929 bdrv_io_limits_intercept(bs, true, nb_sectors);
1930 }
1931
470c0504 1932 if (bs->copy_on_read_in_flight) {
f4658285
SH
1933 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1934 }
1935
dbffbdcf
SH
1936 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1937
f08f2dda
SH
1938 if (flags & BDRV_REQ_ZERO_WRITE) {
1939 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
1940 } else {
1941 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1942 }
6b7cb247 1943
da1fa91d
KW
1944 if (bs->dirty_bitmap) {
1945 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1946 }
1947
1948 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1949 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1950 }
1951
dbffbdcf
SH
1952 tracked_request_end(&req);
1953
6b7cb247 1954 return ret;
da1fa91d
KW
1955}
1956
c5fbe571
SH
1957int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1958 int nb_sectors, QEMUIOVector *qiov)
1959{
1960 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1961
f08f2dda
SH
1962 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
1963}
1964
1965int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
1966 int64_t sector_num, int nb_sectors)
1967{
1968 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
1969
1970 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
1971 BDRV_REQ_ZERO_WRITE);
c5fbe571
SH
1972}
1973
83f64091
FB
1974/**
1975 * Truncate file to 'offset' bytes (needed only for file protocols)
1976 */
1977int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1978{
1979 BlockDriver *drv = bs->drv;
51762288 1980 int ret;
83f64091 1981 if (!drv)
19cb3738 1982 return -ENOMEDIUM;
83f64091
FB
1983 if (!drv->bdrv_truncate)
1984 return -ENOTSUP;
59f2689d
NS
1985 if (bs->read_only)
1986 return -EACCES;
8591675f
MT
1987 if (bdrv_in_use(bs))
1988 return -EBUSY;
51762288
SH
1989 ret = drv->bdrv_truncate(bs, offset);
1990 if (ret == 0) {
1991 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1992 bdrv_dev_resize_cb(bs);
51762288
SH
1993 }
1994 return ret;
83f64091
FB
1995}
1996
4a1d5e1f
FZ
1997/**
1998 * Length of a allocated file in bytes. Sparse files are counted by actual
1999 * allocated space. Return < 0 if error or unknown.
2000 */
2001int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2002{
2003 BlockDriver *drv = bs->drv;
2004 if (!drv) {
2005 return -ENOMEDIUM;
2006 }
2007 if (drv->bdrv_get_allocated_file_size) {
2008 return drv->bdrv_get_allocated_file_size(bs);
2009 }
2010 if (bs->file) {
2011 return bdrv_get_allocated_file_size(bs->file);
2012 }
2013 return -ENOTSUP;
2014}
2015
83f64091
FB
2016/**
2017 * Length of a file in bytes. Return < 0 if error or unknown.
2018 */
2019int64_t bdrv_getlength(BlockDriverState *bs)
2020{
2021 BlockDriver *drv = bs->drv;
2022 if (!drv)
19cb3738 2023 return -ENOMEDIUM;
51762288 2024
2c6942fa 2025 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
2026 if (drv->bdrv_getlength) {
2027 return drv->bdrv_getlength(bs);
2028 }
83f64091 2029 }
46a4e4e6 2030 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
2031}
2032
19cb3738 2033/* return 0 as number of sectors if no device present or error */
96b8f136 2034void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 2035{
19cb3738
FB
2036 int64_t length;
2037 length = bdrv_getlength(bs);
2038 if (length < 0)
2039 length = 0;
2040 else
6ea44308 2041 length = length >> BDRV_SECTOR_BITS;
19cb3738 2042 *nb_sectors_ptr = length;
fc01f7e7 2043}
cf98951b 2044
f3d54fc4
AL
2045struct partition {
2046 uint8_t boot_ind; /* 0x80 - active */
2047 uint8_t head; /* starting head */
2048 uint8_t sector; /* starting sector */
2049 uint8_t cyl; /* starting cylinder */
2050 uint8_t sys_ind; /* What partition type */
2051 uint8_t end_head; /* end head */
2052 uint8_t end_sector; /* end sector */
2053 uint8_t end_cyl; /* end cylinder */
2054 uint32_t start_sect; /* starting sector counting from 0 */
2055 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 2056} QEMU_PACKED;
f3d54fc4
AL
2057
2058/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2059static int guess_disk_lchs(BlockDriverState *bs,
2060 int *pcylinders, int *pheads, int *psectors)
2061{
eb5a3165 2062 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
2063 int ret, i, heads, sectors, cylinders;
2064 struct partition *p;
2065 uint32_t nr_sects;
a38131b6 2066 uint64_t nb_sectors;
498e386c 2067 bool enabled;
f3d54fc4
AL
2068
2069 bdrv_get_geometry(bs, &nb_sectors);
2070
498e386c
ZYW
2071 /**
2072 * The function will be invoked during startup not only in sync I/O mode,
2073 * but also in async I/O mode. So the I/O throttling function has to
2074 * be disabled temporarily here, not permanently.
2075 */
2076 enabled = bs->io_limits_enabled;
2077 bs->io_limits_enabled = false;
f3d54fc4 2078 ret = bdrv_read(bs, 0, buf, 1);
498e386c 2079 bs->io_limits_enabled = enabled;
f3d54fc4
AL
2080 if (ret < 0)
2081 return -1;
2082 /* test msdos magic */
2083 if (buf[510] != 0x55 || buf[511] != 0xaa)
2084 return -1;
2085 for(i = 0; i < 4; i++) {
2086 p = ((struct partition *)(buf + 0x1be)) + i;
2087 nr_sects = le32_to_cpu(p->nr_sects);
2088 if (nr_sects && p->end_head) {
2089 /* We make the assumption that the partition terminates on
2090 a cylinder boundary */
2091 heads = p->end_head + 1;
2092 sectors = p->end_sector & 63;
2093 if (sectors == 0)
2094 continue;
2095 cylinders = nb_sectors / (heads * sectors);
2096 if (cylinders < 1 || cylinders > 16383)
2097 continue;
2098 *pheads = heads;
2099 *psectors = sectors;
2100 *pcylinders = cylinders;
2101#if 0
2102 printf("guessed geometry: LCHS=%d %d %d\n",
2103 cylinders, heads, sectors);
2104#endif
2105 return 0;
2106 }
2107 }
2108 return -1;
2109}
2110
2111void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
2112{
2113 int translation, lba_detected = 0;
2114 int cylinders, heads, secs;
a38131b6 2115 uint64_t nb_sectors;
f3d54fc4
AL
2116
2117 /* if a geometry hint is available, use it */
2118 bdrv_get_geometry(bs, &nb_sectors);
2119 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
2120 translation = bdrv_get_translation_hint(bs);
2121 if (cylinders != 0) {
2122 *pcyls = cylinders;
2123 *pheads = heads;
2124 *psecs = secs;
2125 } else {
2126 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
2127 if (heads > 16) {
2128 /* if heads > 16, it means that a BIOS LBA
2129 translation was active, so the default
2130 hardware geometry is OK */
2131 lba_detected = 1;
2132 goto default_geometry;
2133 } else {
2134 *pcyls = cylinders;
2135 *pheads = heads;
2136 *psecs = secs;
2137 /* disable any translation to be in sync with
2138 the logical geometry */
2139 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
2140 bdrv_set_translation_hint(bs,
2141 BIOS_ATA_TRANSLATION_NONE);
2142 }
2143 }
2144 } else {
2145 default_geometry:
2146 /* if no geometry, use a standard physical disk geometry */
2147 cylinders = nb_sectors / (16 * 63);
2148
2149 if (cylinders > 16383)
2150 cylinders = 16383;
2151 else if (cylinders < 2)
2152 cylinders = 2;
2153 *pcyls = cylinders;
2154 *pheads = 16;
2155 *psecs = 63;
2156 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
2157 if ((*pcyls * *pheads) <= 131072) {
2158 bdrv_set_translation_hint(bs,
2159 BIOS_ATA_TRANSLATION_LARGE);
2160 } else {
2161 bdrv_set_translation_hint(bs,
2162 BIOS_ATA_TRANSLATION_LBA);
2163 }
2164 }
2165 }
2166 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
2167 }
2168}
2169
5fafdf24 2170void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
2171 int cyls, int heads, int secs)
2172{
2173 bs->cyls = cyls;
2174 bs->heads = heads;
2175 bs->secs = secs;
2176}
2177
46d4767d
FB
2178void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
2179{
2180 bs->translation = translation;
2181}
2182
5fafdf24 2183void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
2184 int *pcyls, int *pheads, int *psecs)
2185{
2186 *pcyls = bs->cyls;
2187 *pheads = bs->heads;
2188 *psecs = bs->secs;
2189}
2190
0563e191
ZYW
2191/* throttling disk io limits */
2192void bdrv_set_io_limits(BlockDriverState *bs,
2193 BlockIOLimit *io_limits)
2194{
2195 bs->io_limits = *io_limits;
2196 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
2197}
2198
5bbdbb46
BS
2199/* Recognize floppy formats */
2200typedef struct FDFormat {
2201 FDriveType drive;
2202 uint8_t last_sect;
2203 uint8_t max_track;
2204 uint8_t max_head;
f8d3d128 2205 FDriveRate rate;
5bbdbb46
BS
2206} FDFormat;
2207
2208static const FDFormat fd_formats[] = {
2209 /* First entry is default format */
2210 /* 1.44 MB 3"1/2 floppy disks */
f8d3d128
HP
2211 { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, },
2212 { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, },
2213 { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, },
2214 { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, },
2215 { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, },
2216 { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, },
2217 { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, },
2218 { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2219 /* 2.88 MB 3"1/2 floppy disks */
f8d3d128
HP
2220 { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, },
2221 { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, },
2222 { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, },
2223 { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, },
2224 { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, },
5bbdbb46 2225 /* 720 kB 3"1/2 floppy disks */
f8d3d128
HP
2226 { FDRIVE_DRV_144, 9, 80, 1, FDRIVE_RATE_250K, },
2227 { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, },
2228 { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, },
2229 { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, },
2230 { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, },
2231 { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2232 /* 1.2 MB 5"1/4 floppy disks */
f8d3d128
HP
2233 { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, },
2234 { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, },
2235 { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, },
2236 { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, },
2237 { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, },
5bbdbb46 2238 /* 720 kB 5"1/4 floppy disks */
f8d3d128
HP
2239 { FDRIVE_DRV_120, 9, 80, 1, FDRIVE_RATE_250K, },
2240 { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, },
5bbdbb46 2241 /* 360 kB 5"1/4 floppy disks */
f8d3d128
HP
2242 { FDRIVE_DRV_120, 9, 40, 1, FDRIVE_RATE_300K, },
2243 { FDRIVE_DRV_120, 9, 40, 0, FDRIVE_RATE_300K, },
2244 { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, },
2245 { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, },
5bbdbb46 2246 /* 320 kB 5"1/4 floppy disks */
f8d3d128
HP
2247 { FDRIVE_DRV_120, 8, 40, 1, FDRIVE_RATE_250K, },
2248 { FDRIVE_DRV_120, 8, 40, 0, FDRIVE_RATE_250K, },
5bbdbb46 2249 /* 360 kB must match 5"1/4 better than 3"1/2... */
f8d3d128 2250 { FDRIVE_DRV_144, 9, 80, 0, FDRIVE_RATE_250K, },
5bbdbb46 2251 /* end */
f8d3d128 2252 { FDRIVE_DRV_NONE, -1, -1, 0, 0, },
5bbdbb46
BS
2253};
2254
2255void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
2256 int *max_track, int *last_sect,
f8d3d128
HP
2257 FDriveType drive_in, FDriveType *drive,
2258 FDriveRate *rate)
5bbdbb46
BS
2259{
2260 const FDFormat *parse;
2261 uint64_t nb_sectors, size;
2262 int i, first_match, match;
2263
2264 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
2265 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
2266 /* User defined disk */
f8d3d128 2267 *rate = FDRIVE_RATE_500K;
5bbdbb46
BS
2268 } else {
2269 bdrv_get_geometry(bs, &nb_sectors);
2270 match = -1;
2271 first_match = -1;
2272 for (i = 0; ; i++) {
2273 parse = &fd_formats[i];
2274 if (parse->drive == FDRIVE_DRV_NONE) {
2275 break;
2276 }
2277 if (drive_in == parse->drive ||
2278 drive_in == FDRIVE_DRV_NONE) {
2279 size = (parse->max_head + 1) * parse->max_track *
2280 parse->last_sect;
2281 if (nb_sectors == size) {
2282 match = i;
2283 break;
2284 }
2285 if (first_match == -1) {
2286 first_match = i;
2287 }
2288 }
2289 }
2290 if (match == -1) {
2291 if (first_match == -1) {
2292 match = 1;
2293 } else {
2294 match = first_match;
2295 }
2296 parse = &fd_formats[match];
2297 }
2298 *nb_heads = parse->max_head + 1;
2299 *max_track = parse->max_track;
2300 *last_sect = parse->last_sect;
2301 *drive = parse->drive;
f8d3d128 2302 *rate = parse->rate;
5bbdbb46
BS
2303 }
2304}
2305
46d4767d
FB
2306int bdrv_get_translation_hint(BlockDriverState *bs)
2307{
2308 return bs->translation;
2309}
2310
abd7f68d
MA
2311void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
2312 BlockErrorAction on_write_error)
2313{
2314 bs->on_read_error = on_read_error;
2315 bs->on_write_error = on_write_error;
2316}
2317
2318BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
2319{
2320 return is_read ? bs->on_read_error : bs->on_write_error;
2321}
2322
b338082b
FB
2323int bdrv_is_read_only(BlockDriverState *bs)
2324{
2325 return bs->read_only;
2326}
2327
985a03b0
TS
2328int bdrv_is_sg(BlockDriverState *bs)
2329{
2330 return bs->sg;
2331}
2332
e900a7b7
CH
2333int bdrv_enable_write_cache(BlockDriverState *bs)
2334{
2335 return bs->enable_write_cache;
2336}
2337
ea2384d3
FB
2338int bdrv_is_encrypted(BlockDriverState *bs)
2339{
2340 if (bs->backing_hd && bs->backing_hd->encrypted)
2341 return 1;
2342 return bs->encrypted;
2343}
2344
c0f4ce77
AL
2345int bdrv_key_required(BlockDriverState *bs)
2346{
2347 BlockDriverState *backing_hd = bs->backing_hd;
2348
2349 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2350 return 1;
2351 return (bs->encrypted && !bs->valid_key);
2352}
2353
ea2384d3
FB
2354int bdrv_set_key(BlockDriverState *bs, const char *key)
2355{
2356 int ret;
2357 if (bs->backing_hd && bs->backing_hd->encrypted) {
2358 ret = bdrv_set_key(bs->backing_hd, key);
2359 if (ret < 0)
2360 return ret;
2361 if (!bs->encrypted)
2362 return 0;
2363 }
fd04a2ae
SH
2364 if (!bs->encrypted) {
2365 return -EINVAL;
2366 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2367 return -ENOMEDIUM;
2368 }
c0f4ce77 2369 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
2370 if (ret < 0) {
2371 bs->valid_key = 0;
2372 } else if (!bs->valid_key) {
2373 bs->valid_key = 1;
2374 /* call the change callback now, we skipped it on open */
7d4b4ba5 2375 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 2376 }
c0f4ce77 2377 return ret;
ea2384d3
FB
2378}
2379
2380void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2381{
19cb3738 2382 if (!bs->drv) {
ea2384d3
FB
2383 buf[0] = '\0';
2384 } else {
2385 pstrcpy(buf, buf_size, bs->drv->format_name);
2386 }
2387}
2388
5fafdf24 2389void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
2390 void *opaque)
2391{
2392 BlockDriver *drv;
2393
8a22f02a 2394 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
2395 it(opaque, drv->format_name);
2396 }
2397}
2398
b338082b
FB
2399BlockDriverState *bdrv_find(const char *name)
2400{
2401 BlockDriverState *bs;
2402
1b7bdbc1
SH
2403 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2404 if (!strcmp(name, bs->device_name)) {
b338082b 2405 return bs;
1b7bdbc1 2406 }
b338082b
FB
2407 }
2408 return NULL;
2409}
2410
2f399b0a
MA
2411BlockDriverState *bdrv_next(BlockDriverState *bs)
2412{
2413 if (!bs) {
2414 return QTAILQ_FIRST(&bdrv_states);
2415 }
2416 return QTAILQ_NEXT(bs, list);
2417}
2418
51de9760 2419void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
2420{
2421 BlockDriverState *bs;
2422
1b7bdbc1 2423 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 2424 it(opaque, bs);
81d0912d
FB
2425 }
2426}
2427
ea2384d3
FB
2428const char *bdrv_get_device_name(BlockDriverState *bs)
2429{
2430 return bs->device_name;
2431}
2432
c6ca28d6
AL
2433void bdrv_flush_all(void)
2434{
2435 BlockDriverState *bs;
2436
1b7bdbc1 2437 QTAILQ_FOREACH(bs, &bdrv_states, list) {
29cdb251 2438 bdrv_flush(bs);
1b7bdbc1 2439 }
c6ca28d6
AL
2440}
2441
f2feebbd
KW
2442int bdrv_has_zero_init(BlockDriverState *bs)
2443{
2444 assert(bs->drv);
2445
336c1c12
KW
2446 if (bs->drv->bdrv_has_zero_init) {
2447 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
2448 }
2449
2450 return 1;
2451}
2452
376ae3f1
SH
2453typedef struct BdrvCoIsAllocatedData {
2454 BlockDriverState *bs;
2455 int64_t sector_num;
2456 int nb_sectors;
2457 int *pnum;
2458 int ret;
2459 bool done;
2460} BdrvCoIsAllocatedData;
2461
f58c7b35
TS
2462/*
2463 * Returns true iff the specified sector is present in the disk image. Drivers
2464 * not implementing the functionality are assumed to not support backing files,
2465 * hence all their sectors are reported as allocated.
2466 *
bd9533e3
SH
2467 * If 'sector_num' is beyond the end of the disk image the return value is 0
2468 * and 'pnum' is set to 0.
2469 *
f58c7b35
TS
2470 * 'pnum' is set to the number of sectors (including and immediately following
2471 * the specified sector) that are known to be in the same
2472 * allocated/unallocated state.
2473 *
bd9533e3
SH
2474 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2475 * beyond the end of the disk image it will be clamped.
f58c7b35 2476 */
060f51c9
SH
2477int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2478 int nb_sectors, int *pnum)
f58c7b35 2479{
bd9533e3
SH
2480 int64_t n;
2481
2482 if (sector_num >= bs->total_sectors) {
2483 *pnum = 0;
2484 return 0;
2485 }
2486
2487 n = bs->total_sectors - sector_num;
2488 if (n < nb_sectors) {
2489 nb_sectors = n;
2490 }
2491
6aebab14 2492 if (!bs->drv->bdrv_co_is_allocated) {
bd9533e3 2493 *pnum = nb_sectors;
f58c7b35
TS
2494 return 1;
2495 }
6aebab14 2496
060f51c9
SH
2497 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2498}
2499
2500/* Coroutine wrapper for bdrv_is_allocated() */
2501static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2502{
2503 BdrvCoIsAllocatedData *data = opaque;
2504 BlockDriverState *bs = data->bs;
2505
2506 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2507 data->pnum);
2508 data->done = true;
2509}
2510
2511/*
2512 * Synchronous wrapper around bdrv_co_is_allocated().
2513 *
2514 * See bdrv_co_is_allocated() for details.
2515 */
2516int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2517 int *pnum)
2518{
6aebab14
SH
2519 Coroutine *co;
2520 BdrvCoIsAllocatedData data = {
2521 .bs = bs,
2522 .sector_num = sector_num,
2523 .nb_sectors = nb_sectors,
2524 .pnum = pnum,
2525 .done = false,
2526 };
2527
2528 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2529 qemu_coroutine_enter(co, &data);
2530 while (!data.done) {
2531 qemu_aio_wait();
2532 }
2533 return data.ret;
f58c7b35
TS
2534}
2535
b2023818 2536BlockInfoList *qmp_query_block(Error **errp)
b338082b 2537{
b2023818 2538 BlockInfoList *head = NULL, *cur_item = NULL;
b338082b
FB
2539 BlockDriverState *bs;
2540
1b7bdbc1 2541 QTAILQ_FOREACH(bs, &bdrv_states, list) {
b2023818 2542 BlockInfoList *info = g_malloc0(sizeof(*info));
d15e5465 2543
b2023818
LC
2544 info->value = g_malloc0(sizeof(*info->value));
2545 info->value->device = g_strdup(bs->device_name);
2546 info->value->type = g_strdup("unknown");
2547 info->value->locked = bdrv_dev_is_medium_locked(bs);
2548 info->value->removable = bdrv_dev_has_removable_media(bs);
d15e5465 2549
e4def80b 2550 if (bdrv_dev_has_removable_media(bs)) {
b2023818
LC
2551 info->value->has_tray_open = true;
2552 info->value->tray_open = bdrv_dev_is_tray_open(bs);
e4def80b 2553 }
f04ef601
LC
2554
2555 if (bdrv_iostatus_is_enabled(bs)) {
b2023818
LC
2556 info->value->has_io_status = true;
2557 info->value->io_status = bs->iostatus;
f04ef601
LC
2558 }
2559
19cb3738 2560 if (bs->drv) {
b2023818
LC
2561 info->value->has_inserted = true;
2562 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2563 info->value->inserted->file = g_strdup(bs->filename);
2564 info->value->inserted->ro = bs->read_only;
2565 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2566 info->value->inserted->encrypted = bs->encrypted;
2567 if (bs->backing_file[0]) {
2568 info->value->inserted->has_backing_file = true;
2569 info->value->inserted->backing_file = g_strdup(bs->backing_file);
376253ec 2570 }
727f005e
ZYW
2571
2572 if (bs->io_limits_enabled) {
2573 info->value->inserted->bps =
2574 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2575 info->value->inserted->bps_rd =
2576 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2577 info->value->inserted->bps_wr =
2578 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2579 info->value->inserted->iops =
2580 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2581 info->value->inserted->iops_rd =
2582 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2583 info->value->inserted->iops_wr =
2584 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2585 }
b2023818 2586 }
d15e5465 2587
b2023818
LC
2588 /* XXX: waiting for the qapi to support GSList */
2589 if (!cur_item) {
2590 head = cur_item = info;
2591 } else {
2592 cur_item->next = info;
2593 cur_item = info;
b338082b 2594 }
b338082b 2595 }
d15e5465 2596
b2023818 2597 return head;
b338082b 2598}
a36e69dd 2599
f11f57e4
LC
2600/* Consider exposing this as a full fledged QMP command */
2601static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2602{
2603 BlockStats *s;
2604
2605 s = g_malloc0(sizeof(*s));
2606
2607 if (bs->device_name[0]) {
2608 s->has_device = true;
2609 s->device = g_strdup(bs->device_name);
294cc35f
KW
2610 }
2611
f11f57e4
LC
2612 s->stats = g_malloc0(sizeof(*s->stats));
2613 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2614 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2615 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2616 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2617 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2618 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2619 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2620 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2621 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2622
294cc35f 2623 if (bs->file) {
f11f57e4
LC
2624 s->has_parent = true;
2625 s->parent = qmp_query_blockstat(bs->file, NULL);
294cc35f
KW
2626 }
2627
f11f57e4 2628 return s;
294cc35f
KW
2629}
2630
f11f57e4 2631BlockStatsList *qmp_query_blockstats(Error **errp)
218a536a 2632{
f11f57e4 2633 BlockStatsList *head = NULL, *cur_item = NULL;
a36e69dd
TS
2634 BlockDriverState *bs;
2635
1b7bdbc1 2636 QTAILQ_FOREACH(bs, &bdrv_states, list) {
f11f57e4
LC
2637 BlockStatsList *info = g_malloc0(sizeof(*info));
2638 info->value = qmp_query_blockstat(bs, NULL);
2639
2640 /* XXX: waiting for the qapi to support GSList */
2641 if (!cur_item) {
2642 head = cur_item = info;
2643 } else {
2644 cur_item->next = info;
2645 cur_item = info;
2646 }
a36e69dd 2647 }
218a536a 2648
f11f57e4 2649 return head;
a36e69dd 2650}
ea2384d3 2651
045df330
AL
2652const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2653{
2654 if (bs->backing_hd && bs->backing_hd->encrypted)
2655 return bs->backing_file;
2656 else if (bs->encrypted)
2657 return bs->filename;
2658 else
2659 return NULL;
2660}
2661
5fafdf24 2662void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2663 char *filename, int filename_size)
2664{
3574c608 2665 pstrcpy(filename, filename_size, bs->backing_file);
83f64091
FB
2666}
2667
5fafdf24 2668int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2669 const uint8_t *buf, int nb_sectors)
2670{
2671 BlockDriver *drv = bs->drv;
2672 if (!drv)
19cb3738 2673 return -ENOMEDIUM;
faea38e7
FB
2674 if (!drv->bdrv_write_compressed)
2675 return -ENOTSUP;
fbb7b4e0
KW
2676 if (bdrv_check_request(bs, sector_num, nb_sectors))
2677 return -EIO;
a55eb92c 2678
c6d22830 2679 if (bs->dirty_bitmap) {
7cd1e32a
LS
2680 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2681 }
a55eb92c 2682
faea38e7
FB
2683 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2684}
3b46e624 2685
faea38e7
FB
2686int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2687{
2688 BlockDriver *drv = bs->drv;
2689 if (!drv)
19cb3738 2690 return -ENOMEDIUM;
faea38e7
FB
2691 if (!drv->bdrv_get_info)
2692 return -ENOTSUP;
2693 memset(bdi, 0, sizeof(*bdi));
2694 return drv->bdrv_get_info(bs, bdi);
2695}
2696
45566e9c
CH
2697int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2698 int64_t pos, int size)
178e08a5
AL
2699{
2700 BlockDriver *drv = bs->drv;
2701 if (!drv)
2702 return -ENOMEDIUM;
7cdb1f6d
MK
2703 if (drv->bdrv_save_vmstate)
2704 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2705 if (bs->file)
2706 return bdrv_save_vmstate(bs->file, buf, pos, size);
2707 return -ENOTSUP;
178e08a5
AL
2708}
2709
45566e9c
CH
2710int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2711 int64_t pos, int size)
178e08a5
AL
2712{
2713 BlockDriver *drv = bs->drv;
2714 if (!drv)
2715 return -ENOMEDIUM;
7cdb1f6d
MK
2716 if (drv->bdrv_load_vmstate)
2717 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2718 if (bs->file)
2719 return bdrv_load_vmstate(bs->file, buf, pos, size);
2720 return -ENOTSUP;
178e08a5
AL
2721}
2722
8b9b0cc2
KW
2723void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2724{
2725 BlockDriver *drv = bs->drv;
2726
2727 if (!drv || !drv->bdrv_debug_event) {
2728 return;
2729 }
2730
2731 return drv->bdrv_debug_event(bs, event);
2732
2733}
2734
faea38e7
FB
2735/**************************************************************/
2736/* handling of snapshots */
2737
feeee5ac
MDCF
2738int bdrv_can_snapshot(BlockDriverState *bs)
2739{
2740 BlockDriver *drv = bs->drv;
07b70bfb 2741 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2742 return 0;
2743 }
2744
2745 if (!drv->bdrv_snapshot_create) {
2746 if (bs->file != NULL) {
2747 return bdrv_can_snapshot(bs->file);
2748 }
2749 return 0;
2750 }
2751
2752 return 1;
2753}
2754
199630b6
BS
2755int bdrv_is_snapshot(BlockDriverState *bs)
2756{
2757 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2758}
2759
f9092b10
MA
2760BlockDriverState *bdrv_snapshots(void)
2761{
2762 BlockDriverState *bs;
2763
3ac906f7 2764 if (bs_snapshots) {
f9092b10 2765 return bs_snapshots;
3ac906f7 2766 }
f9092b10
MA
2767
2768 bs = NULL;
2769 while ((bs = bdrv_next(bs))) {
2770 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2771 bs_snapshots = bs;
2772 return bs;
f9092b10
MA
2773 }
2774 }
2775 return NULL;
f9092b10
MA
2776}
2777
5fafdf24 2778int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2779 QEMUSnapshotInfo *sn_info)
2780{
2781 BlockDriver *drv = bs->drv;
2782 if (!drv)
19cb3738 2783 return -ENOMEDIUM;
7cdb1f6d
MK
2784 if (drv->bdrv_snapshot_create)
2785 return drv->bdrv_snapshot_create(bs, sn_info);
2786 if (bs->file)
2787 return bdrv_snapshot_create(bs->file, sn_info);
2788 return -ENOTSUP;
faea38e7
FB
2789}
2790
5fafdf24 2791int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2792 const char *snapshot_id)
2793{
2794 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2795 int ret, open_ret;
2796
faea38e7 2797 if (!drv)
19cb3738 2798 return -ENOMEDIUM;
7cdb1f6d
MK
2799 if (drv->bdrv_snapshot_goto)
2800 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2801
2802 if (bs->file) {
2803 drv->bdrv_close(bs);
2804 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2805 open_ret = drv->bdrv_open(bs, bs->open_flags);
2806 if (open_ret < 0) {
2807 bdrv_delete(bs->file);
2808 bs->drv = NULL;
2809 return open_ret;
2810 }
2811 return ret;
2812 }
2813
2814 return -ENOTSUP;
faea38e7
FB
2815}
2816
2817int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2818{
2819 BlockDriver *drv = bs->drv;
2820 if (!drv)
19cb3738 2821 return -ENOMEDIUM;
7cdb1f6d
MK
2822 if (drv->bdrv_snapshot_delete)
2823 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2824 if (bs->file)
2825 return bdrv_snapshot_delete(bs->file, snapshot_id);
2826 return -ENOTSUP;
faea38e7
FB
2827}
2828
5fafdf24 2829int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2830 QEMUSnapshotInfo **psn_info)
2831{
2832 BlockDriver *drv = bs->drv;
2833 if (!drv)
19cb3738 2834 return -ENOMEDIUM;
7cdb1f6d
MK
2835 if (drv->bdrv_snapshot_list)
2836 return drv->bdrv_snapshot_list(bs, psn_info);
2837 if (bs->file)
2838 return bdrv_snapshot_list(bs->file, psn_info);
2839 return -ENOTSUP;
faea38e7
FB
2840}
2841
51ef6727 2842int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2843 const char *snapshot_name)
2844{
2845 BlockDriver *drv = bs->drv;
2846 if (!drv) {
2847 return -ENOMEDIUM;
2848 }
2849 if (!bs->read_only) {
2850 return -EINVAL;
2851 }
2852 if (drv->bdrv_snapshot_load_tmp) {
2853 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2854 }
2855 return -ENOTSUP;
2856}
2857
e8a6bb9c
MT
2858BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
2859 const char *backing_file)
2860{
2861 if (!bs->drv) {
2862 return NULL;
2863 }
2864
2865 if (bs->backing_hd) {
2866 if (strcmp(bs->backing_file, backing_file) == 0) {
2867 return bs->backing_hd;
2868 } else {
2869 return bdrv_find_backing_image(bs->backing_hd, backing_file);
2870 }
2871 }
2872
2873 return NULL;
2874}
2875
faea38e7
FB
2876#define NB_SUFFIXES 4
2877
2878char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2879{
2880 static const char suffixes[NB_SUFFIXES] = "KMGT";
2881 int64_t base;
2882 int i;
2883
2884 if (size <= 999) {
2885 snprintf(buf, buf_size, "%" PRId64, size);
2886 } else {
2887 base = 1024;
2888 for(i = 0; i < NB_SUFFIXES; i++) {
2889 if (size < (10 * base)) {
5fafdf24 2890 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2891 (double)size / base,
2892 suffixes[i]);
2893 break;
2894 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2895 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2896 ((size + (base >> 1)) / base),
2897 suffixes[i]);
2898 break;
2899 }
2900 base = base * 1024;
2901 }
2902 }
2903 return buf;
2904}
2905
2906char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2907{
2908 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2909#ifdef _WIN32
2910 struct tm *ptm;
2911#else
faea38e7 2912 struct tm tm;
3b9f94e1 2913#endif
faea38e7
FB
2914 time_t ti;
2915 int64_t secs;
2916
2917 if (!sn) {
5fafdf24
TS
2918 snprintf(buf, buf_size,
2919 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2920 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2921 } else {
2922 ti = sn->date_sec;
3b9f94e1
FB
2923#ifdef _WIN32
2924 ptm = localtime(&ti);
2925 strftime(date_buf, sizeof(date_buf),
2926 "%Y-%m-%d %H:%M:%S", ptm);
2927#else
faea38e7
FB
2928 localtime_r(&ti, &tm);
2929 strftime(date_buf, sizeof(date_buf),
2930 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2931#endif
faea38e7
FB
2932 secs = sn->vm_clock_nsec / 1000000000;
2933 snprintf(clock_buf, sizeof(clock_buf),
2934 "%02d:%02d:%02d.%03d",
2935 (int)(secs / 3600),
2936 (int)((secs / 60) % 60),
5fafdf24 2937 (int)(secs % 60),
faea38e7
FB
2938 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2939 snprintf(buf, buf_size,
5fafdf24 2940 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2941 sn->id_str, sn->name,
2942 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2943 date_buf,
2944 clock_buf);
2945 }
2946 return buf;
2947}
2948
ea2384d3 2949/**************************************************************/
83f64091 2950/* async I/Os */
ea2384d3 2951
3b69e4b9 2952BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2953 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2954 BlockDriverCompletionFunc *cb, void *opaque)
83f64091 2955{
bbf0a440
SH
2956 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2957
b2a61371 2958 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2959 cb, opaque, false);
ea2384d3
FB
2960}
2961
f141eafe
AL
2962BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2963 QEMUIOVector *qiov, int nb_sectors,
2964 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2965{
bbf0a440
SH
2966 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2967
1a6e115b 2968 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
8c5873d6 2969 cb, opaque, true);
83f64091
FB
2970}
2971
40b4f539
KW
2972
2973typedef struct MultiwriteCB {
2974 int error;
2975 int num_requests;
2976 int num_callbacks;
2977 struct {
2978 BlockDriverCompletionFunc *cb;
2979 void *opaque;
2980 QEMUIOVector *free_qiov;
40b4f539
KW
2981 } callbacks[];
2982} MultiwriteCB;
2983
2984static void multiwrite_user_cb(MultiwriteCB *mcb)
2985{
2986 int i;
2987
2988 for (i = 0; i < mcb->num_callbacks; i++) {
2989 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2990 if (mcb->callbacks[i].free_qiov) {
2991 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2992 }
7267c094 2993 g_free(mcb->callbacks[i].free_qiov);
40b4f539
KW
2994 }
2995}
2996
2997static void multiwrite_cb(void *opaque, int ret)
2998{
2999 MultiwriteCB *mcb = opaque;
3000
6d519a5f
SH
3001 trace_multiwrite_cb(mcb, ret);
3002
cb6d3ca0 3003 if (ret < 0 && !mcb->error) {
40b4f539 3004 mcb->error = ret;
40b4f539
KW
3005 }
3006
3007 mcb->num_requests--;
3008 if (mcb->num_requests == 0) {
de189a1b 3009 multiwrite_user_cb(mcb);
7267c094 3010 g_free(mcb);
40b4f539
KW
3011 }
3012}
3013
3014static int multiwrite_req_compare(const void *a, const void *b)
3015{
77be4366
CH
3016 const BlockRequest *req1 = a, *req2 = b;
3017
3018 /*
3019 * Note that we can't simply subtract req2->sector from req1->sector
3020 * here as that could overflow the return value.
3021 */
3022 if (req1->sector > req2->sector) {
3023 return 1;
3024 } else if (req1->sector < req2->sector) {
3025 return -1;
3026 } else {
3027 return 0;
3028 }
40b4f539
KW
3029}
3030
3031/*
3032 * Takes a bunch of requests and tries to merge them. Returns the number of
3033 * requests that remain after merging.
3034 */
3035static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
3036 int num_reqs, MultiwriteCB *mcb)
3037{
3038 int i, outidx;
3039
3040 // Sort requests by start sector
3041 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
3042
3043 // Check if adjacent requests touch the same clusters. If so, combine them,
3044 // filling up gaps with zero sectors.
3045 outidx = 0;
3046 for (i = 1; i < num_reqs; i++) {
3047 int merge = 0;
3048 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
3049
b6a127a1 3050 // Handle exactly sequential writes and overlapping writes.
40b4f539
KW
3051 if (reqs[i].sector <= oldreq_last) {
3052 merge = 1;
3053 }
3054
e2a305fb
CH
3055 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
3056 merge = 0;
3057 }
3058
40b4f539
KW
3059 if (merge) {
3060 size_t size;
7267c094 3061 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
3062 qemu_iovec_init(qiov,
3063 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
3064
3065 // Add the first request to the merged one. If the requests are
3066 // overlapping, drop the last sectors of the first request.
3067 size = (reqs[i].sector - reqs[outidx].sector) << 9;
3068 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
3069
b6a127a1
PB
3070 // We should need to add any zeros between the two requests
3071 assert (reqs[i].sector <= oldreq_last);
40b4f539
KW
3072
3073 // Add the second request
3074 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
3075
cbf1dff2 3076 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
3077 reqs[outidx].qiov = qiov;
3078
3079 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
3080 } else {
3081 outidx++;
3082 reqs[outidx].sector = reqs[i].sector;
3083 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
3084 reqs[outidx].qiov = reqs[i].qiov;
3085 }
3086 }
3087
3088 return outidx + 1;
3089}
3090
3091/*
3092 * Submit multiple AIO write requests at once.
3093 *
3094 * On success, the function returns 0 and all requests in the reqs array have
3095 * been submitted. In error case this function returns -1, and any of the
3096 * requests may or may not be submitted yet. In particular, this means that the
3097 * callback will be called for some of the requests, for others it won't. The
3098 * caller must check the error field of the BlockRequest to wait for the right
3099 * callbacks (if error != 0, no callback will be called).
3100 *
3101 * The implementation may modify the contents of the reqs array, e.g. to merge
3102 * requests. However, the fields opaque and error are left unmodified as they
3103 * are used to signal failure for a single request to the caller.
3104 */
3105int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
3106{
40b4f539
KW
3107 MultiwriteCB *mcb;
3108 int i;
3109
301db7c2
RH
3110 /* don't submit writes if we don't have a medium */
3111 if (bs->drv == NULL) {
3112 for (i = 0; i < num_reqs; i++) {
3113 reqs[i].error = -ENOMEDIUM;
3114 }
3115 return -1;
3116 }
3117
40b4f539
KW
3118 if (num_reqs == 0) {
3119 return 0;
3120 }
3121
3122 // Create MultiwriteCB structure
7267c094 3123 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
3124 mcb->num_requests = 0;
3125 mcb->num_callbacks = num_reqs;
3126
3127 for (i = 0; i < num_reqs; i++) {
3128 mcb->callbacks[i].cb = reqs[i].cb;
3129 mcb->callbacks[i].opaque = reqs[i].opaque;
3130 }
3131
3132 // Check for mergable requests
3133 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
3134
6d519a5f
SH
3135 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
3136
df9309fb
PB
3137 /* Run the aio requests. */
3138 mcb->num_requests = num_reqs;
40b4f539 3139 for (i = 0; i < num_reqs; i++) {
ad54ae80 3140 bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
40b4f539 3141 reqs[i].nb_sectors, multiwrite_cb, mcb);
40b4f539
KW
3142 }
3143
3144 return 0;
40b4f539
KW
3145}
3146
83f64091 3147void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 3148{
6bbff9a0 3149 acb->pool->cancel(acb);
83f64091
FB
3150}
3151
98f90dba
ZYW
3152/* block I/O throttling */
3153static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
3154 bool is_write, double elapsed_time, uint64_t *wait)
3155{
3156 uint64_t bps_limit = 0;
3157 double bytes_limit, bytes_base, bytes_res;
3158 double slice_time, wait_time;
3159
3160 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3161 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
3162 } else if (bs->io_limits.bps[is_write]) {
3163 bps_limit = bs->io_limits.bps[is_write];
3164 } else {
3165 if (wait) {
3166 *wait = 0;
3167 }
3168
3169 return false;
3170 }
3171
3172 slice_time = bs->slice_end - bs->slice_start;
3173 slice_time /= (NANOSECONDS_PER_SECOND);
3174 bytes_limit = bps_limit * slice_time;
3175 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
3176 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
3177 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
3178 }
3179
3180 /* bytes_base: the bytes of data which have been read/written; and
3181 * it is obtained from the history statistic info.
3182 * bytes_res: the remaining bytes of data which need to be read/written.
3183 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3184 * the total time for completing reading/writting all data.
3185 */
3186 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
3187
3188 if (bytes_base + bytes_res <= bytes_limit) {
3189 if (wait) {
3190 *wait = 0;
3191 }
3192
3193 return false;
3194 }
3195
3196 /* Calc approx time to dispatch */
3197 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
3198
3199 /* When the I/O rate at runtime exceeds the limits,
3200 * bs->slice_end need to be extended in order that the current statistic
3201 * info can be kept until the timer fire, so it is increased and tuned
3202 * based on the result of experiment.
3203 */
3204 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3205 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3206 if (wait) {
3207 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3208 }
3209
3210 return true;
3211}
3212
3213static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
3214 double elapsed_time, uint64_t *wait)
3215{
3216 uint64_t iops_limit = 0;
3217 double ios_limit, ios_base;
3218 double slice_time, wait_time;
3219
3220 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3221 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
3222 } else if (bs->io_limits.iops[is_write]) {
3223 iops_limit = bs->io_limits.iops[is_write];
3224 } else {
3225 if (wait) {
3226 *wait = 0;
3227 }
3228
3229 return false;
3230 }
3231
3232 slice_time = bs->slice_end - bs->slice_start;
3233 slice_time /= (NANOSECONDS_PER_SECOND);
3234 ios_limit = iops_limit * slice_time;
3235 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
3236 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
3237 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
3238 }
3239
3240 if (ios_base + 1 <= ios_limit) {
3241 if (wait) {
3242 *wait = 0;
3243 }
3244
3245 return false;
3246 }
3247
3248 /* Calc approx time to dispatch */
3249 wait_time = (ios_base + 1) / iops_limit;
3250 if (wait_time > elapsed_time) {
3251 wait_time = wait_time - elapsed_time;
3252 } else {
3253 wait_time = 0;
3254 }
3255
3256 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
3257 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
3258 if (wait) {
3259 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
3260 }
3261
3262 return true;
3263}
3264
3265static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
3266 bool is_write, int64_t *wait)
3267{
3268 int64_t now, max_wait;
3269 uint64_t bps_wait = 0, iops_wait = 0;
3270 double elapsed_time;
3271 int bps_ret, iops_ret;
3272
3273 now = qemu_get_clock_ns(vm_clock);
3274 if ((bs->slice_start < now)
3275 && (bs->slice_end > now)) {
3276 bs->slice_end = now + bs->slice_time;
3277 } else {
3278 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
3279 bs->slice_start = now;
3280 bs->slice_end = now + bs->slice_time;
3281
3282 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3283 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3284
3285 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3286 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3287 }
3288
3289 elapsed_time = now - bs->slice_start;
3290 elapsed_time /= (NANOSECONDS_PER_SECOND);
3291
3292 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3293 is_write, elapsed_time, &bps_wait);
3294 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3295 elapsed_time, &iops_wait);
3296 if (bps_ret || iops_ret) {
3297 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3298 if (wait) {
3299 *wait = max_wait;
3300 }
3301
3302 now = qemu_get_clock_ns(vm_clock);
3303 if (bs->slice_end < now + max_wait) {
3304 bs->slice_end = now + max_wait;
3305 }
3306
3307 return true;
3308 }
3309
3310 if (wait) {
3311 *wait = 0;
3312 }
3313
3314 return false;
3315}
ce1a14dc 3316
83f64091
FB
3317/**************************************************************/
3318/* async block device emulation */
3319
c16b5a2c
CH
3320typedef struct BlockDriverAIOCBSync {
3321 BlockDriverAIOCB common;
3322 QEMUBH *bh;
3323 int ret;
3324 /* vector translation state */
3325 QEMUIOVector *qiov;
3326 uint8_t *bounce;
3327 int is_write;
3328} BlockDriverAIOCBSync;
3329
3330static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3331{
b666d239
KW
3332 BlockDriverAIOCBSync *acb =
3333 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 3334 qemu_bh_delete(acb->bh);
36afc451 3335 acb->bh = NULL;
c16b5a2c
CH
3336 qemu_aio_release(acb);
3337}
3338
3339static AIOPool bdrv_em_aio_pool = {
3340 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3341 .cancel = bdrv_aio_cancel_em,
3342};
3343
ce1a14dc 3344static void bdrv_aio_bh_cb(void *opaque)
83f64091 3345{
ce1a14dc 3346 BlockDriverAIOCBSync *acb = opaque;
f141eafe 3347
f141eafe
AL
3348 if (!acb->is_write)
3349 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 3350 qemu_vfree(acb->bounce);
ce1a14dc 3351 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 3352 qemu_bh_delete(acb->bh);
36afc451 3353 acb->bh = NULL;
ce1a14dc 3354 qemu_aio_release(acb);
83f64091 3355}
beac80cd 3356
f141eafe
AL
3357static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3358 int64_t sector_num,
3359 QEMUIOVector *qiov,
3360 int nb_sectors,
3361 BlockDriverCompletionFunc *cb,
3362 void *opaque,
3363 int is_write)
3364
83f64091 3365{
ce1a14dc 3366 BlockDriverAIOCBSync *acb;
ce1a14dc 3367
c16b5a2c 3368 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
3369 acb->is_write = is_write;
3370 acb->qiov = qiov;
e268ca52 3371 acb->bounce = qemu_blockalign(bs, qiov->size);
3f3aace8 3372 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
3373
3374 if (is_write) {
3375 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1ed20acf 3376 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
f141eafe 3377 } else {
1ed20acf 3378 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
f141eafe
AL
3379 }
3380
ce1a14dc 3381 qemu_bh_schedule(acb->bh);
f141eafe 3382
ce1a14dc 3383 return &acb->common;
beac80cd
FB
3384}
3385
f141eafe
AL
3386static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3387 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 3388 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 3389{
f141eafe
AL
3390 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3391}
83f64091 3392
f141eafe
AL
3393static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3394 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3395 BlockDriverCompletionFunc *cb, void *opaque)
3396{
3397 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 3398}
beac80cd 3399
68485420
KW
3400
3401typedef struct BlockDriverAIOCBCoroutine {
3402 BlockDriverAIOCB common;
3403 BlockRequest req;
3404 bool is_write;
3405 QEMUBH* bh;
3406} BlockDriverAIOCBCoroutine;
3407
3408static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3409{
3410 qemu_aio_flush();
3411}
3412
3413static AIOPool bdrv_em_co_aio_pool = {
3414 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3415 .cancel = bdrv_aio_co_cancel_em,
3416};
3417
35246a68 3418static void bdrv_co_em_bh(void *opaque)
68485420
KW
3419{
3420 BlockDriverAIOCBCoroutine *acb = opaque;
3421
3422 acb->common.cb(acb->common.opaque, acb->req.error);
3423 qemu_bh_delete(acb->bh);
3424 qemu_aio_release(acb);
3425}
3426
b2a61371
SH
3427/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3428static void coroutine_fn bdrv_co_do_rw(void *opaque)
3429{
3430 BlockDriverAIOCBCoroutine *acb = opaque;
3431 BlockDriverState *bs = acb->common.bs;
3432
3433 if (!acb->is_write) {
3434 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
470c0504 3435 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3436 } else {
3437 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
f08f2dda 3438 acb->req.nb_sectors, acb->req.qiov, 0);
b2a61371
SH
3439 }
3440
35246a68 3441 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2a61371
SH
3442 qemu_bh_schedule(acb->bh);
3443}
3444
68485420
KW
3445static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3446 int64_t sector_num,
3447 QEMUIOVector *qiov,
3448 int nb_sectors,
3449 BlockDriverCompletionFunc *cb,
3450 void *opaque,
8c5873d6 3451 bool is_write)
68485420
KW
3452{
3453 Coroutine *co;
3454 BlockDriverAIOCBCoroutine *acb;
3455
3456 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3457 acb->req.sector = sector_num;
3458 acb->req.nb_sectors = nb_sectors;
3459 acb->req.qiov = qiov;
3460 acb->is_write = is_write;
3461
8c5873d6 3462 co = qemu_coroutine_create(bdrv_co_do_rw);
68485420
KW
3463 qemu_coroutine_enter(co, acb);
3464
3465 return &acb->common;
3466}
3467
07f07615 3468static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
b2e12bc6 3469{
07f07615
PB
3470 BlockDriverAIOCBCoroutine *acb = opaque;
3471 BlockDriverState *bs = acb->common.bs;
b2e12bc6 3472
07f07615
PB
3473 acb->req.error = bdrv_co_flush(bs);
3474 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
b2e12bc6 3475 qemu_bh_schedule(acb->bh);
b2e12bc6
CH
3476}
3477
07f07615 3478BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
016f5cf6
AG
3479 BlockDriverCompletionFunc *cb, void *opaque)
3480{
07f07615 3481 trace_bdrv_aio_flush(bs, opaque);
016f5cf6 3482
07f07615
PB
3483 Coroutine *co;
3484 BlockDriverAIOCBCoroutine *acb;
016f5cf6 3485
07f07615
PB
3486 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3487 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3488 qemu_coroutine_enter(co, acb);
016f5cf6 3489
016f5cf6
AG
3490 return &acb->common;
3491}
3492
4265d620
PB
3493static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3494{
3495 BlockDriverAIOCBCoroutine *acb = opaque;
3496 BlockDriverState *bs = acb->common.bs;
3497
3498 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3499 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3500 qemu_bh_schedule(acb->bh);
3501}
3502
3503BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3504 int64_t sector_num, int nb_sectors,
3505 BlockDriverCompletionFunc *cb, void *opaque)
3506{
3507 Coroutine *co;
3508 BlockDriverAIOCBCoroutine *acb;
3509
3510 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3511
3512 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3513 acb->req.sector = sector_num;
3514 acb->req.nb_sectors = nb_sectors;
3515 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3516 qemu_coroutine_enter(co, acb);
3517
3518 return &acb->common;
3519}
3520
ea2384d3
FB
3521void bdrv_init(void)
3522{
5efa9d5a 3523 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 3524}
ce1a14dc 3525
eb852011
MA
3526void bdrv_init_with_whitelist(void)
3527{
3528 use_bdrv_whitelist = 1;
3529 bdrv_init();
3530}
3531
c16b5a2c
CH
3532void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3533 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 3534{
ce1a14dc
PB
3535 BlockDriverAIOCB *acb;
3536
6bbff9a0
AL
3537 if (pool->free_aiocb) {
3538 acb = pool->free_aiocb;
3539 pool->free_aiocb = acb->next;
ce1a14dc 3540 } else {
7267c094 3541 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 3542 acb->pool = pool;
ce1a14dc
PB
3543 }
3544 acb->bs = bs;
3545 acb->cb = cb;
3546 acb->opaque = opaque;
3547 return acb;
3548}
3549
3550void qemu_aio_release(void *p)
3551{
6bbff9a0
AL
3552 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3553 AIOPool *pool = acb->pool;
3554 acb->next = pool->free_aiocb;
3555 pool->free_aiocb = acb;
ce1a14dc 3556}
19cb3738 3557
f9f05dc5
KW
3558/**************************************************************/
3559/* Coroutine block device emulation */
3560
3561typedef struct CoroutineIOCompletion {
3562 Coroutine *coroutine;
3563 int ret;
3564} CoroutineIOCompletion;
3565
3566static void bdrv_co_io_em_complete(void *opaque, int ret)
3567{
3568 CoroutineIOCompletion *co = opaque;
3569
3570 co->ret = ret;
3571 qemu_coroutine_enter(co->coroutine, NULL);
3572}
3573
3574static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3575 int nb_sectors, QEMUIOVector *iov,
3576 bool is_write)
3577{
3578 CoroutineIOCompletion co = {
3579 .coroutine = qemu_coroutine_self(),
3580 };
3581 BlockDriverAIOCB *acb;
3582
3583 if (is_write) {
a652d160
SH
3584 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3585 bdrv_co_io_em_complete, &co);
f9f05dc5 3586 } else {
a652d160
SH
3587 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3588 bdrv_co_io_em_complete, &co);
f9f05dc5
KW
3589 }
3590
59370aaa 3591 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3592 if (!acb) {
3593 return -EIO;
3594 }
3595 qemu_coroutine_yield();
3596
3597 return co.ret;
3598}
3599
3600static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3601 int64_t sector_num, int nb_sectors,
3602 QEMUIOVector *iov)
3603{
3604 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3605}
3606
3607static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3608 int64_t sector_num, int nb_sectors,
3609 QEMUIOVector *iov)
3610{
3611 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3612}
3613
07f07615 3614static void coroutine_fn bdrv_flush_co_entry(void *opaque)
e7a8a783 3615{
07f07615
PB
3616 RwCo *rwco = opaque;
3617
3618 rwco->ret = bdrv_co_flush(rwco->bs);
3619}
3620
3621int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3622{
eb489bb1
KW
3623 int ret;
3624
29cdb251 3625 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
07f07615 3626 return 0;
eb489bb1
KW
3627 }
3628
ca716364 3629 /* Write back cached data to the OS even with cache=unsafe */
eb489bb1
KW
3630 if (bs->drv->bdrv_co_flush_to_os) {
3631 ret = bs->drv->bdrv_co_flush_to_os(bs);
3632 if (ret < 0) {
3633 return ret;
3634 }
3635 }
3636
ca716364
KW
3637 /* But don't actually force it to the disk with cache=unsafe */
3638 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3639 return 0;
3640 }
3641
eb489bb1 3642 if (bs->drv->bdrv_co_flush_to_disk) {
29cdb251 3643 ret = bs->drv->bdrv_co_flush_to_disk(bs);
07f07615
PB
3644 } else if (bs->drv->bdrv_aio_flush) {
3645 BlockDriverAIOCB *acb;
3646 CoroutineIOCompletion co = {
3647 .coroutine = qemu_coroutine_self(),
3648 };
3649
3650 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3651 if (acb == NULL) {
29cdb251 3652 ret = -EIO;
07f07615
PB
3653 } else {
3654 qemu_coroutine_yield();
29cdb251 3655 ret = co.ret;
07f07615 3656 }
07f07615
PB
3657 } else {
3658 /*
3659 * Some block drivers always operate in either writethrough or unsafe
3660 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3661 * know how the server works (because the behaviour is hardcoded or
3662 * depends on server-side configuration), so we can't ensure that
3663 * everything is safe on disk. Returning an error doesn't work because
3664 * that would break guests even if the server operates in writethrough
3665 * mode.
3666 *
3667 * Let's hope the user knows what he's doing.
3668 */
29cdb251 3669 ret = 0;
07f07615 3670 }
29cdb251
PB
3671 if (ret < 0) {
3672 return ret;
3673 }
3674
3675 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3676 * in the case of cache=unsafe, so there are no useless flushes.
3677 */
3678 return bdrv_co_flush(bs->file);
07f07615
PB
3679}
3680
0f15423c
AL
3681void bdrv_invalidate_cache(BlockDriverState *bs)
3682{
3683 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3684 bs->drv->bdrv_invalidate_cache(bs);
3685 }
3686}
3687
3688void bdrv_invalidate_cache_all(void)
3689{
3690 BlockDriverState *bs;
3691
3692 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3693 bdrv_invalidate_cache(bs);
3694 }
3695}
3696
07789269
BC
3697void bdrv_clear_incoming_migration_all(void)
3698{
3699 BlockDriverState *bs;
3700
3701 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3702 bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
3703 }
3704}
3705
07f07615
PB
3706int bdrv_flush(BlockDriverState *bs)
3707{
3708 Coroutine *co;
3709 RwCo rwco = {
3710 .bs = bs,
3711 .ret = NOT_DONE,
e7a8a783 3712 };
e7a8a783 3713
07f07615
PB
3714 if (qemu_in_coroutine()) {
3715 /* Fast-path if already in coroutine context */
3716 bdrv_flush_co_entry(&rwco);
3717 } else {
3718 co = qemu_coroutine_create(bdrv_flush_co_entry);
3719 qemu_coroutine_enter(co, &rwco);
3720 while (rwco.ret == NOT_DONE) {
3721 qemu_aio_wait();
3722 }
e7a8a783 3723 }
07f07615
PB
3724
3725 return rwco.ret;
e7a8a783
KW
3726}
3727
4265d620
PB
3728static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3729{
3730 RwCo *rwco = opaque;
3731
3732 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3733}
3734
3735int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3736 int nb_sectors)
3737{
3738 if (!bs->drv) {
3739 return -ENOMEDIUM;
3740 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3741 return -EIO;
3742 } else if (bs->read_only) {
3743 return -EROFS;
3744 } else if (bs->drv->bdrv_co_discard) {
3745 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3746 } else if (bs->drv->bdrv_aio_discard) {
3747 BlockDriverAIOCB *acb;
3748 CoroutineIOCompletion co = {
3749 .coroutine = qemu_coroutine_self(),
3750 };
3751
3752 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3753 bdrv_co_io_em_complete, &co);
3754 if (acb == NULL) {
3755 return -EIO;
3756 } else {
3757 qemu_coroutine_yield();
3758 return co.ret;
3759 }
4265d620
PB
3760 } else {
3761 return 0;
3762 }
3763}
3764
3765int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3766{
3767 Coroutine *co;
3768 RwCo rwco = {
3769 .bs = bs,
3770 .sector_num = sector_num,
3771 .nb_sectors = nb_sectors,
3772 .ret = NOT_DONE,
3773 };
3774
3775 if (qemu_in_coroutine()) {
3776 /* Fast-path if already in coroutine context */
3777 bdrv_discard_co_entry(&rwco);
3778 } else {
3779 co = qemu_coroutine_create(bdrv_discard_co_entry);
3780 qemu_coroutine_enter(co, &rwco);
3781 while (rwco.ret == NOT_DONE) {
3782 qemu_aio_wait();
3783 }
3784 }
3785
3786 return rwco.ret;
3787}
3788
19cb3738
FB
3789/**************************************************************/
3790/* removable device support */
3791
3792/**
3793 * Return TRUE if the media is present
3794 */
3795int bdrv_is_inserted(BlockDriverState *bs)
3796{
3797 BlockDriver *drv = bs->drv;
a1aff5bf 3798
19cb3738
FB
3799 if (!drv)
3800 return 0;
3801 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3802 return 1;
3803 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3804}
3805
3806/**
8e49ca46
MA
3807 * Return whether the media changed since the last call to this
3808 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3809 */
3810int bdrv_media_changed(BlockDriverState *bs)
3811{
3812 BlockDriver *drv = bs->drv;
19cb3738 3813
8e49ca46
MA
3814 if (drv && drv->bdrv_media_changed) {
3815 return drv->bdrv_media_changed(bs);
3816 }
3817 return -ENOTSUP;
19cb3738
FB
3818}
3819
3820/**
3821 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3822 */
f36f3949 3823void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
3824{
3825 BlockDriver *drv = bs->drv;
19cb3738 3826
822e1cd1
MA
3827 if (drv && drv->bdrv_eject) {
3828 drv->bdrv_eject(bs, eject_flag);
19cb3738 3829 }
6f382ed2
LC
3830
3831 if (bs->device_name[0] != '\0') {
3832 bdrv_emit_qmp_eject_event(bs, eject_flag);
3833 }
19cb3738
FB
3834}
3835
19cb3738
FB
3836/**
3837 * Lock or unlock the media (if it is locked, the user won't be able
3838 * to eject it manually).
3839 */
025e849a 3840void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3841{
3842 BlockDriver *drv = bs->drv;
3843
025e849a 3844 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3845
025e849a
MA
3846 if (drv && drv->bdrv_lock_medium) {
3847 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3848 }
3849}
985a03b0
TS
3850
3851/* needed for generic scsi interface */
3852
3853int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3854{
3855 BlockDriver *drv = bs->drv;
3856
3857 if (drv && drv->bdrv_ioctl)
3858 return drv->bdrv_ioctl(bs, req, buf);
3859 return -ENOTSUP;
3860}
7d780669 3861
221f715d
AL
3862BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3863 unsigned long int req, void *buf,
3864 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3865{
221f715d 3866 BlockDriver *drv = bs->drv;
7d780669 3867
221f715d
AL
3868 if (drv && drv->bdrv_aio_ioctl)
3869 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3870 return NULL;
7d780669 3871}
e268ca52 3872
7b6f9300
MA
3873void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3874{
3875 bs->buffer_alignment = align;
3876}
7cd1e32a 3877
e268ca52
AL
3878void *qemu_blockalign(BlockDriverState *bs, size_t size)
3879{
3880 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3881}
7cd1e32a
LS
3882
3883void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3884{
3885 int64_t bitmap_size;
a55eb92c 3886
aaa0eb75 3887 bs->dirty_count = 0;
a55eb92c 3888 if (enable) {
c6d22830
JK
3889 if (!bs->dirty_bitmap) {
3890 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3891 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3892 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3893
7267c094 3894 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3895 }
7cd1e32a 3896 } else {
c6d22830 3897 if (bs->dirty_bitmap) {
7267c094 3898 g_free(bs->dirty_bitmap);
c6d22830 3899 bs->dirty_bitmap = NULL;
a55eb92c 3900 }
7cd1e32a
LS
3901 }
3902}
3903
3904int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3905{
6ea44308 3906 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3907
c6d22830
JK
3908 if (bs->dirty_bitmap &&
3909 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3910 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3911 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3912 } else {
3913 return 0;
3914 }
3915}
3916
a55eb92c
JK
3917void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3918 int nr_sectors)
7cd1e32a
LS
3919{
3920 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3921}
aaa0eb75
LS
3922
3923int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3924{
3925 return bs->dirty_count;
3926}
f88e1a42 3927
db593f25
MT
3928void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3929{
3930 assert(bs->in_use != in_use);
3931 bs->in_use = in_use;
3932}
3933
3934int bdrv_in_use(BlockDriverState *bs)
3935{
3936 return bs->in_use;
3937}
3938
28a7282a
LC
3939void bdrv_iostatus_enable(BlockDriverState *bs)
3940{
d6bf279e 3941 bs->iostatus_enabled = true;
58e21ef5 3942 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3943}
3944
3945/* The I/O status is only enabled if the drive explicitly
3946 * enables it _and_ the VM is configured to stop on errors */
3947bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3948{
d6bf279e 3949 return (bs->iostatus_enabled &&
28a7282a
LC
3950 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3951 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3952 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3953}
3954
3955void bdrv_iostatus_disable(BlockDriverState *bs)
3956{
d6bf279e 3957 bs->iostatus_enabled = false;
28a7282a
LC
3958}
3959
3960void bdrv_iostatus_reset(BlockDriverState *bs)
3961{
3962 if (bdrv_iostatus_is_enabled(bs)) {
58e21ef5 3963 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
28a7282a
LC
3964 }
3965}
3966
3967/* XXX: Today this is set by device models because it makes the implementation
3968 quite simple. However, the block layer knows about the error, so it's
3969 possible to implement this without device models being involved */
3970void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3971{
58e21ef5
LC
3972 if (bdrv_iostatus_is_enabled(bs) &&
3973 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
28a7282a 3974 assert(error >= 0);
58e21ef5
LC
3975 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3976 BLOCK_DEVICE_IO_STATUS_FAILED;
28a7282a
LC
3977 }
3978}
3979
a597e79c
CH
3980void
3981bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3982 enum BlockAcctType type)
3983{
3984 assert(type < BDRV_MAX_IOTYPE);
3985
3986 cookie->bytes = bytes;
c488c7f6 3987 cookie->start_time_ns = get_clock();
a597e79c
CH
3988 cookie->type = type;
3989}
3990
3991void
3992bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3993{
3994 assert(cookie->type < BDRV_MAX_IOTYPE);
3995
3996 bs->nr_bytes[cookie->type] += cookie->bytes;
3997 bs->nr_ops[cookie->type]++;
c488c7f6 3998 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3999}
4000
f88e1a42
JS
4001int bdrv_img_create(const char *filename, const char *fmt,
4002 const char *base_filename, const char *base_fmt,
4003 char *options, uint64_t img_size, int flags)
4004{
4005 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 4006 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
4007 BlockDriverState *bs = NULL;
4008 BlockDriver *drv, *proto_drv;
96df67d1 4009 BlockDriver *backing_drv = NULL;
f88e1a42
JS
4010 int ret = 0;
4011
4012 /* Find driver and parse its options */
4013 drv = bdrv_find_format(fmt);
4014 if (!drv) {
4015 error_report("Unknown file format '%s'", fmt);
4f70f249 4016 ret = -EINVAL;
f88e1a42
JS
4017 goto out;
4018 }
4019
4020 proto_drv = bdrv_find_protocol(filename);
4021 if (!proto_drv) {
4022 error_report("Unknown protocol '%s'", filename);
4f70f249 4023 ret = -EINVAL;
f88e1a42
JS
4024 goto out;
4025 }
4026
4027 create_options = append_option_parameters(create_options,
4028 drv->create_options);
4029 create_options = append_option_parameters(create_options,
4030 proto_drv->create_options);
4031
4032 /* Create parameter list with default values */
4033 param = parse_option_parameters("", create_options, param);
4034
4035 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
4036
4037 /* Parse -o options */
4038 if (options) {
4039 param = parse_option_parameters(options, create_options, param);
4040 if (param == NULL) {
4041 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 4042 ret = -EINVAL;
f88e1a42
JS
4043 goto out;
4044 }
4045 }
4046
4047 if (base_filename) {
4048 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
4049 base_filename)) {
4050 error_report("Backing file not supported for file format '%s'",
4051 fmt);
4f70f249 4052 ret = -EINVAL;
f88e1a42
JS
4053 goto out;
4054 }
4055 }
4056
4057 if (base_fmt) {
4058 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
4059 error_report("Backing file format not supported for file "
4060 "format '%s'", fmt);
4f70f249 4061 ret = -EINVAL;
f88e1a42
JS
4062 goto out;
4063 }
4064 }
4065
792da93a
JS
4066 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
4067 if (backing_file && backing_file->value.s) {
4068 if (!strcmp(filename, backing_file->value.s)) {
4069 error_report("Error: Trying to create an image with the "
4070 "same filename as the backing file");
4f70f249 4071 ret = -EINVAL;
792da93a
JS
4072 goto out;
4073 }
4074 }
4075
f88e1a42
JS
4076 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
4077 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
4078 backing_drv = bdrv_find_format(backing_fmt->value.s);
4079 if (!backing_drv) {
f88e1a42
JS
4080 error_report("Unknown backing file format '%s'",
4081 backing_fmt->value.s);
4f70f249 4082 ret = -EINVAL;
f88e1a42
JS
4083 goto out;
4084 }
4085 }
4086
4087 // The size for the image must always be specified, with one exception:
4088 // If we are using a backing file, we can obtain the size from there
d220894e
KW
4089 size = get_option_parameter(param, BLOCK_OPT_SIZE);
4090 if (size && size->value.n == -1) {
f88e1a42
JS
4091 if (backing_file && backing_file->value.s) {
4092 uint64_t size;
f88e1a42
JS
4093 char buf[32];
4094
f88e1a42
JS
4095 bs = bdrv_new("");
4096
96df67d1 4097 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 4098 if (ret < 0) {
96df67d1 4099 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
4100 goto out;
4101 }
4102 bdrv_get_geometry(bs, &size);
4103 size *= 512;
4104
4105 snprintf(buf, sizeof(buf), "%" PRId64, size);
4106 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
4107 } else {
4108 error_report("Image creation needs a size parameter");
4f70f249 4109 ret = -EINVAL;
f88e1a42
JS
4110 goto out;
4111 }
4112 }
4113
4114 printf("Formatting '%s', fmt=%s ", filename, fmt);
4115 print_option_parameters(param);
4116 puts("");
4117
4118 ret = bdrv_create(drv, filename, param);
4119
4120 if (ret < 0) {
4121 if (ret == -ENOTSUP) {
4122 error_report("Formatting or formatting option not supported for "
4123 "file format '%s'", fmt);
4124 } else if (ret == -EFBIG) {
4125 error_report("The image size is too large for file format '%s'",
4126 fmt);
4127 } else {
4128 error_report("%s: error while creating %s: %s", filename, fmt,
4129 strerror(-ret));
4130 }
4131 }
4132
4133out:
4134 free_option_parameters(create_options);
4135 free_option_parameters(param);
4136
4137 if (bs) {
4138 bdrv_delete(bs);
4139 }
4f70f249
JS
4140
4141 return ret;
f88e1a42 4142}
eeec61f2
SH
4143
4144void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
c83c66c3
SH
4145 int64_t speed, BlockDriverCompletionFunc *cb,
4146 void *opaque, Error **errp)
eeec61f2
SH
4147{
4148 BlockJob *job;
4149
4150 if (bs->job || bdrv_in_use(bs)) {
fd7f8c65 4151 error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
eeec61f2
SH
4152 return NULL;
4153 }
4154 bdrv_set_in_use(bs, 1);
4155
4156 job = g_malloc0(job_type->instance_size);
4157 job->job_type = job_type;
4158 job->bs = bs;
4159 job->cb = cb;
4160 job->opaque = opaque;
4161 bs->job = job;
c83c66c3
SH
4162
4163 /* Only set speed when necessary to avoid NotSupported error */
4164 if (speed != 0) {
4165 Error *local_err = NULL;
4166
4167 block_job_set_speed(job, speed, &local_err);
4168 if (error_is_set(&local_err)) {
4169 bs->job = NULL;
4170 g_free(job);
4171 bdrv_set_in_use(bs, 0);
4172 error_propagate(errp, local_err);
4173 return NULL;
4174 }
4175 }
eeec61f2
SH
4176 return job;
4177}
4178
4179void block_job_complete(BlockJob *job, int ret)
4180{
4181 BlockDriverState *bs = job->bs;
4182
4183 assert(bs->job == job);
4184 job->cb(job->opaque, ret);
4185 bs->job = NULL;
4186 g_free(job);
4187 bdrv_set_in_use(bs, 0);
4188}
4189
882ec7ce 4190void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
eeec61f2 4191{
9e6636c7 4192 Error *local_err = NULL;
9f25eccc 4193
eeec61f2 4194 if (!job->job_type->set_speed) {
9e6636c7
SH
4195 error_set(errp, QERR_NOT_SUPPORTED);
4196 return;
eeec61f2 4197 }
882ec7ce 4198 job->job_type->set_speed(job, speed, &local_err);
9e6636c7
SH
4199 if (error_is_set(&local_err)) {
4200 error_propagate(errp, local_err);
4201 return;
9f25eccc 4202 }
9e6636c7 4203
882ec7ce 4204 job->speed = speed;
eeec61f2
SH
4205}
4206
4207void block_job_cancel(BlockJob *job)
4208{
4209 job->cancelled = true;
4210}
4211
4212bool block_job_is_cancelled(BlockJob *job)
4213{
4214 return job->cancelled;
4215}
3e914655
PB
4216
4217void block_job_cancel_sync(BlockJob *job)
4218{
4219 BlockDriverState *bs = job->bs;
4220
4221 assert(bs->job == job);
4222 block_job_cancel(job);
4223 while (bs->job != NULL && bs->job->busy) {
4224 qemu_aio_wait();
4225 }
4226}