]> git.ipfire.org Git - thirdparty/qemu.git/blame - block.c
scsi: Support I/O status
[thirdparty/qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
5fafdf24 5 *
fc01f7e7
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
3990d09a 24#include "config-host.h"
faf07963 25#include "qemu-common.h"
6d519a5f 26#include "trace.h"
376253ec 27#include "monitor.h"
ea2384d3 28#include "block_int.h"
5efa9d5a 29#include "module.h"
d15e5465 30#include "qemu-objects.h"
68485420 31#include "qemu-coroutine.h"
fc01f7e7 32
71e72a19 33#ifdef CONFIG_BSD
7674e7bf
FB
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
72cf2d4f 37#include <sys/queue.h>
c5e97233 38#ifndef __DragonFly__
7674e7bf
FB
39#include <sys/disk.h>
40#endif
c5e97233 41#endif
7674e7bf 42
49dc768d
AL
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
7d4b4ba5 47static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
f141eafe
AL
48static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
c87c0672 50 BlockDriverCompletionFunc *cb, void *opaque);
f141eafe
AL
51static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 53 BlockDriverCompletionFunc *cb, void *opaque);
b2e12bc6
CH
54static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55 BlockDriverCompletionFunc *cb, void *opaque);
016f5cf6
AG
56static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
5fafdf24 58static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091
FB
59 uint8_t *buf, int nb_sectors);
60static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61 const uint8_t *buf, int nb_sectors);
68485420
KW
62static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64 BlockDriverCompletionFunc *cb, void *opaque);
65static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67 BlockDriverCompletionFunc *cb, void *opaque);
f9f05dc5
KW
68static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
70 QEMUIOVector *iov);
71static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
73 QEMUIOVector *iov);
e7a8a783 74static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
ec530c81 75
1b7bdbc1
SH
76static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
7ee930d0 78
8a22f02a
SH
79static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 81
f9092b10
MA
82/* The device to use for VM snapshots */
83static BlockDriverState *bs_snapshots;
84
eb852011
MA
85/* If non-zero, use only whitelisted block drivers */
86static int use_bdrv_whitelist;
87
9e0b22f4
SH
88#ifdef _WIN32
89static int is_windows_drive_prefix(const char *filename)
90{
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
94}
95
96int is_windows_drive(const char *filename)
97{
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
105}
106#endif
107
108/* check if the path starts with "<protocol>:" */
109static int path_has_protocol(const char *path)
110{
111#ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
115 }
116#endif
117
118 return strchr(path, ':') != NULL;
119}
120
83f64091 121int path_is_absolute(const char *path)
3b0d4f61 122{
83f64091 123 const char *p;
21664424
FB
124#ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128#endif
83f64091
FB
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
3b9f94e1
FB
134#ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136#else
137 return (*p == '/');
138#endif
3b0d4f61
FB
139}
140
83f64091
FB
141/* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
3b0d4f61 147{
83f64091
FB
148 const char *p, *p1;
149 int len;
150
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
3b9f94e1
FB
161 p1 = strrchr(base_path, '/');
162#ifdef _WIN32
163 {
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
168 }
169#endif
83f64091
FB
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
3b0d4f61 182 }
3b0d4f61
FB
183}
184
5efa9d5a 185void bdrv_register(BlockDriver *bdrv)
ea2384d3 186{
68485420
KW
187 if (bdrv->bdrv_co_readv) {
188 /* Emulate AIO by coroutines, and sync by AIO */
189 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
190 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
191 bdrv->bdrv_read = bdrv_read_em;
192 bdrv->bdrv_write = bdrv_write_em;
f9f05dc5
KW
193 } else {
194 bdrv->bdrv_co_readv = bdrv_co_readv_em;
195 bdrv->bdrv_co_writev = bdrv_co_writev_em;
196
197 if (!bdrv->bdrv_aio_readv) {
198 /* add AIO emulation layer */
199 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
200 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 } else if (!bdrv->bdrv_read) {
202 /* add synchronous IO emulation layer */
203 bdrv->bdrv_read = bdrv_read_em;
204 bdrv->bdrv_write = bdrv_write_em;
205 }
83f64091 206 }
b2e12bc6
CH
207
208 if (!bdrv->bdrv_aio_flush)
209 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
210
8a22f02a 211 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 212}
b338082b
FB
213
214/* create a new block device (by default it is empty) */
215BlockDriverState *bdrv_new(const char *device_name)
216{
1b7bdbc1 217 BlockDriverState *bs;
b338082b 218
7267c094 219 bs = g_malloc0(sizeof(BlockDriverState));
b338082b 220 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
ea2384d3 221 if (device_name[0] != '\0') {
1b7bdbc1 222 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
ea2384d3 223 }
28a7282a 224 bdrv_iostatus_disable(bs);
b338082b
FB
225 return bs;
226}
227
ea2384d3
FB
228BlockDriver *bdrv_find_format(const char *format_name)
229{
230 BlockDriver *drv1;
8a22f02a
SH
231 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
232 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 233 return drv1;
8a22f02a 234 }
ea2384d3
FB
235 }
236 return NULL;
237}
238
eb852011
MA
239static int bdrv_is_whitelisted(BlockDriver *drv)
240{
241 static const char *whitelist[] = {
242 CONFIG_BDRV_WHITELIST
243 };
244 const char **p;
245
246 if (!whitelist[0])
247 return 1; /* no whitelist, anything goes */
248
249 for (p = whitelist; *p; p++) {
250 if (!strcmp(drv->format_name, *p)) {
251 return 1;
252 }
253 }
254 return 0;
255}
256
257BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
258{
259 BlockDriver *drv = bdrv_find_format(format_name);
260 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
261}
262
0e7e1989
KW
263int bdrv_create(BlockDriver *drv, const char* filename,
264 QEMUOptionParameter *options)
ea2384d3
FB
265{
266 if (!drv->bdrv_create)
267 return -ENOTSUP;
0e7e1989
KW
268
269 return drv->bdrv_create(filename, options);
ea2384d3
FB
270}
271
84a12e66
CH
272int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
273{
274 BlockDriver *drv;
275
b50cbabc 276 drv = bdrv_find_protocol(filename);
84a12e66 277 if (drv == NULL) {
16905d71 278 return -ENOENT;
84a12e66
CH
279 }
280
281 return bdrv_create(drv, filename, options);
282}
283
d5249393 284#ifdef _WIN32
95389c86 285void get_tmp_filename(char *filename, int size)
d5249393 286{
3b9f94e1 287 char temp_dir[MAX_PATH];
3b46e624 288
3b9f94e1
FB
289 GetTempPath(MAX_PATH, temp_dir);
290 GetTempFileName(temp_dir, "qem", 0, filename);
d5249393
FB
291}
292#else
95389c86 293void get_tmp_filename(char *filename, int size)
fc01f7e7 294{
67b915a5 295 int fd;
7ccfb2eb 296 const char *tmpdir;
d5249393 297 /* XXX: race condition possible */
0badc1ee
AJ
298 tmpdir = getenv("TMPDIR");
299 if (!tmpdir)
300 tmpdir = "/tmp";
301 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
ea2384d3
FB
302 fd = mkstemp(filename);
303 close(fd);
304}
d5249393 305#endif
fc01f7e7 306
84a12e66
CH
307/*
308 * Detect host devices. By convention, /dev/cdrom[N] is always
309 * recognized as a host CDROM.
310 */
311static BlockDriver *find_hdev_driver(const char *filename)
312{
313 int score_max = 0, score;
314 BlockDriver *drv = NULL, *d;
315
316 QLIST_FOREACH(d, &bdrv_drivers, list) {
317 if (d->bdrv_probe_device) {
318 score = d->bdrv_probe_device(filename);
319 if (score > score_max) {
320 score_max = score;
321 drv = d;
322 }
323 }
324 }
325
326 return drv;
327}
328
b50cbabc 329BlockDriver *bdrv_find_protocol(const char *filename)
83f64091
FB
330{
331 BlockDriver *drv1;
332 char protocol[128];
1cec71e3 333 int len;
83f64091 334 const char *p;
19cb3738 335
66f82cee
KW
336 /* TODO Drivers without bdrv_file_open must be specified explicitly */
337
39508e7a
CH
338 /*
339 * XXX(hch): we really should not let host device detection
340 * override an explicit protocol specification, but moving this
341 * later breaks access to device names with colons in them.
342 * Thanks to the brain-dead persistent naming schemes on udev-
343 * based Linux systems those actually are quite common.
344 */
345 drv1 = find_hdev_driver(filename);
346 if (drv1) {
347 return drv1;
348 }
349
9e0b22f4 350 if (!path_has_protocol(filename)) {
39508e7a 351 return bdrv_find_format("file");
84a12e66 352 }
9e0b22f4
SH
353 p = strchr(filename, ':');
354 assert(p != NULL);
1cec71e3
AL
355 len = p - filename;
356 if (len > sizeof(protocol) - 1)
357 len = sizeof(protocol) - 1;
358 memcpy(protocol, filename, len);
359 protocol[len] = '\0';
8a22f02a 360 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
5fafdf24 361 if (drv1->protocol_name &&
8a22f02a 362 !strcmp(drv1->protocol_name, protocol)) {
83f64091 363 return drv1;
8a22f02a 364 }
83f64091
FB
365 }
366 return NULL;
367}
368
c98ac35d 369static int find_image_format(const char *filename, BlockDriver **pdrv)
f3a5d3f8
CH
370{
371 int ret, score, score_max;
372 BlockDriver *drv1, *drv;
373 uint8_t buf[2048];
374 BlockDriverState *bs;
375
f5edb014 376 ret = bdrv_file_open(&bs, filename, 0);
c98ac35d
SW
377 if (ret < 0) {
378 *pdrv = NULL;
379 return ret;
380 }
f8ea0b00 381
08a00559
KW
382 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
383 if (bs->sg || !bdrv_is_inserted(bs)) {
1a396859 384 bdrv_delete(bs);
c98ac35d
SW
385 drv = bdrv_find_format("raw");
386 if (!drv) {
387 ret = -ENOENT;
388 }
389 *pdrv = drv;
390 return ret;
1a396859 391 }
f8ea0b00 392
83f64091
FB
393 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
394 bdrv_delete(bs);
395 if (ret < 0) {
c98ac35d
SW
396 *pdrv = NULL;
397 return ret;
83f64091
FB
398 }
399
ea2384d3 400 score_max = 0;
84a12e66 401 drv = NULL;
8a22f02a 402 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
83f64091
FB
403 if (drv1->bdrv_probe) {
404 score = drv1->bdrv_probe(buf, ret, filename);
405 if (score > score_max) {
406 score_max = score;
407 drv = drv1;
408 }
0849bf08 409 }
fc01f7e7 410 }
c98ac35d
SW
411 if (!drv) {
412 ret = -ENOENT;
413 }
414 *pdrv = drv;
415 return ret;
ea2384d3
FB
416}
417
51762288
SH
418/**
419 * Set the current 'total_sectors' value
420 */
421static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
422{
423 BlockDriver *drv = bs->drv;
424
396759ad
NB
425 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
426 if (bs->sg)
427 return 0;
428
51762288
SH
429 /* query actual device if possible, otherwise just trust the hint */
430 if (drv->bdrv_getlength) {
431 int64_t length = drv->bdrv_getlength(bs);
432 if (length < 0) {
433 return length;
434 }
435 hint = length >> BDRV_SECTOR_BITS;
436 }
437
438 bs->total_sectors = hint;
439 return 0;
440}
441
c3993cdc
SH
442/**
443 * Set open flags for a given cache mode
444 *
445 * Return 0 on success, -1 if the cache mode was invalid.
446 */
447int bdrv_parse_cache_flags(const char *mode, int *flags)
448{
449 *flags &= ~BDRV_O_CACHE_MASK;
450
451 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
452 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
92196b2f
SH
453 } else if (!strcmp(mode, "directsync")) {
454 *flags |= BDRV_O_NOCACHE;
c3993cdc
SH
455 } else if (!strcmp(mode, "writeback")) {
456 *flags |= BDRV_O_CACHE_WB;
457 } else if (!strcmp(mode, "unsafe")) {
458 *flags |= BDRV_O_CACHE_WB;
459 *flags |= BDRV_O_NO_FLUSH;
460 } else if (!strcmp(mode, "writethrough")) {
461 /* this is the default */
462 } else {
463 return -1;
464 }
465
466 return 0;
467}
468
57915332
KW
469/*
470 * Common part for opening disk images and files
471 */
472static int bdrv_open_common(BlockDriverState *bs, const char *filename,
473 int flags, BlockDriver *drv)
474{
475 int ret, open_flags;
476
477 assert(drv != NULL);
478
28dcee10
SH
479 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
480
66f82cee 481 bs->file = NULL;
51762288 482 bs->total_sectors = 0;
57915332
KW
483 bs->encrypted = 0;
484 bs->valid_key = 0;
485 bs->open_flags = flags;
57915332
KW
486 bs->buffer_alignment = 512;
487
488 pstrcpy(bs->filename, sizeof(bs->filename), filename);
489
490 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
491 return -ENOTSUP;
492 }
493
494 bs->drv = drv;
7267c094 495 bs->opaque = g_malloc0(drv->instance_size);
57915332 496
a6599793 497 if (flags & BDRV_O_CACHE_WB)
57915332
KW
498 bs->enable_write_cache = 1;
499
500 /*
501 * Clear flags that are internal to the block layer before opening the
502 * image.
503 */
504 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
505
506 /*
ebabb67a 507 * Snapshots should be writable.
57915332
KW
508 */
509 if (bs->is_temporary) {
510 open_flags |= BDRV_O_RDWR;
511 }
512
66f82cee
KW
513 /* Open the image, either directly or using a protocol */
514 if (drv->bdrv_file_open) {
515 ret = drv->bdrv_file_open(bs, filename, open_flags);
516 } else {
517 ret = bdrv_file_open(&bs->file, filename, open_flags);
518 if (ret >= 0) {
519 ret = drv->bdrv_open(bs, open_flags);
520 }
521 }
522
57915332
KW
523 if (ret < 0) {
524 goto free_and_fail;
525 }
526
527 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
51762288
SH
528
529 ret = refresh_total_sectors(bs, bs->total_sectors);
530 if (ret < 0) {
531 goto free_and_fail;
57915332 532 }
51762288 533
57915332
KW
534#ifndef _WIN32
535 if (bs->is_temporary) {
536 unlink(filename);
537 }
538#endif
539 return 0;
540
541free_and_fail:
66f82cee
KW
542 if (bs->file) {
543 bdrv_delete(bs->file);
544 bs->file = NULL;
545 }
7267c094 546 g_free(bs->opaque);
57915332
KW
547 bs->opaque = NULL;
548 bs->drv = NULL;
549 return ret;
550}
551
b6ce07aa
KW
552/*
553 * Opens a file using a protocol (file, host_device, nbd, ...)
554 */
83f64091 555int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
ea2384d3 556{
83f64091 557 BlockDriverState *bs;
6db95603 558 BlockDriver *drv;
83f64091
FB
559 int ret;
560
b50cbabc 561 drv = bdrv_find_protocol(filename);
6db95603
CH
562 if (!drv) {
563 return -ENOENT;
564 }
565
83f64091 566 bs = bdrv_new("");
b6ce07aa 567 ret = bdrv_open_common(bs, filename, flags, drv);
83f64091
FB
568 if (ret < 0) {
569 bdrv_delete(bs);
570 return ret;
3b0d4f61 571 }
71d0770c 572 bs->growable = 1;
83f64091
FB
573 *pbs = bs;
574 return 0;
575}
576
b6ce07aa
KW
577/*
578 * Opens a disk image (raw, qcow2, vmdk, ...)
579 */
d6e9098e
KW
580int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
581 BlockDriver *drv)
ea2384d3 582{
b6ce07aa 583 int ret;
712e7874 584
83f64091 585 if (flags & BDRV_O_SNAPSHOT) {
ea2384d3
FB
586 BlockDriverState *bs1;
587 int64_t total_size;
7c96d46e 588 int is_protocol = 0;
91a073a9
KW
589 BlockDriver *bdrv_qcow2;
590 QEMUOptionParameter *options;
b6ce07aa
KW
591 char tmp_filename[PATH_MAX];
592 char backing_filename[PATH_MAX];
3b46e624 593
ea2384d3
FB
594 /* if snapshot, we create a temporary backing file and open it
595 instead of opening 'filename' directly */
33e3963e 596
ea2384d3
FB
597 /* if there is a backing file, use it */
598 bs1 = bdrv_new("");
d6e9098e 599 ret = bdrv_open(bs1, filename, 0, drv);
51d7c00c 600 if (ret < 0) {
ea2384d3 601 bdrv_delete(bs1);
51d7c00c 602 return ret;
ea2384d3 603 }
3e82990b 604 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
7c96d46e
AL
605
606 if (bs1->drv && bs1->drv->protocol_name)
607 is_protocol = 1;
608
ea2384d3 609 bdrv_delete(bs1);
3b46e624 610
ea2384d3 611 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
7c96d46e
AL
612
613 /* Real path is meaningless for protocols */
614 if (is_protocol)
615 snprintf(backing_filename, sizeof(backing_filename),
616 "%s", filename);
114cdfa9
KS
617 else if (!realpath(filename, backing_filename))
618 return -errno;
7c96d46e 619
91a073a9
KW
620 bdrv_qcow2 = bdrv_find_format("qcow2");
621 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
622
3e82990b 623 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
91a073a9
KW
624 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
625 if (drv) {
626 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
627 drv->format_name);
628 }
629
630 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
d748768c 631 free_option_parameters(options);
51d7c00c
AL
632 if (ret < 0) {
633 return ret;
ea2384d3 634 }
91a073a9 635
ea2384d3 636 filename = tmp_filename;
91a073a9 637 drv = bdrv_qcow2;
ea2384d3
FB
638 bs->is_temporary = 1;
639 }
712e7874 640
b6ce07aa 641 /* Find the right image format driver */
6db95603 642 if (!drv) {
c98ac35d 643 ret = find_image_format(filename, &drv);
51d7c00c 644 }
6987307c 645
51d7c00c 646 if (!drv) {
51d7c00c 647 goto unlink_and_fail;
ea2384d3 648 }
b6ce07aa
KW
649
650 /* Open the image */
651 ret = bdrv_open_common(bs, filename, flags, drv);
652 if (ret < 0) {
6987307c
CH
653 goto unlink_and_fail;
654 }
655
b6ce07aa
KW
656 /* If there is a backing file, use it */
657 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
658 char backing_filename[PATH_MAX];
659 int back_flags;
660 BlockDriver *back_drv = NULL;
661
662 bs->backing_hd = bdrv_new("");
df2dbb4a
SH
663
664 if (path_has_protocol(bs->backing_file)) {
665 pstrcpy(backing_filename, sizeof(backing_filename),
666 bs->backing_file);
667 } else {
668 path_combine(backing_filename, sizeof(backing_filename),
669 filename, bs->backing_file);
670 }
671
672 if (bs->backing_format[0] != '\0') {
b6ce07aa 673 back_drv = bdrv_find_format(bs->backing_format);
df2dbb4a 674 }
b6ce07aa
KW
675
676 /* backing files always opened read-only */
677 back_flags =
678 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
679
680 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
681 if (ret < 0) {
682 bdrv_close(bs);
683 return ret;
684 }
685 if (bs->is_temporary) {
686 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
687 } else {
688 /* base image inherits from "parent" */
689 bs->backing_hd->keep_read_only = bs->keep_read_only;
690 }
691 }
692
693 if (!bdrv_key_required(bs)) {
7d4b4ba5 694 bdrv_dev_change_media_cb(bs, true);
b6ce07aa
KW
695 }
696
697 return 0;
698
699unlink_and_fail:
700 if (bs->is_temporary) {
701 unlink(filename);
702 }
703 return ret;
704}
705
fc01f7e7
FB
706void bdrv_close(BlockDriverState *bs)
707{
19cb3738 708 if (bs->drv) {
f9092b10
MA
709 if (bs == bs_snapshots) {
710 bs_snapshots = NULL;
711 }
557df6ac 712 if (bs->backing_hd) {
ea2384d3 713 bdrv_delete(bs->backing_hd);
557df6ac
SH
714 bs->backing_hd = NULL;
715 }
ea2384d3 716 bs->drv->bdrv_close(bs);
7267c094 717 g_free(bs->opaque);
ea2384d3
FB
718#ifdef _WIN32
719 if (bs->is_temporary) {
720 unlink(bs->filename);
721 }
67b915a5 722#endif
ea2384d3
FB
723 bs->opaque = NULL;
724 bs->drv = NULL;
b338082b 725
66f82cee
KW
726 if (bs->file != NULL) {
727 bdrv_close(bs->file);
728 }
729
7d4b4ba5 730 bdrv_dev_change_media_cb(bs, false);
b338082b
FB
731 }
732}
733
2bc93fed
MK
734void bdrv_close_all(void)
735{
736 BlockDriverState *bs;
737
738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
739 bdrv_close(bs);
740 }
741}
742
d22b2f41
RH
743/* make a BlockDriverState anonymous by removing from bdrv_state list.
744 Also, NULL terminate the device_name to prevent double remove */
745void bdrv_make_anon(BlockDriverState *bs)
746{
747 if (bs->device_name[0] != '\0') {
748 QTAILQ_REMOVE(&bdrv_states, bs, list);
749 }
750 bs->device_name[0] = '\0';
751}
752
b338082b
FB
753void bdrv_delete(BlockDriverState *bs)
754{
fa879d62 755 assert(!bs->dev);
18846dee 756
1b7bdbc1 757 /* remove from list, if necessary */
d22b2f41 758 bdrv_make_anon(bs);
34c6f050 759
b338082b 760 bdrv_close(bs);
66f82cee
KW
761 if (bs->file != NULL) {
762 bdrv_delete(bs->file);
763 }
764
f9092b10 765 assert(bs != bs_snapshots);
7267c094 766 g_free(bs);
fc01f7e7
FB
767}
768
fa879d62
MA
769int bdrv_attach_dev(BlockDriverState *bs, void *dev)
770/* TODO change to DeviceState *dev when all users are qdevified */
18846dee 771{
fa879d62 772 if (bs->dev) {
18846dee
MA
773 return -EBUSY;
774 }
fa879d62 775 bs->dev = dev;
28a7282a 776 bdrv_iostatus_reset(bs);
18846dee
MA
777 return 0;
778}
779
fa879d62
MA
780/* TODO qdevified devices don't use this, remove when devices are qdevified */
781void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
18846dee 782{
fa879d62
MA
783 if (bdrv_attach_dev(bs, dev) < 0) {
784 abort();
785 }
786}
787
788void bdrv_detach_dev(BlockDriverState *bs, void *dev)
789/* TODO change to DeviceState *dev when all users are qdevified */
790{
791 assert(bs->dev == dev);
792 bs->dev = NULL;
0e49de52
MA
793 bs->dev_ops = NULL;
794 bs->dev_opaque = NULL;
29e05f20 795 bs->buffer_alignment = 512;
18846dee
MA
796}
797
fa879d62
MA
798/* TODO change to return DeviceState * when all users are qdevified */
799void *bdrv_get_attached_dev(BlockDriverState *bs)
18846dee 800{
fa879d62 801 return bs->dev;
18846dee
MA
802}
803
0e49de52
MA
804void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
805 void *opaque)
806{
807 bs->dev_ops = ops;
808 bs->dev_opaque = opaque;
2c6942fa
MA
809 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
810 bs_snapshots = NULL;
811 }
0e49de52
MA
812}
813
7d4b4ba5 814static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
0e49de52 815{
145feb17 816 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
7d4b4ba5 817 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
145feb17
MA
818 }
819}
820
2c6942fa
MA
821bool bdrv_dev_has_removable_media(BlockDriverState *bs)
822{
823 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
824}
825
e4def80b
MA
826bool bdrv_dev_is_tray_open(BlockDriverState *bs)
827{
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
830 }
831 return false;
832}
833
145feb17
MA
834static void bdrv_dev_resize_cb(BlockDriverState *bs)
835{
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
0e49de52
MA
838 }
839}
840
f107639a
MA
841bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
842{
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
845 }
846 return false;
847}
848
e97fc193
AL
849/*
850 * Run consistency checks on an image
851 *
e076f338 852 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 853 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 854 * check are stored in res.
e97fc193 855 */
e076f338 856int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
e97fc193
AL
857{
858 if (bs->drv->bdrv_check == NULL) {
859 return -ENOTSUP;
860 }
861
e076f338 862 memset(res, 0, sizeof(*res));
9ac228e0 863 return bs->drv->bdrv_check(bs, res);
e97fc193
AL
864}
865
8a426614
KW
866#define COMMIT_BUF_SECTORS 2048
867
33e3963e
FB
868/* commit COW file into the raw image */
869int bdrv_commit(BlockDriverState *bs)
870{
19cb3738 871 BlockDriver *drv = bs->drv;
ee181196 872 BlockDriver *backing_drv;
8a426614
KW
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
4dca4b63 875 int ret = 0, rw_ret = 0;
8a426614 876 uint8_t *buf;
4dca4b63
NS
877 char filename[1024];
878 BlockDriverState *bs_rw, *bs_ro;
33e3963e 879
19cb3738
FB
880 if (!drv)
881 return -ENOMEDIUM;
4dca4b63
NS
882
883 if (!bs->backing_hd) {
884 return -ENOTSUP;
33e3963e
FB
885 }
886
4dca4b63
NS
887 if (bs->backing_hd->keep_read_only) {
888 return -EACCES;
889 }
ee181196
KW
890
891 backing_drv = bs->backing_hd->drv;
4dca4b63
NS
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
895
896 if (ro) {
897 /* re-open as RW */
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
ee181196
KW
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
902 backing_drv);
4dca4b63
NS
903 if (rw_ret < 0) {
904 bdrv_delete(bs_rw);
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
ee181196
KW
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
908 backing_drv);
4dca4b63
NS
909 if (ret < 0) {
910 bdrv_delete(bs_ro);
911 /* drive not functional anymore */
912 bs->drv = NULL;
913 return ret;
914 }
915 bs->backing_hd = bs_ro;
916 return rw_ret;
917 }
918 bs->backing_hd = bs_rw;
ea2384d3 919 }
33e3963e 920
6ea44308 921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
7267c094 922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
8a426614
KW
923
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
926
927 if (bdrv_read(bs, sector, buf, n) != 0) {
928 ret = -EIO;
929 goto ro_cleanup;
930 }
931
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
933 ret = -EIO;
934 goto ro_cleanup;
935 }
ea2384d3 936 }
33e3963e 937 }
95389c86 938
1d44952f
CH
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
941 bdrv_flush(bs);
942 }
95389c86 943
3f5075ae
CH
944 /*
945 * Make sure all data we wrote to the backing device is actually
946 * stable on disk.
947 */
948 if (bs->backing_hd)
949 bdrv_flush(bs->backing_hd);
4dca4b63
NS
950
951ro_cleanup:
7267c094 952 g_free(buf);
4dca4b63
NS
953
954 if (ro) {
955 /* re-open as RO */
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
ee181196
KW
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
960 backing_drv);
4dca4b63
NS
961 if (ret < 0) {
962 bdrv_delete(bs_ro);
963 /* drive not functional anymore */
964 bs->drv = NULL;
965 return ret;
966 }
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
969 }
970
1d44952f 971 return ret;
33e3963e
FB
972}
973
6ab4b5ab
MA
974void bdrv_commit_all(void)
975{
976 BlockDriverState *bs;
977
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
979 bdrv_commit(bs);
980 }
981}
982
756e6736
KW
983/*
984 * Return values:
985 * 0 - success
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
988 * image file header
989 * -ENOTSUP - format driver doesn't support changing the backing file
990 */
991int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
993{
994 BlockDriver *drv = bs->drv;
995
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
998 } else {
999 return -ENOTSUP;
1000 }
1001}
1002
71d0770c
AL
1003static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1004 size_t size)
1005{
1006 int64_t len;
1007
1008 if (!bdrv_is_inserted(bs))
1009 return -ENOMEDIUM;
1010
1011 if (bs->growable)
1012 return 0;
1013
1014 len = bdrv_getlength(bs);
1015
fbb7b4e0
KW
1016 if (offset < 0)
1017 return -EIO;
1018
1019 if ((offset > len) || (len - offset < size))
71d0770c
AL
1020 return -EIO;
1021
1022 return 0;
1023}
1024
1025static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1026 int nb_sectors)
1027{
eb5a3165
JS
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
71d0770c
AL
1030}
1031
e7a8a783
KW
1032static inline bool bdrv_has_async_rw(BlockDriver *drv)
1033{
1034 return drv->bdrv_co_readv != bdrv_co_readv_em
1035 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1036}
1037
1038static inline bool bdrv_has_async_flush(BlockDriver *drv)
1039{
1040 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1041}
1042
19cb3738 1043/* return < 0 if error. See bdrv_write() for the return codes */
5fafdf24 1044int bdrv_read(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1045 uint8_t *buf, int nb_sectors)
1046{
ea2384d3
FB
1047 BlockDriver *drv = bs->drv;
1048
19cb3738
FB
1049 if (!drv)
1050 return -ENOMEDIUM;
e7a8a783
KW
1051
1052 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1053 QEMUIOVector qiov;
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1057 };
1058
1059 qemu_iovec_init_external(&qiov, &iov, 1);
1060 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1061 }
1062
71d0770c
AL
1063 if (bdrv_check_request(bs, sector_num, nb_sectors))
1064 return -EIO;
b338082b 1065
eda578e5 1066 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
fc01f7e7
FB
1067}
1068
7cd1e32a 1069static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
a55eb92c 1070 int nb_sectors, int dirty)
7cd1e32a
LS
1071{
1072 int64_t start, end;
c6d22830 1073 unsigned long val, idx, bit;
a55eb92c 1074
6ea44308 1075 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
c6d22830 1076 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c
JK
1077
1078 for (; start <= end; start++) {
c6d22830
JK
1079 idx = start / (sizeof(unsigned long) * 8);
1080 bit = start % (sizeof(unsigned long) * 8);
1081 val = bs->dirty_bitmap[idx];
1082 if (dirty) {
6d59fec1 1083 if (!(val & (1UL << bit))) {
aaa0eb75 1084 bs->dirty_count++;
6d59fec1 1085 val |= 1UL << bit;
aaa0eb75 1086 }
c6d22830 1087 } else {
6d59fec1 1088 if (val & (1UL << bit)) {
aaa0eb75 1089 bs->dirty_count--;
6d59fec1 1090 val &= ~(1UL << bit);
aaa0eb75 1091 }
c6d22830
JK
1092 }
1093 bs->dirty_bitmap[idx] = val;
7cd1e32a
LS
1094 }
1095}
1096
5fafdf24 1097/* Return < 0 if error. Important errors are:
19cb3738
FB
1098 -EIO generic I/O error (may happen for all errors)
1099 -ENOMEDIUM No media inserted.
1100 -EINVAL Invalid sector number or nb_sectors
1101 -EACCES Trying to write a read-only device
1102*/
5fafdf24 1103int bdrv_write(BlockDriverState *bs, int64_t sector_num,
fc01f7e7
FB
1104 const uint8_t *buf, int nb_sectors)
1105{
83f64091 1106 BlockDriver *drv = bs->drv;
e7a8a783 1107
19cb3738
FB
1108 if (!bs->drv)
1109 return -ENOMEDIUM;
e7a8a783
KW
1110
1111 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1112 QEMUIOVector qiov;
1113 struct iovec iov = {
1114 .iov_base = (void *)buf,
1115 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1116 };
1117
1118 qemu_iovec_init_external(&qiov, &iov, 1);
1119 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1120 }
1121
0849bf08 1122 if (bs->read_only)
19cb3738 1123 return -EACCES;
71d0770c
AL
1124 if (bdrv_check_request(bs, sector_num, nb_sectors))
1125 return -EIO;
a55eb92c 1126
c6d22830 1127 if (bs->dirty_bitmap) {
7cd1e32a
LS
1128 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1129 }
a55eb92c 1130
294cc35f
KW
1131 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1132 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1133 }
1134
42fb2807 1135 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
83f64091
FB
1136}
1137
eda578e5
AL
1138int bdrv_pread(BlockDriverState *bs, int64_t offset,
1139 void *buf, int count1)
83f64091 1140{
6ea44308 1141 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1142 int len, nb_sectors, count;
1143 int64_t sector_num;
9a8c4cce 1144 int ret;
83f64091
FB
1145
1146 count = count1;
1147 /* first read to align to sector start */
6ea44308 1148 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1149 if (len > count)
1150 len = count;
6ea44308 1151 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1152 if (len > 0) {
9a8c4cce
KW
1153 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1154 return ret;
6ea44308 1155 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
83f64091
FB
1156 count -= len;
1157 if (count == 0)
1158 return count1;
1159 sector_num++;
1160 buf += len;
1161 }
1162
1163 /* read the sectors "in place" */
6ea44308 1164 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1165 if (nb_sectors > 0) {
9a8c4cce
KW
1166 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1167 return ret;
83f64091 1168 sector_num += nb_sectors;
6ea44308 1169 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1170 buf += len;
1171 count -= len;
1172 }
1173
1174 /* add data from the last sector */
1175 if (count > 0) {
9a8c4cce
KW
1176 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1177 return ret;
83f64091
FB
1178 memcpy(buf, tmp_buf, count);
1179 }
1180 return count1;
1181}
1182
eda578e5
AL
1183int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1184 const void *buf, int count1)
83f64091 1185{
6ea44308 1186 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
83f64091
FB
1187 int len, nb_sectors, count;
1188 int64_t sector_num;
9a8c4cce 1189 int ret;
83f64091
FB
1190
1191 count = count1;
1192 /* first write to align to sector start */
6ea44308 1193 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
83f64091
FB
1194 if (len > count)
1195 len = count;
6ea44308 1196 sector_num = offset >> BDRV_SECTOR_BITS;
83f64091 1197 if (len > 0) {
9a8c4cce
KW
1198 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1199 return ret;
6ea44308 1200 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
9a8c4cce
KW
1201 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1202 return ret;
83f64091
FB
1203 count -= len;
1204 if (count == 0)
1205 return count1;
1206 sector_num++;
1207 buf += len;
1208 }
1209
1210 /* write the sectors "in place" */
6ea44308 1211 nb_sectors = count >> BDRV_SECTOR_BITS;
83f64091 1212 if (nb_sectors > 0) {
9a8c4cce
KW
1213 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1214 return ret;
83f64091 1215 sector_num += nb_sectors;
6ea44308 1216 len = nb_sectors << BDRV_SECTOR_BITS;
83f64091
FB
1217 buf += len;
1218 count -= len;
1219 }
1220
1221 /* add data from the last sector */
1222 if (count > 0) {
9a8c4cce
KW
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1224 return ret;
83f64091 1225 memcpy(tmp_buf, buf, count);
9a8c4cce
KW
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1227 return ret;
83f64091
FB
1228 }
1229 return count1;
1230}
83f64091 1231
f08145fe
KW
1232/*
1233 * Writes to the file and ensures that no writes are reordered across this
1234 * request (acts as a barrier)
1235 *
1236 * Returns 0 on success, -errno in error cases.
1237 */
1238int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1239 const void *buf, int count)
1240{
1241 int ret;
1242
1243 ret = bdrv_pwrite(bs, offset, buf, count);
1244 if (ret < 0) {
1245 return ret;
1246 }
1247
92196b2f
SH
1248 /* No flush needed for cache modes that use O_DSYNC */
1249 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
f08145fe
KW
1250 bdrv_flush(bs);
1251 }
1252
1253 return 0;
1254}
1255
da1fa91d
KW
1256int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1257 int nb_sectors, QEMUIOVector *qiov)
1258{
1259 BlockDriver *drv = bs->drv;
1260
1261 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1262
1263 if (!drv) {
1264 return -ENOMEDIUM;
1265 }
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1267 return -EIO;
1268 }
1269
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1271}
1272
1273int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1275{
1276 BlockDriver *drv = bs->drv;
1277
1278 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1279
1280 if (!bs->drv) {
1281 return -ENOMEDIUM;
1282 }
1283 if (bs->read_only) {
1284 return -EACCES;
1285 }
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287 return -EIO;
1288 }
1289
1290 if (bs->dirty_bitmap) {
1291 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1292 }
1293
1294 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1295 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1296 }
1297
1298 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1299}
1300
83f64091
FB
1301/**
1302 * Truncate file to 'offset' bytes (needed only for file protocols)
1303 */
1304int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1305{
1306 BlockDriver *drv = bs->drv;
51762288 1307 int ret;
83f64091 1308 if (!drv)
19cb3738 1309 return -ENOMEDIUM;
83f64091
FB
1310 if (!drv->bdrv_truncate)
1311 return -ENOTSUP;
59f2689d
NS
1312 if (bs->read_only)
1313 return -EACCES;
8591675f
MT
1314 if (bdrv_in_use(bs))
1315 return -EBUSY;
51762288
SH
1316 ret = drv->bdrv_truncate(bs, offset);
1317 if (ret == 0) {
1318 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
145feb17 1319 bdrv_dev_resize_cb(bs);
51762288
SH
1320 }
1321 return ret;
83f64091
FB
1322}
1323
4a1d5e1f
FZ
1324/**
1325 * Length of a allocated file in bytes. Sparse files are counted by actual
1326 * allocated space. Return < 0 if error or unknown.
1327 */
1328int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1329{
1330 BlockDriver *drv = bs->drv;
1331 if (!drv) {
1332 return -ENOMEDIUM;
1333 }
1334 if (drv->bdrv_get_allocated_file_size) {
1335 return drv->bdrv_get_allocated_file_size(bs);
1336 }
1337 if (bs->file) {
1338 return bdrv_get_allocated_file_size(bs->file);
1339 }
1340 return -ENOTSUP;
1341}
1342
83f64091
FB
1343/**
1344 * Length of a file in bytes. Return < 0 if error or unknown.
1345 */
1346int64_t bdrv_getlength(BlockDriverState *bs)
1347{
1348 BlockDriver *drv = bs->drv;
1349 if (!drv)
19cb3738 1350 return -ENOMEDIUM;
51762288 1351
2c6942fa 1352 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
46a4e4e6
SH
1353 if (drv->bdrv_getlength) {
1354 return drv->bdrv_getlength(bs);
1355 }
83f64091 1356 }
46a4e4e6 1357 return bs->total_sectors * BDRV_SECTOR_SIZE;
fc01f7e7
FB
1358}
1359
19cb3738 1360/* return 0 as number of sectors if no device present or error */
96b8f136 1361void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
fc01f7e7 1362{
19cb3738
FB
1363 int64_t length;
1364 length = bdrv_getlength(bs);
1365 if (length < 0)
1366 length = 0;
1367 else
6ea44308 1368 length = length >> BDRV_SECTOR_BITS;
19cb3738 1369 *nb_sectors_ptr = length;
fc01f7e7 1370}
cf98951b 1371
f3d54fc4
AL
1372struct partition {
1373 uint8_t boot_ind; /* 0x80 - active */
1374 uint8_t head; /* starting head */
1375 uint8_t sector; /* starting sector */
1376 uint8_t cyl; /* starting cylinder */
1377 uint8_t sys_ind; /* What partition type */
1378 uint8_t end_head; /* end head */
1379 uint8_t end_sector; /* end sector */
1380 uint8_t end_cyl; /* end cylinder */
1381 uint32_t start_sect; /* starting sector counting from 0 */
1382 uint32_t nr_sects; /* nr of sectors in partition */
541dc0d4 1383} QEMU_PACKED;
f3d54fc4
AL
1384
1385/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1386static int guess_disk_lchs(BlockDriverState *bs,
1387 int *pcylinders, int *pheads, int *psectors)
1388{
eb5a3165 1389 uint8_t buf[BDRV_SECTOR_SIZE];
f3d54fc4
AL
1390 int ret, i, heads, sectors, cylinders;
1391 struct partition *p;
1392 uint32_t nr_sects;
a38131b6 1393 uint64_t nb_sectors;
f3d54fc4
AL
1394
1395 bdrv_get_geometry(bs, &nb_sectors);
1396
1397 ret = bdrv_read(bs, 0, buf, 1);
1398 if (ret < 0)
1399 return -1;
1400 /* test msdos magic */
1401 if (buf[510] != 0x55 || buf[511] != 0xaa)
1402 return -1;
1403 for(i = 0; i < 4; i++) {
1404 p = ((struct partition *)(buf + 0x1be)) + i;
1405 nr_sects = le32_to_cpu(p->nr_sects);
1406 if (nr_sects && p->end_head) {
1407 /* We make the assumption that the partition terminates on
1408 a cylinder boundary */
1409 heads = p->end_head + 1;
1410 sectors = p->end_sector & 63;
1411 if (sectors == 0)
1412 continue;
1413 cylinders = nb_sectors / (heads * sectors);
1414 if (cylinders < 1 || cylinders > 16383)
1415 continue;
1416 *pheads = heads;
1417 *psectors = sectors;
1418 *pcylinders = cylinders;
1419#if 0
1420 printf("guessed geometry: LCHS=%d %d %d\n",
1421 cylinders, heads, sectors);
1422#endif
1423 return 0;
1424 }
1425 }
1426 return -1;
1427}
1428
1429void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1430{
1431 int translation, lba_detected = 0;
1432 int cylinders, heads, secs;
a38131b6 1433 uint64_t nb_sectors;
f3d54fc4
AL
1434
1435 /* if a geometry hint is available, use it */
1436 bdrv_get_geometry(bs, &nb_sectors);
1437 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1438 translation = bdrv_get_translation_hint(bs);
1439 if (cylinders != 0) {
1440 *pcyls = cylinders;
1441 *pheads = heads;
1442 *psecs = secs;
1443 } else {
1444 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1445 if (heads > 16) {
1446 /* if heads > 16, it means that a BIOS LBA
1447 translation was active, so the default
1448 hardware geometry is OK */
1449 lba_detected = 1;
1450 goto default_geometry;
1451 } else {
1452 *pcyls = cylinders;
1453 *pheads = heads;
1454 *psecs = secs;
1455 /* disable any translation to be in sync with
1456 the logical geometry */
1457 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1458 bdrv_set_translation_hint(bs,
1459 BIOS_ATA_TRANSLATION_NONE);
1460 }
1461 }
1462 } else {
1463 default_geometry:
1464 /* if no geometry, use a standard physical disk geometry */
1465 cylinders = nb_sectors / (16 * 63);
1466
1467 if (cylinders > 16383)
1468 cylinders = 16383;
1469 else if (cylinders < 2)
1470 cylinders = 2;
1471 *pcyls = cylinders;
1472 *pheads = 16;
1473 *psecs = 63;
1474 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1475 if ((*pcyls * *pheads) <= 131072) {
1476 bdrv_set_translation_hint(bs,
1477 BIOS_ATA_TRANSLATION_LARGE);
1478 } else {
1479 bdrv_set_translation_hint(bs,
1480 BIOS_ATA_TRANSLATION_LBA);
1481 }
1482 }
1483 }
1484 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1485 }
1486}
1487
5fafdf24 1488void bdrv_set_geometry_hint(BlockDriverState *bs,
b338082b
FB
1489 int cyls, int heads, int secs)
1490{
1491 bs->cyls = cyls;
1492 bs->heads = heads;
1493 bs->secs = secs;
1494}
1495
46d4767d
FB
1496void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1497{
1498 bs->translation = translation;
1499}
1500
5fafdf24 1501void bdrv_get_geometry_hint(BlockDriverState *bs,
b338082b
FB
1502 int *pcyls, int *pheads, int *psecs)
1503{
1504 *pcyls = bs->cyls;
1505 *pheads = bs->heads;
1506 *psecs = bs->secs;
1507}
1508
5bbdbb46
BS
1509/* Recognize floppy formats */
1510typedef struct FDFormat {
1511 FDriveType drive;
1512 uint8_t last_sect;
1513 uint8_t max_track;
1514 uint8_t max_head;
1515} FDFormat;
1516
1517static const FDFormat fd_formats[] = {
1518 /* First entry is default format */
1519 /* 1.44 MB 3"1/2 floppy disks */
1520 { FDRIVE_DRV_144, 18, 80, 1, },
1521 { FDRIVE_DRV_144, 20, 80, 1, },
1522 { FDRIVE_DRV_144, 21, 80, 1, },
1523 { FDRIVE_DRV_144, 21, 82, 1, },
1524 { FDRIVE_DRV_144, 21, 83, 1, },
1525 { FDRIVE_DRV_144, 22, 80, 1, },
1526 { FDRIVE_DRV_144, 23, 80, 1, },
1527 { FDRIVE_DRV_144, 24, 80, 1, },
1528 /* 2.88 MB 3"1/2 floppy disks */
1529 { FDRIVE_DRV_288, 36, 80, 1, },
1530 { FDRIVE_DRV_288, 39, 80, 1, },
1531 { FDRIVE_DRV_288, 40, 80, 1, },
1532 { FDRIVE_DRV_288, 44, 80, 1, },
1533 { FDRIVE_DRV_288, 48, 80, 1, },
1534 /* 720 kB 3"1/2 floppy disks */
1535 { FDRIVE_DRV_144, 9, 80, 1, },
1536 { FDRIVE_DRV_144, 10, 80, 1, },
1537 { FDRIVE_DRV_144, 10, 82, 1, },
1538 { FDRIVE_DRV_144, 10, 83, 1, },
1539 { FDRIVE_DRV_144, 13, 80, 1, },
1540 { FDRIVE_DRV_144, 14, 80, 1, },
1541 /* 1.2 MB 5"1/4 floppy disks */
1542 { FDRIVE_DRV_120, 15, 80, 1, },
1543 { FDRIVE_DRV_120, 18, 80, 1, },
1544 { FDRIVE_DRV_120, 18, 82, 1, },
1545 { FDRIVE_DRV_120, 18, 83, 1, },
1546 { FDRIVE_DRV_120, 20, 80, 1, },
1547 /* 720 kB 5"1/4 floppy disks */
1548 { FDRIVE_DRV_120, 9, 80, 1, },
1549 { FDRIVE_DRV_120, 11, 80, 1, },
1550 /* 360 kB 5"1/4 floppy disks */
1551 { FDRIVE_DRV_120, 9, 40, 1, },
1552 { FDRIVE_DRV_120, 9, 40, 0, },
1553 { FDRIVE_DRV_120, 10, 41, 1, },
1554 { FDRIVE_DRV_120, 10, 42, 1, },
1555 /* 320 kB 5"1/4 floppy disks */
1556 { FDRIVE_DRV_120, 8, 40, 1, },
1557 { FDRIVE_DRV_120, 8, 40, 0, },
1558 /* 360 kB must match 5"1/4 better than 3"1/2... */
1559 { FDRIVE_DRV_144, 9, 80, 0, },
1560 /* end */
1561 { FDRIVE_DRV_NONE, -1, -1, 0, },
1562};
1563
1564void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1565 int *max_track, int *last_sect,
1566 FDriveType drive_in, FDriveType *drive)
1567{
1568 const FDFormat *parse;
1569 uint64_t nb_sectors, size;
1570 int i, first_match, match;
1571
1572 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1573 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1574 /* User defined disk */
1575 } else {
1576 bdrv_get_geometry(bs, &nb_sectors);
1577 match = -1;
1578 first_match = -1;
1579 for (i = 0; ; i++) {
1580 parse = &fd_formats[i];
1581 if (parse->drive == FDRIVE_DRV_NONE) {
1582 break;
1583 }
1584 if (drive_in == parse->drive ||
1585 drive_in == FDRIVE_DRV_NONE) {
1586 size = (parse->max_head + 1) * parse->max_track *
1587 parse->last_sect;
1588 if (nb_sectors == size) {
1589 match = i;
1590 break;
1591 }
1592 if (first_match == -1) {
1593 first_match = i;
1594 }
1595 }
1596 }
1597 if (match == -1) {
1598 if (first_match == -1) {
1599 match = 1;
1600 } else {
1601 match = first_match;
1602 }
1603 parse = &fd_formats[match];
1604 }
1605 *nb_heads = parse->max_head + 1;
1606 *max_track = parse->max_track;
1607 *last_sect = parse->last_sect;
1608 *drive = parse->drive;
1609 }
1610}
1611
46d4767d
FB
1612int bdrv_get_translation_hint(BlockDriverState *bs)
1613{
1614 return bs->translation;
1615}
1616
abd7f68d
MA
1617void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1618 BlockErrorAction on_write_error)
1619{
1620 bs->on_read_error = on_read_error;
1621 bs->on_write_error = on_write_error;
1622}
1623
1624BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1625{
1626 return is_read ? bs->on_read_error : bs->on_write_error;
1627}
1628
b338082b
FB
1629int bdrv_is_read_only(BlockDriverState *bs)
1630{
1631 return bs->read_only;
1632}
1633
985a03b0
TS
1634int bdrv_is_sg(BlockDriverState *bs)
1635{
1636 return bs->sg;
1637}
1638
e900a7b7
CH
1639int bdrv_enable_write_cache(BlockDriverState *bs)
1640{
1641 return bs->enable_write_cache;
1642}
1643
ea2384d3
FB
1644int bdrv_is_encrypted(BlockDriverState *bs)
1645{
1646 if (bs->backing_hd && bs->backing_hd->encrypted)
1647 return 1;
1648 return bs->encrypted;
1649}
1650
c0f4ce77
AL
1651int bdrv_key_required(BlockDriverState *bs)
1652{
1653 BlockDriverState *backing_hd = bs->backing_hd;
1654
1655 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1656 return 1;
1657 return (bs->encrypted && !bs->valid_key);
1658}
1659
ea2384d3
FB
1660int bdrv_set_key(BlockDriverState *bs, const char *key)
1661{
1662 int ret;
1663 if (bs->backing_hd && bs->backing_hd->encrypted) {
1664 ret = bdrv_set_key(bs->backing_hd, key);
1665 if (ret < 0)
1666 return ret;
1667 if (!bs->encrypted)
1668 return 0;
1669 }
fd04a2ae
SH
1670 if (!bs->encrypted) {
1671 return -EINVAL;
1672 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1673 return -ENOMEDIUM;
1674 }
c0f4ce77 1675 ret = bs->drv->bdrv_set_key(bs, key);
bb5fc20f
AL
1676 if (ret < 0) {
1677 bs->valid_key = 0;
1678 } else if (!bs->valid_key) {
1679 bs->valid_key = 1;
1680 /* call the change callback now, we skipped it on open */
7d4b4ba5 1681 bdrv_dev_change_media_cb(bs, true);
bb5fc20f 1682 }
c0f4ce77 1683 return ret;
ea2384d3
FB
1684}
1685
1686void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1687{
19cb3738 1688 if (!bs->drv) {
ea2384d3
FB
1689 buf[0] = '\0';
1690 } else {
1691 pstrcpy(buf, buf_size, bs->drv->format_name);
1692 }
1693}
1694
5fafdf24 1695void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
ea2384d3
FB
1696 void *opaque)
1697{
1698 BlockDriver *drv;
1699
8a22f02a 1700 QLIST_FOREACH(drv, &bdrv_drivers, list) {
ea2384d3
FB
1701 it(opaque, drv->format_name);
1702 }
1703}
1704
b338082b
FB
1705BlockDriverState *bdrv_find(const char *name)
1706{
1707 BlockDriverState *bs;
1708
1b7bdbc1
SH
1709 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1710 if (!strcmp(name, bs->device_name)) {
b338082b 1711 return bs;
1b7bdbc1 1712 }
b338082b
FB
1713 }
1714 return NULL;
1715}
1716
2f399b0a
MA
1717BlockDriverState *bdrv_next(BlockDriverState *bs)
1718{
1719 if (!bs) {
1720 return QTAILQ_FIRST(&bdrv_states);
1721 }
1722 return QTAILQ_NEXT(bs, list);
1723}
1724
51de9760 1725void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
81d0912d
FB
1726{
1727 BlockDriverState *bs;
1728
1b7bdbc1 1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
51de9760 1730 it(opaque, bs);
81d0912d
FB
1731 }
1732}
1733
ea2384d3
FB
1734const char *bdrv_get_device_name(BlockDriverState *bs)
1735{
1736 return bs->device_name;
1737}
1738
205ef796 1739int bdrv_flush(BlockDriverState *bs)
7a6cba61 1740{
016f5cf6 1741 if (bs->open_flags & BDRV_O_NO_FLUSH) {
205ef796
KW
1742 return 0;
1743 }
1744
e7a8a783
KW
1745 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1746 return bdrv_co_flush_em(bs);
1747 }
1748
205ef796
KW
1749 if (bs->drv && bs->drv->bdrv_flush) {
1750 return bs->drv->bdrv_flush(bs);
016f5cf6
AG
1751 }
1752
205ef796
KW
1753 /*
1754 * Some block drivers always operate in either writethrough or unsafe mode
1755 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1756 * the server works (because the behaviour is hardcoded or depends on
1757 * server-side configuration), so we can't ensure that everything is safe
1758 * on disk. Returning an error doesn't work because that would break guests
1759 * even if the server operates in writethrough mode.
1760 *
1761 * Let's hope the user knows what he's doing.
1762 */
1763 return 0;
7a6cba61
PB
1764}
1765
c6ca28d6
AL
1766void bdrv_flush_all(void)
1767{
1768 BlockDriverState *bs;
1769
1b7bdbc1 1770 QTAILQ_FOREACH(bs, &bdrv_states, list) {
c602a489 1771 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
c6ca28d6 1772 bdrv_flush(bs);
1b7bdbc1
SH
1773 }
1774 }
c6ca28d6
AL
1775}
1776
f2feebbd
KW
1777int bdrv_has_zero_init(BlockDriverState *bs)
1778{
1779 assert(bs->drv);
1780
336c1c12
KW
1781 if (bs->drv->bdrv_has_zero_init) {
1782 return bs->drv->bdrv_has_zero_init(bs);
f2feebbd
KW
1783 }
1784
1785 return 1;
1786}
1787
bb8bf76f
CH
1788int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1789{
1790 if (!bs->drv) {
1791 return -ENOMEDIUM;
1792 }
1793 if (!bs->drv->bdrv_discard) {
1794 return 0;
1795 }
1796 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1797}
1798
f58c7b35
TS
1799/*
1800 * Returns true iff the specified sector is present in the disk image. Drivers
1801 * not implementing the functionality are assumed to not support backing files,
1802 * hence all their sectors are reported as allocated.
1803 *
1804 * 'pnum' is set to the number of sectors (including and immediately following
1805 * the specified sector) that are known to be in the same
1806 * allocated/unallocated state.
1807 *
1808 * 'nb_sectors' is the max value 'pnum' should be set to.
1809 */
1810int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1811 int *pnum)
1812{
1813 int64_t n;
1814 if (!bs->drv->bdrv_is_allocated) {
1815 if (sector_num >= bs->total_sectors) {
1816 *pnum = 0;
1817 return 0;
1818 }
1819 n = bs->total_sectors - sector_num;
1820 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1821 return 1;
1822 }
1823 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1824}
1825
2582bfed
LC
1826void bdrv_mon_event(const BlockDriverState *bdrv,
1827 BlockMonEventAction action, int is_read)
1828{
1829 QObject *data;
1830 const char *action_str;
1831
1832 switch (action) {
1833 case BDRV_ACTION_REPORT:
1834 action_str = "report";
1835 break;
1836 case BDRV_ACTION_IGNORE:
1837 action_str = "ignore";
1838 break;
1839 case BDRV_ACTION_STOP:
1840 action_str = "stop";
1841 break;
1842 default:
1843 abort();
1844 }
1845
1846 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1847 bdrv->device_name,
1848 action_str,
1849 is_read ? "read" : "write");
1850 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1851
1852 qobject_decref(data);
1853}
1854
d15e5465 1855static void bdrv_print_dict(QObject *obj, void *opaque)
b338082b 1856{
d15e5465
LC
1857 QDict *bs_dict;
1858 Monitor *mon = opaque;
1859
1860 bs_dict = qobject_to_qdict(obj);
1861
d8aeeb31 1862 monitor_printf(mon, "%s: removable=%d",
d15e5465 1863 qdict_get_str(bs_dict, "device"),
d15e5465
LC
1864 qdict_get_bool(bs_dict, "removable"));
1865
1866 if (qdict_get_bool(bs_dict, "removable")) {
1867 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
e4def80b
MA
1868 monitor_printf(mon, " tray-open=%d",
1869 qdict_get_bool(bs_dict, "tray-open"));
d15e5465 1870 }
d15e5465
LC
1871 if (qdict_haskey(bs_dict, "inserted")) {
1872 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1873
1874 monitor_printf(mon, " file=");
1875 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1876 if (qdict_haskey(qdict, "backing_file")) {
1877 monitor_printf(mon, " backing_file=");
1878 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1879 }
1880 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1881 qdict_get_bool(qdict, "ro"),
1882 qdict_get_str(qdict, "drv"),
1883 qdict_get_bool(qdict, "encrypted"));
1884 } else {
1885 monitor_printf(mon, " [not inserted]");
1886 }
1887
1888 monitor_printf(mon, "\n");
1889}
1890
1891void bdrv_info_print(Monitor *mon, const QObject *data)
1892{
1893 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1894}
1895
d15e5465
LC
1896void bdrv_info(Monitor *mon, QObject **ret_data)
1897{
1898 QList *bs_list;
b338082b
FB
1899 BlockDriverState *bs;
1900
d15e5465
LC
1901 bs_list = qlist_new();
1902
1b7bdbc1 1903 QTAILQ_FOREACH(bs, &bdrv_states, list) {
d15e5465 1904 QObject *bs_obj;
e4def80b 1905 QDict *bs_dict;
d15e5465 1906
d8aeeb31 1907 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
d15e5465 1908 "'removable': %i, 'locked': %i }",
2c6942fa
MA
1909 bs->device_name,
1910 bdrv_dev_has_removable_media(bs),
f107639a 1911 bdrv_dev_is_medium_locked(bs));
e4def80b 1912 bs_dict = qobject_to_qdict(bs_obj);
d15e5465 1913
e4def80b
MA
1914 if (bdrv_dev_has_removable_media(bs)) {
1915 qdict_put(bs_dict, "tray-open",
1916 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1917 }
19cb3738 1918 if (bs->drv) {
d15e5465 1919 QObject *obj;
d15e5465
LC
1920
1921 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1922 "'encrypted': %i }",
1923 bs->filename, bs->read_only,
1924 bs->drv->format_name,
1925 bdrv_is_encrypted(bs));
fef30743 1926 if (bs->backing_file[0] != '\0') {
d15e5465
LC
1927 QDict *qdict = qobject_to_qdict(obj);
1928 qdict_put(qdict, "backing_file",
1929 qstring_from_str(bs->backing_file));
376253ec 1930 }
d15e5465
LC
1931
1932 qdict_put_obj(bs_dict, "inserted", obj);
b338082b 1933 }
d15e5465 1934 qlist_append_obj(bs_list, bs_obj);
b338082b 1935 }
d15e5465
LC
1936
1937 *ret_data = QOBJECT(bs_list);
b338082b 1938}
a36e69dd 1939
218a536a 1940static void bdrv_stats_iter(QObject *data, void *opaque)
a36e69dd 1941{
218a536a
LC
1942 QDict *qdict;
1943 Monitor *mon = opaque;
1944
1945 qdict = qobject_to_qdict(data);
1946 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1947
1948 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1949 monitor_printf(mon, " rd_bytes=%" PRId64
1950 " wr_bytes=%" PRId64
1951 " rd_operations=%" PRId64
1952 " wr_operations=%" PRId64
e8045d67 1953 " flush_operations=%" PRId64
c488c7f6
CH
1954 " wr_total_time_ns=%" PRId64
1955 " rd_total_time_ns=%" PRId64
1956 " flush_total_time_ns=%" PRId64
218a536a
LC
1957 "\n",
1958 qdict_get_int(qdict, "rd_bytes"),
1959 qdict_get_int(qdict, "wr_bytes"),
1960 qdict_get_int(qdict, "rd_operations"),
e8045d67 1961 qdict_get_int(qdict, "wr_operations"),
c488c7f6
CH
1962 qdict_get_int(qdict, "flush_operations"),
1963 qdict_get_int(qdict, "wr_total_time_ns"),
1964 qdict_get_int(qdict, "rd_total_time_ns"),
1965 qdict_get_int(qdict, "flush_total_time_ns"));
218a536a
LC
1966}
1967
1968void bdrv_stats_print(Monitor *mon, const QObject *data)
1969{
1970 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1971}
1972
294cc35f
KW
1973static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1974{
1975 QObject *res;
1976 QDict *dict;
1977
1978 res = qobject_from_jsonf("{ 'stats': {"
1979 "'rd_bytes': %" PRId64 ","
1980 "'wr_bytes': %" PRId64 ","
1981 "'rd_operations': %" PRId64 ","
1982 "'wr_operations': %" PRId64 ","
e8045d67 1983 "'wr_highest_offset': %" PRId64 ","
c488c7f6
CH
1984 "'flush_operations': %" PRId64 ","
1985 "'wr_total_time_ns': %" PRId64 ","
1986 "'rd_total_time_ns': %" PRId64 ","
1987 "'flush_total_time_ns': %" PRId64
294cc35f 1988 "} }",
a597e79c
CH
1989 bs->nr_bytes[BDRV_ACCT_READ],
1990 bs->nr_bytes[BDRV_ACCT_WRITE],
1991 bs->nr_ops[BDRV_ACCT_READ],
1992 bs->nr_ops[BDRV_ACCT_WRITE],
5ffbbc67 1993 bs->wr_highest_sector *
e8045d67 1994 (uint64_t)BDRV_SECTOR_SIZE,
c488c7f6
CH
1995 bs->nr_ops[BDRV_ACCT_FLUSH],
1996 bs->total_time_ns[BDRV_ACCT_WRITE],
1997 bs->total_time_ns[BDRV_ACCT_READ],
1998 bs->total_time_ns[BDRV_ACCT_FLUSH]);
294cc35f
KW
1999 dict = qobject_to_qdict(res);
2000
2001 if (*bs->device_name) {
2002 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2003 }
2004
2005 if (bs->file) {
2006 QObject *parent = bdrv_info_stats_bs(bs->file);
2007 qdict_put_obj(dict, "parent", parent);
2008 }
2009
2010 return res;
2011}
2012
218a536a
LC
2013void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2014{
2015 QObject *obj;
2016 QList *devices;
a36e69dd
TS
2017 BlockDriverState *bs;
2018
218a536a
LC
2019 devices = qlist_new();
2020
1b7bdbc1 2021 QTAILQ_FOREACH(bs, &bdrv_states, list) {
294cc35f 2022 obj = bdrv_info_stats_bs(bs);
218a536a 2023 qlist_append_obj(devices, obj);
a36e69dd 2024 }
218a536a
LC
2025
2026 *ret_data = QOBJECT(devices);
a36e69dd 2027}
ea2384d3 2028
045df330
AL
2029const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2030{
2031 if (bs->backing_hd && bs->backing_hd->encrypted)
2032 return bs->backing_file;
2033 else if (bs->encrypted)
2034 return bs->filename;
2035 else
2036 return NULL;
2037}
2038
5fafdf24 2039void bdrv_get_backing_filename(BlockDriverState *bs,
83f64091
FB
2040 char *filename, int filename_size)
2041{
b783e409 2042 if (!bs->backing_file) {
83f64091
FB
2043 pstrcpy(filename, filename_size, "");
2044 } else {
2045 pstrcpy(filename, filename_size, bs->backing_file);
2046 }
2047}
2048
5fafdf24 2049int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
faea38e7
FB
2050 const uint8_t *buf, int nb_sectors)
2051{
2052 BlockDriver *drv = bs->drv;
2053 if (!drv)
19cb3738 2054 return -ENOMEDIUM;
faea38e7
FB
2055 if (!drv->bdrv_write_compressed)
2056 return -ENOTSUP;
fbb7b4e0
KW
2057 if (bdrv_check_request(bs, sector_num, nb_sectors))
2058 return -EIO;
a55eb92c 2059
c6d22830 2060 if (bs->dirty_bitmap) {
7cd1e32a
LS
2061 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2062 }
a55eb92c 2063
faea38e7
FB
2064 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2065}
3b46e624 2066
faea38e7
FB
2067int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2068{
2069 BlockDriver *drv = bs->drv;
2070 if (!drv)
19cb3738 2071 return -ENOMEDIUM;
faea38e7
FB
2072 if (!drv->bdrv_get_info)
2073 return -ENOTSUP;
2074 memset(bdi, 0, sizeof(*bdi));
2075 return drv->bdrv_get_info(bs, bdi);
2076}
2077
45566e9c
CH
2078int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2079 int64_t pos, int size)
178e08a5
AL
2080{
2081 BlockDriver *drv = bs->drv;
2082 if (!drv)
2083 return -ENOMEDIUM;
7cdb1f6d
MK
2084 if (drv->bdrv_save_vmstate)
2085 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2086 if (bs->file)
2087 return bdrv_save_vmstate(bs->file, buf, pos, size);
2088 return -ENOTSUP;
178e08a5
AL
2089}
2090
45566e9c
CH
2091int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2092 int64_t pos, int size)
178e08a5
AL
2093{
2094 BlockDriver *drv = bs->drv;
2095 if (!drv)
2096 return -ENOMEDIUM;
7cdb1f6d
MK
2097 if (drv->bdrv_load_vmstate)
2098 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2099 if (bs->file)
2100 return bdrv_load_vmstate(bs->file, buf, pos, size);
2101 return -ENOTSUP;
178e08a5
AL
2102}
2103
8b9b0cc2
KW
2104void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2105{
2106 BlockDriver *drv = bs->drv;
2107
2108 if (!drv || !drv->bdrv_debug_event) {
2109 return;
2110 }
2111
2112 return drv->bdrv_debug_event(bs, event);
2113
2114}
2115
faea38e7
FB
2116/**************************************************************/
2117/* handling of snapshots */
2118
feeee5ac
MDCF
2119int bdrv_can_snapshot(BlockDriverState *bs)
2120{
2121 BlockDriver *drv = bs->drv;
07b70bfb 2122 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
feeee5ac
MDCF
2123 return 0;
2124 }
2125
2126 if (!drv->bdrv_snapshot_create) {
2127 if (bs->file != NULL) {
2128 return bdrv_can_snapshot(bs->file);
2129 }
2130 return 0;
2131 }
2132
2133 return 1;
2134}
2135
199630b6
BS
2136int bdrv_is_snapshot(BlockDriverState *bs)
2137{
2138 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2139}
2140
f9092b10
MA
2141BlockDriverState *bdrv_snapshots(void)
2142{
2143 BlockDriverState *bs;
2144
3ac906f7 2145 if (bs_snapshots) {
f9092b10 2146 return bs_snapshots;
3ac906f7 2147 }
f9092b10
MA
2148
2149 bs = NULL;
2150 while ((bs = bdrv_next(bs))) {
2151 if (bdrv_can_snapshot(bs)) {
3ac906f7
MA
2152 bs_snapshots = bs;
2153 return bs;
f9092b10
MA
2154 }
2155 }
2156 return NULL;
f9092b10
MA
2157}
2158
5fafdf24 2159int bdrv_snapshot_create(BlockDriverState *bs,
faea38e7
FB
2160 QEMUSnapshotInfo *sn_info)
2161{
2162 BlockDriver *drv = bs->drv;
2163 if (!drv)
19cb3738 2164 return -ENOMEDIUM;
7cdb1f6d
MK
2165 if (drv->bdrv_snapshot_create)
2166 return drv->bdrv_snapshot_create(bs, sn_info);
2167 if (bs->file)
2168 return bdrv_snapshot_create(bs->file, sn_info);
2169 return -ENOTSUP;
faea38e7
FB
2170}
2171
5fafdf24 2172int bdrv_snapshot_goto(BlockDriverState *bs,
faea38e7
FB
2173 const char *snapshot_id)
2174{
2175 BlockDriver *drv = bs->drv;
7cdb1f6d
MK
2176 int ret, open_ret;
2177
faea38e7 2178 if (!drv)
19cb3738 2179 return -ENOMEDIUM;
7cdb1f6d
MK
2180 if (drv->bdrv_snapshot_goto)
2181 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2182
2183 if (bs->file) {
2184 drv->bdrv_close(bs);
2185 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2186 open_ret = drv->bdrv_open(bs, bs->open_flags);
2187 if (open_ret < 0) {
2188 bdrv_delete(bs->file);
2189 bs->drv = NULL;
2190 return open_ret;
2191 }
2192 return ret;
2193 }
2194
2195 return -ENOTSUP;
faea38e7
FB
2196}
2197
2198int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2199{
2200 BlockDriver *drv = bs->drv;
2201 if (!drv)
19cb3738 2202 return -ENOMEDIUM;
7cdb1f6d
MK
2203 if (drv->bdrv_snapshot_delete)
2204 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2205 if (bs->file)
2206 return bdrv_snapshot_delete(bs->file, snapshot_id);
2207 return -ENOTSUP;
faea38e7
FB
2208}
2209
5fafdf24 2210int bdrv_snapshot_list(BlockDriverState *bs,
faea38e7
FB
2211 QEMUSnapshotInfo **psn_info)
2212{
2213 BlockDriver *drv = bs->drv;
2214 if (!drv)
19cb3738 2215 return -ENOMEDIUM;
7cdb1f6d
MK
2216 if (drv->bdrv_snapshot_list)
2217 return drv->bdrv_snapshot_list(bs, psn_info);
2218 if (bs->file)
2219 return bdrv_snapshot_list(bs->file, psn_info);
2220 return -ENOTSUP;
faea38e7
FB
2221}
2222
51ef6727 2223int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2224 const char *snapshot_name)
2225{
2226 BlockDriver *drv = bs->drv;
2227 if (!drv) {
2228 return -ENOMEDIUM;
2229 }
2230 if (!bs->read_only) {
2231 return -EINVAL;
2232 }
2233 if (drv->bdrv_snapshot_load_tmp) {
2234 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2235 }
2236 return -ENOTSUP;
2237}
2238
faea38e7
FB
2239#define NB_SUFFIXES 4
2240
2241char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2242{
2243 static const char suffixes[NB_SUFFIXES] = "KMGT";
2244 int64_t base;
2245 int i;
2246
2247 if (size <= 999) {
2248 snprintf(buf, buf_size, "%" PRId64, size);
2249 } else {
2250 base = 1024;
2251 for(i = 0; i < NB_SUFFIXES; i++) {
2252 if (size < (10 * base)) {
5fafdf24 2253 snprintf(buf, buf_size, "%0.1f%c",
faea38e7
FB
2254 (double)size / base,
2255 suffixes[i]);
2256 break;
2257 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
5fafdf24 2258 snprintf(buf, buf_size, "%" PRId64 "%c",
faea38e7
FB
2259 ((size + (base >> 1)) / base),
2260 suffixes[i]);
2261 break;
2262 }
2263 base = base * 1024;
2264 }
2265 }
2266 return buf;
2267}
2268
2269char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2270{
2271 char buf1[128], date_buf[128], clock_buf[128];
3b9f94e1
FB
2272#ifdef _WIN32
2273 struct tm *ptm;
2274#else
faea38e7 2275 struct tm tm;
3b9f94e1 2276#endif
faea38e7
FB
2277 time_t ti;
2278 int64_t secs;
2279
2280 if (!sn) {
5fafdf24
TS
2281 snprintf(buf, buf_size,
2282 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2283 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2284 } else {
2285 ti = sn->date_sec;
3b9f94e1
FB
2286#ifdef _WIN32
2287 ptm = localtime(&ti);
2288 strftime(date_buf, sizeof(date_buf),
2289 "%Y-%m-%d %H:%M:%S", ptm);
2290#else
faea38e7
FB
2291 localtime_r(&ti, &tm);
2292 strftime(date_buf, sizeof(date_buf),
2293 "%Y-%m-%d %H:%M:%S", &tm);
3b9f94e1 2294#endif
faea38e7
FB
2295 secs = sn->vm_clock_nsec / 1000000000;
2296 snprintf(clock_buf, sizeof(clock_buf),
2297 "%02d:%02d:%02d.%03d",
2298 (int)(secs / 3600),
2299 (int)((secs / 60) % 60),
5fafdf24 2300 (int)(secs % 60),
faea38e7
FB
2301 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2302 snprintf(buf, buf_size,
5fafdf24 2303 "%-10s%-20s%7s%20s%15s",
faea38e7
FB
2304 sn->id_str, sn->name,
2305 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2306 date_buf,
2307 clock_buf);
2308 }
2309 return buf;
2310}
2311
ea2384d3 2312/**************************************************************/
83f64091 2313/* async I/Os */
ea2384d3 2314
3b69e4b9 2315BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
f141eafe 2316 QEMUIOVector *qiov, int nb_sectors,
3b69e4b9 2317 BlockDriverCompletionFunc *cb, void *opaque)
83f64091
FB
2318{
2319 BlockDriver *drv = bs->drv;
83f64091 2320
bbf0a440
SH
2321 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2322
19cb3738 2323 if (!drv)
ce1a14dc 2324 return NULL;
71d0770c
AL
2325 if (bdrv_check_request(bs, sector_num, nb_sectors))
2326 return NULL;
3b46e624 2327
a597e79c
CH
2328 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2329 cb, opaque);
ea2384d3
FB
2330}
2331
4dcafbb1
MT
2332typedef struct BlockCompleteData {
2333 BlockDriverCompletionFunc *cb;
2334 void *opaque;
2335 BlockDriverState *bs;
2336 int64_t sector_num;
2337 int nb_sectors;
2338} BlockCompleteData;
2339
2340static void block_complete_cb(void *opaque, int ret)
2341{
2342 BlockCompleteData *b = opaque;
2343
2344 if (b->bs->dirty_bitmap) {
2345 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2346 }
2347 b->cb(b->opaque, ret);
7267c094 2348 g_free(b);
4dcafbb1
MT
2349}
2350
2351static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2352 int64_t sector_num,
2353 int nb_sectors,
2354 BlockDriverCompletionFunc *cb,
2355 void *opaque)
2356{
7267c094 2357 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
4dcafbb1
MT
2358
2359 blkdata->bs = bs;
2360 blkdata->cb = cb;
2361 blkdata->opaque = opaque;
2362 blkdata->sector_num = sector_num;
2363 blkdata->nb_sectors = nb_sectors;
2364
2365 return blkdata;
2366}
2367
f141eafe
AL
2368BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2369 QEMUIOVector *qiov, int nb_sectors,
2370 BlockDriverCompletionFunc *cb, void *opaque)
ea2384d3 2371{
83f64091 2372 BlockDriver *drv = bs->drv;
a36e69dd 2373 BlockDriverAIOCB *ret;
4dcafbb1 2374 BlockCompleteData *blk_cb_data;
ea2384d3 2375
bbf0a440
SH
2376 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2377
19cb3738 2378 if (!drv)
ce1a14dc 2379 return NULL;
83f64091 2380 if (bs->read_only)
ce1a14dc 2381 return NULL;
71d0770c
AL
2382 if (bdrv_check_request(bs, sector_num, nb_sectors))
2383 return NULL;
83f64091 2384
c6d22830 2385 if (bs->dirty_bitmap) {
4dcafbb1
MT
2386 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2387 opaque);
2388 cb = &block_complete_cb;
2389 opaque = blk_cb_data;
7cd1e32a 2390 }
a55eb92c 2391
f141eafe
AL
2392 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2393 cb, opaque);
a36e69dd
TS
2394
2395 if (ret) {
294cc35f
KW
2396 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2397 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2398 }
a36e69dd
TS
2399 }
2400
2401 return ret;
83f64091
FB
2402}
2403
40b4f539
KW
2404
2405typedef struct MultiwriteCB {
2406 int error;
2407 int num_requests;
2408 int num_callbacks;
2409 struct {
2410 BlockDriverCompletionFunc *cb;
2411 void *opaque;
2412 QEMUIOVector *free_qiov;
2413 void *free_buf;
2414 } callbacks[];
2415} MultiwriteCB;
2416
2417static void multiwrite_user_cb(MultiwriteCB *mcb)
2418{
2419 int i;
2420
2421 for (i = 0; i < mcb->num_callbacks; i++) {
2422 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1e1ea48d
SH
2423 if (mcb->callbacks[i].free_qiov) {
2424 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2425 }
7267c094 2426 g_free(mcb->callbacks[i].free_qiov);
f8a83245 2427 qemu_vfree(mcb->callbacks[i].free_buf);
40b4f539
KW
2428 }
2429}
2430
2431static void multiwrite_cb(void *opaque, int ret)
2432{
2433 MultiwriteCB *mcb = opaque;
2434
6d519a5f
SH
2435 trace_multiwrite_cb(mcb, ret);
2436
cb6d3ca0 2437 if (ret < 0 && !mcb->error) {
40b4f539 2438 mcb->error = ret;
40b4f539
KW
2439 }
2440
2441 mcb->num_requests--;
2442 if (mcb->num_requests == 0) {
de189a1b 2443 multiwrite_user_cb(mcb);
7267c094 2444 g_free(mcb);
40b4f539
KW
2445 }
2446}
2447
2448static int multiwrite_req_compare(const void *a, const void *b)
2449{
77be4366
CH
2450 const BlockRequest *req1 = a, *req2 = b;
2451
2452 /*
2453 * Note that we can't simply subtract req2->sector from req1->sector
2454 * here as that could overflow the return value.
2455 */
2456 if (req1->sector > req2->sector) {
2457 return 1;
2458 } else if (req1->sector < req2->sector) {
2459 return -1;
2460 } else {
2461 return 0;
2462 }
40b4f539
KW
2463}
2464
2465/*
2466 * Takes a bunch of requests and tries to merge them. Returns the number of
2467 * requests that remain after merging.
2468 */
2469static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2470 int num_reqs, MultiwriteCB *mcb)
2471{
2472 int i, outidx;
2473
2474 // Sort requests by start sector
2475 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2476
2477 // Check if adjacent requests touch the same clusters. If so, combine them,
2478 // filling up gaps with zero sectors.
2479 outidx = 0;
2480 for (i = 1; i < num_reqs; i++) {
2481 int merge = 0;
2482 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2483
2484 // This handles the cases that are valid for all block drivers, namely
2485 // exactly sequential writes and overlapping writes.
2486 if (reqs[i].sector <= oldreq_last) {
2487 merge = 1;
2488 }
2489
2490 // The block driver may decide that it makes sense to combine requests
2491 // even if there is a gap of some sectors between them. In this case,
2492 // the gap is filled with zeros (therefore only applicable for yet
2493 // unused space in format like qcow2).
2494 if (!merge && bs->drv->bdrv_merge_requests) {
2495 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2496 }
2497
e2a305fb
CH
2498 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2499 merge = 0;
2500 }
2501
40b4f539
KW
2502 if (merge) {
2503 size_t size;
7267c094 2504 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
40b4f539
KW
2505 qemu_iovec_init(qiov,
2506 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2507
2508 // Add the first request to the merged one. If the requests are
2509 // overlapping, drop the last sectors of the first request.
2510 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2511 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2512
2513 // We might need to add some zeros between the two requests
2514 if (reqs[i].sector > oldreq_last) {
2515 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2516 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2517 memset(buf, 0, zero_bytes);
2518 qemu_iovec_add(qiov, buf, zero_bytes);
2519 mcb->callbacks[i].free_buf = buf;
2520 }
2521
2522 // Add the second request
2523 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2524
cbf1dff2 2525 reqs[outidx].nb_sectors = qiov->size >> 9;
40b4f539
KW
2526 reqs[outidx].qiov = qiov;
2527
2528 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2529 } else {
2530 outidx++;
2531 reqs[outidx].sector = reqs[i].sector;
2532 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2533 reqs[outidx].qiov = reqs[i].qiov;
2534 }
2535 }
2536
2537 return outidx + 1;
2538}
2539
2540/*
2541 * Submit multiple AIO write requests at once.
2542 *
2543 * On success, the function returns 0 and all requests in the reqs array have
2544 * been submitted. In error case this function returns -1, and any of the
2545 * requests may or may not be submitted yet. In particular, this means that the
2546 * callback will be called for some of the requests, for others it won't. The
2547 * caller must check the error field of the BlockRequest to wait for the right
2548 * callbacks (if error != 0, no callback will be called).
2549 *
2550 * The implementation may modify the contents of the reqs array, e.g. to merge
2551 * requests. However, the fields opaque and error are left unmodified as they
2552 * are used to signal failure for a single request to the caller.
2553 */
2554int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2555{
2556 BlockDriverAIOCB *acb;
2557 MultiwriteCB *mcb;
2558 int i;
2559
301db7c2
RH
2560 /* don't submit writes if we don't have a medium */
2561 if (bs->drv == NULL) {
2562 for (i = 0; i < num_reqs; i++) {
2563 reqs[i].error = -ENOMEDIUM;
2564 }
2565 return -1;
2566 }
2567
40b4f539
KW
2568 if (num_reqs == 0) {
2569 return 0;
2570 }
2571
2572 // Create MultiwriteCB structure
7267c094 2573 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
40b4f539
KW
2574 mcb->num_requests = 0;
2575 mcb->num_callbacks = num_reqs;
2576
2577 for (i = 0; i < num_reqs; i++) {
2578 mcb->callbacks[i].cb = reqs[i].cb;
2579 mcb->callbacks[i].opaque = reqs[i].opaque;
2580 }
2581
2582 // Check for mergable requests
2583 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2584
6d519a5f
SH
2585 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2586
453f9a16
KW
2587 /*
2588 * Run the aio requests. As soon as one request can't be submitted
2589 * successfully, fail all requests that are not yet submitted (we must
2590 * return failure for all requests anyway)
2591 *
2592 * num_requests cannot be set to the right value immediately: If
2593 * bdrv_aio_writev fails for some request, num_requests would be too high
2594 * and therefore multiwrite_cb() would never recognize the multiwrite
2595 * request as completed. We also cannot use the loop variable i to set it
2596 * when the first request fails because the callback may already have been
2597 * called for previously submitted requests. Thus, num_requests must be
2598 * incremented for each request that is submitted.
2599 *
2600 * The problem that callbacks may be called early also means that we need
2601 * to take care that num_requests doesn't become 0 before all requests are
2602 * submitted - multiwrite_cb() would consider the multiwrite request
2603 * completed. A dummy request that is "completed" by a manual call to
2604 * multiwrite_cb() takes care of this.
2605 */
2606 mcb->num_requests = 1;
2607
6d519a5f 2608 // Run the aio requests
40b4f539 2609 for (i = 0; i < num_reqs; i++) {
453f9a16 2610 mcb->num_requests++;
40b4f539
KW
2611 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2612 reqs[i].nb_sectors, multiwrite_cb, mcb);
2613
2614 if (acb == NULL) {
2615 // We can only fail the whole thing if no request has been
2616 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2617 // complete and report the error in the callback.
453f9a16 2618 if (i == 0) {
6d519a5f 2619 trace_bdrv_aio_multiwrite_earlyfail(mcb);
40b4f539
KW
2620 goto fail;
2621 } else {
6d519a5f 2622 trace_bdrv_aio_multiwrite_latefail(mcb, i);
7eb58a6c 2623 multiwrite_cb(mcb, -EIO);
40b4f539
KW
2624 break;
2625 }
40b4f539
KW
2626 }
2627 }
2628
453f9a16
KW
2629 /* Complete the dummy request */
2630 multiwrite_cb(mcb, 0);
2631
40b4f539
KW
2632 return 0;
2633
2634fail:
453f9a16
KW
2635 for (i = 0; i < mcb->num_callbacks; i++) {
2636 reqs[i].error = -EIO;
2637 }
7267c094 2638 g_free(mcb);
40b4f539
KW
2639 return -1;
2640}
2641
b2e12bc6
CH
2642BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2643 BlockDriverCompletionFunc *cb, void *opaque)
2644{
2645 BlockDriver *drv = bs->drv;
2646
a13aac04
SH
2647 trace_bdrv_aio_flush(bs, opaque);
2648
016f5cf6
AG
2649 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2650 return bdrv_aio_noop_em(bs, cb, opaque);
2651 }
2652
b2e12bc6
CH
2653 if (!drv)
2654 return NULL;
b2e12bc6
CH
2655 return drv->bdrv_aio_flush(bs, cb, opaque);
2656}
2657
83f64091 2658void bdrv_aio_cancel(BlockDriverAIOCB *acb)
83f64091 2659{
6bbff9a0 2660 acb->pool->cancel(acb);
83f64091
FB
2661}
2662
ce1a14dc 2663
83f64091
FB
2664/**************************************************************/
2665/* async block device emulation */
2666
c16b5a2c
CH
2667typedef struct BlockDriverAIOCBSync {
2668 BlockDriverAIOCB common;
2669 QEMUBH *bh;
2670 int ret;
2671 /* vector translation state */
2672 QEMUIOVector *qiov;
2673 uint8_t *bounce;
2674 int is_write;
2675} BlockDriverAIOCBSync;
2676
2677static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2678{
b666d239
KW
2679 BlockDriverAIOCBSync *acb =
2680 container_of(blockacb, BlockDriverAIOCBSync, common);
6a7ad299 2681 qemu_bh_delete(acb->bh);
36afc451 2682 acb->bh = NULL;
c16b5a2c
CH
2683 qemu_aio_release(acb);
2684}
2685
2686static AIOPool bdrv_em_aio_pool = {
2687 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2688 .cancel = bdrv_aio_cancel_em,
2689};
2690
ce1a14dc 2691static void bdrv_aio_bh_cb(void *opaque)
83f64091 2692{
ce1a14dc 2693 BlockDriverAIOCBSync *acb = opaque;
f141eafe 2694
f141eafe
AL
2695 if (!acb->is_write)
2696 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
ceb42de8 2697 qemu_vfree(acb->bounce);
ce1a14dc 2698 acb->common.cb(acb->common.opaque, acb->ret);
6a7ad299 2699 qemu_bh_delete(acb->bh);
36afc451 2700 acb->bh = NULL;
ce1a14dc 2701 qemu_aio_release(acb);
83f64091 2702}
beac80cd 2703
f141eafe
AL
2704static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2705 int64_t sector_num,
2706 QEMUIOVector *qiov,
2707 int nb_sectors,
2708 BlockDriverCompletionFunc *cb,
2709 void *opaque,
2710 int is_write)
2711
83f64091 2712{
ce1a14dc 2713 BlockDriverAIOCBSync *acb;
ce1a14dc 2714
c16b5a2c 2715 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
f141eafe
AL
2716 acb->is_write = is_write;
2717 acb->qiov = qiov;
e268ca52 2718 acb->bounce = qemu_blockalign(bs, qiov->size);
f141eafe 2719
ce1a14dc
PB
2720 if (!acb->bh)
2721 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
f141eafe
AL
2722
2723 if (is_write) {
2724 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2725 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2726 } else {
2727 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2728 }
2729
ce1a14dc 2730 qemu_bh_schedule(acb->bh);
f141eafe 2731
ce1a14dc 2732 return &acb->common;
beac80cd
FB
2733}
2734
f141eafe
AL
2735static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2736 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
ce1a14dc 2737 BlockDriverCompletionFunc *cb, void *opaque)
beac80cd 2738{
f141eafe
AL
2739 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2740}
83f64091 2741
f141eafe
AL
2742static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2743 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2744 BlockDriverCompletionFunc *cb, void *opaque)
2745{
2746 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
beac80cd 2747}
beac80cd 2748
68485420
KW
2749
2750typedef struct BlockDriverAIOCBCoroutine {
2751 BlockDriverAIOCB common;
2752 BlockRequest req;
2753 bool is_write;
2754 QEMUBH* bh;
2755} BlockDriverAIOCBCoroutine;
2756
2757static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2758{
2759 qemu_aio_flush();
2760}
2761
2762static AIOPool bdrv_em_co_aio_pool = {
2763 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2764 .cancel = bdrv_aio_co_cancel_em,
2765};
2766
2767static void bdrv_co_rw_bh(void *opaque)
2768{
2769 BlockDriverAIOCBCoroutine *acb = opaque;
2770
2771 acb->common.cb(acb->common.opaque, acb->req.error);
2772 qemu_bh_delete(acb->bh);
2773 qemu_aio_release(acb);
2774}
2775
2776static void coroutine_fn bdrv_co_rw(void *opaque)
2777{
2778 BlockDriverAIOCBCoroutine *acb = opaque;
2779 BlockDriverState *bs = acb->common.bs;
2780
2781 if (!acb->is_write) {
2782 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2783 acb->req.nb_sectors, acb->req.qiov);
2784 } else {
2785 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2786 acb->req.nb_sectors, acb->req.qiov);
2787 }
2788
2789 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2790 qemu_bh_schedule(acb->bh);
2791}
2792
2793static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2794 int64_t sector_num,
2795 QEMUIOVector *qiov,
2796 int nb_sectors,
2797 BlockDriverCompletionFunc *cb,
2798 void *opaque,
2799 bool is_write)
2800{
2801 Coroutine *co;
2802 BlockDriverAIOCBCoroutine *acb;
2803
2804 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2805 acb->req.sector = sector_num;
2806 acb->req.nb_sectors = nb_sectors;
2807 acb->req.qiov = qiov;
2808 acb->is_write = is_write;
2809
2810 co = qemu_coroutine_create(bdrv_co_rw);
2811 qemu_coroutine_enter(co, acb);
2812
2813 return &acb->common;
2814}
2815
2816static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2817 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2818 BlockDriverCompletionFunc *cb, void *opaque)
2819{
2820 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2821 false);
2822}
2823
2824static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2825 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2826 BlockDriverCompletionFunc *cb, void *opaque)
2827{
2828 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2829 true);
2830}
2831
b2e12bc6
CH
2832static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2833 BlockDriverCompletionFunc *cb, void *opaque)
2834{
2835 BlockDriverAIOCBSync *acb;
2836
2837 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2838 acb->is_write = 1; /* don't bounce in the completion hadler */
2839 acb->qiov = NULL;
2840 acb->bounce = NULL;
2841 acb->ret = 0;
2842
2843 if (!acb->bh)
2844 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2845
2846 bdrv_flush(bs);
2847 qemu_bh_schedule(acb->bh);
2848 return &acb->common;
2849}
2850
016f5cf6
AG
2851static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2852 BlockDriverCompletionFunc *cb, void *opaque)
2853{
2854 BlockDriverAIOCBSync *acb;
2855
2856 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2857 acb->is_write = 1; /* don't bounce in the completion handler */
2858 acb->qiov = NULL;
2859 acb->bounce = NULL;
2860 acb->ret = 0;
2861
2862 if (!acb->bh) {
2863 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2864 }
2865
2866 qemu_bh_schedule(acb->bh);
2867 return &acb->common;
2868}
2869
83f64091
FB
2870/**************************************************************/
2871/* sync block device emulation */
ea2384d3 2872
83f64091
FB
2873static void bdrv_rw_em_cb(void *opaque, int ret)
2874{
2875 *(int *)opaque = ret;
ea2384d3
FB
2876}
2877
83f64091
FB
2878#define NOT_DONE 0x7fffffff
2879
5fafdf24 2880static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
83f64091 2881 uint8_t *buf, int nb_sectors)
7a6cba61 2882{
ce1a14dc
PB
2883 int async_ret;
2884 BlockDriverAIOCB *acb;
f141eafe
AL
2885 struct iovec iov;
2886 QEMUIOVector qiov;
83f64091 2887
83f64091 2888 async_ret = NOT_DONE;
3f4cb3d3 2889 iov.iov_base = (void *)buf;
eb5a3165 2890 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe
AL
2891 qemu_iovec_init_external(&qiov, &iov, 1);
2892 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2893 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2894 if (acb == NULL) {
2895 async_ret = -1;
2896 goto fail;
2897 }
baf35cb9 2898
83f64091
FB
2899 while (async_ret == NOT_DONE) {
2900 qemu_aio_wait();
2901 }
baf35cb9 2902
65d6b3d8
KW
2903
2904fail:
83f64091 2905 return async_ret;
7a6cba61
PB
2906}
2907
83f64091
FB
2908static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2909 const uint8_t *buf, int nb_sectors)
2910{
ce1a14dc
PB
2911 int async_ret;
2912 BlockDriverAIOCB *acb;
f141eafe
AL
2913 struct iovec iov;
2914 QEMUIOVector qiov;
83f64091 2915
83f64091 2916 async_ret = NOT_DONE;
f141eafe 2917 iov.iov_base = (void *)buf;
eb5a3165 2918 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
f141eafe
AL
2919 qemu_iovec_init_external(&qiov, &iov, 1);
2920 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2921 bdrv_rw_em_cb, &async_ret);
65d6b3d8
KW
2922 if (acb == NULL) {
2923 async_ret = -1;
2924 goto fail;
2925 }
83f64091
FB
2926 while (async_ret == NOT_DONE) {
2927 qemu_aio_wait();
2928 }
65d6b3d8
KW
2929
2930fail:
83f64091
FB
2931 return async_ret;
2932}
ea2384d3
FB
2933
2934void bdrv_init(void)
2935{
5efa9d5a 2936 module_call_init(MODULE_INIT_BLOCK);
ea2384d3 2937}
ce1a14dc 2938
eb852011
MA
2939void bdrv_init_with_whitelist(void)
2940{
2941 use_bdrv_whitelist = 1;
2942 bdrv_init();
2943}
2944
c16b5a2c
CH
2945void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2946 BlockDriverCompletionFunc *cb, void *opaque)
ce1a14dc 2947{
ce1a14dc
PB
2948 BlockDriverAIOCB *acb;
2949
6bbff9a0
AL
2950 if (pool->free_aiocb) {
2951 acb = pool->free_aiocb;
2952 pool->free_aiocb = acb->next;
ce1a14dc 2953 } else {
7267c094 2954 acb = g_malloc0(pool->aiocb_size);
6bbff9a0 2955 acb->pool = pool;
ce1a14dc
PB
2956 }
2957 acb->bs = bs;
2958 acb->cb = cb;
2959 acb->opaque = opaque;
2960 return acb;
2961}
2962
2963void qemu_aio_release(void *p)
2964{
6bbff9a0
AL
2965 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2966 AIOPool *pool = acb->pool;
2967 acb->next = pool->free_aiocb;
2968 pool->free_aiocb = acb;
ce1a14dc 2969}
19cb3738 2970
f9f05dc5
KW
2971/**************************************************************/
2972/* Coroutine block device emulation */
2973
2974typedef struct CoroutineIOCompletion {
2975 Coroutine *coroutine;
2976 int ret;
2977} CoroutineIOCompletion;
2978
2979static void bdrv_co_io_em_complete(void *opaque, int ret)
2980{
2981 CoroutineIOCompletion *co = opaque;
2982
2983 co->ret = ret;
2984 qemu_coroutine_enter(co->coroutine, NULL);
2985}
2986
2987static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2988 int nb_sectors, QEMUIOVector *iov,
2989 bool is_write)
2990{
2991 CoroutineIOCompletion co = {
2992 .coroutine = qemu_coroutine_self(),
2993 };
2994 BlockDriverAIOCB *acb;
2995
2996 if (is_write) {
2997 acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2998 bdrv_co_io_em_complete, &co);
2999 } else {
3000 acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3001 bdrv_co_io_em_complete, &co);
3002 }
3003
59370aaa 3004 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
f9f05dc5
KW
3005 if (!acb) {
3006 return -EIO;
3007 }
3008 qemu_coroutine_yield();
3009
3010 return co.ret;
3011}
3012
3013static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3014 int64_t sector_num, int nb_sectors,
3015 QEMUIOVector *iov)
3016{
3017 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3018}
3019
3020static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3021 int64_t sector_num, int nb_sectors,
3022 QEMUIOVector *iov)
3023{
3024 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3025}
3026
e7a8a783
KW
3027static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3028{
3029 CoroutineIOCompletion co = {
3030 .coroutine = qemu_coroutine_self(),
3031 };
3032 BlockDriverAIOCB *acb;
3033
3034 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3035 if (!acb) {
3036 return -EIO;
3037 }
3038 qemu_coroutine_yield();
3039 return co.ret;
3040}
3041
19cb3738
FB
3042/**************************************************************/
3043/* removable device support */
3044
3045/**
3046 * Return TRUE if the media is present
3047 */
3048int bdrv_is_inserted(BlockDriverState *bs)
3049{
3050 BlockDriver *drv = bs->drv;
a1aff5bf 3051
19cb3738
FB
3052 if (!drv)
3053 return 0;
3054 if (!drv->bdrv_is_inserted)
a1aff5bf
MA
3055 return 1;
3056 return drv->bdrv_is_inserted(bs);
19cb3738
FB
3057}
3058
3059/**
8e49ca46
MA
3060 * Return whether the media changed since the last call to this
3061 * function, or -ENOTSUP if we don't know. Most drivers don't know.
19cb3738
FB
3062 */
3063int bdrv_media_changed(BlockDriverState *bs)
3064{
3065 BlockDriver *drv = bs->drv;
19cb3738 3066
8e49ca46
MA
3067 if (drv && drv->bdrv_media_changed) {
3068 return drv->bdrv_media_changed(bs);
3069 }
3070 return -ENOTSUP;
19cb3738
FB
3071}
3072
3073/**
3074 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3075 */
fdec4404 3076void bdrv_eject(BlockDriverState *bs, int eject_flag)
19cb3738
FB
3077{
3078 BlockDriver *drv = bs->drv;
19cb3738 3079
822e1cd1
MA
3080 if (drv && drv->bdrv_eject) {
3081 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
3082 }
3083}
3084
19cb3738
FB
3085/**
3086 * Lock or unlock the media (if it is locked, the user won't be able
3087 * to eject it manually).
3088 */
025e849a 3089void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
3090{
3091 BlockDriver *drv = bs->drv;
3092
025e849a 3093 trace_bdrv_lock_medium(bs, locked);
b8c6d095 3094
025e849a
MA
3095 if (drv && drv->bdrv_lock_medium) {
3096 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
3097 }
3098}
985a03b0
TS
3099
3100/* needed for generic scsi interface */
3101
3102int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3103{
3104 BlockDriver *drv = bs->drv;
3105
3106 if (drv && drv->bdrv_ioctl)
3107 return drv->bdrv_ioctl(bs, req, buf);
3108 return -ENOTSUP;
3109}
7d780669 3110
221f715d
AL
3111BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3112 unsigned long int req, void *buf,
3113 BlockDriverCompletionFunc *cb, void *opaque)
7d780669 3114{
221f715d 3115 BlockDriver *drv = bs->drv;
7d780669 3116
221f715d
AL
3117 if (drv && drv->bdrv_aio_ioctl)
3118 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3119 return NULL;
7d780669 3120}
e268ca52 3121
7b6f9300
MA
3122void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3123{
3124 bs->buffer_alignment = align;
3125}
7cd1e32a 3126
e268ca52
AL
3127void *qemu_blockalign(BlockDriverState *bs, size_t size)
3128{
3129 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3130}
7cd1e32a
LS
3131
3132void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3133{
3134 int64_t bitmap_size;
a55eb92c 3135
aaa0eb75 3136 bs->dirty_count = 0;
a55eb92c 3137 if (enable) {
c6d22830
JK
3138 if (!bs->dirty_bitmap) {
3139 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3140 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3141 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
a55eb92c 3142
7267c094 3143 bs->dirty_bitmap = g_malloc0(bitmap_size);
a55eb92c 3144 }
7cd1e32a 3145 } else {
c6d22830 3146 if (bs->dirty_bitmap) {
7267c094 3147 g_free(bs->dirty_bitmap);
c6d22830 3148 bs->dirty_bitmap = NULL;
a55eb92c 3149 }
7cd1e32a
LS
3150 }
3151}
3152
3153int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3154{
6ea44308 3155 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
a55eb92c 3156
c6d22830
JK
3157 if (bs->dirty_bitmap &&
3158 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
6d59fec1
MT
3159 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3160 (1UL << (chunk % (sizeof(unsigned long) * 8))));
7cd1e32a
LS
3161 } else {
3162 return 0;
3163 }
3164}
3165
a55eb92c
JK
3166void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3167 int nr_sectors)
7cd1e32a
LS
3168{
3169 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3170}
aaa0eb75
LS
3171
3172int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3173{
3174 return bs->dirty_count;
3175}
f88e1a42 3176
db593f25
MT
3177void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3178{
3179 assert(bs->in_use != in_use);
3180 bs->in_use = in_use;
3181}
3182
3183int bdrv_in_use(BlockDriverState *bs)
3184{
3185 return bs->in_use;
3186}
3187
28a7282a
LC
3188void bdrv_iostatus_enable(BlockDriverState *bs)
3189{
3190 bs->iostatus = BDRV_IOS_OK;
3191}
3192
3193/* The I/O status is only enabled if the drive explicitly
3194 * enables it _and_ the VM is configured to stop on errors */
3195bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3196{
3197 return (bs->iostatus != BDRV_IOS_INVAL &&
3198 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3199 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3200 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3201}
3202
3203void bdrv_iostatus_disable(BlockDriverState *bs)
3204{
3205 bs->iostatus = BDRV_IOS_INVAL;
3206}
3207
3208void bdrv_iostatus_reset(BlockDriverState *bs)
3209{
3210 if (bdrv_iostatus_is_enabled(bs)) {
3211 bs->iostatus = BDRV_IOS_OK;
3212 }
3213}
3214
3215/* XXX: Today this is set by device models because it makes the implementation
3216 quite simple. However, the block layer knows about the error, so it's
3217 possible to implement this without device models being involved */
3218void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3219{
3220 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3221 assert(error >= 0);
3222 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3223 }
3224}
3225
a597e79c
CH
3226void
3227bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3228 enum BlockAcctType type)
3229{
3230 assert(type < BDRV_MAX_IOTYPE);
3231
3232 cookie->bytes = bytes;
c488c7f6 3233 cookie->start_time_ns = get_clock();
a597e79c
CH
3234 cookie->type = type;
3235}
3236
3237void
3238bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3239{
3240 assert(cookie->type < BDRV_MAX_IOTYPE);
3241
3242 bs->nr_bytes[cookie->type] += cookie->bytes;
3243 bs->nr_ops[cookie->type]++;
c488c7f6 3244 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
a597e79c
CH
3245}
3246
f88e1a42
JS
3247int bdrv_img_create(const char *filename, const char *fmt,
3248 const char *base_filename, const char *base_fmt,
3249 char *options, uint64_t img_size, int flags)
3250{
3251 QEMUOptionParameter *param = NULL, *create_options = NULL;
d220894e 3252 QEMUOptionParameter *backing_fmt, *backing_file, *size;
f88e1a42
JS
3253 BlockDriverState *bs = NULL;
3254 BlockDriver *drv, *proto_drv;
96df67d1 3255 BlockDriver *backing_drv = NULL;
f88e1a42
JS
3256 int ret = 0;
3257
3258 /* Find driver and parse its options */
3259 drv = bdrv_find_format(fmt);
3260 if (!drv) {
3261 error_report("Unknown file format '%s'", fmt);
4f70f249 3262 ret = -EINVAL;
f88e1a42
JS
3263 goto out;
3264 }
3265
3266 proto_drv = bdrv_find_protocol(filename);
3267 if (!proto_drv) {
3268 error_report("Unknown protocol '%s'", filename);
4f70f249 3269 ret = -EINVAL;
f88e1a42
JS
3270 goto out;
3271 }
3272
3273 create_options = append_option_parameters(create_options,
3274 drv->create_options);
3275 create_options = append_option_parameters(create_options,
3276 proto_drv->create_options);
3277
3278 /* Create parameter list with default values */
3279 param = parse_option_parameters("", create_options, param);
3280
3281 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3282
3283 /* Parse -o options */
3284 if (options) {
3285 param = parse_option_parameters(options, create_options, param);
3286 if (param == NULL) {
3287 error_report("Invalid options for file format '%s'.", fmt);
4f70f249 3288 ret = -EINVAL;
f88e1a42
JS
3289 goto out;
3290 }
3291 }
3292
3293 if (base_filename) {
3294 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3295 base_filename)) {
3296 error_report("Backing file not supported for file format '%s'",
3297 fmt);
4f70f249 3298 ret = -EINVAL;
f88e1a42
JS
3299 goto out;
3300 }
3301 }
3302
3303 if (base_fmt) {
3304 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3305 error_report("Backing file format not supported for file "
3306 "format '%s'", fmt);
4f70f249 3307 ret = -EINVAL;
f88e1a42
JS
3308 goto out;
3309 }
3310 }
3311
792da93a
JS
3312 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3313 if (backing_file && backing_file->value.s) {
3314 if (!strcmp(filename, backing_file->value.s)) {
3315 error_report("Error: Trying to create an image with the "
3316 "same filename as the backing file");
4f70f249 3317 ret = -EINVAL;
792da93a
JS
3318 goto out;
3319 }
3320 }
3321
f88e1a42
JS
3322 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3323 if (backing_fmt && backing_fmt->value.s) {
96df67d1
SH
3324 backing_drv = bdrv_find_format(backing_fmt->value.s);
3325 if (!backing_drv) {
f88e1a42
JS
3326 error_report("Unknown backing file format '%s'",
3327 backing_fmt->value.s);
4f70f249 3328 ret = -EINVAL;
f88e1a42
JS
3329 goto out;
3330 }
3331 }
3332
3333 // The size for the image must always be specified, with one exception:
3334 // If we are using a backing file, we can obtain the size from there
d220894e
KW
3335 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3336 if (size && size->value.n == -1) {
f88e1a42
JS
3337 if (backing_file && backing_file->value.s) {
3338 uint64_t size;
f88e1a42
JS
3339 char buf[32];
3340
f88e1a42
JS
3341 bs = bdrv_new("");
3342
96df67d1 3343 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
f88e1a42 3344 if (ret < 0) {
96df67d1 3345 error_report("Could not open '%s'", backing_file->value.s);
f88e1a42
JS
3346 goto out;
3347 }
3348 bdrv_get_geometry(bs, &size);
3349 size *= 512;
3350
3351 snprintf(buf, sizeof(buf), "%" PRId64, size);
3352 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3353 } else {
3354 error_report("Image creation needs a size parameter");
4f70f249 3355 ret = -EINVAL;
f88e1a42
JS
3356 goto out;
3357 }
3358 }
3359
3360 printf("Formatting '%s', fmt=%s ", filename, fmt);
3361 print_option_parameters(param);
3362 puts("");
3363
3364 ret = bdrv_create(drv, filename, param);
3365
3366 if (ret < 0) {
3367 if (ret == -ENOTSUP) {
3368 error_report("Formatting or formatting option not supported for "
3369 "file format '%s'", fmt);
3370 } else if (ret == -EFBIG) {
3371 error_report("The image size is too large for file format '%s'",
3372 fmt);
3373 } else {
3374 error_report("%s: error while creating %s: %s", filename, fmt,
3375 strerror(-ret));
3376 }
3377 }
3378
3379out:
3380 free_option_parameters(create_options);
3381 free_option_parameters(param);
3382
3383 if (bs) {
3384 bdrv_delete(bs);
3385 }
4f70f249
JS
3386
3387 return ret;
f88e1a42 3388}