]> git.ipfire.org Git - thirdparty/qemu.git/blame_incremental - block.c
block: drop redundant bdrv_flush implementation
[thirdparty/qemu.git] / block.c
... / ...
CommitLineData
1/*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#include "config-host.h"
25#include "qemu-common.h"
26#include "trace.h"
27#include "monitor.h"
28#include "block_int.h"
29#include "module.h"
30#include "qemu-objects.h"
31#include "qemu-coroutine.h"
32
33#ifdef CONFIG_BSD
34#include <sys/types.h>
35#include <sys/stat.h>
36#include <sys/ioctl.h>
37#include <sys/queue.h>
38#ifndef __DragonFly__
39#include <sys/disk.h>
40#endif
41#endif
42
43#ifdef _WIN32
44#include <windows.h>
45#endif
46
47#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
48
49static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55 BlockDriverCompletionFunc *cb, void *opaque);
56static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
57 int64_t sector_num, int nb_sectors,
58 QEMUIOVector *iov);
59static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
60 int64_t sector_num, int nb_sectors,
61 QEMUIOVector *iov);
62static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
63 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
64static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
67 int64_t sector_num,
68 QEMUIOVector *qiov,
69 int nb_sectors,
70 BlockDriverCompletionFunc *cb,
71 void *opaque,
72 bool is_write);
73static void coroutine_fn bdrv_co_do_rw(void *opaque);
74
75static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(bdrv_states);
77
78static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79 QLIST_HEAD_INITIALIZER(bdrv_drivers);
80
81/* The device to use for VM snapshots */
82static BlockDriverState *bs_snapshots;
83
84/* If non-zero, use only whitelisted block drivers */
85static int use_bdrv_whitelist;
86
87#ifdef _WIN32
88static int is_windows_drive_prefix(const char *filename)
89{
90 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92 filename[1] == ':');
93}
94
95int is_windows_drive(const char *filename)
96{
97 if (is_windows_drive_prefix(filename) &&
98 filename[2] == '\0')
99 return 1;
100 if (strstart(filename, "\\\\.\\", NULL) ||
101 strstart(filename, "//./", NULL))
102 return 1;
103 return 0;
104}
105#endif
106
107/* check if the path starts with "<protocol>:" */
108static int path_has_protocol(const char *path)
109{
110#ifdef _WIN32
111 if (is_windows_drive(path) ||
112 is_windows_drive_prefix(path)) {
113 return 0;
114 }
115#endif
116
117 return strchr(path, ':') != NULL;
118}
119
120int path_is_absolute(const char *path)
121{
122 const char *p;
123#ifdef _WIN32
124 /* specific case for names like: "\\.\d:" */
125 if (*path == '/' || *path == '\\')
126 return 1;
127#endif
128 p = strchr(path, ':');
129 if (p)
130 p++;
131 else
132 p = path;
133#ifdef _WIN32
134 return (*p == '/' || *p == '\\');
135#else
136 return (*p == '/');
137#endif
138}
139
140/* if filename is absolute, just copy it to dest. Otherwise, build a
141 path to it by considering it is relative to base_path. URL are
142 supported. */
143void path_combine(char *dest, int dest_size,
144 const char *base_path,
145 const char *filename)
146{
147 const char *p, *p1;
148 int len;
149
150 if (dest_size <= 0)
151 return;
152 if (path_is_absolute(filename)) {
153 pstrcpy(dest, dest_size, filename);
154 } else {
155 p = strchr(base_path, ':');
156 if (p)
157 p++;
158 else
159 p = base_path;
160 p1 = strrchr(base_path, '/');
161#ifdef _WIN32
162 {
163 const char *p2;
164 p2 = strrchr(base_path, '\\');
165 if (!p1 || p2 > p1)
166 p1 = p2;
167 }
168#endif
169 if (p1)
170 p1++;
171 else
172 p1 = base_path;
173 if (p1 > p)
174 p = p1;
175 len = p - base_path;
176 if (len > dest_size - 1)
177 len = dest_size - 1;
178 memcpy(dest, base_path, len);
179 dest[len] = '\0';
180 pstrcat(dest, dest_size, filename);
181 }
182}
183
184void bdrv_register(BlockDriver *bdrv)
185{
186 /* Block drivers without coroutine functions need emulation */
187 if (!bdrv->bdrv_co_readv) {
188 bdrv->bdrv_co_readv = bdrv_co_readv_em;
189 bdrv->bdrv_co_writev = bdrv_co_writev_em;
190
191 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192 * the block driver lacks aio we need to emulate that too.
193 */
194 if (!bdrv->bdrv_aio_readv) {
195 /* add AIO emulation layer */
196 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
197 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
198 }
199 }
200
201 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
202}
203
204/* create a new block device (by default it is empty) */
205BlockDriverState *bdrv_new(const char *device_name)
206{
207 BlockDriverState *bs;
208
209 bs = g_malloc0(sizeof(BlockDriverState));
210 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
211 if (device_name[0] != '\0') {
212 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
213 }
214 bdrv_iostatus_disable(bs);
215 return bs;
216}
217
218BlockDriver *bdrv_find_format(const char *format_name)
219{
220 BlockDriver *drv1;
221 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
222 if (!strcmp(drv1->format_name, format_name)) {
223 return drv1;
224 }
225 }
226 return NULL;
227}
228
229static int bdrv_is_whitelisted(BlockDriver *drv)
230{
231 static const char *whitelist[] = {
232 CONFIG_BDRV_WHITELIST
233 };
234 const char **p;
235
236 if (!whitelist[0])
237 return 1; /* no whitelist, anything goes */
238
239 for (p = whitelist; *p; p++) {
240 if (!strcmp(drv->format_name, *p)) {
241 return 1;
242 }
243 }
244 return 0;
245}
246
247BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
248{
249 BlockDriver *drv = bdrv_find_format(format_name);
250 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
251}
252
253int bdrv_create(BlockDriver *drv, const char* filename,
254 QEMUOptionParameter *options)
255{
256 if (!drv->bdrv_create)
257 return -ENOTSUP;
258
259 return drv->bdrv_create(filename, options);
260}
261
262int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
263{
264 BlockDriver *drv;
265
266 drv = bdrv_find_protocol(filename);
267 if (drv == NULL) {
268 return -ENOENT;
269 }
270
271 return bdrv_create(drv, filename, options);
272}
273
274#ifdef _WIN32
275void get_tmp_filename(char *filename, int size)
276{
277 char temp_dir[MAX_PATH];
278
279 GetTempPath(MAX_PATH, temp_dir);
280 GetTempFileName(temp_dir, "qem", 0, filename);
281}
282#else
283void get_tmp_filename(char *filename, int size)
284{
285 int fd;
286 const char *tmpdir;
287 /* XXX: race condition possible */
288 tmpdir = getenv("TMPDIR");
289 if (!tmpdir)
290 tmpdir = "/tmp";
291 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
292 fd = mkstemp(filename);
293 close(fd);
294}
295#endif
296
297/*
298 * Detect host devices. By convention, /dev/cdrom[N] is always
299 * recognized as a host CDROM.
300 */
301static BlockDriver *find_hdev_driver(const char *filename)
302{
303 int score_max = 0, score;
304 BlockDriver *drv = NULL, *d;
305
306 QLIST_FOREACH(d, &bdrv_drivers, list) {
307 if (d->bdrv_probe_device) {
308 score = d->bdrv_probe_device(filename);
309 if (score > score_max) {
310 score_max = score;
311 drv = d;
312 }
313 }
314 }
315
316 return drv;
317}
318
319BlockDriver *bdrv_find_protocol(const char *filename)
320{
321 BlockDriver *drv1;
322 char protocol[128];
323 int len;
324 const char *p;
325
326 /* TODO Drivers without bdrv_file_open must be specified explicitly */
327
328 /*
329 * XXX(hch): we really should not let host device detection
330 * override an explicit protocol specification, but moving this
331 * later breaks access to device names with colons in them.
332 * Thanks to the brain-dead persistent naming schemes on udev-
333 * based Linux systems those actually are quite common.
334 */
335 drv1 = find_hdev_driver(filename);
336 if (drv1) {
337 return drv1;
338 }
339
340 if (!path_has_protocol(filename)) {
341 return bdrv_find_format("file");
342 }
343 p = strchr(filename, ':');
344 assert(p != NULL);
345 len = p - filename;
346 if (len > sizeof(protocol) - 1)
347 len = sizeof(protocol) - 1;
348 memcpy(protocol, filename, len);
349 protocol[len] = '\0';
350 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
351 if (drv1->protocol_name &&
352 !strcmp(drv1->protocol_name, protocol)) {
353 return drv1;
354 }
355 }
356 return NULL;
357}
358
359static int find_image_format(const char *filename, BlockDriver **pdrv)
360{
361 int ret, score, score_max;
362 BlockDriver *drv1, *drv;
363 uint8_t buf[2048];
364 BlockDriverState *bs;
365
366 ret = bdrv_file_open(&bs, filename, 0);
367 if (ret < 0) {
368 *pdrv = NULL;
369 return ret;
370 }
371
372 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373 if (bs->sg || !bdrv_is_inserted(bs)) {
374 bdrv_delete(bs);
375 drv = bdrv_find_format("raw");
376 if (!drv) {
377 ret = -ENOENT;
378 }
379 *pdrv = drv;
380 return ret;
381 }
382
383 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
384 bdrv_delete(bs);
385 if (ret < 0) {
386 *pdrv = NULL;
387 return ret;
388 }
389
390 score_max = 0;
391 drv = NULL;
392 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
393 if (drv1->bdrv_probe) {
394 score = drv1->bdrv_probe(buf, ret, filename);
395 if (score > score_max) {
396 score_max = score;
397 drv = drv1;
398 }
399 }
400 }
401 if (!drv) {
402 ret = -ENOENT;
403 }
404 *pdrv = drv;
405 return ret;
406}
407
408/**
409 * Set the current 'total_sectors' value
410 */
411static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
412{
413 BlockDriver *drv = bs->drv;
414
415 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
416 if (bs->sg)
417 return 0;
418
419 /* query actual device if possible, otherwise just trust the hint */
420 if (drv->bdrv_getlength) {
421 int64_t length = drv->bdrv_getlength(bs);
422 if (length < 0) {
423 return length;
424 }
425 hint = length >> BDRV_SECTOR_BITS;
426 }
427
428 bs->total_sectors = hint;
429 return 0;
430}
431
432/**
433 * Set open flags for a given cache mode
434 *
435 * Return 0 on success, -1 if the cache mode was invalid.
436 */
437int bdrv_parse_cache_flags(const char *mode, int *flags)
438{
439 *flags &= ~BDRV_O_CACHE_MASK;
440
441 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
442 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
443 } else if (!strcmp(mode, "directsync")) {
444 *flags |= BDRV_O_NOCACHE;
445 } else if (!strcmp(mode, "writeback")) {
446 *flags |= BDRV_O_CACHE_WB;
447 } else if (!strcmp(mode, "unsafe")) {
448 *flags |= BDRV_O_CACHE_WB;
449 *flags |= BDRV_O_NO_FLUSH;
450 } else if (!strcmp(mode, "writethrough")) {
451 /* this is the default */
452 } else {
453 return -1;
454 }
455
456 return 0;
457}
458
459/*
460 * Common part for opening disk images and files
461 */
462static int bdrv_open_common(BlockDriverState *bs, const char *filename,
463 int flags, BlockDriver *drv)
464{
465 int ret, open_flags;
466
467 assert(drv != NULL);
468
469 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
470
471 bs->file = NULL;
472 bs->total_sectors = 0;
473 bs->encrypted = 0;
474 bs->valid_key = 0;
475 bs->open_flags = flags;
476 bs->buffer_alignment = 512;
477
478 pstrcpy(bs->filename, sizeof(bs->filename), filename);
479
480 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
481 return -ENOTSUP;
482 }
483
484 bs->drv = drv;
485 bs->opaque = g_malloc0(drv->instance_size);
486
487 if (flags & BDRV_O_CACHE_WB)
488 bs->enable_write_cache = 1;
489
490 /*
491 * Clear flags that are internal to the block layer before opening the
492 * image.
493 */
494 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
495
496 /*
497 * Snapshots should be writable.
498 */
499 if (bs->is_temporary) {
500 open_flags |= BDRV_O_RDWR;
501 }
502
503 /* Open the image, either directly or using a protocol */
504 if (drv->bdrv_file_open) {
505 ret = drv->bdrv_file_open(bs, filename, open_flags);
506 } else {
507 ret = bdrv_file_open(&bs->file, filename, open_flags);
508 if (ret >= 0) {
509 ret = drv->bdrv_open(bs, open_flags);
510 }
511 }
512
513 if (ret < 0) {
514 goto free_and_fail;
515 }
516
517 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
518
519 ret = refresh_total_sectors(bs, bs->total_sectors);
520 if (ret < 0) {
521 goto free_and_fail;
522 }
523
524#ifndef _WIN32
525 if (bs->is_temporary) {
526 unlink(filename);
527 }
528#endif
529 return 0;
530
531free_and_fail:
532 if (bs->file) {
533 bdrv_delete(bs->file);
534 bs->file = NULL;
535 }
536 g_free(bs->opaque);
537 bs->opaque = NULL;
538 bs->drv = NULL;
539 return ret;
540}
541
542/*
543 * Opens a file using a protocol (file, host_device, nbd, ...)
544 */
545int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
546{
547 BlockDriverState *bs;
548 BlockDriver *drv;
549 int ret;
550
551 drv = bdrv_find_protocol(filename);
552 if (!drv) {
553 return -ENOENT;
554 }
555
556 bs = bdrv_new("");
557 ret = bdrv_open_common(bs, filename, flags, drv);
558 if (ret < 0) {
559 bdrv_delete(bs);
560 return ret;
561 }
562 bs->growable = 1;
563 *pbs = bs;
564 return 0;
565}
566
567/*
568 * Opens a disk image (raw, qcow2, vmdk, ...)
569 */
570int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
571 BlockDriver *drv)
572{
573 int ret;
574
575 if (flags & BDRV_O_SNAPSHOT) {
576 BlockDriverState *bs1;
577 int64_t total_size;
578 int is_protocol = 0;
579 BlockDriver *bdrv_qcow2;
580 QEMUOptionParameter *options;
581 char tmp_filename[PATH_MAX];
582 char backing_filename[PATH_MAX];
583
584 /* if snapshot, we create a temporary backing file and open it
585 instead of opening 'filename' directly */
586
587 /* if there is a backing file, use it */
588 bs1 = bdrv_new("");
589 ret = bdrv_open(bs1, filename, 0, drv);
590 if (ret < 0) {
591 bdrv_delete(bs1);
592 return ret;
593 }
594 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
595
596 if (bs1->drv && bs1->drv->protocol_name)
597 is_protocol = 1;
598
599 bdrv_delete(bs1);
600
601 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
602
603 /* Real path is meaningless for protocols */
604 if (is_protocol)
605 snprintf(backing_filename, sizeof(backing_filename),
606 "%s", filename);
607 else if (!realpath(filename, backing_filename))
608 return -errno;
609
610 bdrv_qcow2 = bdrv_find_format("qcow2");
611 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
612
613 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
614 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
615 if (drv) {
616 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
617 drv->format_name);
618 }
619
620 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
621 free_option_parameters(options);
622 if (ret < 0) {
623 return ret;
624 }
625
626 filename = tmp_filename;
627 drv = bdrv_qcow2;
628 bs->is_temporary = 1;
629 }
630
631 /* Find the right image format driver */
632 if (!drv) {
633 ret = find_image_format(filename, &drv);
634 }
635
636 if (!drv) {
637 goto unlink_and_fail;
638 }
639
640 /* Open the image */
641 ret = bdrv_open_common(bs, filename, flags, drv);
642 if (ret < 0) {
643 goto unlink_and_fail;
644 }
645
646 /* If there is a backing file, use it */
647 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
648 char backing_filename[PATH_MAX];
649 int back_flags;
650 BlockDriver *back_drv = NULL;
651
652 bs->backing_hd = bdrv_new("");
653
654 if (path_has_protocol(bs->backing_file)) {
655 pstrcpy(backing_filename, sizeof(backing_filename),
656 bs->backing_file);
657 } else {
658 path_combine(backing_filename, sizeof(backing_filename),
659 filename, bs->backing_file);
660 }
661
662 if (bs->backing_format[0] != '\0') {
663 back_drv = bdrv_find_format(bs->backing_format);
664 }
665
666 /* backing files always opened read-only */
667 back_flags =
668 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
669
670 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
671 if (ret < 0) {
672 bdrv_close(bs);
673 return ret;
674 }
675 if (bs->is_temporary) {
676 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
677 } else {
678 /* base image inherits from "parent" */
679 bs->backing_hd->keep_read_only = bs->keep_read_only;
680 }
681 }
682
683 if (!bdrv_key_required(bs)) {
684 bdrv_dev_change_media_cb(bs, true);
685 }
686
687 return 0;
688
689unlink_and_fail:
690 if (bs->is_temporary) {
691 unlink(filename);
692 }
693 return ret;
694}
695
696void bdrv_close(BlockDriverState *bs)
697{
698 if (bs->drv) {
699 if (bs == bs_snapshots) {
700 bs_snapshots = NULL;
701 }
702 if (bs->backing_hd) {
703 bdrv_delete(bs->backing_hd);
704 bs->backing_hd = NULL;
705 }
706 bs->drv->bdrv_close(bs);
707 g_free(bs->opaque);
708#ifdef _WIN32
709 if (bs->is_temporary) {
710 unlink(bs->filename);
711 }
712#endif
713 bs->opaque = NULL;
714 bs->drv = NULL;
715
716 if (bs->file != NULL) {
717 bdrv_close(bs->file);
718 }
719
720 bdrv_dev_change_media_cb(bs, false);
721 }
722}
723
724void bdrv_close_all(void)
725{
726 BlockDriverState *bs;
727
728 QTAILQ_FOREACH(bs, &bdrv_states, list) {
729 bdrv_close(bs);
730 }
731}
732
733/* make a BlockDriverState anonymous by removing from bdrv_state list.
734 Also, NULL terminate the device_name to prevent double remove */
735void bdrv_make_anon(BlockDriverState *bs)
736{
737 if (bs->device_name[0] != '\0') {
738 QTAILQ_REMOVE(&bdrv_states, bs, list);
739 }
740 bs->device_name[0] = '\0';
741}
742
743void bdrv_delete(BlockDriverState *bs)
744{
745 assert(!bs->dev);
746
747 /* remove from list, if necessary */
748 bdrv_make_anon(bs);
749
750 bdrv_close(bs);
751 if (bs->file != NULL) {
752 bdrv_delete(bs->file);
753 }
754
755 assert(bs != bs_snapshots);
756 g_free(bs);
757}
758
759int bdrv_attach_dev(BlockDriverState *bs, void *dev)
760/* TODO change to DeviceState *dev when all users are qdevified */
761{
762 if (bs->dev) {
763 return -EBUSY;
764 }
765 bs->dev = dev;
766 bdrv_iostatus_reset(bs);
767 return 0;
768}
769
770/* TODO qdevified devices don't use this, remove when devices are qdevified */
771void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
772{
773 if (bdrv_attach_dev(bs, dev) < 0) {
774 abort();
775 }
776}
777
778void bdrv_detach_dev(BlockDriverState *bs, void *dev)
779/* TODO change to DeviceState *dev when all users are qdevified */
780{
781 assert(bs->dev == dev);
782 bs->dev = NULL;
783 bs->dev_ops = NULL;
784 bs->dev_opaque = NULL;
785 bs->buffer_alignment = 512;
786}
787
788/* TODO change to return DeviceState * when all users are qdevified */
789void *bdrv_get_attached_dev(BlockDriverState *bs)
790{
791 return bs->dev;
792}
793
794void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
795 void *opaque)
796{
797 bs->dev_ops = ops;
798 bs->dev_opaque = opaque;
799 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
800 bs_snapshots = NULL;
801 }
802}
803
804static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
805{
806 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
807 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
808 }
809}
810
811bool bdrv_dev_has_removable_media(BlockDriverState *bs)
812{
813 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
814}
815
816bool bdrv_dev_is_tray_open(BlockDriverState *bs)
817{
818 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
819 return bs->dev_ops->is_tray_open(bs->dev_opaque);
820 }
821 return false;
822}
823
824static void bdrv_dev_resize_cb(BlockDriverState *bs)
825{
826 if (bs->dev_ops && bs->dev_ops->resize_cb) {
827 bs->dev_ops->resize_cb(bs->dev_opaque);
828 }
829}
830
831bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
832{
833 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
834 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
835 }
836 return false;
837}
838
839/*
840 * Run consistency checks on an image
841 *
842 * Returns 0 if the check could be completed (it doesn't mean that the image is
843 * free of errors) or -errno when an internal error occurred. The results of the
844 * check are stored in res.
845 */
846int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
847{
848 if (bs->drv->bdrv_check == NULL) {
849 return -ENOTSUP;
850 }
851
852 memset(res, 0, sizeof(*res));
853 return bs->drv->bdrv_check(bs, res);
854}
855
856#define COMMIT_BUF_SECTORS 2048
857
858/* commit COW file into the raw image */
859int bdrv_commit(BlockDriverState *bs)
860{
861 BlockDriver *drv = bs->drv;
862 BlockDriver *backing_drv;
863 int64_t sector, total_sectors;
864 int n, ro, open_flags;
865 int ret = 0, rw_ret = 0;
866 uint8_t *buf;
867 char filename[1024];
868 BlockDriverState *bs_rw, *bs_ro;
869
870 if (!drv)
871 return -ENOMEDIUM;
872
873 if (!bs->backing_hd) {
874 return -ENOTSUP;
875 }
876
877 if (bs->backing_hd->keep_read_only) {
878 return -EACCES;
879 }
880
881 backing_drv = bs->backing_hd->drv;
882 ro = bs->backing_hd->read_only;
883 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
884 open_flags = bs->backing_hd->open_flags;
885
886 if (ro) {
887 /* re-open as RW */
888 bdrv_delete(bs->backing_hd);
889 bs->backing_hd = NULL;
890 bs_rw = bdrv_new("");
891 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
892 backing_drv);
893 if (rw_ret < 0) {
894 bdrv_delete(bs_rw);
895 /* try to re-open read-only */
896 bs_ro = bdrv_new("");
897 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
898 backing_drv);
899 if (ret < 0) {
900 bdrv_delete(bs_ro);
901 /* drive not functional anymore */
902 bs->drv = NULL;
903 return ret;
904 }
905 bs->backing_hd = bs_ro;
906 return rw_ret;
907 }
908 bs->backing_hd = bs_rw;
909 }
910
911 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
912 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
913
914 for (sector = 0; sector < total_sectors; sector += n) {
915 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
916
917 if (bdrv_read(bs, sector, buf, n) != 0) {
918 ret = -EIO;
919 goto ro_cleanup;
920 }
921
922 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
923 ret = -EIO;
924 goto ro_cleanup;
925 }
926 }
927 }
928
929 if (drv->bdrv_make_empty) {
930 ret = drv->bdrv_make_empty(bs);
931 bdrv_flush(bs);
932 }
933
934 /*
935 * Make sure all data we wrote to the backing device is actually
936 * stable on disk.
937 */
938 if (bs->backing_hd)
939 bdrv_flush(bs->backing_hd);
940
941ro_cleanup:
942 g_free(buf);
943
944 if (ro) {
945 /* re-open as RO */
946 bdrv_delete(bs->backing_hd);
947 bs->backing_hd = NULL;
948 bs_ro = bdrv_new("");
949 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
950 backing_drv);
951 if (ret < 0) {
952 bdrv_delete(bs_ro);
953 /* drive not functional anymore */
954 bs->drv = NULL;
955 return ret;
956 }
957 bs->backing_hd = bs_ro;
958 bs->backing_hd->keep_read_only = 0;
959 }
960
961 return ret;
962}
963
964void bdrv_commit_all(void)
965{
966 BlockDriverState *bs;
967
968 QTAILQ_FOREACH(bs, &bdrv_states, list) {
969 bdrv_commit(bs);
970 }
971}
972
973/*
974 * Return values:
975 * 0 - success
976 * -EINVAL - backing format specified, but no file
977 * -ENOSPC - can't update the backing file because no space is left in the
978 * image file header
979 * -ENOTSUP - format driver doesn't support changing the backing file
980 */
981int bdrv_change_backing_file(BlockDriverState *bs,
982 const char *backing_file, const char *backing_fmt)
983{
984 BlockDriver *drv = bs->drv;
985
986 if (drv->bdrv_change_backing_file != NULL) {
987 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
988 } else {
989 return -ENOTSUP;
990 }
991}
992
993static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
994 size_t size)
995{
996 int64_t len;
997
998 if (!bdrv_is_inserted(bs))
999 return -ENOMEDIUM;
1000
1001 if (bs->growable)
1002 return 0;
1003
1004 len = bdrv_getlength(bs);
1005
1006 if (offset < 0)
1007 return -EIO;
1008
1009 if ((offset > len) || (len - offset < size))
1010 return -EIO;
1011
1012 return 0;
1013}
1014
1015static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1016 int nb_sectors)
1017{
1018 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1019 nb_sectors * BDRV_SECTOR_SIZE);
1020}
1021
1022typedef struct RwCo {
1023 BlockDriverState *bs;
1024 int64_t sector_num;
1025 int nb_sectors;
1026 QEMUIOVector *qiov;
1027 bool is_write;
1028 int ret;
1029} RwCo;
1030
1031static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1032{
1033 RwCo *rwco = opaque;
1034
1035 if (!rwco->is_write) {
1036 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1037 rwco->nb_sectors, rwco->qiov);
1038 } else {
1039 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1040 rwco->nb_sectors, rwco->qiov);
1041 }
1042}
1043
1044/*
1045 * Process a synchronous request using coroutines
1046 */
1047static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1048 int nb_sectors, bool is_write)
1049{
1050 QEMUIOVector qiov;
1051 struct iovec iov = {
1052 .iov_base = (void *)buf,
1053 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1054 };
1055 Coroutine *co;
1056 RwCo rwco = {
1057 .bs = bs,
1058 .sector_num = sector_num,
1059 .nb_sectors = nb_sectors,
1060 .qiov = &qiov,
1061 .is_write = is_write,
1062 .ret = NOT_DONE,
1063 };
1064
1065 qemu_iovec_init_external(&qiov, &iov, 1);
1066
1067 if (qemu_in_coroutine()) {
1068 /* Fast-path if already in coroutine context */
1069 bdrv_rw_co_entry(&rwco);
1070 } else {
1071 co = qemu_coroutine_create(bdrv_rw_co_entry);
1072 qemu_coroutine_enter(co, &rwco);
1073 while (rwco.ret == NOT_DONE) {
1074 qemu_aio_wait();
1075 }
1076 }
1077 return rwco.ret;
1078}
1079
1080/* return < 0 if error. See bdrv_write() for the return codes */
1081int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1082 uint8_t *buf, int nb_sectors)
1083{
1084 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1085}
1086
1087static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1088 int nb_sectors, int dirty)
1089{
1090 int64_t start, end;
1091 unsigned long val, idx, bit;
1092
1093 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1094 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1095
1096 for (; start <= end; start++) {
1097 idx = start / (sizeof(unsigned long) * 8);
1098 bit = start % (sizeof(unsigned long) * 8);
1099 val = bs->dirty_bitmap[idx];
1100 if (dirty) {
1101 if (!(val & (1UL << bit))) {
1102 bs->dirty_count++;
1103 val |= 1UL << bit;
1104 }
1105 } else {
1106 if (val & (1UL << bit)) {
1107 bs->dirty_count--;
1108 val &= ~(1UL << bit);
1109 }
1110 }
1111 bs->dirty_bitmap[idx] = val;
1112 }
1113}
1114
1115/* Return < 0 if error. Important errors are:
1116 -EIO generic I/O error (may happen for all errors)
1117 -ENOMEDIUM No media inserted.
1118 -EINVAL Invalid sector number or nb_sectors
1119 -EACCES Trying to write a read-only device
1120*/
1121int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1122 const uint8_t *buf, int nb_sectors)
1123{
1124 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1125}
1126
1127int bdrv_pread(BlockDriverState *bs, int64_t offset,
1128 void *buf, int count1)
1129{
1130 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1131 int len, nb_sectors, count;
1132 int64_t sector_num;
1133 int ret;
1134
1135 count = count1;
1136 /* first read to align to sector start */
1137 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1138 if (len > count)
1139 len = count;
1140 sector_num = offset >> BDRV_SECTOR_BITS;
1141 if (len > 0) {
1142 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1143 return ret;
1144 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1145 count -= len;
1146 if (count == 0)
1147 return count1;
1148 sector_num++;
1149 buf += len;
1150 }
1151
1152 /* read the sectors "in place" */
1153 nb_sectors = count >> BDRV_SECTOR_BITS;
1154 if (nb_sectors > 0) {
1155 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1156 return ret;
1157 sector_num += nb_sectors;
1158 len = nb_sectors << BDRV_SECTOR_BITS;
1159 buf += len;
1160 count -= len;
1161 }
1162
1163 /* add data from the last sector */
1164 if (count > 0) {
1165 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1166 return ret;
1167 memcpy(buf, tmp_buf, count);
1168 }
1169 return count1;
1170}
1171
1172int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1173 const void *buf, int count1)
1174{
1175 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1176 int len, nb_sectors, count;
1177 int64_t sector_num;
1178 int ret;
1179
1180 count = count1;
1181 /* first write to align to sector start */
1182 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1183 if (len > count)
1184 len = count;
1185 sector_num = offset >> BDRV_SECTOR_BITS;
1186 if (len > 0) {
1187 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1188 return ret;
1189 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1190 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1191 return ret;
1192 count -= len;
1193 if (count == 0)
1194 return count1;
1195 sector_num++;
1196 buf += len;
1197 }
1198
1199 /* write the sectors "in place" */
1200 nb_sectors = count >> BDRV_SECTOR_BITS;
1201 if (nb_sectors > 0) {
1202 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1203 return ret;
1204 sector_num += nb_sectors;
1205 len = nb_sectors << BDRV_SECTOR_BITS;
1206 buf += len;
1207 count -= len;
1208 }
1209
1210 /* add data from the last sector */
1211 if (count > 0) {
1212 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1213 return ret;
1214 memcpy(tmp_buf, buf, count);
1215 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1216 return ret;
1217 }
1218 return count1;
1219}
1220
1221/*
1222 * Writes to the file and ensures that no writes are reordered across this
1223 * request (acts as a barrier)
1224 *
1225 * Returns 0 on success, -errno in error cases.
1226 */
1227int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1228 const void *buf, int count)
1229{
1230 int ret;
1231
1232 ret = bdrv_pwrite(bs, offset, buf, count);
1233 if (ret < 0) {
1234 return ret;
1235 }
1236
1237 /* No flush needed for cache modes that use O_DSYNC */
1238 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1239 bdrv_flush(bs);
1240 }
1241
1242 return 0;
1243}
1244
1245/*
1246 * Handle a read request in coroutine context
1247 */
1248static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1249 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1250{
1251 BlockDriver *drv = bs->drv;
1252
1253 if (!drv) {
1254 return -ENOMEDIUM;
1255 }
1256 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1257 return -EIO;
1258 }
1259
1260 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1261}
1262
1263int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1264 int nb_sectors, QEMUIOVector *qiov)
1265{
1266 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1267
1268 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1269}
1270
1271/*
1272 * Handle a write request in coroutine context
1273 */
1274static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1275 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1276{
1277 BlockDriver *drv = bs->drv;
1278 int ret;
1279
1280 if (!bs->drv) {
1281 return -ENOMEDIUM;
1282 }
1283 if (bs->read_only) {
1284 return -EACCES;
1285 }
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287 return -EIO;
1288 }
1289
1290 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1291
1292 if (bs->dirty_bitmap) {
1293 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294 }
1295
1296 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1297 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298 }
1299
1300 return ret;
1301}
1302
1303int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1304 int nb_sectors, QEMUIOVector *qiov)
1305{
1306 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1307
1308 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1309}
1310
1311/**
1312 * Truncate file to 'offset' bytes (needed only for file protocols)
1313 */
1314int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1315{
1316 BlockDriver *drv = bs->drv;
1317 int ret;
1318 if (!drv)
1319 return -ENOMEDIUM;
1320 if (!drv->bdrv_truncate)
1321 return -ENOTSUP;
1322 if (bs->read_only)
1323 return -EACCES;
1324 if (bdrv_in_use(bs))
1325 return -EBUSY;
1326 ret = drv->bdrv_truncate(bs, offset);
1327 if (ret == 0) {
1328 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1329 bdrv_dev_resize_cb(bs);
1330 }
1331 return ret;
1332}
1333
1334/**
1335 * Length of a allocated file in bytes. Sparse files are counted by actual
1336 * allocated space. Return < 0 if error or unknown.
1337 */
1338int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1339{
1340 BlockDriver *drv = bs->drv;
1341 if (!drv) {
1342 return -ENOMEDIUM;
1343 }
1344 if (drv->bdrv_get_allocated_file_size) {
1345 return drv->bdrv_get_allocated_file_size(bs);
1346 }
1347 if (bs->file) {
1348 return bdrv_get_allocated_file_size(bs->file);
1349 }
1350 return -ENOTSUP;
1351}
1352
1353/**
1354 * Length of a file in bytes. Return < 0 if error or unknown.
1355 */
1356int64_t bdrv_getlength(BlockDriverState *bs)
1357{
1358 BlockDriver *drv = bs->drv;
1359 if (!drv)
1360 return -ENOMEDIUM;
1361
1362 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1363 if (drv->bdrv_getlength) {
1364 return drv->bdrv_getlength(bs);
1365 }
1366 }
1367 return bs->total_sectors * BDRV_SECTOR_SIZE;
1368}
1369
1370/* return 0 as number of sectors if no device present or error */
1371void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1372{
1373 int64_t length;
1374 length = bdrv_getlength(bs);
1375 if (length < 0)
1376 length = 0;
1377 else
1378 length = length >> BDRV_SECTOR_BITS;
1379 *nb_sectors_ptr = length;
1380}
1381
1382struct partition {
1383 uint8_t boot_ind; /* 0x80 - active */
1384 uint8_t head; /* starting head */
1385 uint8_t sector; /* starting sector */
1386 uint8_t cyl; /* starting cylinder */
1387 uint8_t sys_ind; /* What partition type */
1388 uint8_t end_head; /* end head */
1389 uint8_t end_sector; /* end sector */
1390 uint8_t end_cyl; /* end cylinder */
1391 uint32_t start_sect; /* starting sector counting from 0 */
1392 uint32_t nr_sects; /* nr of sectors in partition */
1393} QEMU_PACKED;
1394
1395/* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1396static int guess_disk_lchs(BlockDriverState *bs,
1397 int *pcylinders, int *pheads, int *psectors)
1398{
1399 uint8_t buf[BDRV_SECTOR_SIZE];
1400 int ret, i, heads, sectors, cylinders;
1401 struct partition *p;
1402 uint32_t nr_sects;
1403 uint64_t nb_sectors;
1404
1405 bdrv_get_geometry(bs, &nb_sectors);
1406
1407 ret = bdrv_read(bs, 0, buf, 1);
1408 if (ret < 0)
1409 return -1;
1410 /* test msdos magic */
1411 if (buf[510] != 0x55 || buf[511] != 0xaa)
1412 return -1;
1413 for(i = 0; i < 4; i++) {
1414 p = ((struct partition *)(buf + 0x1be)) + i;
1415 nr_sects = le32_to_cpu(p->nr_sects);
1416 if (nr_sects && p->end_head) {
1417 /* We make the assumption that the partition terminates on
1418 a cylinder boundary */
1419 heads = p->end_head + 1;
1420 sectors = p->end_sector & 63;
1421 if (sectors == 0)
1422 continue;
1423 cylinders = nb_sectors / (heads * sectors);
1424 if (cylinders < 1 || cylinders > 16383)
1425 continue;
1426 *pheads = heads;
1427 *psectors = sectors;
1428 *pcylinders = cylinders;
1429#if 0
1430 printf("guessed geometry: LCHS=%d %d %d\n",
1431 cylinders, heads, sectors);
1432#endif
1433 return 0;
1434 }
1435 }
1436 return -1;
1437}
1438
1439void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1440{
1441 int translation, lba_detected = 0;
1442 int cylinders, heads, secs;
1443 uint64_t nb_sectors;
1444
1445 /* if a geometry hint is available, use it */
1446 bdrv_get_geometry(bs, &nb_sectors);
1447 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1448 translation = bdrv_get_translation_hint(bs);
1449 if (cylinders != 0) {
1450 *pcyls = cylinders;
1451 *pheads = heads;
1452 *psecs = secs;
1453 } else {
1454 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1455 if (heads > 16) {
1456 /* if heads > 16, it means that a BIOS LBA
1457 translation was active, so the default
1458 hardware geometry is OK */
1459 lba_detected = 1;
1460 goto default_geometry;
1461 } else {
1462 *pcyls = cylinders;
1463 *pheads = heads;
1464 *psecs = secs;
1465 /* disable any translation to be in sync with
1466 the logical geometry */
1467 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1468 bdrv_set_translation_hint(bs,
1469 BIOS_ATA_TRANSLATION_NONE);
1470 }
1471 }
1472 } else {
1473 default_geometry:
1474 /* if no geometry, use a standard physical disk geometry */
1475 cylinders = nb_sectors / (16 * 63);
1476
1477 if (cylinders > 16383)
1478 cylinders = 16383;
1479 else if (cylinders < 2)
1480 cylinders = 2;
1481 *pcyls = cylinders;
1482 *pheads = 16;
1483 *psecs = 63;
1484 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1485 if ((*pcyls * *pheads) <= 131072) {
1486 bdrv_set_translation_hint(bs,
1487 BIOS_ATA_TRANSLATION_LARGE);
1488 } else {
1489 bdrv_set_translation_hint(bs,
1490 BIOS_ATA_TRANSLATION_LBA);
1491 }
1492 }
1493 }
1494 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1495 }
1496}
1497
1498void bdrv_set_geometry_hint(BlockDriverState *bs,
1499 int cyls, int heads, int secs)
1500{
1501 bs->cyls = cyls;
1502 bs->heads = heads;
1503 bs->secs = secs;
1504}
1505
1506void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1507{
1508 bs->translation = translation;
1509}
1510
1511void bdrv_get_geometry_hint(BlockDriverState *bs,
1512 int *pcyls, int *pheads, int *psecs)
1513{
1514 *pcyls = bs->cyls;
1515 *pheads = bs->heads;
1516 *psecs = bs->secs;
1517}
1518
1519/* Recognize floppy formats */
1520typedef struct FDFormat {
1521 FDriveType drive;
1522 uint8_t last_sect;
1523 uint8_t max_track;
1524 uint8_t max_head;
1525} FDFormat;
1526
1527static const FDFormat fd_formats[] = {
1528 /* First entry is default format */
1529 /* 1.44 MB 3"1/2 floppy disks */
1530 { FDRIVE_DRV_144, 18, 80, 1, },
1531 { FDRIVE_DRV_144, 20, 80, 1, },
1532 { FDRIVE_DRV_144, 21, 80, 1, },
1533 { FDRIVE_DRV_144, 21, 82, 1, },
1534 { FDRIVE_DRV_144, 21, 83, 1, },
1535 { FDRIVE_DRV_144, 22, 80, 1, },
1536 { FDRIVE_DRV_144, 23, 80, 1, },
1537 { FDRIVE_DRV_144, 24, 80, 1, },
1538 /* 2.88 MB 3"1/2 floppy disks */
1539 { FDRIVE_DRV_288, 36, 80, 1, },
1540 { FDRIVE_DRV_288, 39, 80, 1, },
1541 { FDRIVE_DRV_288, 40, 80, 1, },
1542 { FDRIVE_DRV_288, 44, 80, 1, },
1543 { FDRIVE_DRV_288, 48, 80, 1, },
1544 /* 720 kB 3"1/2 floppy disks */
1545 { FDRIVE_DRV_144, 9, 80, 1, },
1546 { FDRIVE_DRV_144, 10, 80, 1, },
1547 { FDRIVE_DRV_144, 10, 82, 1, },
1548 { FDRIVE_DRV_144, 10, 83, 1, },
1549 { FDRIVE_DRV_144, 13, 80, 1, },
1550 { FDRIVE_DRV_144, 14, 80, 1, },
1551 /* 1.2 MB 5"1/4 floppy disks */
1552 { FDRIVE_DRV_120, 15, 80, 1, },
1553 { FDRIVE_DRV_120, 18, 80, 1, },
1554 { FDRIVE_DRV_120, 18, 82, 1, },
1555 { FDRIVE_DRV_120, 18, 83, 1, },
1556 { FDRIVE_DRV_120, 20, 80, 1, },
1557 /* 720 kB 5"1/4 floppy disks */
1558 { FDRIVE_DRV_120, 9, 80, 1, },
1559 { FDRIVE_DRV_120, 11, 80, 1, },
1560 /* 360 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120, 9, 40, 1, },
1562 { FDRIVE_DRV_120, 9, 40, 0, },
1563 { FDRIVE_DRV_120, 10, 41, 1, },
1564 { FDRIVE_DRV_120, 10, 42, 1, },
1565 /* 320 kB 5"1/4 floppy disks */
1566 { FDRIVE_DRV_120, 8, 40, 1, },
1567 { FDRIVE_DRV_120, 8, 40, 0, },
1568 /* 360 kB must match 5"1/4 better than 3"1/2... */
1569 { FDRIVE_DRV_144, 9, 80, 0, },
1570 /* end */
1571 { FDRIVE_DRV_NONE, -1, -1, 0, },
1572};
1573
1574void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1575 int *max_track, int *last_sect,
1576 FDriveType drive_in, FDriveType *drive)
1577{
1578 const FDFormat *parse;
1579 uint64_t nb_sectors, size;
1580 int i, first_match, match;
1581
1582 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1583 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1584 /* User defined disk */
1585 } else {
1586 bdrv_get_geometry(bs, &nb_sectors);
1587 match = -1;
1588 first_match = -1;
1589 for (i = 0; ; i++) {
1590 parse = &fd_formats[i];
1591 if (parse->drive == FDRIVE_DRV_NONE) {
1592 break;
1593 }
1594 if (drive_in == parse->drive ||
1595 drive_in == FDRIVE_DRV_NONE) {
1596 size = (parse->max_head + 1) * parse->max_track *
1597 parse->last_sect;
1598 if (nb_sectors == size) {
1599 match = i;
1600 break;
1601 }
1602 if (first_match == -1) {
1603 first_match = i;
1604 }
1605 }
1606 }
1607 if (match == -1) {
1608 if (first_match == -1) {
1609 match = 1;
1610 } else {
1611 match = first_match;
1612 }
1613 parse = &fd_formats[match];
1614 }
1615 *nb_heads = parse->max_head + 1;
1616 *max_track = parse->max_track;
1617 *last_sect = parse->last_sect;
1618 *drive = parse->drive;
1619 }
1620}
1621
1622int bdrv_get_translation_hint(BlockDriverState *bs)
1623{
1624 return bs->translation;
1625}
1626
1627void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1628 BlockErrorAction on_write_error)
1629{
1630 bs->on_read_error = on_read_error;
1631 bs->on_write_error = on_write_error;
1632}
1633
1634BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1635{
1636 return is_read ? bs->on_read_error : bs->on_write_error;
1637}
1638
1639int bdrv_is_read_only(BlockDriverState *bs)
1640{
1641 return bs->read_only;
1642}
1643
1644int bdrv_is_sg(BlockDriverState *bs)
1645{
1646 return bs->sg;
1647}
1648
1649int bdrv_enable_write_cache(BlockDriverState *bs)
1650{
1651 return bs->enable_write_cache;
1652}
1653
1654int bdrv_is_encrypted(BlockDriverState *bs)
1655{
1656 if (bs->backing_hd && bs->backing_hd->encrypted)
1657 return 1;
1658 return bs->encrypted;
1659}
1660
1661int bdrv_key_required(BlockDriverState *bs)
1662{
1663 BlockDriverState *backing_hd = bs->backing_hd;
1664
1665 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1666 return 1;
1667 return (bs->encrypted && !bs->valid_key);
1668}
1669
1670int bdrv_set_key(BlockDriverState *bs, const char *key)
1671{
1672 int ret;
1673 if (bs->backing_hd && bs->backing_hd->encrypted) {
1674 ret = bdrv_set_key(bs->backing_hd, key);
1675 if (ret < 0)
1676 return ret;
1677 if (!bs->encrypted)
1678 return 0;
1679 }
1680 if (!bs->encrypted) {
1681 return -EINVAL;
1682 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1683 return -ENOMEDIUM;
1684 }
1685 ret = bs->drv->bdrv_set_key(bs, key);
1686 if (ret < 0) {
1687 bs->valid_key = 0;
1688 } else if (!bs->valid_key) {
1689 bs->valid_key = 1;
1690 /* call the change callback now, we skipped it on open */
1691 bdrv_dev_change_media_cb(bs, true);
1692 }
1693 return ret;
1694}
1695
1696void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1697{
1698 if (!bs->drv) {
1699 buf[0] = '\0';
1700 } else {
1701 pstrcpy(buf, buf_size, bs->drv->format_name);
1702 }
1703}
1704
1705void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1706 void *opaque)
1707{
1708 BlockDriver *drv;
1709
1710 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1711 it(opaque, drv->format_name);
1712 }
1713}
1714
1715BlockDriverState *bdrv_find(const char *name)
1716{
1717 BlockDriverState *bs;
1718
1719 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1720 if (!strcmp(name, bs->device_name)) {
1721 return bs;
1722 }
1723 }
1724 return NULL;
1725}
1726
1727BlockDriverState *bdrv_next(BlockDriverState *bs)
1728{
1729 if (!bs) {
1730 return QTAILQ_FIRST(&bdrv_states);
1731 }
1732 return QTAILQ_NEXT(bs, list);
1733}
1734
1735void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1736{
1737 BlockDriverState *bs;
1738
1739 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1740 it(opaque, bs);
1741 }
1742}
1743
1744const char *bdrv_get_device_name(BlockDriverState *bs)
1745{
1746 return bs->device_name;
1747}
1748
1749void bdrv_flush_all(void)
1750{
1751 BlockDriverState *bs;
1752
1753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1754 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1755 bdrv_flush(bs);
1756 }
1757 }
1758}
1759
1760int bdrv_has_zero_init(BlockDriverState *bs)
1761{
1762 assert(bs->drv);
1763
1764 if (bs->drv->bdrv_has_zero_init) {
1765 return bs->drv->bdrv_has_zero_init(bs);
1766 }
1767
1768 return 1;
1769}
1770
1771int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1772{
1773 if (!bs->drv) {
1774 return -ENOMEDIUM;
1775 }
1776 if (!bs->drv->bdrv_discard) {
1777 return 0;
1778 }
1779 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1780}
1781
1782/*
1783 * Returns true iff the specified sector is present in the disk image. Drivers
1784 * not implementing the functionality are assumed to not support backing files,
1785 * hence all their sectors are reported as allocated.
1786 *
1787 * 'pnum' is set to the number of sectors (including and immediately following
1788 * the specified sector) that are known to be in the same
1789 * allocated/unallocated state.
1790 *
1791 * 'nb_sectors' is the max value 'pnum' should be set to.
1792 */
1793int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1794 int *pnum)
1795{
1796 int64_t n;
1797 if (!bs->drv->bdrv_is_allocated) {
1798 if (sector_num >= bs->total_sectors) {
1799 *pnum = 0;
1800 return 0;
1801 }
1802 n = bs->total_sectors - sector_num;
1803 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1804 return 1;
1805 }
1806 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1807}
1808
1809void bdrv_mon_event(const BlockDriverState *bdrv,
1810 BlockMonEventAction action, int is_read)
1811{
1812 QObject *data;
1813 const char *action_str;
1814
1815 switch (action) {
1816 case BDRV_ACTION_REPORT:
1817 action_str = "report";
1818 break;
1819 case BDRV_ACTION_IGNORE:
1820 action_str = "ignore";
1821 break;
1822 case BDRV_ACTION_STOP:
1823 action_str = "stop";
1824 break;
1825 default:
1826 abort();
1827 }
1828
1829 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1830 bdrv->device_name,
1831 action_str,
1832 is_read ? "read" : "write");
1833 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1834
1835 qobject_decref(data);
1836}
1837
1838static void bdrv_print_dict(QObject *obj, void *opaque)
1839{
1840 QDict *bs_dict;
1841 Monitor *mon = opaque;
1842
1843 bs_dict = qobject_to_qdict(obj);
1844
1845 monitor_printf(mon, "%s: removable=%d",
1846 qdict_get_str(bs_dict, "device"),
1847 qdict_get_bool(bs_dict, "removable"));
1848
1849 if (qdict_get_bool(bs_dict, "removable")) {
1850 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1851 monitor_printf(mon, " tray-open=%d",
1852 qdict_get_bool(bs_dict, "tray-open"));
1853 }
1854
1855 if (qdict_haskey(bs_dict, "io-status")) {
1856 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1857 }
1858
1859 if (qdict_haskey(bs_dict, "inserted")) {
1860 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1861
1862 monitor_printf(mon, " file=");
1863 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1864 if (qdict_haskey(qdict, "backing_file")) {
1865 monitor_printf(mon, " backing_file=");
1866 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1867 }
1868 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1869 qdict_get_bool(qdict, "ro"),
1870 qdict_get_str(qdict, "drv"),
1871 qdict_get_bool(qdict, "encrypted"));
1872 } else {
1873 monitor_printf(mon, " [not inserted]");
1874 }
1875
1876 monitor_printf(mon, "\n");
1877}
1878
1879void bdrv_info_print(Monitor *mon, const QObject *data)
1880{
1881 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1882}
1883
1884static const char *const io_status_name[BDRV_IOS_MAX] = {
1885 [BDRV_IOS_OK] = "ok",
1886 [BDRV_IOS_FAILED] = "failed",
1887 [BDRV_IOS_ENOSPC] = "nospace",
1888};
1889
1890void bdrv_info(Monitor *mon, QObject **ret_data)
1891{
1892 QList *bs_list;
1893 BlockDriverState *bs;
1894
1895 bs_list = qlist_new();
1896
1897 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1898 QObject *bs_obj;
1899 QDict *bs_dict;
1900
1901 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1902 "'removable': %i, 'locked': %i }",
1903 bs->device_name,
1904 bdrv_dev_has_removable_media(bs),
1905 bdrv_dev_is_medium_locked(bs));
1906 bs_dict = qobject_to_qdict(bs_obj);
1907
1908 if (bdrv_dev_has_removable_media(bs)) {
1909 qdict_put(bs_dict, "tray-open",
1910 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1911 }
1912
1913 if (bdrv_iostatus_is_enabled(bs)) {
1914 qdict_put(bs_dict, "io-status",
1915 qstring_from_str(io_status_name[bs->iostatus]));
1916 }
1917
1918 if (bs->drv) {
1919 QObject *obj;
1920
1921 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1922 "'encrypted': %i }",
1923 bs->filename, bs->read_only,
1924 bs->drv->format_name,
1925 bdrv_is_encrypted(bs));
1926 if (bs->backing_file[0] != '\0') {
1927 QDict *qdict = qobject_to_qdict(obj);
1928 qdict_put(qdict, "backing_file",
1929 qstring_from_str(bs->backing_file));
1930 }
1931
1932 qdict_put_obj(bs_dict, "inserted", obj);
1933 }
1934 qlist_append_obj(bs_list, bs_obj);
1935 }
1936
1937 *ret_data = QOBJECT(bs_list);
1938}
1939
1940static void bdrv_stats_iter(QObject *data, void *opaque)
1941{
1942 QDict *qdict;
1943 Monitor *mon = opaque;
1944
1945 qdict = qobject_to_qdict(data);
1946 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1947
1948 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1949 monitor_printf(mon, " rd_bytes=%" PRId64
1950 " wr_bytes=%" PRId64
1951 " rd_operations=%" PRId64
1952 " wr_operations=%" PRId64
1953 " flush_operations=%" PRId64
1954 " wr_total_time_ns=%" PRId64
1955 " rd_total_time_ns=%" PRId64
1956 " flush_total_time_ns=%" PRId64
1957 "\n",
1958 qdict_get_int(qdict, "rd_bytes"),
1959 qdict_get_int(qdict, "wr_bytes"),
1960 qdict_get_int(qdict, "rd_operations"),
1961 qdict_get_int(qdict, "wr_operations"),
1962 qdict_get_int(qdict, "flush_operations"),
1963 qdict_get_int(qdict, "wr_total_time_ns"),
1964 qdict_get_int(qdict, "rd_total_time_ns"),
1965 qdict_get_int(qdict, "flush_total_time_ns"));
1966}
1967
1968void bdrv_stats_print(Monitor *mon, const QObject *data)
1969{
1970 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1971}
1972
1973static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1974{
1975 QObject *res;
1976 QDict *dict;
1977
1978 res = qobject_from_jsonf("{ 'stats': {"
1979 "'rd_bytes': %" PRId64 ","
1980 "'wr_bytes': %" PRId64 ","
1981 "'rd_operations': %" PRId64 ","
1982 "'wr_operations': %" PRId64 ","
1983 "'wr_highest_offset': %" PRId64 ","
1984 "'flush_operations': %" PRId64 ","
1985 "'wr_total_time_ns': %" PRId64 ","
1986 "'rd_total_time_ns': %" PRId64 ","
1987 "'flush_total_time_ns': %" PRId64
1988 "} }",
1989 bs->nr_bytes[BDRV_ACCT_READ],
1990 bs->nr_bytes[BDRV_ACCT_WRITE],
1991 bs->nr_ops[BDRV_ACCT_READ],
1992 bs->nr_ops[BDRV_ACCT_WRITE],
1993 bs->wr_highest_sector *
1994 (uint64_t)BDRV_SECTOR_SIZE,
1995 bs->nr_ops[BDRV_ACCT_FLUSH],
1996 bs->total_time_ns[BDRV_ACCT_WRITE],
1997 bs->total_time_ns[BDRV_ACCT_READ],
1998 bs->total_time_ns[BDRV_ACCT_FLUSH]);
1999 dict = qobject_to_qdict(res);
2000
2001 if (*bs->device_name) {
2002 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2003 }
2004
2005 if (bs->file) {
2006 QObject *parent = bdrv_info_stats_bs(bs->file);
2007 qdict_put_obj(dict, "parent", parent);
2008 }
2009
2010 return res;
2011}
2012
2013void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2014{
2015 QObject *obj;
2016 QList *devices;
2017 BlockDriverState *bs;
2018
2019 devices = qlist_new();
2020
2021 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2022 obj = bdrv_info_stats_bs(bs);
2023 qlist_append_obj(devices, obj);
2024 }
2025
2026 *ret_data = QOBJECT(devices);
2027}
2028
2029const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2030{
2031 if (bs->backing_hd && bs->backing_hd->encrypted)
2032 return bs->backing_file;
2033 else if (bs->encrypted)
2034 return bs->filename;
2035 else
2036 return NULL;
2037}
2038
2039void bdrv_get_backing_filename(BlockDriverState *bs,
2040 char *filename, int filename_size)
2041{
2042 if (!bs->backing_file) {
2043 pstrcpy(filename, filename_size, "");
2044 } else {
2045 pstrcpy(filename, filename_size, bs->backing_file);
2046 }
2047}
2048
2049int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2050 const uint8_t *buf, int nb_sectors)
2051{
2052 BlockDriver *drv = bs->drv;
2053 if (!drv)
2054 return -ENOMEDIUM;
2055 if (!drv->bdrv_write_compressed)
2056 return -ENOTSUP;
2057 if (bdrv_check_request(bs, sector_num, nb_sectors))
2058 return -EIO;
2059
2060 if (bs->dirty_bitmap) {
2061 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2062 }
2063
2064 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2065}
2066
2067int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2068{
2069 BlockDriver *drv = bs->drv;
2070 if (!drv)
2071 return -ENOMEDIUM;
2072 if (!drv->bdrv_get_info)
2073 return -ENOTSUP;
2074 memset(bdi, 0, sizeof(*bdi));
2075 return drv->bdrv_get_info(bs, bdi);
2076}
2077
2078int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2079 int64_t pos, int size)
2080{
2081 BlockDriver *drv = bs->drv;
2082 if (!drv)
2083 return -ENOMEDIUM;
2084 if (drv->bdrv_save_vmstate)
2085 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2086 if (bs->file)
2087 return bdrv_save_vmstate(bs->file, buf, pos, size);
2088 return -ENOTSUP;
2089}
2090
2091int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2092 int64_t pos, int size)
2093{
2094 BlockDriver *drv = bs->drv;
2095 if (!drv)
2096 return -ENOMEDIUM;
2097 if (drv->bdrv_load_vmstate)
2098 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2099 if (bs->file)
2100 return bdrv_load_vmstate(bs->file, buf, pos, size);
2101 return -ENOTSUP;
2102}
2103
2104void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2105{
2106 BlockDriver *drv = bs->drv;
2107
2108 if (!drv || !drv->bdrv_debug_event) {
2109 return;
2110 }
2111
2112 return drv->bdrv_debug_event(bs, event);
2113
2114}
2115
2116/**************************************************************/
2117/* handling of snapshots */
2118
2119int bdrv_can_snapshot(BlockDriverState *bs)
2120{
2121 BlockDriver *drv = bs->drv;
2122 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2123 return 0;
2124 }
2125
2126 if (!drv->bdrv_snapshot_create) {
2127 if (bs->file != NULL) {
2128 return bdrv_can_snapshot(bs->file);
2129 }
2130 return 0;
2131 }
2132
2133 return 1;
2134}
2135
2136int bdrv_is_snapshot(BlockDriverState *bs)
2137{
2138 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2139}
2140
2141BlockDriverState *bdrv_snapshots(void)
2142{
2143 BlockDriverState *bs;
2144
2145 if (bs_snapshots) {
2146 return bs_snapshots;
2147 }
2148
2149 bs = NULL;
2150 while ((bs = bdrv_next(bs))) {
2151 if (bdrv_can_snapshot(bs)) {
2152 bs_snapshots = bs;
2153 return bs;
2154 }
2155 }
2156 return NULL;
2157}
2158
2159int bdrv_snapshot_create(BlockDriverState *bs,
2160 QEMUSnapshotInfo *sn_info)
2161{
2162 BlockDriver *drv = bs->drv;
2163 if (!drv)
2164 return -ENOMEDIUM;
2165 if (drv->bdrv_snapshot_create)
2166 return drv->bdrv_snapshot_create(bs, sn_info);
2167 if (bs->file)
2168 return bdrv_snapshot_create(bs->file, sn_info);
2169 return -ENOTSUP;
2170}
2171
2172int bdrv_snapshot_goto(BlockDriverState *bs,
2173 const char *snapshot_id)
2174{
2175 BlockDriver *drv = bs->drv;
2176 int ret, open_ret;
2177
2178 if (!drv)
2179 return -ENOMEDIUM;
2180 if (drv->bdrv_snapshot_goto)
2181 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2182
2183 if (bs->file) {
2184 drv->bdrv_close(bs);
2185 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2186 open_ret = drv->bdrv_open(bs, bs->open_flags);
2187 if (open_ret < 0) {
2188 bdrv_delete(bs->file);
2189 bs->drv = NULL;
2190 return open_ret;
2191 }
2192 return ret;
2193 }
2194
2195 return -ENOTSUP;
2196}
2197
2198int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2199{
2200 BlockDriver *drv = bs->drv;
2201 if (!drv)
2202 return -ENOMEDIUM;
2203 if (drv->bdrv_snapshot_delete)
2204 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2205 if (bs->file)
2206 return bdrv_snapshot_delete(bs->file, snapshot_id);
2207 return -ENOTSUP;
2208}
2209
2210int bdrv_snapshot_list(BlockDriverState *bs,
2211 QEMUSnapshotInfo **psn_info)
2212{
2213 BlockDriver *drv = bs->drv;
2214 if (!drv)
2215 return -ENOMEDIUM;
2216 if (drv->bdrv_snapshot_list)
2217 return drv->bdrv_snapshot_list(bs, psn_info);
2218 if (bs->file)
2219 return bdrv_snapshot_list(bs->file, psn_info);
2220 return -ENOTSUP;
2221}
2222
2223int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2224 const char *snapshot_name)
2225{
2226 BlockDriver *drv = bs->drv;
2227 if (!drv) {
2228 return -ENOMEDIUM;
2229 }
2230 if (!bs->read_only) {
2231 return -EINVAL;
2232 }
2233 if (drv->bdrv_snapshot_load_tmp) {
2234 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2235 }
2236 return -ENOTSUP;
2237}
2238
2239#define NB_SUFFIXES 4
2240
2241char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2242{
2243 static const char suffixes[NB_SUFFIXES] = "KMGT";
2244 int64_t base;
2245 int i;
2246
2247 if (size <= 999) {
2248 snprintf(buf, buf_size, "%" PRId64, size);
2249 } else {
2250 base = 1024;
2251 for(i = 0; i < NB_SUFFIXES; i++) {
2252 if (size < (10 * base)) {
2253 snprintf(buf, buf_size, "%0.1f%c",
2254 (double)size / base,
2255 suffixes[i]);
2256 break;
2257 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2258 snprintf(buf, buf_size, "%" PRId64 "%c",
2259 ((size + (base >> 1)) / base),
2260 suffixes[i]);
2261 break;
2262 }
2263 base = base * 1024;
2264 }
2265 }
2266 return buf;
2267}
2268
2269char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2270{
2271 char buf1[128], date_buf[128], clock_buf[128];
2272#ifdef _WIN32
2273 struct tm *ptm;
2274#else
2275 struct tm tm;
2276#endif
2277 time_t ti;
2278 int64_t secs;
2279
2280 if (!sn) {
2281 snprintf(buf, buf_size,
2282 "%-10s%-20s%7s%20s%15s",
2283 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2284 } else {
2285 ti = sn->date_sec;
2286#ifdef _WIN32
2287 ptm = localtime(&ti);
2288 strftime(date_buf, sizeof(date_buf),
2289 "%Y-%m-%d %H:%M:%S", ptm);
2290#else
2291 localtime_r(&ti, &tm);
2292 strftime(date_buf, sizeof(date_buf),
2293 "%Y-%m-%d %H:%M:%S", &tm);
2294#endif
2295 secs = sn->vm_clock_nsec / 1000000000;
2296 snprintf(clock_buf, sizeof(clock_buf),
2297 "%02d:%02d:%02d.%03d",
2298 (int)(secs / 3600),
2299 (int)((secs / 60) % 60),
2300 (int)(secs % 60),
2301 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2302 snprintf(buf, buf_size,
2303 "%-10s%-20s%7s%20s%15s",
2304 sn->id_str, sn->name,
2305 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2306 date_buf,
2307 clock_buf);
2308 }
2309 return buf;
2310}
2311
2312/**************************************************************/
2313/* async I/Os */
2314
2315BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2316 QEMUIOVector *qiov, int nb_sectors,
2317 BlockDriverCompletionFunc *cb, void *opaque)
2318{
2319 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2320
2321 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2322 cb, opaque, false);
2323}
2324
2325BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2326 QEMUIOVector *qiov, int nb_sectors,
2327 BlockDriverCompletionFunc *cb, void *opaque)
2328{
2329 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2330
2331 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2332 cb, opaque, true);
2333}
2334
2335
2336typedef struct MultiwriteCB {
2337 int error;
2338 int num_requests;
2339 int num_callbacks;
2340 struct {
2341 BlockDriverCompletionFunc *cb;
2342 void *opaque;
2343 QEMUIOVector *free_qiov;
2344 void *free_buf;
2345 } callbacks[];
2346} MultiwriteCB;
2347
2348static void multiwrite_user_cb(MultiwriteCB *mcb)
2349{
2350 int i;
2351
2352 for (i = 0; i < mcb->num_callbacks; i++) {
2353 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2354 if (mcb->callbacks[i].free_qiov) {
2355 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2356 }
2357 g_free(mcb->callbacks[i].free_qiov);
2358 qemu_vfree(mcb->callbacks[i].free_buf);
2359 }
2360}
2361
2362static void multiwrite_cb(void *opaque, int ret)
2363{
2364 MultiwriteCB *mcb = opaque;
2365
2366 trace_multiwrite_cb(mcb, ret);
2367
2368 if (ret < 0 && !mcb->error) {
2369 mcb->error = ret;
2370 }
2371
2372 mcb->num_requests--;
2373 if (mcb->num_requests == 0) {
2374 multiwrite_user_cb(mcb);
2375 g_free(mcb);
2376 }
2377}
2378
2379static int multiwrite_req_compare(const void *a, const void *b)
2380{
2381 const BlockRequest *req1 = a, *req2 = b;
2382
2383 /*
2384 * Note that we can't simply subtract req2->sector from req1->sector
2385 * here as that could overflow the return value.
2386 */
2387 if (req1->sector > req2->sector) {
2388 return 1;
2389 } else if (req1->sector < req2->sector) {
2390 return -1;
2391 } else {
2392 return 0;
2393 }
2394}
2395
2396/*
2397 * Takes a bunch of requests and tries to merge them. Returns the number of
2398 * requests that remain after merging.
2399 */
2400static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2401 int num_reqs, MultiwriteCB *mcb)
2402{
2403 int i, outidx;
2404
2405 // Sort requests by start sector
2406 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2407
2408 // Check if adjacent requests touch the same clusters. If so, combine them,
2409 // filling up gaps with zero sectors.
2410 outidx = 0;
2411 for (i = 1; i < num_reqs; i++) {
2412 int merge = 0;
2413 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2414
2415 // This handles the cases that are valid for all block drivers, namely
2416 // exactly sequential writes and overlapping writes.
2417 if (reqs[i].sector <= oldreq_last) {
2418 merge = 1;
2419 }
2420
2421 // The block driver may decide that it makes sense to combine requests
2422 // even if there is a gap of some sectors between them. In this case,
2423 // the gap is filled with zeros (therefore only applicable for yet
2424 // unused space in format like qcow2).
2425 if (!merge && bs->drv->bdrv_merge_requests) {
2426 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2427 }
2428
2429 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2430 merge = 0;
2431 }
2432
2433 if (merge) {
2434 size_t size;
2435 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2436 qemu_iovec_init(qiov,
2437 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2438
2439 // Add the first request to the merged one. If the requests are
2440 // overlapping, drop the last sectors of the first request.
2441 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2442 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2443
2444 // We might need to add some zeros between the two requests
2445 if (reqs[i].sector > oldreq_last) {
2446 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2447 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2448 memset(buf, 0, zero_bytes);
2449 qemu_iovec_add(qiov, buf, zero_bytes);
2450 mcb->callbacks[i].free_buf = buf;
2451 }
2452
2453 // Add the second request
2454 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2455
2456 reqs[outidx].nb_sectors = qiov->size >> 9;
2457 reqs[outidx].qiov = qiov;
2458
2459 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2460 } else {
2461 outidx++;
2462 reqs[outidx].sector = reqs[i].sector;
2463 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2464 reqs[outidx].qiov = reqs[i].qiov;
2465 }
2466 }
2467
2468 return outidx + 1;
2469}
2470
2471/*
2472 * Submit multiple AIO write requests at once.
2473 *
2474 * On success, the function returns 0 and all requests in the reqs array have
2475 * been submitted. In error case this function returns -1, and any of the
2476 * requests may or may not be submitted yet. In particular, this means that the
2477 * callback will be called for some of the requests, for others it won't. The
2478 * caller must check the error field of the BlockRequest to wait for the right
2479 * callbacks (if error != 0, no callback will be called).
2480 *
2481 * The implementation may modify the contents of the reqs array, e.g. to merge
2482 * requests. However, the fields opaque and error are left unmodified as they
2483 * are used to signal failure for a single request to the caller.
2484 */
2485int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2486{
2487 BlockDriverAIOCB *acb;
2488 MultiwriteCB *mcb;
2489 int i;
2490
2491 /* don't submit writes if we don't have a medium */
2492 if (bs->drv == NULL) {
2493 for (i = 0; i < num_reqs; i++) {
2494 reqs[i].error = -ENOMEDIUM;
2495 }
2496 return -1;
2497 }
2498
2499 if (num_reqs == 0) {
2500 return 0;
2501 }
2502
2503 // Create MultiwriteCB structure
2504 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2505 mcb->num_requests = 0;
2506 mcb->num_callbacks = num_reqs;
2507
2508 for (i = 0; i < num_reqs; i++) {
2509 mcb->callbacks[i].cb = reqs[i].cb;
2510 mcb->callbacks[i].opaque = reqs[i].opaque;
2511 }
2512
2513 // Check for mergable requests
2514 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2515
2516 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2517
2518 /*
2519 * Run the aio requests. As soon as one request can't be submitted
2520 * successfully, fail all requests that are not yet submitted (we must
2521 * return failure for all requests anyway)
2522 *
2523 * num_requests cannot be set to the right value immediately: If
2524 * bdrv_aio_writev fails for some request, num_requests would be too high
2525 * and therefore multiwrite_cb() would never recognize the multiwrite
2526 * request as completed. We also cannot use the loop variable i to set it
2527 * when the first request fails because the callback may already have been
2528 * called for previously submitted requests. Thus, num_requests must be
2529 * incremented for each request that is submitted.
2530 *
2531 * The problem that callbacks may be called early also means that we need
2532 * to take care that num_requests doesn't become 0 before all requests are
2533 * submitted - multiwrite_cb() would consider the multiwrite request
2534 * completed. A dummy request that is "completed" by a manual call to
2535 * multiwrite_cb() takes care of this.
2536 */
2537 mcb->num_requests = 1;
2538
2539 // Run the aio requests
2540 for (i = 0; i < num_reqs; i++) {
2541 mcb->num_requests++;
2542 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2543 reqs[i].nb_sectors, multiwrite_cb, mcb);
2544
2545 if (acb == NULL) {
2546 // We can only fail the whole thing if no request has been
2547 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2548 // complete and report the error in the callback.
2549 if (i == 0) {
2550 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2551 goto fail;
2552 } else {
2553 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2554 multiwrite_cb(mcb, -EIO);
2555 break;
2556 }
2557 }
2558 }
2559
2560 /* Complete the dummy request */
2561 multiwrite_cb(mcb, 0);
2562
2563 return 0;
2564
2565fail:
2566 for (i = 0; i < mcb->num_callbacks; i++) {
2567 reqs[i].error = -EIO;
2568 }
2569 g_free(mcb);
2570 return -1;
2571}
2572
2573void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2574{
2575 acb->pool->cancel(acb);
2576}
2577
2578
2579/**************************************************************/
2580/* async block device emulation */
2581
2582typedef struct BlockDriverAIOCBSync {
2583 BlockDriverAIOCB common;
2584 QEMUBH *bh;
2585 int ret;
2586 /* vector translation state */
2587 QEMUIOVector *qiov;
2588 uint8_t *bounce;
2589 int is_write;
2590} BlockDriverAIOCBSync;
2591
2592static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2593{
2594 BlockDriverAIOCBSync *acb =
2595 container_of(blockacb, BlockDriverAIOCBSync, common);
2596 qemu_bh_delete(acb->bh);
2597 acb->bh = NULL;
2598 qemu_aio_release(acb);
2599}
2600
2601static AIOPool bdrv_em_aio_pool = {
2602 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2603 .cancel = bdrv_aio_cancel_em,
2604};
2605
2606static void bdrv_aio_bh_cb(void *opaque)
2607{
2608 BlockDriverAIOCBSync *acb = opaque;
2609
2610 if (!acb->is_write)
2611 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2612 qemu_vfree(acb->bounce);
2613 acb->common.cb(acb->common.opaque, acb->ret);
2614 qemu_bh_delete(acb->bh);
2615 acb->bh = NULL;
2616 qemu_aio_release(acb);
2617}
2618
2619static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2620 int64_t sector_num,
2621 QEMUIOVector *qiov,
2622 int nb_sectors,
2623 BlockDriverCompletionFunc *cb,
2624 void *opaque,
2625 int is_write)
2626
2627{
2628 BlockDriverAIOCBSync *acb;
2629
2630 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2631 acb->is_write = is_write;
2632 acb->qiov = qiov;
2633 acb->bounce = qemu_blockalign(bs, qiov->size);
2634
2635 if (!acb->bh)
2636 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2637
2638 if (is_write) {
2639 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2640 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2641 } else {
2642 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2643 }
2644
2645 qemu_bh_schedule(acb->bh);
2646
2647 return &acb->common;
2648}
2649
2650static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2651 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2652 BlockDriverCompletionFunc *cb, void *opaque)
2653{
2654 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2655}
2656
2657static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2658 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2659 BlockDriverCompletionFunc *cb, void *opaque)
2660{
2661 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2662}
2663
2664
2665typedef struct BlockDriverAIOCBCoroutine {
2666 BlockDriverAIOCB common;
2667 BlockRequest req;
2668 bool is_write;
2669 QEMUBH* bh;
2670} BlockDriverAIOCBCoroutine;
2671
2672static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2673{
2674 qemu_aio_flush();
2675}
2676
2677static AIOPool bdrv_em_co_aio_pool = {
2678 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2679 .cancel = bdrv_aio_co_cancel_em,
2680};
2681
2682static void bdrv_co_em_bh(void *opaque)
2683{
2684 BlockDriverAIOCBCoroutine *acb = opaque;
2685
2686 acb->common.cb(acb->common.opaque, acb->req.error);
2687 qemu_bh_delete(acb->bh);
2688 qemu_aio_release(acb);
2689}
2690
2691/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2692static void coroutine_fn bdrv_co_do_rw(void *opaque)
2693{
2694 BlockDriverAIOCBCoroutine *acb = opaque;
2695 BlockDriverState *bs = acb->common.bs;
2696
2697 if (!acb->is_write) {
2698 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2699 acb->req.nb_sectors, acb->req.qiov);
2700 } else {
2701 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2702 acb->req.nb_sectors, acb->req.qiov);
2703 }
2704
2705 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2706 qemu_bh_schedule(acb->bh);
2707}
2708
2709static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2710 int64_t sector_num,
2711 QEMUIOVector *qiov,
2712 int nb_sectors,
2713 BlockDriverCompletionFunc *cb,
2714 void *opaque,
2715 bool is_write)
2716{
2717 Coroutine *co;
2718 BlockDriverAIOCBCoroutine *acb;
2719
2720 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2721 acb->req.sector = sector_num;
2722 acb->req.nb_sectors = nb_sectors;
2723 acb->req.qiov = qiov;
2724 acb->is_write = is_write;
2725
2726 co = qemu_coroutine_create(bdrv_co_do_rw);
2727 qemu_coroutine_enter(co, acb);
2728
2729 return &acb->common;
2730}
2731
2732static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2733{
2734 BlockDriverAIOCBCoroutine *acb = opaque;
2735 BlockDriverState *bs = acb->common.bs;
2736
2737 acb->req.error = bdrv_co_flush(bs);
2738 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2739 qemu_bh_schedule(acb->bh);
2740}
2741
2742BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2743 BlockDriverCompletionFunc *cb, void *opaque)
2744{
2745 trace_bdrv_aio_flush(bs, opaque);
2746
2747 Coroutine *co;
2748 BlockDriverAIOCBCoroutine *acb;
2749
2750 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2751 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2752 qemu_coroutine_enter(co, acb);
2753
2754 return &acb->common;
2755}
2756
2757void bdrv_init(void)
2758{
2759 module_call_init(MODULE_INIT_BLOCK);
2760}
2761
2762void bdrv_init_with_whitelist(void)
2763{
2764 use_bdrv_whitelist = 1;
2765 bdrv_init();
2766}
2767
2768void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2769 BlockDriverCompletionFunc *cb, void *opaque)
2770{
2771 BlockDriverAIOCB *acb;
2772
2773 if (pool->free_aiocb) {
2774 acb = pool->free_aiocb;
2775 pool->free_aiocb = acb->next;
2776 } else {
2777 acb = g_malloc0(pool->aiocb_size);
2778 acb->pool = pool;
2779 }
2780 acb->bs = bs;
2781 acb->cb = cb;
2782 acb->opaque = opaque;
2783 return acb;
2784}
2785
2786void qemu_aio_release(void *p)
2787{
2788 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2789 AIOPool *pool = acb->pool;
2790 acb->next = pool->free_aiocb;
2791 pool->free_aiocb = acb;
2792}
2793
2794/**************************************************************/
2795/* Coroutine block device emulation */
2796
2797typedef struct CoroutineIOCompletion {
2798 Coroutine *coroutine;
2799 int ret;
2800} CoroutineIOCompletion;
2801
2802static void bdrv_co_io_em_complete(void *opaque, int ret)
2803{
2804 CoroutineIOCompletion *co = opaque;
2805
2806 co->ret = ret;
2807 qemu_coroutine_enter(co->coroutine, NULL);
2808}
2809
2810static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2811 int nb_sectors, QEMUIOVector *iov,
2812 bool is_write)
2813{
2814 CoroutineIOCompletion co = {
2815 .coroutine = qemu_coroutine_self(),
2816 };
2817 BlockDriverAIOCB *acb;
2818
2819 if (is_write) {
2820 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2821 bdrv_co_io_em_complete, &co);
2822 } else {
2823 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2824 bdrv_co_io_em_complete, &co);
2825 }
2826
2827 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2828 if (!acb) {
2829 return -EIO;
2830 }
2831 qemu_coroutine_yield();
2832
2833 return co.ret;
2834}
2835
2836static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2837 int64_t sector_num, int nb_sectors,
2838 QEMUIOVector *iov)
2839{
2840 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2841}
2842
2843static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2844 int64_t sector_num, int nb_sectors,
2845 QEMUIOVector *iov)
2846{
2847 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2848}
2849
2850static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2851{
2852 RwCo *rwco = opaque;
2853
2854 rwco->ret = bdrv_co_flush(rwco->bs);
2855}
2856
2857int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2858{
2859 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2860 return 0;
2861 } else if (!bs->drv) {
2862 return 0;
2863 } else if (bs->drv->bdrv_co_flush) {
2864 return bs->drv->bdrv_co_flush(bs);
2865 } else if (bs->drv->bdrv_aio_flush) {
2866 BlockDriverAIOCB *acb;
2867 CoroutineIOCompletion co = {
2868 .coroutine = qemu_coroutine_self(),
2869 };
2870
2871 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2872 if (acb == NULL) {
2873 return -EIO;
2874 } else {
2875 qemu_coroutine_yield();
2876 return co.ret;
2877 }
2878 } else if (bs->drv->bdrv_flush) {
2879 return bs->drv->bdrv_flush(bs);
2880 } else {
2881 /*
2882 * Some block drivers always operate in either writethrough or unsafe
2883 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2884 * know how the server works (because the behaviour is hardcoded or
2885 * depends on server-side configuration), so we can't ensure that
2886 * everything is safe on disk. Returning an error doesn't work because
2887 * that would break guests even if the server operates in writethrough
2888 * mode.
2889 *
2890 * Let's hope the user knows what he's doing.
2891 */
2892 return 0;
2893 }
2894}
2895
2896int bdrv_flush(BlockDriverState *bs)
2897{
2898 Coroutine *co;
2899 RwCo rwco = {
2900 .bs = bs,
2901 .ret = NOT_DONE,
2902 };
2903
2904 if (qemu_in_coroutine()) {
2905 /* Fast-path if already in coroutine context */
2906 bdrv_flush_co_entry(&rwco);
2907 } else {
2908 co = qemu_coroutine_create(bdrv_flush_co_entry);
2909 qemu_coroutine_enter(co, &rwco);
2910 while (rwco.ret == NOT_DONE) {
2911 qemu_aio_wait();
2912 }
2913 }
2914
2915 return rwco.ret;
2916}
2917
2918/**************************************************************/
2919/* removable device support */
2920
2921/**
2922 * Return TRUE if the media is present
2923 */
2924int bdrv_is_inserted(BlockDriverState *bs)
2925{
2926 BlockDriver *drv = bs->drv;
2927
2928 if (!drv)
2929 return 0;
2930 if (!drv->bdrv_is_inserted)
2931 return 1;
2932 return drv->bdrv_is_inserted(bs);
2933}
2934
2935/**
2936 * Return whether the media changed since the last call to this
2937 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2938 */
2939int bdrv_media_changed(BlockDriverState *bs)
2940{
2941 BlockDriver *drv = bs->drv;
2942
2943 if (drv && drv->bdrv_media_changed) {
2944 return drv->bdrv_media_changed(bs);
2945 }
2946 return -ENOTSUP;
2947}
2948
2949/**
2950 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2951 */
2952void bdrv_eject(BlockDriverState *bs, int eject_flag)
2953{
2954 BlockDriver *drv = bs->drv;
2955
2956 if (drv && drv->bdrv_eject) {
2957 drv->bdrv_eject(bs, eject_flag);
2958 }
2959}
2960
2961/**
2962 * Lock or unlock the media (if it is locked, the user won't be able
2963 * to eject it manually).
2964 */
2965void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2966{
2967 BlockDriver *drv = bs->drv;
2968
2969 trace_bdrv_lock_medium(bs, locked);
2970
2971 if (drv && drv->bdrv_lock_medium) {
2972 drv->bdrv_lock_medium(bs, locked);
2973 }
2974}
2975
2976/* needed for generic scsi interface */
2977
2978int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2979{
2980 BlockDriver *drv = bs->drv;
2981
2982 if (drv && drv->bdrv_ioctl)
2983 return drv->bdrv_ioctl(bs, req, buf);
2984 return -ENOTSUP;
2985}
2986
2987BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2988 unsigned long int req, void *buf,
2989 BlockDriverCompletionFunc *cb, void *opaque)
2990{
2991 BlockDriver *drv = bs->drv;
2992
2993 if (drv && drv->bdrv_aio_ioctl)
2994 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2995 return NULL;
2996}
2997
2998void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
2999{
3000 bs->buffer_alignment = align;
3001}
3002
3003void *qemu_blockalign(BlockDriverState *bs, size_t size)
3004{
3005 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3006}
3007
3008void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3009{
3010 int64_t bitmap_size;
3011
3012 bs->dirty_count = 0;
3013 if (enable) {
3014 if (!bs->dirty_bitmap) {
3015 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3016 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3017 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3018
3019 bs->dirty_bitmap = g_malloc0(bitmap_size);
3020 }
3021 } else {
3022 if (bs->dirty_bitmap) {
3023 g_free(bs->dirty_bitmap);
3024 bs->dirty_bitmap = NULL;
3025 }
3026 }
3027}
3028
3029int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3030{
3031 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3032
3033 if (bs->dirty_bitmap &&
3034 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3035 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3036 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3037 } else {
3038 return 0;
3039 }
3040}
3041
3042void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3043 int nr_sectors)
3044{
3045 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3046}
3047
3048int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3049{
3050 return bs->dirty_count;
3051}
3052
3053void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3054{
3055 assert(bs->in_use != in_use);
3056 bs->in_use = in_use;
3057}
3058
3059int bdrv_in_use(BlockDriverState *bs)
3060{
3061 return bs->in_use;
3062}
3063
3064void bdrv_iostatus_enable(BlockDriverState *bs)
3065{
3066 bs->iostatus = BDRV_IOS_OK;
3067}
3068
3069/* The I/O status is only enabled if the drive explicitly
3070 * enables it _and_ the VM is configured to stop on errors */
3071bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3072{
3073 return (bs->iostatus != BDRV_IOS_INVAL &&
3074 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3075 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3076 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3077}
3078
3079void bdrv_iostatus_disable(BlockDriverState *bs)
3080{
3081 bs->iostatus = BDRV_IOS_INVAL;
3082}
3083
3084void bdrv_iostatus_reset(BlockDriverState *bs)
3085{
3086 if (bdrv_iostatus_is_enabled(bs)) {
3087 bs->iostatus = BDRV_IOS_OK;
3088 }
3089}
3090
3091/* XXX: Today this is set by device models because it makes the implementation
3092 quite simple. However, the block layer knows about the error, so it's
3093 possible to implement this without device models being involved */
3094void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3095{
3096 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3097 assert(error >= 0);
3098 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3099 }
3100}
3101
3102void
3103bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3104 enum BlockAcctType type)
3105{
3106 assert(type < BDRV_MAX_IOTYPE);
3107
3108 cookie->bytes = bytes;
3109 cookie->start_time_ns = get_clock();
3110 cookie->type = type;
3111}
3112
3113void
3114bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3115{
3116 assert(cookie->type < BDRV_MAX_IOTYPE);
3117
3118 bs->nr_bytes[cookie->type] += cookie->bytes;
3119 bs->nr_ops[cookie->type]++;
3120 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3121}
3122
3123int bdrv_img_create(const char *filename, const char *fmt,
3124 const char *base_filename, const char *base_fmt,
3125 char *options, uint64_t img_size, int flags)
3126{
3127 QEMUOptionParameter *param = NULL, *create_options = NULL;
3128 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3129 BlockDriverState *bs = NULL;
3130 BlockDriver *drv, *proto_drv;
3131 BlockDriver *backing_drv = NULL;
3132 int ret = 0;
3133
3134 /* Find driver and parse its options */
3135 drv = bdrv_find_format(fmt);
3136 if (!drv) {
3137 error_report("Unknown file format '%s'", fmt);
3138 ret = -EINVAL;
3139 goto out;
3140 }
3141
3142 proto_drv = bdrv_find_protocol(filename);
3143 if (!proto_drv) {
3144 error_report("Unknown protocol '%s'", filename);
3145 ret = -EINVAL;
3146 goto out;
3147 }
3148
3149 create_options = append_option_parameters(create_options,
3150 drv->create_options);
3151 create_options = append_option_parameters(create_options,
3152 proto_drv->create_options);
3153
3154 /* Create parameter list with default values */
3155 param = parse_option_parameters("", create_options, param);
3156
3157 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3158
3159 /* Parse -o options */
3160 if (options) {
3161 param = parse_option_parameters(options, create_options, param);
3162 if (param == NULL) {
3163 error_report("Invalid options for file format '%s'.", fmt);
3164 ret = -EINVAL;
3165 goto out;
3166 }
3167 }
3168
3169 if (base_filename) {
3170 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3171 base_filename)) {
3172 error_report("Backing file not supported for file format '%s'",
3173 fmt);
3174 ret = -EINVAL;
3175 goto out;
3176 }
3177 }
3178
3179 if (base_fmt) {
3180 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3181 error_report("Backing file format not supported for file "
3182 "format '%s'", fmt);
3183 ret = -EINVAL;
3184 goto out;
3185 }
3186 }
3187
3188 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3189 if (backing_file && backing_file->value.s) {
3190 if (!strcmp(filename, backing_file->value.s)) {
3191 error_report("Error: Trying to create an image with the "
3192 "same filename as the backing file");
3193 ret = -EINVAL;
3194 goto out;
3195 }
3196 }
3197
3198 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3199 if (backing_fmt && backing_fmt->value.s) {
3200 backing_drv = bdrv_find_format(backing_fmt->value.s);
3201 if (!backing_drv) {
3202 error_report("Unknown backing file format '%s'",
3203 backing_fmt->value.s);
3204 ret = -EINVAL;
3205 goto out;
3206 }
3207 }
3208
3209 // The size for the image must always be specified, with one exception:
3210 // If we are using a backing file, we can obtain the size from there
3211 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3212 if (size && size->value.n == -1) {
3213 if (backing_file && backing_file->value.s) {
3214 uint64_t size;
3215 char buf[32];
3216
3217 bs = bdrv_new("");
3218
3219 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3220 if (ret < 0) {
3221 error_report("Could not open '%s'", backing_file->value.s);
3222 goto out;
3223 }
3224 bdrv_get_geometry(bs, &size);
3225 size *= 512;
3226
3227 snprintf(buf, sizeof(buf), "%" PRId64, size);
3228 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3229 } else {
3230 error_report("Image creation needs a size parameter");
3231 ret = -EINVAL;
3232 goto out;
3233 }
3234 }
3235
3236 printf("Formatting '%s', fmt=%s ", filename, fmt);
3237 print_option_parameters(param);
3238 puts("");
3239
3240 ret = bdrv_create(drv, filename, param);
3241
3242 if (ret < 0) {
3243 if (ret == -ENOTSUP) {
3244 error_report("Formatting or formatting option not supported for "
3245 "file format '%s'", fmt);
3246 } else if (ret == -EFBIG) {
3247 error_report("The image size is too large for file format '%s'",
3248 fmt);
3249 } else {
3250 error_report("%s: error while creating %s: %s", filename, fmt,
3251 strerror(-ret));
3252 }
3253 }
3254
3255out:
3256 free_option_parameters(create_options);
3257 free_option_parameters(param);
3258
3259 if (bs) {
3260 bdrv_delete(bs);
3261 }
3262
3263 return ret;
3264}