]> git.ipfire.org Git - thirdparty/qemu.git/blame - block/backup.c
block/backup: move in-flight requests handling from backup to block-copy
[thirdparty/qemu.git] / block / backup.c
CommitLineData
98d2c6f2
DM
1/*
2 * QEMU backup
3 *
4 * Copyright (C) 2013 Proxmox Server Solutions
5 *
6 * Authors:
7 * Dietmar Maurer (dietmar@proxmox.com)
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 *
12 */
13
80c71a24 14#include "qemu/osdep.h"
98d2c6f2
DM
15
16#include "trace.h"
17#include "block/block.h"
18#include "block/block_int.h"
c87621ea 19#include "block/blockjob_int.h"
49d3e828 20#include "block/block_backup.h"
beb5f545 21#include "block/block-copy.h"
da34e65c 22#include "qapi/error.h"
cc7a8ea7 23#include "qapi/qmp/qerror.h"
98d2c6f2 24#include "qemu/ratelimit.h"
f348b6d1 25#include "qemu/cutils.h"
373340b2 26#include "sysemu/block-backend.h"
b2f56462 27#include "qemu/bitmap.h"
a410a7f1 28#include "qemu/error-report.h"
98d2c6f2 29
16096a4d 30#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
98d2c6f2 31
98d2c6f2
DM
32typedef struct BackupBlockJob {
33 BlockJob common;
2c8074c4 34 BlockDriverState *source_bs;
62aa1fbe 35
d58d8453 36 BdrvDirtyBitmap *sync_bitmap;
62aa1fbe 37
fc5d3f84 38 MirrorSyncMode sync_mode;
c8b56501 39 BitmapSyncMode bitmap_mode;
98d2c6f2
DM
40 BlockdevOnError on_source_error;
41 BlockdevOnError on_target_error;
42 CoRwlock flush_rwlock;
05df8a6a 43 uint64_t len;
cf79cdf6 44 uint64_t bytes_read;
16096a4d 45 int64_t cluster_size;
12b3e52e 46 NotifierWithReturn before_write;
a193b0f0 47
2c8074c4 48 BlockCopyState *bcs;
98d2c6f2
DM
49} BackupBlockJob;
50
bd21935b
KW
51static const BlockJobDriver backup_job_driver;
52
2c8074c4
VSO
53static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
54{
55 BackupBlockJob *s = opaque;
56
57 s->bytes_read += bytes;
58 job_progress_update(&s->common.job, bytes);
59}
60
61static void backup_progress_reset_callback(void *opaque)
62{
63 BackupBlockJob *s = opaque;
64 uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap);
65
66 job_progress_set_remaining(&s->common.job, estimate);
67}
68
0bd0c443
VSO
69static int coroutine_fn backup_do_cow(BackupBlockJob *job,
70 int64_t offset, uint64_t bytes,
71 bool *error_is_read,
72 bool is_write_notifier)
73{
0bd0c443
VSO
74 int ret = 0;
75 int64_t start, end; /* bytes */
76
77 qemu_co_rwlock_rdlock(&job->flush_rwlock);
78
79 start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
80 end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
81
82 trace_backup_do_cow_enter(job, start, offset, bytes);
83
2c8074c4
VSO
84 ret = block_copy(job->bcs, start, end - start, error_is_read,
85 is_write_notifier);
0bd0c443 86
03f5d60b 87 trace_backup_do_cow_return(job, offset, bytes, ret);
98d2c6f2
DM
88
89 qemu_co_rwlock_unlock(&job->flush_rwlock);
90
91 return ret;
92}
93
94static int coroutine_fn backup_before_write_notify(
95 NotifierWithReturn *notifier,
96 void *opaque)
97{
12b3e52e 98 BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
98d2c6f2
DM
99 BdrvTrackedRequest *req = opaque;
100
2c8074c4 101 assert(req->bs == job->source_bs);
03f5d60b
EB
102 assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
103 assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
793ed47a 104
03f5d60b 105 return backup_do_cow(job, req->offset, req->bytes, NULL, true);
98d2c6f2
DM
106}
107
b976ea3c
FZ
108static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
109{
110 BdrvDirtyBitmap *bm;
c23909e5
JS
111 bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \
112 && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER));
b976ea3c 113
c23909e5 114 if (sync) {
cf0cd293 115 /*
c23909e5
JS
116 * We succeeded, or we always intended to sync the bitmap.
117 * Delete this bitmap and install the child.
cf0cd293 118 */
2c8074c4 119 bm = bdrv_dirty_bitmap_abdicate(job->source_bs, job->sync_bitmap, NULL);
c23909e5
JS
120 } else {
121 /*
122 * We failed, or we never intended to sync the bitmap anyway.
123 * Merge the successor back into the parent, keeping all data.
124 */
2c8074c4 125 bm = bdrv_reclaim_dirty_bitmap(job->source_bs, job->sync_bitmap, NULL);
c23909e5
JS
126 }
127
128 assert(bm);
129
130 if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
131 /* If we failed and synced, merge in the bits we didn't copy: */
2c8074c4 132 bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap,
c23909e5 133 NULL, true);
b976ea3c
FZ
134 }
135}
136
4ad35181 137static void backup_commit(Job *job)
c347b2c6 138{
4ad35181 139 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
c347b2c6
JS
140 if (s->sync_bitmap) {
141 backup_cleanup_sync_bitmap(s, 0);
142 }
143}
144
4ad35181 145static void backup_abort(Job *job)
c347b2c6 146{
4ad35181 147 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
c347b2c6
JS
148 if (s->sync_bitmap) {
149 backup_cleanup_sync_bitmap(s, -1);
150 }
151}
152
4ad35181 153static void backup_clean(Job *job)
e8a40bf7 154{
4ad35181 155 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
a8389e31 156
2c8074c4 157 block_copy_state_free(s->bcs);
e8a40bf7
JS
158}
159
49d3e828
WC
160void backup_do_checkpoint(BlockJob *job, Error **errp)
161{
162 BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
49d3e828 163
bd21935b 164 assert(block_job_driver(job) == &backup_job_driver);
49d3e828
WC
165
166 if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
167 error_setg(errp, "The backup job only supports block checkpoint in"
168 " sync=none mode");
169 return;
170 }
171
2c8074c4 172 bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len);
49d3e828
WC
173}
174
98d2c6f2
DM
175static BlockErrorAction backup_error_action(BackupBlockJob *job,
176 bool read, int error)
177{
178 if (read) {
81e254dc
KW
179 return block_job_error_action(&job->common, job->on_source_error,
180 true, error);
98d2c6f2 181 } else {
81e254dc
KW
182 return block_job_error_action(&job->common, job->on_target_error,
183 false, error);
98d2c6f2
DM
184 }
185}
186
d58d8453
JS
187static bool coroutine_fn yield_and_check(BackupBlockJob *job)
188{
dee81d51
KW
189 uint64_t delay_ns;
190
daa7f2f9 191 if (job_is_cancelled(&job->common.job)) {
d58d8453
JS
192 return true;
193 }
194
0e23e382
VSO
195 /*
196 * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
197 * return. Without a yield, the VM would not reboot.
198 */
dee81d51
KW
199 delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
200 job->bytes_read = 0;
5d43e86e 201 job_sleep_ns(&job->common.job, delay_ns);
d58d8453 202
daa7f2f9 203 if (job_is_cancelled(&job->common.job)) {
d58d8453
JS
204 return true;
205 }
206
207 return false;
208}
209
c334e897 210static int coroutine_fn backup_loop(BackupBlockJob *job)
d58d8453
JS
211{
212 bool error_is_read;
a8389e31 213 int64_t offset;
62aa1fbe 214 BdrvDirtyBitmapIter *bdbi;
62aa1fbe 215 int ret = 0;
d58d8453 216
2c8074c4 217 bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap);
62aa1fbe 218 while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
53f1c879
VSO
219 do {
220 if (yield_and_check(job)) {
62aa1fbe 221 goto out;
53f1c879 222 }
a8389e31 223 ret = backup_do_cow(job, offset,
53f1c879
VSO
224 job->cluster_size, &error_is_read, false);
225 if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
226 BLOCK_ERROR_ACTION_REPORT)
227 {
62aa1fbe 228 goto out;
53f1c879
VSO
229 }
230 } while (ret < 0);
d58d8453
JS
231 }
232
62aa1fbe
JS
233 out:
234 bdrv_dirty_iter_free(bdbi);
235 return ret;
d58d8453
JS
236}
237
141cdcdf 238static void backup_init_copy_bitmap(BackupBlockJob *job)
8cc6dc62 239{
141cdcdf
JS
240 bool ret;
241 uint64_t estimate;
242
243 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
2c8074c4 244 ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap,
141cdcdf
JS
245 job->sync_bitmap,
246 NULL, true);
247 assert(ret);
248 } else {
7e30dd61
JS
249 if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
250 /*
251 * We can't hog the coroutine to initialize this thoroughly.
252 * Set a flag and resume work when we are able to yield safely.
253 */
2c8074c4 254 job->bcs->skip_unallocated = true;
7e30dd61 255 }
2c8074c4 256 bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len);
141cdcdf 257 }
8cc6dc62 258
2c8074c4 259 estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap);
141cdcdf 260 job_progress_set_remaining(&job->common.job, estimate);
8cc6dc62
VSO
261}
262
68702775 263static int coroutine_fn backup_run(Job *job, Error **errp)
98d2c6f2 264{
68702775 265 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
98d2c6f2
DM
266 int ret = 0;
267
68702775 268 qemu_co_rwlock_init(&s->flush_rwlock);
98d2c6f2 269
141cdcdf 270 backup_init_copy_bitmap(s);
8cc6dc62 271
68702775 272 s->before_write.notify = backup_before_write_notify;
2c8074c4 273 bdrv_add_before_write_notifier(s->source_bs, &s->before_write);
98d2c6f2 274
7e30dd61
JS
275 if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
276 int64_t offset = 0;
277 int64_t count;
278
279 for (offset = 0; offset < s->len; ) {
280 if (yield_and_check(s)) {
281 ret = -ECANCELED;
282 goto out;
283 }
284
2c8074c4 285 ret = block_copy_reset_unallocated(s->bcs, offset, &count);
7e30dd61
JS
286 if (ret < 0) {
287 goto out;
288 }
289
290 offset += count;
291 }
2c8074c4 292 s->bcs->skip_unallocated = false;
7e30dd61
JS
293 }
294
68702775 295 if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
0e23e382
VSO
296 /*
297 * All bits are set in copy_bitmap to allow any cluster to be copied.
298 * This does not actually require them to be copied.
299 */
68702775 300 while (!job_is_cancelled(job)) {
0e23e382
VSO
301 /*
302 * Yield until the job is cancelled. We just let our before_write
303 * notify callback service CoW requests.
304 */
68702775 305 job_yield(job);
98d2c6f2 306 }
fc5d3f84 307 } else {
c334e897 308 ret = backup_loop(s);
98d2c6f2
DM
309 }
310
7e30dd61 311 out:
68702775 312 notifier_with_return_remove(&s->before_write);
98d2c6f2
DM
313
314 /* wait until pending backup_do_cow() calls have completed */
68702775
JS
315 qemu_co_rwlock_wrlock(&s->flush_rwlock);
316 qemu_co_rwlock_unlock(&s->flush_rwlock);
98d2c6f2 317
f67432a2 318 return ret;
98d2c6f2
DM
319}
320
a7815a76 321static const BlockJobDriver backup_job_driver = {
33e9e9bd
KW
322 .job_driver = {
323 .instance_size = sizeof(BackupBlockJob),
252291ea 324 .job_type = JOB_TYPE_BACKUP,
80fa2c75 325 .free = block_job_free,
b15de828 326 .user_resume = block_job_user_resume,
f67432a2 327 .run = backup_run,
4ad35181
KW
328 .commit = backup_commit,
329 .abort = backup_abort,
330 .clean = backup_clean,
bb0c9409 331 }
a7815a76
JS
332};
333
ae6b12fa
VSO
334static int64_t backup_calculate_cluster_size(BlockDriverState *target,
335 Error **errp)
336{
337 int ret;
338 BlockDriverInfo bdi;
339
340 /*
341 * If there is no backing file on the target, we cannot rely on COW if our
342 * backup cluster size is smaller than the target cluster size. Even for
343 * targets with a backing file, try to avoid COW if possible.
344 */
345 ret = bdrv_get_info(target, &bdi);
346 if (ret == -ENOTSUP && !target->backing) {
347 /* Cluster size is not defined */
348 warn_report("The target block device doesn't provide "
349 "information about the block size and it doesn't have a "
350 "backing file. The default block size of %u bytes is "
351 "used. If the actual block size of the target exceeds "
352 "this default, the backup may be unusable",
353 BACKUP_CLUSTER_SIZE_DEFAULT);
354 return BACKUP_CLUSTER_SIZE_DEFAULT;
355 } else if (ret < 0 && !target->backing) {
356 error_setg_errno(errp, -ret,
357 "Couldn't determine the cluster size of the target image, "
358 "which has no backing file");
359 error_append_hint(errp,
360 "Aborting, since this may create an unusable destination image\n");
361 return ret;
362 } else if (ret < 0 && target->backing) {
363 /* Not fatal; just trudge on ahead. */
364 return BACKUP_CLUSTER_SIZE_DEFAULT;
365 }
366
367 return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
368}
369
111049a4 370BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
70559d49
AG
371 BlockDriverState *target, int64_t speed,
372 MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
c8b56501 373 BitmapSyncMode bitmap_mode,
13b9414b 374 bool compress,
98d2c6f2
DM
375 BlockdevOnError on_source_error,
376 BlockdevOnError on_target_error,
47970dfb 377 int creation_flags,
097310b5 378 BlockCompletionFunc *cb, void *opaque,
62c9e416 379 JobTxn *txn, Error **errp)
98d2c6f2
DM
380{
381 int64_t len;
91ab6883 382 BackupBlockJob *job = NULL;
ae6b12fa 383 int64_t cluster_size;
2c8074c4 384 BdrvRequestFlags write_flags;
98d2c6f2
DM
385
386 assert(bs);
387 assert(target);
98d2c6f2 388
a6c9365a
JS
389 /* QMP interface protects us from these cases */
390 assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
391 assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP);
392
c29c1dd3
FZ
393 if (bs == target) {
394 error_setg(errp, "Source and target cannot be the same");
111049a4 395 return NULL;
c29c1dd3
FZ
396 }
397
c29c1dd3
FZ
398 if (!bdrv_is_inserted(bs)) {
399 error_setg(errp, "Device is not inserted: %s",
400 bdrv_get_device_name(bs));
111049a4 401 return NULL;
c29c1dd3
FZ
402 }
403
404 if (!bdrv_is_inserted(target)) {
405 error_setg(errp, "Device is not inserted: %s",
406 bdrv_get_device_name(target));
111049a4 407 return NULL;
c29c1dd3
FZ
408 }
409
ac850bf0 410 if (compress && !block_driver_can_compress(target->drv)) {
13b9414b
PB
411 error_setg(errp, "Compression is not supported for this drive %s",
412 bdrv_get_device_name(target));
111049a4 413 return NULL;
13b9414b
PB
414 }
415
c29c1dd3 416 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
111049a4 417 return NULL;
c29c1dd3
FZ
418 }
419
420 if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
111049a4 421 return NULL;
c29c1dd3
FZ
422 }
423
1a2b8b40 424 if (sync_bitmap) {
b30ffbef
JS
425 /* If we need to write to this bitmap, check that we can: */
426 if (bitmap_mode != BITMAP_SYNC_MODE_NEVER &&
427 bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
428 return NULL;
429 }
430
d58d8453
JS
431 /* Create a new bitmap, and freeze/disable this one. */
432 if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
111049a4 433 return NULL;
d58d8453 434 }
d58d8453
JS
435 }
436
98d2c6f2
DM
437 len = bdrv_getlength(bs);
438 if (len < 0) {
439 error_setg_errno(errp, -len, "unable to get length for '%s'",
440 bdrv_get_device_name(bs));
d58d8453 441 goto error;
98d2c6f2
DM
442 }
443
ae6b12fa
VSO
444 cluster_size = backup_calculate_cluster_size(target, errp);
445 if (cluster_size < 0) {
446 goto error;
447 }
448
05df8a6a 449 /* job->len is fixed, so we can't allow resize */
2c8074c4 450 job = block_job_create(job_id, &backup_job_driver, txn, bs, 0, BLK_PERM_ALL,
c6cc12bf 451 speed, creation_flags, cb, opaque, errp);
98d2c6f2 452 if (!job) {
d58d8453 453 goto error;
98d2c6f2
DM
454 }
455
2c8074c4 456 job->source_bs = bs;
98d2c6f2
DM
457 job->on_source_error = on_source_error;
458 job->on_target_error = on_target_error;
fc5d3f84 459 job->sync_mode = sync_mode;
c8b56501
JS
460 job->sync_bitmap = sync_bitmap;
461 job->bitmap_mode = bitmap_mode;
4c9bca7e 462
a1ed82b4 463 /*
372c67ea
VSO
464 * If source is in backing chain of target assume that target is going to be
465 * used for "image fleecing", i.e. it should represent a kind of snapshot of
466 * source at backup-start point in time. And target is going to be read by
467 * somebody (for example, used as NBD export) during backup job.
468 *
469 * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
470 * intersection of backup writes and third party reads from target,
471 * otherwise reading from target we may occasionally read already updated by
472 * guest data.
473 *
474 * For more information see commit f8d59dfb40bb and test
475 * tests/qemu-iotests/222
a1ed82b4 476 */
2c8074c4
VSO
477 write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
478 (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
479
480 job->bcs = block_copy_state_new(bs, target, cluster_size, write_flags,
481 backup_progress_bytes_callback,
482 backup_progress_reset_callback, job, errp);
483 if (!job->bcs) {
484 goto error;
485 }
a1ed82b4 486
ae6b12fa 487 job->cluster_size = cluster_size;
4c9bca7e 488
2c8074c4 489 /* Required permissions are already taken by block-copy-state target */
76d554e2
KW
490 block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
491 &error_abort);
05df8a6a 492 job->len = len;
111049a4
JS
493
494 return &job->common;
d58d8453
JS
495
496 error:
497 if (sync_bitmap) {
498 bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
499 }
91ab6883 500 if (job) {
4ad35181
KW
501 backup_clean(&job->common.job);
502 job_early_fail(&job->common.job);
91ab6883 503 }
111049a4
JS
504
505 return NULL;
98d2c6f2 506}