]> git.ipfire.org Git - thirdparty/qemu.git/blame - block/block-copy.c
block/block-copy: allocate buffer in block_copy_with_bounce_buffer
[thirdparty/qemu.git] / block / block-copy.c
CommitLineData
beb5f545
VSO
1/*
2 * block_copy API
3 *
4 * Copyright (C) 2013 Proxmox Server Solutions
5 * Copyright (c) 2019 Virtuozzo International GmbH.
6 *
7 * Authors:
8 * Dietmar Maurer (dietmar@proxmox.com)
9 * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 */
14
15#include "qemu/osdep.h"
16
17#include "trace.h"
18#include "qapi/error.h"
19#include "block/block-copy.h"
20#include "sysemu/block-backend.h"
21
a6ffe199
VSO
22static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
23 int64_t start,
24 int64_t end)
25{
26 BlockCopyInFlightReq *req;
27 bool waited;
28
29 do {
30 waited = false;
31 QLIST_FOREACH(req, &s->inflight_reqs, list) {
32 if (end > req->start_byte && start < req->end_byte) {
33 qemu_co_queue_wait(&req->wait_queue, NULL);
34 waited = true;
35 break;
36 }
37 }
38 } while (waited);
39}
40
41static void block_copy_inflight_req_begin(BlockCopyState *s,
42 BlockCopyInFlightReq *req,
43 int64_t start, int64_t end)
44{
45 req->start_byte = start;
46 req->end_byte = end;
47 qemu_co_queue_init(&req->wait_queue);
48 QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
49}
50
51static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
52{
53 QLIST_REMOVE(req, list);
54 qemu_co_queue_restart_all(&req->wait_queue);
55}
56
beb5f545
VSO
57void block_copy_state_free(BlockCopyState *s)
58{
59 if (!s) {
60 return;
61 }
62
5deb6cbd 63 bdrv_release_dirty_bitmap(s->copy_bitmap);
beb5f545
VSO
64 g_free(s);
65}
66
00e30f05 67BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
0f4b02b7
VSO
68 int64_t cluster_size,
69 BdrvRequestFlags write_flags, Error **errp)
beb5f545
VSO
70{
71 BlockCopyState *s;
beb5f545 72 BdrvDirtyBitmap *copy_bitmap;
00e30f05
VSO
73 uint32_t max_transfer =
74 MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer,
75 target->bs->bl.max_transfer));
beb5f545 76
00e30f05
VSO
77 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
78 errp);
beb5f545
VSO
79 if (!copy_bitmap) {
80 return NULL;
81 }
82 bdrv_disable_dirty_bitmap(copy_bitmap);
83
84 s = g_new(BlockCopyState, 1);
85 *s = (BlockCopyState) {
00e30f05
VSO
86 .source = source,
87 .target = target,
beb5f545
VSO
88 .copy_bitmap = copy_bitmap,
89 .cluster_size = cluster_size,
90 .len = bdrv_dirty_bitmap_size(copy_bitmap),
91 .write_flags = write_flags,
beb5f545
VSO
92 };
93
00e30f05 94 s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
beb5f545
VSO
95 /*
96 * Set use_copy_range, consider the following:
97 * 1. Compression is not supported for copy_range.
98 * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
99 * that in here. If max_transfer is smaller than the job->cluster_size,
100 * we do not use copy_range (in that case it's zero after aligning down
101 * above).
102 */
103 s->use_copy_range =
104 !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
105
a6ffe199
VSO
106 QLIST_INIT(&s->inflight_reqs);
107
beb5f545 108 return s;
beb5f545
VSO
109}
110
0f4b02b7
VSO
111void block_copy_set_callbacks(
112 BlockCopyState *s,
113 ProgressBytesCallbackFunc progress_bytes_callback,
114 ProgressResetCallbackFunc progress_reset_callback,
115 void *progress_opaque)
116{
117 s->progress_bytes_callback = progress_bytes_callback;
118 s->progress_reset_callback = progress_reset_callback;
119 s->progress_opaque = progress_opaque;
120}
121
beb5f545
VSO
122/*
123 * Copy range to target with a bounce buffer and return the bytes copied. If
124 * error occurred, return a negative error number
125 */
126static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
127 int64_t start,
128 int64_t end,
3816edd2 129 bool *error_is_read)
beb5f545
VSO
130{
131 int ret;
132 int nbytes;
3816edd2 133 void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
beb5f545
VSO
134
135 assert(QEMU_IS_ALIGNED(start, s->cluster_size));
136 bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
137 nbytes = MIN(s->cluster_size, s->len - start);
beb5f545 138
3816edd2 139 ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
beb5f545
VSO
140 if (ret < 0) {
141 trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
142 if (error_is_read) {
143 *error_is_read = true;
144 }
145 goto fail;
146 }
147
3816edd2 148 ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
00e30f05 149 s->write_flags);
beb5f545
VSO
150 if (ret < 0) {
151 trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
152 if (error_is_read) {
153 *error_is_read = false;
154 }
155 goto fail;
156 }
157
3816edd2
VSO
158 qemu_vfree(bounce_buffer);
159
beb5f545
VSO
160 return nbytes;
161fail:
3816edd2 162 qemu_vfree(bounce_buffer);
beb5f545
VSO
163 bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
164 return ret;
165
166}
167
168/*
169 * Copy range to target and return the bytes copied. If error occurred, return a
170 * negative error number.
171 */
172static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
173 int64_t start,
00e30f05 174 int64_t end)
beb5f545
VSO
175{
176 int ret;
177 int nr_clusters;
178 int nbytes;
beb5f545
VSO
179
180 assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
181 assert(QEMU_IS_ALIGNED(start, s->cluster_size));
182 nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
183 nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
184 bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
185 s->cluster_size * nr_clusters);
00e30f05
VSO
186 ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
187 0, s->write_flags);
beb5f545
VSO
188 if (ret < 0) {
189 trace_block_copy_with_offload_fail(s, start, ret);
190 bdrv_set_dirty_bitmap(s->copy_bitmap, start,
191 s->cluster_size * nr_clusters);
192 return ret;
193 }
194
195 return nbytes;
196}
197
198/*
199 * Check if the cluster starting at offset is allocated or not.
200 * return via pnum the number of contiguous clusters sharing this allocation.
201 */
202static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
203 int64_t *pnum)
204{
00e30f05 205 BlockDriverState *bs = s->source->bs;
beb5f545
VSO
206 int64_t count, total_count = 0;
207 int64_t bytes = s->len - offset;
208 int ret;
209
210 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
211
212 while (true) {
213 ret = bdrv_is_allocated(bs, offset, bytes, &count);
214 if (ret < 0) {
215 return ret;
216 }
217
218 total_count += count;
219
220 if (ret || count == 0) {
221 /*
222 * ret: partial segment(s) are considered allocated.
223 * otherwise: unallocated tail is treated as an entire segment.
224 */
225 *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
226 return ret;
227 }
228
229 /* Unallocated segment(s) with uncertain following segment(s) */
230 if (total_count >= s->cluster_size) {
231 *pnum = total_count / s->cluster_size;
232 return 0;
233 }
234
235 offset += count;
236 bytes -= count;
237 }
238}
239
240/*
241 * Reset bits in copy_bitmap starting at offset if they represent unallocated
242 * data in the image. May reset subsequent contiguous bits.
243 * @return 0 when the cluster at @offset was unallocated,
244 * 1 otherwise, and -ret on error.
245 */
246int64_t block_copy_reset_unallocated(BlockCopyState *s,
247 int64_t offset, int64_t *count)
248{
249 int ret;
250 int64_t clusters, bytes;
251
252 ret = block_copy_is_cluster_allocated(s, offset, &clusters);
253 if (ret < 0) {
254 return ret;
255 }
256
257 bytes = clusters * s->cluster_size;
258
259 if (!ret) {
260 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
261 s->progress_reset_callback(s->progress_opaque);
262 }
263
264 *count = bytes;
265 return ret;
266}
267
268int coroutine_fn block_copy(BlockCopyState *s,
269 int64_t start, uint64_t bytes,
00e30f05 270 bool *error_is_read)
beb5f545
VSO
271{
272 int ret = 0;
273 int64_t end = bytes + start; /* bytes */
beb5f545 274 int64_t status_bytes;
a6ffe199 275 BlockCopyInFlightReq req;
beb5f545
VSO
276
277 /*
278 * block_copy() user is responsible for keeping source and target in same
279 * aio context
280 */
00e30f05
VSO
281 assert(bdrv_get_aio_context(s->source->bs) ==
282 bdrv_get_aio_context(s->target->bs));
beb5f545
VSO
283
284 assert(QEMU_IS_ALIGNED(start, s->cluster_size));
285 assert(QEMU_IS_ALIGNED(end, s->cluster_size));
286
a6ffe199
VSO
287 block_copy_wait_inflight_reqs(s, start, bytes);
288 block_copy_inflight_req_begin(s, &req, start, end);
289
beb5f545
VSO
290 while (start < end) {
291 int64_t dirty_end;
292
293 if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
294 trace_block_copy_skip(s, start);
295 start += s->cluster_size;
296 continue; /* already copied */
297 }
298
299 dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
300 (end - start));
301 if (dirty_end < 0) {
302 dirty_end = end;
303 }
304
305 if (s->skip_unallocated) {
306 ret = block_copy_reset_unallocated(s, start, &status_bytes);
307 if (ret == 0) {
308 trace_block_copy_skip_range(s, start, status_bytes);
309 start += status_bytes;
310 continue;
311 }
312 /* Clamp to known allocated region */
313 dirty_end = MIN(dirty_end, start + status_bytes);
314 }
315
316 trace_block_copy_process(s, start);
317
318 if (s->use_copy_range) {
00e30f05 319 ret = block_copy_with_offload(s, start, dirty_end);
beb5f545
VSO
320 if (ret < 0) {
321 s->use_copy_range = false;
322 }
323 }
324 if (!s->use_copy_range) {
325 ret = block_copy_with_bounce_buffer(s, start, dirty_end,
3816edd2 326 error_is_read);
beb5f545
VSO
327 }
328 if (ret < 0) {
329 break;
330 }
331
332 start += ret;
333 s->progress_bytes_callback(ret, s->progress_opaque);
334 ret = 0;
335 }
336
a6ffe199
VSO
337 block_copy_inflight_req_end(&req);
338
beb5f545
VSO
339 return ret;
340}