]>
Commit | Line | Data |
---|---|---|
beb5f545 VSO |
1 | /* |
2 | * block_copy API | |
3 | * | |
4 | * Copyright (C) 2013 Proxmox Server Solutions | |
5 | * Copyright (c) 2019 Virtuozzo International GmbH. | |
6 | * | |
7 | * Authors: | |
8 | * Dietmar Maurer (dietmar@proxmox.com) | |
9 | * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | */ | |
14 | ||
15 | #include "qemu/osdep.h" | |
16 | ||
17 | #include "trace.h" | |
18 | #include "qapi/error.h" | |
19 | #include "block/block-copy.h" | |
20 | #include "sysemu/block-backend.h" | |
21 | ||
a6ffe199 VSO |
22 | static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s, |
23 | int64_t start, | |
24 | int64_t end) | |
25 | { | |
26 | BlockCopyInFlightReq *req; | |
27 | bool waited; | |
28 | ||
29 | do { | |
30 | waited = false; | |
31 | QLIST_FOREACH(req, &s->inflight_reqs, list) { | |
32 | if (end > req->start_byte && start < req->end_byte) { | |
33 | qemu_co_queue_wait(&req->wait_queue, NULL); | |
34 | waited = true; | |
35 | break; | |
36 | } | |
37 | } | |
38 | } while (waited); | |
39 | } | |
40 | ||
41 | static void block_copy_inflight_req_begin(BlockCopyState *s, | |
42 | BlockCopyInFlightReq *req, | |
43 | int64_t start, int64_t end) | |
44 | { | |
45 | req->start_byte = start; | |
46 | req->end_byte = end; | |
47 | qemu_co_queue_init(&req->wait_queue); | |
48 | QLIST_INSERT_HEAD(&s->inflight_reqs, req, list); | |
49 | } | |
50 | ||
51 | static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req) | |
52 | { | |
53 | QLIST_REMOVE(req, list); | |
54 | qemu_co_queue_restart_all(&req->wait_queue); | |
55 | } | |
56 | ||
beb5f545 VSO |
57 | void block_copy_state_free(BlockCopyState *s) |
58 | { | |
59 | if (!s) { | |
60 | return; | |
61 | } | |
62 | ||
5deb6cbd | 63 | bdrv_release_dirty_bitmap(s->copy_bitmap); |
beb5f545 VSO |
64 | g_free(s); |
65 | } | |
66 | ||
00e30f05 | 67 | BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, |
0f4b02b7 VSO |
68 | int64_t cluster_size, |
69 | BdrvRequestFlags write_flags, Error **errp) | |
beb5f545 VSO |
70 | { |
71 | BlockCopyState *s; | |
beb5f545 | 72 | BdrvDirtyBitmap *copy_bitmap; |
00e30f05 VSO |
73 | uint32_t max_transfer = |
74 | MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer, | |
75 | target->bs->bl.max_transfer)); | |
beb5f545 | 76 | |
00e30f05 VSO |
77 | copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL, |
78 | errp); | |
beb5f545 VSO |
79 | if (!copy_bitmap) { |
80 | return NULL; | |
81 | } | |
82 | bdrv_disable_dirty_bitmap(copy_bitmap); | |
83 | ||
84 | s = g_new(BlockCopyState, 1); | |
85 | *s = (BlockCopyState) { | |
00e30f05 VSO |
86 | .source = source, |
87 | .target = target, | |
beb5f545 VSO |
88 | .copy_bitmap = copy_bitmap, |
89 | .cluster_size = cluster_size, | |
90 | .len = bdrv_dirty_bitmap_size(copy_bitmap), | |
91 | .write_flags = write_flags, | |
beb5f545 VSO |
92 | }; |
93 | ||
00e30f05 | 94 | s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size), |
beb5f545 VSO |
95 | /* |
96 | * Set use_copy_range, consider the following: | |
97 | * 1. Compression is not supported for copy_range. | |
98 | * 2. copy_range does not respect max_transfer (it's a TODO), so we factor | |
99 | * that in here. If max_transfer is smaller than the job->cluster_size, | |
100 | * we do not use copy_range (in that case it's zero after aligning down | |
101 | * above). | |
102 | */ | |
103 | s->use_copy_range = | |
104 | !(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0; | |
105 | ||
a6ffe199 VSO |
106 | QLIST_INIT(&s->inflight_reqs); |
107 | ||
beb5f545 | 108 | return s; |
beb5f545 VSO |
109 | } |
110 | ||
0f4b02b7 VSO |
111 | void block_copy_set_callbacks( |
112 | BlockCopyState *s, | |
113 | ProgressBytesCallbackFunc progress_bytes_callback, | |
114 | ProgressResetCallbackFunc progress_reset_callback, | |
115 | void *progress_opaque) | |
116 | { | |
117 | s->progress_bytes_callback = progress_bytes_callback; | |
118 | s->progress_reset_callback = progress_reset_callback; | |
119 | s->progress_opaque = progress_opaque; | |
120 | } | |
121 | ||
beb5f545 VSO |
122 | /* |
123 | * Copy range to target with a bounce buffer and return the bytes copied. If | |
124 | * error occurred, return a negative error number | |
125 | */ | |
126 | static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s, | |
127 | int64_t start, | |
128 | int64_t end, | |
3816edd2 | 129 | bool *error_is_read) |
beb5f545 VSO |
130 | { |
131 | int ret; | |
132 | int nbytes; | |
3816edd2 | 133 | void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size); |
beb5f545 VSO |
134 | |
135 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
136 | bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); | |
137 | nbytes = MIN(s->cluster_size, s->len - start); | |
beb5f545 | 138 | |
3816edd2 | 139 | ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0); |
beb5f545 VSO |
140 | if (ret < 0) { |
141 | trace_block_copy_with_bounce_buffer_read_fail(s, start, ret); | |
142 | if (error_is_read) { | |
143 | *error_is_read = true; | |
144 | } | |
145 | goto fail; | |
146 | } | |
147 | ||
3816edd2 | 148 | ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer, |
00e30f05 | 149 | s->write_flags); |
beb5f545 VSO |
150 | if (ret < 0) { |
151 | trace_block_copy_with_bounce_buffer_write_fail(s, start, ret); | |
152 | if (error_is_read) { | |
153 | *error_is_read = false; | |
154 | } | |
155 | goto fail; | |
156 | } | |
157 | ||
3816edd2 VSO |
158 | qemu_vfree(bounce_buffer); |
159 | ||
beb5f545 VSO |
160 | return nbytes; |
161 | fail: | |
3816edd2 | 162 | qemu_vfree(bounce_buffer); |
beb5f545 VSO |
163 | bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size); |
164 | return ret; | |
165 | ||
166 | } | |
167 | ||
168 | /* | |
169 | * Copy range to target and return the bytes copied. If error occurred, return a | |
170 | * negative error number. | |
171 | */ | |
172 | static int coroutine_fn block_copy_with_offload(BlockCopyState *s, | |
173 | int64_t start, | |
00e30f05 | 174 | int64_t end) |
beb5f545 VSO |
175 | { |
176 | int ret; | |
177 | int nr_clusters; | |
178 | int nbytes; | |
beb5f545 VSO |
179 | |
180 | assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size)); | |
181 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
182 | nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start); | |
183 | nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size); | |
184 | bdrv_reset_dirty_bitmap(s->copy_bitmap, start, | |
185 | s->cluster_size * nr_clusters); | |
00e30f05 VSO |
186 | ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes, |
187 | 0, s->write_flags); | |
beb5f545 VSO |
188 | if (ret < 0) { |
189 | trace_block_copy_with_offload_fail(s, start, ret); | |
190 | bdrv_set_dirty_bitmap(s->copy_bitmap, start, | |
191 | s->cluster_size * nr_clusters); | |
192 | return ret; | |
193 | } | |
194 | ||
195 | return nbytes; | |
196 | } | |
197 | ||
198 | /* | |
199 | * Check if the cluster starting at offset is allocated or not. | |
200 | * return via pnum the number of contiguous clusters sharing this allocation. | |
201 | */ | |
202 | static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset, | |
203 | int64_t *pnum) | |
204 | { | |
00e30f05 | 205 | BlockDriverState *bs = s->source->bs; |
beb5f545 VSO |
206 | int64_t count, total_count = 0; |
207 | int64_t bytes = s->len - offset; | |
208 | int ret; | |
209 | ||
210 | assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); | |
211 | ||
212 | while (true) { | |
213 | ret = bdrv_is_allocated(bs, offset, bytes, &count); | |
214 | if (ret < 0) { | |
215 | return ret; | |
216 | } | |
217 | ||
218 | total_count += count; | |
219 | ||
220 | if (ret || count == 0) { | |
221 | /* | |
222 | * ret: partial segment(s) are considered allocated. | |
223 | * otherwise: unallocated tail is treated as an entire segment. | |
224 | */ | |
225 | *pnum = DIV_ROUND_UP(total_count, s->cluster_size); | |
226 | return ret; | |
227 | } | |
228 | ||
229 | /* Unallocated segment(s) with uncertain following segment(s) */ | |
230 | if (total_count >= s->cluster_size) { | |
231 | *pnum = total_count / s->cluster_size; | |
232 | return 0; | |
233 | } | |
234 | ||
235 | offset += count; | |
236 | bytes -= count; | |
237 | } | |
238 | } | |
239 | ||
240 | /* | |
241 | * Reset bits in copy_bitmap starting at offset if they represent unallocated | |
242 | * data in the image. May reset subsequent contiguous bits. | |
243 | * @return 0 when the cluster at @offset was unallocated, | |
244 | * 1 otherwise, and -ret on error. | |
245 | */ | |
246 | int64_t block_copy_reset_unallocated(BlockCopyState *s, | |
247 | int64_t offset, int64_t *count) | |
248 | { | |
249 | int ret; | |
250 | int64_t clusters, bytes; | |
251 | ||
252 | ret = block_copy_is_cluster_allocated(s, offset, &clusters); | |
253 | if (ret < 0) { | |
254 | return ret; | |
255 | } | |
256 | ||
257 | bytes = clusters * s->cluster_size; | |
258 | ||
259 | if (!ret) { | |
260 | bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); | |
261 | s->progress_reset_callback(s->progress_opaque); | |
262 | } | |
263 | ||
264 | *count = bytes; | |
265 | return ret; | |
266 | } | |
267 | ||
268 | int coroutine_fn block_copy(BlockCopyState *s, | |
269 | int64_t start, uint64_t bytes, | |
00e30f05 | 270 | bool *error_is_read) |
beb5f545 VSO |
271 | { |
272 | int ret = 0; | |
273 | int64_t end = bytes + start; /* bytes */ | |
beb5f545 | 274 | int64_t status_bytes; |
a6ffe199 | 275 | BlockCopyInFlightReq req; |
beb5f545 VSO |
276 | |
277 | /* | |
278 | * block_copy() user is responsible for keeping source and target in same | |
279 | * aio context | |
280 | */ | |
00e30f05 VSO |
281 | assert(bdrv_get_aio_context(s->source->bs) == |
282 | bdrv_get_aio_context(s->target->bs)); | |
beb5f545 VSO |
283 | |
284 | assert(QEMU_IS_ALIGNED(start, s->cluster_size)); | |
285 | assert(QEMU_IS_ALIGNED(end, s->cluster_size)); | |
286 | ||
a6ffe199 VSO |
287 | block_copy_wait_inflight_reqs(s, start, bytes); |
288 | block_copy_inflight_req_begin(s, &req, start, end); | |
289 | ||
beb5f545 VSO |
290 | while (start < end) { |
291 | int64_t dirty_end; | |
292 | ||
293 | if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) { | |
294 | trace_block_copy_skip(s, start); | |
295 | start += s->cluster_size; | |
296 | continue; /* already copied */ | |
297 | } | |
298 | ||
299 | dirty_end = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start, | |
300 | (end - start)); | |
301 | if (dirty_end < 0) { | |
302 | dirty_end = end; | |
303 | } | |
304 | ||
305 | if (s->skip_unallocated) { | |
306 | ret = block_copy_reset_unallocated(s, start, &status_bytes); | |
307 | if (ret == 0) { | |
308 | trace_block_copy_skip_range(s, start, status_bytes); | |
309 | start += status_bytes; | |
310 | continue; | |
311 | } | |
312 | /* Clamp to known allocated region */ | |
313 | dirty_end = MIN(dirty_end, start + status_bytes); | |
314 | } | |
315 | ||
316 | trace_block_copy_process(s, start); | |
317 | ||
318 | if (s->use_copy_range) { | |
00e30f05 | 319 | ret = block_copy_with_offload(s, start, dirty_end); |
beb5f545 VSO |
320 | if (ret < 0) { |
321 | s->use_copy_range = false; | |
322 | } | |
323 | } | |
324 | if (!s->use_copy_range) { | |
325 | ret = block_copy_with_bounce_buffer(s, start, dirty_end, | |
3816edd2 | 326 | error_is_read); |
beb5f545 VSO |
327 | } |
328 | if (ret < 0) { | |
329 | break; | |
330 | } | |
331 | ||
332 | start += ret; | |
333 | s->progress_bytes_callback(ret, s->progress_opaque); | |
334 | ret = 0; | |
335 | } | |
336 | ||
a6ffe199 VSO |
337 | block_copy_inflight_req_end(&req); |
338 | ||
beb5f545 VSO |
339 | return ret; |
340 | } |