]>
Commit | Line | Data |
---|---|---|
67467ea5 | 1 | #include <limits.h> |
d0ed0ab8 AS |
2 | #include <sys/types.h> |
3 | #include <sys/stat.h> | |
4 | #include "basefs_allocator.h" | |
5 | #include "block_range.h" | |
6 | #include "hashmap.h" | |
7 | #include "base_fs.h" | |
8 | ||
9 | struct base_fs_allocator { | |
555a0fce | 10 | struct ext2fs_hashmap *entries; |
d0ed0ab8 | 11 | struct basefs_entry *cur_entry; |
58ecfa7f DA |
12 | /* The next expected logical block to allocate for cur_entry. */ |
13 | blk64_t next_lblk; | |
af4d3f89 DA |
14 | /* Blocks which are definitely owned by a single inode in BaseFS. */ |
15 | ext2fs_block_bitmap exclusive_block_map; | |
0f3291fd DA |
16 | /* Blocks which are available to the first inode that requests it. */ |
17 | ext2fs_block_bitmap dedup_block_map; | |
d0ed0ab8 AS |
18 | }; |
19 | ||
20 | static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *, | |
21 | struct blk_alloc_ctx *ctx); | |
22 | ||
af4d3f89 DA |
23 | /* |
24 | * Free any reserved, but unconsumed block ranges in the allocator. This both | |
25 | * frees the block_range_list data structure and unreserves exclusive blocks | |
26 | * from the block map. | |
27 | */ | |
269f4c1b | 28 | static void fs_free_blocks_range(ext2_filsys fs, |
af4d3f89 | 29 | struct base_fs_allocator *allocator, |
269f4c1b | 30 | struct block_range_list *list) |
d0ed0ab8 | 31 | { |
af4d3f89 DA |
32 | ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; |
33 | ||
34 | blk64_t block; | |
35 | while (list->head) { | |
36 | block = consume_next_block(list); | |
37 | if (ext2fs_test_block_bitmap2(exclusive_map, block)) { | |
38 | ext2fs_unmark_block_bitmap2(fs->block_map, block); | |
39 | ext2fs_unmark_block_bitmap2(exclusive_map, block); | |
40 | } | |
d0ed0ab8 AS |
41 | } |
42 | } | |
43 | ||
c2481ace DA |
44 | /* |
45 | * Free any blocks in the bitmap that were reserved but never used. This is | |
46 | * needed to free dedup_block_map and ensure the free block bitmap is | |
47 | * internally consistent. | |
48 | */ | |
49 | static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap) | |
50 | { | |
51 | blk64_t block = 0; | |
52 | blk64_t start = fs->super->s_first_data_block; | |
53 | blk64_t end = ext2fs_blocks_count(fs->super) - 1; | |
54 | errcode_t retval; | |
55 | ||
56 | for (;;) { | |
57 | retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end, | |
58 | &block); | |
59 | if (retval) | |
60 | break; | |
61 | ext2fs_unmark_block_bitmap2(fs->block_map, block); | |
62 | start = block + 1; | |
63 | } | |
64 | } | |
65 | ||
dcbe79c4 DA |
66 | static void basefs_allocator_free(ext2_filsys fs, |
67 | struct base_fs_allocator *allocator) | |
68 | { | |
69 | struct basefs_entry *e; | |
70 | struct ext2fs_hashmap_entry *it = NULL; | |
71 | struct ext2fs_hashmap *entries = allocator->entries; | |
72 | ||
73 | if (entries) { | |
74 | while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) { | |
af4d3f89 | 75 | fs_free_blocks_range(fs, allocator, &e->blocks); |
dcbe79c4 DA |
76 | delete_block_ranges(&e->blocks); |
77 | } | |
78 | ext2fs_hashmap_free(entries); | |
79 | } | |
c2481ace | 80 | fs_free_blocks_bitmap(fs, allocator->dedup_block_map); |
af4d3f89 | 81 | ext2fs_free_block_bitmap(allocator->exclusive_block_map); |
0f3291fd | 82 | ext2fs_free_block_bitmap(allocator->dedup_block_map); |
dcbe79c4 DA |
83 | free(allocator); |
84 | } | |
85 | ||
af4d3f89 DA |
86 | /* |
87 | * Build a bitmap of which blocks are definitely owned by exactly one file in | |
88 | * Base FS. Blocks which are not valid or are de-duplicated are skipped. This | |
89 | * is called during allocator initialization, to ensure that libext2fs does | |
90 | * not allocate which we want to re-use. | |
0f3291fd DA |
91 | * |
92 | * If a block was allocated in the initial filesystem, it can never be re-used, | |
93 | * so it will appear in neither the exclusive or dedup set. If a block is used | |
94 | * by multiple files, it will be removed from the owned set and instead added | |
95 | * to the dedup set. | |
96 | * | |
97 | * The dedup set is not removed from fs->block_map. This allows us to re-use | |
98 | * dedup blocks separately and not have them be allocated outside of file data. | |
af4d3f89 DA |
99 | */ |
100 | static void fs_reserve_block(ext2_filsys fs, | |
101 | struct base_fs_allocator *allocator, | |
102 | blk64_t block) | |
103 | { | |
104 | ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; | |
0f3291fd | 105 | ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; |
af4d3f89 DA |
106 | |
107 | if (block >= ext2fs_blocks_count(fs->super)) | |
108 | return; | |
109 | ||
110 | if (ext2fs_test_block_bitmap2(fs->block_map, block)) { | |
0f3291fd DA |
111 | if (!ext2fs_test_block_bitmap2(exclusive_map, block)) |
112 | return; | |
af4d3f89 | 113 | ext2fs_unmark_block_bitmap2(exclusive_map, block); |
0f3291fd | 114 | ext2fs_mark_block_bitmap2(dedup_map, block); |
af4d3f89 DA |
115 | } else { |
116 | ext2fs_mark_block_bitmap2(fs->block_map, block); | |
117 | ext2fs_mark_block_bitmap2(exclusive_map, block); | |
118 | } | |
119 | } | |
120 | ||
269f4c1b | 121 | static void fs_reserve_blocks_range(ext2_filsys fs, |
af4d3f89 | 122 | struct base_fs_allocator *allocator, |
269f4c1b | 123 | struct block_range_list *list) |
d0ed0ab8 | 124 | { |
af4d3f89 | 125 | blk64_t block; |
269f4c1b | 126 | struct block_range *blocks = list->head; |
af4d3f89 | 127 | |
d0ed0ab8 | 128 | while (blocks) { |
af4d3f89 DA |
129 | for (block = blocks->start; block <= blocks->end; block++) |
130 | fs_reserve_block(fs, allocator, block); | |
d0ed0ab8 AS |
131 | blocks = blocks->next; |
132 | } | |
133 | } | |
134 | ||
dcbe79c4 DA |
135 | /* |
136 | * For each file in the base FS map, ensure that its blocks are reserved in | |
137 | * the actual block map. This prevents libext2fs from allocating them for | |
138 | * general purpose use, and ensures that if the file needs data blocks, they | |
139 | * can be re-acquired exclusively for that file. | |
67467ea5 DA |
140 | * |
141 | * If a file in the base map is missing, or not a regular file in the new | |
142 | * filesystem, then it's skipped to ensure that its blocks are reusable. | |
dcbe79c4 | 143 | */ |
67467ea5 DA |
144 | static errcode_t fs_reserve_blocks(ext2_filsys fs, |
145 | struct base_fs_allocator *allocator, | |
146 | const char *src_dir) | |
d0ed0ab8 | 147 | { |
67467ea5 DA |
148 | int nbytes; |
149 | char full_path[PATH_MAX]; | |
150 | const char *sep = "/"; | |
151 | struct stat st; | |
d0ed0ab8 | 152 | struct basefs_entry *e; |
555a0fce | 153 | struct ext2fs_hashmap_entry *it = NULL; |
dcbe79c4 DA |
154 | struct ext2fs_hashmap *entries = allocator->entries; |
155 | ||
67467ea5 DA |
156 | if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/') |
157 | sep = ""; | |
158 | ||
159 | while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) { | |
160 | nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s", | |
161 | src_dir, sep, e->path); | |
162 | if (nbytes >= sizeof(full_path)) | |
163 | return ENAMETOOLONG; | |
164 | if (lstat(full_path, &st) || !S_ISREG(st.st_mode)) | |
165 | continue; | |
af4d3f89 | 166 | fs_reserve_blocks_range(fs, allocator, &e->blocks); |
67467ea5 DA |
167 | } |
168 | return 0; | |
dcbe79c4 DA |
169 | } |
170 | ||
171 | errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file, | |
67467ea5 | 172 | const char *mountpoint, const char *src_dir) |
dcbe79c4 DA |
173 | { |
174 | errcode_t retval = 0; | |
d0ed0ab8 | 175 | struct base_fs_allocator *allocator; |
d0ed0ab8 | 176 | |
dcbe79c4 DA |
177 | allocator = calloc(1, sizeof(*allocator)); |
178 | if (!allocator) { | |
179 | retval = ENOMEM; | |
180 | goto out; | |
181 | } | |
d0ed0ab8 AS |
182 | |
183 | retval = ext2fs_read_bitmaps(fs); | |
184 | if (retval) | |
af4d3f89 | 185 | goto err_load; |
d0ed0ab8 AS |
186 | |
187 | allocator->cur_entry = NULL; | |
dcbe79c4 DA |
188 | allocator->entries = basefs_parse(file, mountpoint); |
189 | if (!allocator->entries) { | |
190 | retval = EIO; | |
af4d3f89 | 191 | goto err_load; |
dcbe79c4 | 192 | } |
af4d3f89 DA |
193 | retval = ext2fs_allocate_block_bitmap(fs, "exclusive map", |
194 | &allocator->exclusive_block_map); | |
195 | if (retval) | |
196 | goto err_load; | |
0f3291fd DA |
197 | retval = ext2fs_allocate_block_bitmap(fs, "dedup map", |
198 | &allocator->dedup_block_map); | |
199 | if (retval) | |
200 | goto err_load; | |
dcbe79c4 | 201 | |
67467ea5 DA |
202 | retval = fs_reserve_blocks(fs, allocator, src_dir); |
203 | if (retval) | |
204 | goto err_load; | |
d0ed0ab8 | 205 | |
70964797 | 206 | /* Override the default allocator */ |
d0ed0ab8 AS |
207 | fs->get_alloc_block2 = basefs_block_allocator; |
208 | fs->priv_data = allocator; | |
209 | ||
dcbe79c4 | 210 | goto out; |
d0ed0ab8 | 211 | |
af4d3f89 | 212 | err_load: |
dcbe79c4 DA |
213 | basefs_allocator_free(fs, allocator); |
214 | out: | |
215 | return retval; | |
d0ed0ab8 AS |
216 | } |
217 | ||
af4d3f89 | 218 | /* Try and acquire the next usable block from the Base FS map. */ |
c25ec972 DA |
219 | static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator, |
220 | struct block_range_list* list, blk64_t *ret) | |
af4d3f89 DA |
221 | { |
222 | blk64_t block; | |
223 | ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; | |
0f3291fd | 224 | ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; |
af4d3f89 | 225 | |
c25ec972 DA |
226 | if (!list->head) |
227 | return EXT2_ET_BLOCK_ALLOC_FAIL; | |
228 | ||
229 | block = consume_next_block(list); | |
230 | if (block >= ext2fs_blocks_count(fs->super)) | |
231 | return EXT2_ET_BLOCK_ALLOC_FAIL; | |
232 | if (ext2fs_test_block_bitmap2(exclusive_map, block)) { | |
233 | ext2fs_unmark_block_bitmap2(exclusive_map, block); | |
234 | *ret = block; | |
235 | return 0; | |
236 | } | |
237 | if (ext2fs_test_block_bitmap2(dedup_map, block)) { | |
238 | ext2fs_unmark_block_bitmap2(dedup_map, block); | |
239 | *ret = block; | |
240 | return 0; | |
af4d3f89 | 241 | } |
c25ec972 | 242 | return EXT2_ET_BLOCK_ALLOC_FAIL; |
af4d3f89 DA |
243 | } |
244 | ||
58ecfa7f DA |
245 | /* |
246 | * BaseFS lists blocks in logical block order. However, the allocator hook is | |
247 | * only called if a block needs to be allocated. In the case of a deduplicated | |
248 | * block, or a hole, the hook is not invoked. This means the next block | |
249 | * allocation request will be out of sequence. For example, consider if BaseFS | |
250 | * specifies the following (0 being a hole): | |
251 | * 1 2 3 0 4 5 | |
252 | * | |
253 | * If the new file has a hole at logical block 0, we could accidentally | |
254 | * shift the entire expected block list as follows: | |
255 | * 0 1 2 0 3 4 | |
256 | * | |
257 | * To account for this, we track the next expected logical block in the | |
258 | * allocator. If the current request is for a later logical block, we skip and | |
259 | * free the intermediate physical blocks that would have been allocated. This | |
260 | * ensures the original block assignment is respected. | |
261 | */ | |
262 | static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator, | |
263 | struct blk_alloc_ctx *ctx) | |
264 | { | |
265 | blk64_t block; | |
266 | struct block_range_list *list = &allocator->cur_entry->blocks; | |
267 | ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map; | |
268 | ||
269 | while (list->head && allocator->next_lblk < ctx->lblk) { | |
270 | block = consume_next_block(list); | |
271 | if (block >= ext2fs_blocks_count(fs->super)) | |
272 | continue; | |
273 | if (ext2fs_test_block_bitmap2(exclusive_map, block)) { | |
274 | ext2fs_unmark_block_bitmap2(exclusive_map, block); | |
275 | ext2fs_unmark_block_bitmap2(fs->block_map, block); | |
276 | } | |
277 | allocator->next_lblk++; | |
278 | } | |
279 | } | |
280 | ||
d0ed0ab8 AS |
281 | static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal, |
282 | blk64_t *ret, struct blk_alloc_ctx *ctx) | |
283 | { | |
284 | errcode_t retval; | |
d0ed0ab8 AS |
285 | struct base_fs_allocator *allocator = fs->priv_data; |
286 | struct basefs_entry *e = allocator->cur_entry; | |
67467ea5 | 287 | ext2fs_block_bitmap dedup_map = allocator->dedup_block_map; |
d0ed0ab8 | 288 | |
af4d3f89 | 289 | if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) { |
58ecfa7f DA |
290 | if (allocator->next_lblk < ctx->lblk) |
291 | skip_blocks(fs, allocator, ctx); | |
292 | allocator->next_lblk = ctx->lblk + 1; | |
293 | ||
af4d3f89 DA |
294 | if (!get_next_block(fs, allocator, &e->blocks, ret)) |
295 | return 0; | |
d0ed0ab8 | 296 | } |
af4d3f89 DA |
297 | |
298 | retval = ext2fs_new_block2(fs, goal, fs->block_map, ret); | |
67467ea5 DA |
299 | if (!retval) { |
300 | ext2fs_mark_block_bitmap2(fs->block_map, *ret); | |
301 | return 0; | |
302 | } | |
c25ec972 DA |
303 | if (retval != EXT2_ET_BLOCK_ALLOC_FAIL) |
304 | return retval; | |
305 | ||
306 | /* Try to steal a block from the dedup pool. */ | |
307 | retval = ext2fs_find_first_set_block_bitmap2(dedup_map, | |
308 | fs->super->s_first_data_block, | |
309 | ext2fs_blocks_count(fs->super) - 1, ret); | |
310 | if (!retval) { | |
311 | ext2fs_unmark_block_bitmap2(dedup_map, *ret); | |
312 | return 0; | |
313 | } | |
314 | ||
315 | /* | |
316 | * As a last resort, take any block from our file's list. This | |
317 | * risks bloating the diff, but means we are more likely to | |
318 | * successfully build an image. | |
319 | */ | |
320 | while (e->blocks.head) { | |
321 | if (!get_next_block(fs, allocator, &e->blocks, ret)) | |
67467ea5 | 322 | return 0; |
67467ea5 | 323 | } |
c25ec972 | 324 | return EXT2_ET_BLOCK_ALLOC_FAIL; |
d0ed0ab8 AS |
325 | } |
326 | ||
327 | void base_fs_alloc_cleanup(ext2_filsys fs) | |
328 | { | |
dcbe79c4 | 329 | basefs_allocator_free(fs, fs->priv_data); |
d0ed0ab8 AS |
330 | fs->priv_data = NULL; |
331 | fs->get_alloc_block2 = NULL; | |
d0ed0ab8 AS |
332 | } |
333 | ||
334 | errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path, | |
335 | const char *name EXT2FS_ATTR((unused)), | |
336 | ext2_ino_t parent_ino EXT2FS_ATTR((unused)), | |
337 | ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode) | |
338 | { | |
339 | struct base_fs_allocator *allocator = fs->priv_data; | |
340 | ||
341 | if (mode != S_IFREG) | |
342 | return 0; | |
343 | ||
58ecfa7f | 344 | if (allocator) { |
555a0fce JQ |
345 | allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries, |
346 | target_path, | |
347 | strlen(target_path)); | |
58ecfa7f DA |
348 | allocator->next_lblk = 0; |
349 | } | |
d0ed0ab8 AS |
350 | return 0; |
351 | } | |
352 | ||
353 | errcode_t base_fs_alloc_unset_target(ext2_filsys fs, | |
354 | const char *target_path EXT2FS_ATTR((unused)), | |
355 | const char *name EXT2FS_ATTR((unused)), | |
356 | ext2_ino_t parent_ino EXT2FS_ATTR((unused)), | |
357 | ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode) | |
358 | { | |
359 | struct base_fs_allocator *allocator = fs->priv_data; | |
360 | ||
361 | if (!allocator || !allocator->cur_entry || mode != S_IFREG) | |
362 | return 0; | |
363 | ||
af4d3f89 | 364 | fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks); |
269f4c1b | 365 | delete_block_ranges(&allocator->cur_entry->blocks); |
d0ed0ab8 AS |
366 | return 0; |
367 | } |