]>
Commit | Line | Data |
---|---|---|
8c1be37e LP |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2016 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
20 | #include <sys/mount.h> | |
21 | ||
22 | #include "architecture.h" | |
23 | #include "blkid-util.h" | |
24 | #include "dissect-image.h" | |
25 | #include "gpt.h" | |
26 | #include "mount-util.h" | |
27 | #include "path-util.h" | |
28 | #include "stat-util.h" | |
29 | #include "string-table.h" | |
30 | #include "string-util.h" | |
31 | #include "udev-util.h" | |
32 | ||
33 | int dissect_image(int fd, DissectedImage **ret) { | |
34 | ||
35 | #ifdef HAVE_BLKID | |
36 | _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL; | |
37 | bool is_gpt, is_mbr, generic_rw, multiple_generic = false; | |
38 | _cleanup_udev_device_unref_ struct udev_device *d = NULL; | |
39 | _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; | |
40 | _cleanup_blkid_free_probe_ blkid_probe b = NULL; | |
41 | _cleanup_udev_unref_ struct udev *udev = NULL; | |
42 | _cleanup_free_ char *generic_node = NULL; | |
43 | const char *pttype = NULL, *usage = NULL; | |
44 | struct udev_list_entry *first, *item; | |
45 | blkid_partlist pl; | |
46 | int r, generic_nr; | |
47 | struct stat st; | |
48 | unsigned i; | |
49 | ||
50 | assert(fd >= 0); | |
51 | assert(ret); | |
52 | ||
53 | /* Probes a disk image, and returns information about what it found in *ret. | |
54 | * | |
55 | * Returns -ENOPKG if no suitable partition table or file system could be found. */ | |
56 | ||
57 | if (fstat(fd, &st) < 0) | |
58 | return -errno; | |
59 | ||
60 | if (!S_ISBLK(st.st_mode)) | |
61 | return -ENOTBLK; | |
62 | ||
63 | b = blkid_new_probe(); | |
64 | if (!b) | |
65 | return -ENOMEM; | |
66 | ||
67 | errno = 0; | |
68 | r = blkid_probe_set_device(b, fd, 0, 0); | |
69 | if (r != 0) { | |
70 | if (errno == 0) | |
71 | return -ENOMEM; | |
72 | ||
73 | return -errno; | |
74 | } | |
75 | ||
76 | blkid_probe_enable_superblocks(b, 1); | |
77 | blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE|BLKID_SUBLKS_USAGE); | |
78 | blkid_probe_enable_partitions(b, 1); | |
79 | blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS); | |
80 | ||
81 | errno = 0; | |
82 | r = blkid_do_safeprobe(b); | |
83 | if (r == -2 || r == 1) { | |
84 | log_debug("Failed to identify any partition table."); | |
85 | return -ENOPKG; | |
86 | } | |
87 | if (r != 0) { | |
88 | if (errno == 0) | |
89 | return -EIO; | |
90 | ||
91 | return -errno; | |
92 | } | |
93 | ||
94 | m = new0(DissectedImage, 1); | |
95 | if (!m) | |
96 | return -ENOMEM; | |
97 | ||
98 | (void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL); | |
99 | if (streq_ptr(usage, "filesystem")) { | |
100 | _cleanup_free_ char *t = NULL, *n = NULL; | |
101 | const char *fstype = NULL; | |
102 | ||
103 | /* OK, we have found a file system, that's our root partition then. */ | |
104 | (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); | |
105 | ||
106 | if (fstype) { | |
107 | t = strdup(fstype); | |
108 | if (!t) | |
109 | return -ENOMEM; | |
110 | } | |
111 | ||
112 | if (asprintf(&n, "/dev/block/%u:%u", major(st.st_rdev), minor(st.st_rdev)) < 0) | |
113 | return -ENOMEM; | |
114 | ||
115 | m->partitions[PARTITION_ROOT] = (DissectedPartition) { | |
116 | .found = true, | |
117 | .rw = true, | |
118 | .partno = -1, | |
119 | .architecture = _ARCHITECTURE_INVALID, | |
120 | .fstype = t, | |
121 | .node = n, | |
122 | }; | |
123 | ||
124 | t = n = NULL; | |
125 | ||
126 | *ret = m; | |
127 | m = NULL; | |
128 | ||
129 | return 0; | |
130 | } | |
131 | ||
132 | (void) blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL); | |
133 | if (!pttype) | |
134 | return -ENOPKG; | |
135 | ||
136 | is_gpt = streq_ptr(pttype, "gpt"); | |
137 | is_mbr = streq_ptr(pttype, "dos"); | |
138 | ||
139 | if (!is_gpt && !is_mbr) | |
140 | return -ENOPKG; | |
141 | ||
142 | errno = 0; | |
143 | pl = blkid_probe_get_partitions(b); | |
144 | if (!pl) { | |
145 | if (errno == 0) | |
146 | return -ENOMEM; | |
147 | ||
148 | return -errno; | |
149 | } | |
150 | ||
151 | udev = udev_new(); | |
152 | if (!udev) | |
153 | return -errno; | |
154 | ||
155 | d = udev_device_new_from_devnum(udev, 'b', st.st_rdev); | |
156 | if (!d) | |
157 | return -ENOMEM; | |
158 | ||
159 | for (i = 0;; i++) { | |
160 | int n, z; | |
161 | ||
162 | if (i >= 10) { | |
163 | log_debug("Kernel partitions never appeared."); | |
164 | return -ENXIO; | |
165 | } | |
166 | ||
167 | e = udev_enumerate_new(udev); | |
168 | if (!e) | |
169 | return -errno; | |
170 | ||
171 | r = udev_enumerate_add_match_parent(e, d); | |
172 | if (r < 0) | |
173 | return r; | |
174 | ||
175 | r = udev_enumerate_scan_devices(e); | |
176 | if (r < 0) | |
177 | return r; | |
178 | ||
179 | /* Count the partitions enumerated by the kernel */ | |
180 | n = 0; | |
181 | first = udev_enumerate_get_list_entry(e); | |
182 | udev_list_entry_foreach(item, first) | |
183 | n++; | |
184 | ||
185 | /* Count the partitions enumerated by blkid */ | |
186 | z = blkid_partlist_numof_partitions(pl); | |
187 | if (n == z + 1) | |
188 | break; | |
189 | if (n > z + 1) { | |
190 | log_debug("blkid and kernel partition list do not match."); | |
191 | return -EIO; | |
192 | } | |
193 | if (n < z + 1) { | |
194 | unsigned j; | |
195 | ||
196 | /* The kernel has probed fewer partitions than blkid? Maybe the kernel prober is still running | |
197 | * or it got EBUSY because udev already opened the device. Let's reprobe the device, which is a | |
198 | * synchronous call that waits until probing is complete. */ | |
199 | ||
200 | for (j = 0; j < 20; j++) { | |
201 | ||
202 | r = ioctl(fd, BLKRRPART, 0); | |
203 | if (r < 0) | |
204 | r = -errno; | |
205 | if (r >= 0 || r != -EBUSY) | |
206 | break; | |
207 | ||
208 | /* If something else has the device open, such as an udev rule, the ioctl will return | |
209 | * EBUSY. Since there's no way to wait until it isn't busy anymore, let's just wait a | |
210 | * bit, and try again. | |
211 | * | |
212 | * This is really something they should fix in the kernel! */ | |
213 | ||
214 | usleep(50 * USEC_PER_MSEC); | |
215 | } | |
216 | ||
217 | if (r < 0) | |
218 | return r; | |
219 | } | |
220 | ||
221 | e = udev_enumerate_unref(e); | |
222 | } | |
223 | ||
224 | first = udev_enumerate_get_list_entry(e); | |
225 | udev_list_entry_foreach(item, first) { | |
226 | _cleanup_udev_device_unref_ struct udev_device *q; | |
227 | unsigned long long flags; | |
228 | blkid_partition pp; | |
229 | const char *node; | |
230 | dev_t qn; | |
231 | int nr; | |
232 | ||
233 | q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item)); | |
234 | if (!q) | |
235 | return -errno; | |
236 | ||
237 | qn = udev_device_get_devnum(q); | |
238 | if (major(qn) == 0) | |
239 | continue; | |
240 | ||
241 | if (st.st_rdev == qn) | |
242 | continue; | |
243 | ||
244 | node = udev_device_get_devnode(q); | |
245 | if (!node) | |
246 | continue; | |
247 | ||
248 | pp = blkid_partlist_devno_to_partition(pl, qn); | |
249 | if (!pp) | |
250 | continue; | |
251 | ||
252 | flags = blkid_partition_get_flags(pp); | |
253 | ||
254 | nr = blkid_partition_get_partno(pp); | |
255 | if (nr < 0) | |
256 | continue; | |
257 | ||
258 | if (is_gpt) { | |
259 | int designator = _PARTITION_DESIGNATOR_INVALID, architecture = _ARCHITECTURE_INVALID; | |
260 | const char *stype, *fstype = NULL; | |
261 | sd_id128_t type_id; | |
262 | bool rw = true; | |
263 | ||
264 | if (flags & GPT_FLAG_NO_AUTO) | |
265 | continue; | |
266 | ||
267 | stype = blkid_partition_get_type_string(pp); | |
268 | if (!stype) | |
269 | continue; | |
270 | ||
271 | if (sd_id128_from_string(stype, &type_id) < 0) | |
272 | continue; | |
273 | ||
274 | if (sd_id128_equal(type_id, GPT_HOME)) { | |
275 | designator = PARTITION_HOME; | |
276 | rw = !(flags & GPT_FLAG_READ_ONLY); | |
277 | } else if (sd_id128_equal(type_id, GPT_SRV)) { | |
278 | designator = PARTITION_SRV; | |
279 | rw = !(flags & GPT_FLAG_READ_ONLY); | |
280 | } else if (sd_id128_equal(type_id, GPT_ESP)) { | |
281 | designator = PARTITION_ESP; | |
282 | fstype = "vfat"; | |
283 | } | |
284 | #ifdef GPT_ROOT_NATIVE | |
285 | else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) { | |
286 | designator = PARTITION_ROOT; | |
287 | architecture = native_architecture(); | |
288 | rw = !(flags & GPT_FLAG_READ_ONLY); | |
289 | } | |
290 | #endif | |
291 | #ifdef GPT_ROOT_SECONDARY | |
292 | else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) { | |
293 | designator = PARTITION_ROOT_SECONDARY; | |
294 | architecture = SECONDARY_ARCHITECTURE; | |
295 | rw = !(flags & GPT_FLAG_READ_ONLY); | |
296 | } | |
297 | #endif | |
298 | else if (sd_id128_equal(type_id, GPT_SWAP)) { | |
299 | designator = PARTITION_SWAP; | |
300 | fstype = "swap"; | |
301 | } else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) { | |
302 | ||
303 | if (generic_node) | |
304 | multiple_generic = true; | |
305 | else { | |
306 | generic_nr = nr; | |
307 | generic_rw = !(flags & GPT_FLAG_READ_ONLY); | |
308 | generic_node = strdup(node); | |
309 | if (!generic_node) | |
310 | return -ENOMEM; | |
311 | } | |
312 | } | |
313 | ||
314 | if (designator != _PARTITION_DESIGNATOR_INVALID) { | |
315 | _cleanup_free_ char *t = NULL, *n = NULL; | |
316 | ||
317 | /* First one wins */ | |
318 | if (m->partitions[designator].found) | |
319 | continue; | |
320 | ||
321 | if (fstype) { | |
322 | t = strdup(fstype); | |
323 | if (!t) | |
324 | return -ENOMEM; | |
325 | } | |
326 | ||
327 | n = strdup(node); | |
328 | if (!n) | |
329 | return -ENOMEM; | |
330 | ||
331 | m->partitions[designator] = (DissectedPartition) { | |
332 | .found = true, | |
333 | .partno = nr, | |
334 | .rw = rw, | |
335 | .architecture = architecture, | |
336 | .node = n, | |
337 | .fstype = t, | |
338 | }; | |
339 | ||
340 | n = t = NULL; | |
341 | } | |
342 | ||
343 | } else if (is_mbr) { | |
344 | ||
345 | if (flags != 0x80) /* Bootable flag */ | |
346 | continue; | |
347 | ||
348 | if (blkid_partition_get_type(pp) != 0x83) /* Linux partition */ | |
349 | continue; | |
350 | ||
351 | if (generic_node) | |
352 | multiple_generic = true; | |
353 | else { | |
354 | generic_nr = nr; | |
355 | generic_rw = true; | |
356 | generic_node = strdup(node); | |
357 | if (!generic_node) | |
358 | return -ENOMEM; | |
359 | } | |
360 | } | |
361 | } | |
362 | ||
363 | if (!m->partitions[PARTITION_ROOT].found) { | |
364 | /* No root partition found? Then let's see if ther's one for the secondary architecture. And if not | |
365 | * either, then check if there's a single generic one, and use that. */ | |
366 | ||
367 | if (m->partitions[PARTITION_ROOT_SECONDARY].found) { | |
368 | m->partitions[PARTITION_ROOT] = m->partitions[PARTITION_ROOT_SECONDARY]; | |
369 | zero(m->partitions[PARTITION_ROOT_SECONDARY]); | |
370 | } else if (generic_node) { | |
371 | ||
372 | if (multiple_generic) | |
373 | return -ENOTUNIQ; | |
374 | ||
375 | m->partitions[PARTITION_ROOT] = (DissectedPartition) { | |
376 | .found = true, | |
377 | .rw = generic_rw, | |
378 | .partno = generic_nr, | |
379 | .architecture = _ARCHITECTURE_INVALID, | |
380 | .node = generic_node, | |
381 | }; | |
382 | ||
383 | generic_node = NULL; | |
384 | } else | |
385 | return -ENXIO; | |
386 | } | |
387 | ||
388 | /* Fill in file system types if we don't know them yet. */ | |
389 | for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { | |
390 | const char *fstype; | |
391 | ||
392 | if (!m->partitions[i].found) /* not found? */ | |
393 | continue; | |
394 | ||
395 | if (m->partitions[i].fstype) /* already know the type? */ | |
396 | continue; | |
397 | ||
398 | if (!m->partitions[i].node) /* have no device node for? */ | |
399 | continue; | |
400 | ||
401 | if (b) | |
402 | blkid_free_probe(b); | |
403 | ||
404 | b = blkid_new_probe_from_filename(m->partitions[i].node); | |
405 | if (!b) | |
406 | return -ENOMEM; | |
407 | ||
408 | blkid_probe_enable_superblocks(b, 1); | |
409 | blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE); | |
410 | ||
411 | errno = 0; | |
412 | r = blkid_do_safeprobe(b); | |
413 | if (r == -2 || r == 1) { | |
414 | log_debug("Failed to identify any partition type on partition %i", m->partitions[i].partno); | |
415 | continue; | |
416 | } | |
417 | if (r != 0) { | |
418 | if (errno == 0) | |
419 | return -EIO; | |
420 | ||
421 | return -errno; | |
422 | } | |
423 | ||
424 | (void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL); | |
425 | if (fstype) { | |
426 | char *t; | |
427 | ||
428 | t = strdup(fstype); | |
429 | if (!t) | |
430 | return -ENOMEM; | |
431 | ||
432 | m->partitions[i].fstype = t; | |
433 | } | |
434 | } | |
435 | ||
436 | *ret = m; | |
437 | m = NULL; | |
438 | ||
439 | return 0; | |
440 | #else | |
441 | return -EOPNOTSUPP; | |
442 | #endif | |
443 | } | |
444 | ||
445 | DissectedImage* dissected_image_unref(DissectedImage *m) { | |
446 | unsigned i; | |
447 | ||
448 | if (!m) | |
449 | return NULL; | |
450 | ||
451 | for (i = 0; i < _PARTITION_DESIGNATOR_MAX; i++) { | |
452 | free(m->partitions[i].fstype); | |
453 | free(m->partitions[i].node); | |
454 | } | |
455 | ||
456 | free(m); | |
457 | return NULL; | |
458 | } | |
459 | ||
460 | static int mount_partition(DissectedPartition *m, const char *where, const char *directory, DissectedImageMountFlags flags) { | |
461 | const char *p, *options = NULL; | |
462 | bool rw; | |
463 | ||
464 | assert(m); | |
465 | assert(where); | |
466 | ||
467 | if (!m->found || !m->node || !m->fstype) | |
468 | return 0; | |
469 | ||
470 | rw = m->rw && !(flags & DISSECTED_IMAGE_READ_ONLY); | |
471 | ||
472 | if (directory) | |
473 | p = strjoina(where, directory); | |
474 | else | |
475 | p = where; | |
476 | ||
477 | /* Not supported for now. */ | |
478 | if (streq(m->fstype, "crypto_LUKS")) | |
479 | return -EOPNOTSUPP; | |
480 | ||
481 | /* If this is a loopback device then let's mount the image with discard, so that the underlying file remains | |
482 | * sparse when possible. */ | |
483 | if ((flags & DISSECTED_IMAGE_DISCARD_ON_LOOP) && | |
484 | STR_IN_SET(m->fstype, "btrfs", "ext4", "vfat", "xfs")) { | |
485 | const char *l; | |
486 | ||
487 | l = path_startswith(m->node, "/dev"); | |
488 | if (l && startswith(l, "loop")) | |
489 | options = "discard"; | |
490 | } | |
491 | ||
492 | return mount_verbose(LOG_DEBUG, m->node, p, m->fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), options); | |
493 | } | |
494 | ||
495 | int dissected_image_mount(DissectedImage *m, const char *where, DissectedImageMountFlags flags) { | |
496 | int r; | |
497 | ||
498 | assert(m); | |
499 | assert(where); | |
500 | ||
501 | if (!m->partitions[PARTITION_ROOT].found) | |
502 | return -ENXIO; | |
503 | ||
504 | r = mount_partition(m->partitions + PARTITION_ROOT, where, NULL, flags); | |
505 | if (r < 0) | |
506 | return r; | |
507 | ||
508 | r = mount_partition(m->partitions + PARTITION_HOME, where, "/home", flags); | |
509 | if (r < 0) | |
510 | return r; | |
511 | ||
512 | r = mount_partition(m->partitions + PARTITION_SRV, where, "/srv", flags); | |
513 | if (r < 0) | |
514 | return r; | |
515 | ||
516 | if (m->partitions[PARTITION_ESP].found) { | |
517 | const char *mp, *x; | |
518 | ||
519 | /* Mount the ESP to /efi if it exists and is empty. If it doesn't exist, use /boot instead. */ | |
520 | ||
521 | mp = "/efi"; | |
522 | x = strjoina(where, mp); | |
523 | r = dir_is_empty(x); | |
524 | if (r == -ENOENT) { | |
525 | mp = "/boot"; | |
526 | x = strjoina(where, mp); | |
527 | r = dir_is_empty(x); | |
528 | } | |
529 | if (r > 0) { | |
530 | r = mount_partition(m->partitions + PARTITION_ESP, where, mp, flags); | |
531 | if (r < 0) | |
532 | return r; | |
533 | } | |
534 | } | |
535 | ||
536 | return 0; | |
537 | } | |
538 | ||
539 | static const char *const partition_designator_table[] = { | |
540 | [PARTITION_ROOT] = "root", | |
541 | [PARTITION_ROOT_SECONDARY] = "root-secondary", | |
542 | [PARTITION_HOME] = "home", | |
543 | [PARTITION_SRV] = "srv", | |
544 | [PARTITION_ESP] = "esp", | |
545 | [PARTITION_SWAP] = "swap", | |
546 | }; | |
547 | ||
548 | DEFINE_STRING_TABLE_LOOKUP(partition_designator, int); |