]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/storagetm/storagetm.c
storagetm: add new systemd-storagetm component
[thirdparty/systemd.git] / src / storagetm / storagetm.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <getopt.h>
4 #include <sys/file.h>
5
6 #include "af-list.h"
7 #include "alloc-util.h"
8 #include "blockdev-util.h"
9 #include "build.h"
10 #include "daemon-util.h"
11 #include "device-util.h"
12 #include "fd-util.h"
13 #include "fileio.h"
14 #include "format-util.h"
15 #include "fs-util.h"
16 #include "local-addresses.h"
17 #include "loop-util.h"
18 #include "main-func.h"
19 #include "parse-argument.h"
20 #include "path-util.h"
21 #include "pretty-print.h"
22 #include "process-util.h"
23 #include "random-util.h"
24 #include "recurse-dir.h"
25 #include "socket-util.h"
26 #include "terminal-util.h"
27 #include "udev-util.h"
28
29 static char **arg_devices = NULL;
30 static char *arg_nqn = NULL;
31 static int arg_all = 0;
32
33 STATIC_DESTRUCTOR_REGISTER(arg_devices, strv_freep);
34 STATIC_DESTRUCTOR_REGISTER(arg_nqn, freep);
35
36 static int help(void) {
37 _cleanup_free_ char *link = NULL;
38 int r;
39
40 r = terminal_urlify_man("systemd-storagetm", "8", &link);
41 if (r < 0)
42 return log_oom();
43
44 printf("%s [OPTIONS...] [DEVICE...]\n"
45 "\n%sExpose a block device or regular file as NVMe-TCP volume.%s\n\n"
46 " -h --help Show this help\n"
47 " --version Show package version\n"
48 " --nqn=STRING Select NQN (NVMe Qualified Name)\n"
49 " -a --all Expose all devices\n"
50 "\nSee the %s for details.\n",
51 program_invocation_short_name,
52 ansi_highlight(),
53 ansi_normal(),
54 link);
55
56 return 0;
57 }
58
59 static int parse_argv(int argc, char *argv[]) {
60
61 enum {
62 ARG_NQN = 0x100,
63 ARG_VERSION,
64 };
65
66 static const struct option options[] = {
67 { "help", no_argument, NULL, 'h' },
68 { "version", no_argument, NULL, ARG_VERSION },
69 { "nqn", required_argument, NULL, ARG_NQN },
70 { "all", no_argument, NULL, 'a' },
71 {}
72 };
73
74 int r, c;
75
76 assert(argc >= 0);
77 assert(argv);
78
79 while ((c = getopt_long(argc, argv, "ha", options, NULL)) >= 0)
80
81 switch (c) {
82
83 case 'h':
84 return help();
85
86 case ARG_VERSION:
87 return version();
88
89 case ARG_NQN:
90 if (!filename_is_valid(optarg))
91 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "NQN invalid: %s", optarg);
92
93 if (free_and_strdup(&arg_nqn, optarg) < 0)
94 return log_oom();
95
96 break;
97
98 case 'a':
99 arg_all++;
100 break;
101
102 case '?':
103 return -EINVAL;
104
105 default:
106 assert_not_reached();
107 }
108
109 if (arg_all > 0) {
110 if (argc > optind)
111 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expects no further arguments if --all/-a is specified.");
112 } else {
113 if (optind >= argc)
114 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Expecting device name or --all/-a.");
115
116 for (int i = optind; i < argc; i++)
117 if (!path_is_valid(argv[i]))
118 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Invalid path: %s", argv[i]);
119
120 arg_devices = strv_copy(argv + optind);
121 }
122
123 if (!arg_nqn) {
124 sd_id128_t id;
125
126 r = sd_id128_get_machine_app_specific(SD_ID128_MAKE(b4,f9,4e,52,b8,e2,45,db,88,84,6e,2e,c3,f4,ef,18), &id);
127 if (r < 0)
128 return log_error_errno(r, "Failed to get machine ID: %m");
129
130 /* See NVM Express Base Specification 2.0c, 4.5 "NVMe Qualified Names" */
131 if (asprintf(&arg_nqn, "nqn.2023-10.io.systemd:storagetm." SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(id)) < 0)
132 return log_oom();
133 }
134
135 return 1;
136 }
137
138 typedef struct NvmeSubsystem {
139 char *name;
140 struct stat device_stat;
141 int device_fd;
142 int nvme_all_subsystems_fd; /* The /sys/kernel/config/nvmet/subsystems/ dir, that contains all subsystems */
143 int nvme_our_subsystem_fd; /* Our private subsystem dir below it. */
144 char *device;
145 } NvmeSubsystem;
146
147 static NvmeSubsystem* nvme_subsystem_free(NvmeSubsystem *s) {
148 if (!s)
149 return NULL;
150
151 free(s->name);
152 safe_close(s->nvme_all_subsystems_fd);
153 safe_close(s->nvme_our_subsystem_fd);
154 safe_close(s->device_fd);
155 free(s->device);
156
157 return mfree(s);
158 }
159
160 static int nvme_subsystem_unlink(NvmeSubsystem *s) {
161 int r;
162
163 assert(s);
164
165 if (s->nvme_our_subsystem_fd >= 0) {
166 _cleanup_close_ int namespaces_fd = -EBADF;
167
168 namespaces_fd = openat(s->nvme_our_subsystem_fd, "namespaces", O_CLOEXEC|O_DIRECTORY|O_RDONLY);
169 if (namespaces_fd < 0)
170 log_warning_errno(errno, "Failed to open 'namespaces' directory of subsystem '%s': %m", s->name);
171 else {
172 _cleanup_free_ DirectoryEntries *de = NULL;
173
174 r = readdir_all(namespaces_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
175 if (r < 0)
176 log_warning_errno(r, "Failed to read 'namespaces' dir of subsystem '%s', ignoring: %m", s->name);
177 else {
178 FOREACH_ARRAY(ee, de->entries, de->n_entries) {
179 _cleanup_free_ char *enable_fn = NULL;
180 const struct dirent *e = *ee;
181
182 enable_fn = path_join(e->d_name, "enable");
183 if (!enable_fn)
184 return log_oom();
185
186 r = write_string_file_at(namespaces_fd, enable_fn, "0", WRITE_STRING_FILE_DISABLE_BUFFER);
187 if (r < 0)
188 log_warning_errno(r, "Failed to disable namespace '%s' of NVME subsystem '%s', ignoring: %m", e->d_name, s->name);
189
190 if (unlinkat(namespaces_fd, e->d_name, AT_REMOVEDIR) < 0 && errno != ENOENT)
191 log_warning_errno(errno, "Failed to remove namespace '%s' of NVME subsystem '%s', ignoring: %m", e->d_name, s->name);
192 }
193 }
194 }
195
196 s->nvme_our_subsystem_fd = safe_close(s->nvme_our_subsystem_fd);
197 }
198
199 if (s->nvme_all_subsystems_fd >= 0 && s->name) {
200 if (unlinkat(s->nvme_all_subsystems_fd, s->name, AT_REMOVEDIR) < 0 && errno != ENOENT)
201 log_warning_errno(errno, "Failed to remove NVME subsystem '%s', ignoring: %m", s->name);
202
203 s->nvme_all_subsystems_fd = safe_close(s->nvme_all_subsystems_fd); /* Invalidate the subsystems/ dir fd, to remember we unlinked the thing already */
204
205 log_info("NVME subsystem '%s' removed.", s->name);
206 }
207
208 return 0;
209 }
210
211 static NvmeSubsystem *nvme_subsystem_destroy(NvmeSubsystem *s) {
212 if (!s)
213 return NULL;
214
215 (void) nvme_subsystem_unlink(s);
216
217 return nvme_subsystem_free(s);
218 }
219
220 DEFINE_TRIVIAL_CLEANUP_FUNC(NvmeSubsystem*, nvme_subsystem_destroy);
221
222 static int nvme_subsystem_add(const char *node, int consumed_fd, NvmeSubsystem **ret) {
223 _cleanup_close_ int fd = consumed_fd; /* always take possession of the fd */
224 int r;
225
226 assert(node);
227 assert(ret);
228
229 _cleanup_free_ char *fname = NULL;
230 r = path_extract_filename(node, &fname);
231 if (r < 0)
232 return log_error_errno(r, "Failed to extract file name from path: %s", node);
233
234 _cleanup_free_ char *j = NULL;
235 j = strjoin(arg_nqn, ".", fname);
236 if (!j)
237 return log_oom();
238
239 if (fd < 0) {
240 fd = RET_NERRNO(open(node, O_RDONLY|O_CLOEXEC|O_NONBLOCK));
241 if (fd < 0)
242 return log_error_errno(fd, "Failed to open '%s': %m", node);
243 }
244
245 struct stat st;
246 if (fstat(fd, &st) < 0)
247 return log_error_errno(errno, "Failed to fstat '%s': %m", node);
248 if (!S_ISBLK(st.st_mode)) {
249 r = stat_verify_regular(&st);
250 if (r < 0)
251 return log_error_errno(r, "Not a block device or regular file, refusing: %s", node);
252 }
253
254 /* Let's lock this device continuously while we are operating on it */
255 r = lock_generic_with_timeout(fd, LOCK_BSD, LOCK_EX, 10 * USEC_PER_SEC);
256 if (r < 0)
257 return log_error_errno(r, "Failed to lock block device: %m");
258
259 _cleanup_close_ int subsystems_fd = -EBADF;
260 subsystems_fd = RET_NERRNO(open("/sys/kernel/config/nvmet/subsystems", O_DIRECTORY|O_CLOEXEC|O_RDONLY));
261 if (subsystems_fd < 0)
262 return log_error_errno(subsystems_fd, "Failed to open /sys/kernel/config/nvmet/subsystems: %m");
263
264 _cleanup_close_ int subsystem_fd = -EBADF;
265 subsystem_fd = open_mkdir_at(subsystems_fd, j, O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
266 if (subsystem_fd < 0)
267 return log_error_errno(subsystem_fd, "Failed to create NVME subsystem '%s': %m", j);
268
269 r = write_string_file_at(subsystem_fd, "attr_allow_any_host", "1", WRITE_STRING_FILE_DISABLE_BUFFER);
270 if (r < 0)
271 return log_error_errno(r, "Failed to set 'attr_allow_any_host' flag: %m");
272
273 _cleanup_close_ int namespace_fd = -EBADF;
274 namespace_fd = open_mkdir_at(subsystem_fd, "namespaces/1", O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
275 if (namespace_fd < 0)
276 return log_error_errno(namespace_fd, "Failed to create NVME namespace '1': %m");
277
278 /* We use /proc/$PID/fd/$FD rather than /proc/self/fd/$FD, because this string is visible to others
279 * via configfs, and by including the PID it's clear to who the stuff belongs. */
280 r = write_string_file_at(namespace_fd, "device_path", FORMAT_PROC_PID_FD_PATH(0, fd), WRITE_STRING_FILE_DISABLE_BUFFER);
281 if (r < 0)
282 return log_error_errno(r, "Failed to write 'device_path' attribute: %m");
283
284 r = write_string_file_at(namespace_fd, "enable", "1", WRITE_STRING_FILE_DISABLE_BUFFER);
285 if (r < 0)
286 return log_error_errno(r, "Failed to write 'enable' attribute: %m");
287
288 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *subsys = NULL;
289
290 subsys = new(NvmeSubsystem, 1);
291 if (!subsys)
292 return log_oom();
293
294 *subsys = (NvmeSubsystem) {
295 .name = TAKE_PTR(j),
296 .device_fd = TAKE_FD(fd),
297 .nvme_all_subsystems_fd = TAKE_FD(subsystems_fd),
298 .nvme_our_subsystem_fd = TAKE_FD(subsystem_fd),
299 .device_stat = st,
300 };
301
302 subsys->device = strdup(node);
303 if (!subsys->device)
304 return log_oom();
305
306 *ret = TAKE_PTR(subsys);
307 return 0;
308 }
309
310 typedef struct NvmePort {
311 uint16_t portnr; /* used for both the IP and the NVME port numer */
312
313 int nvme_port_fd;
314 int nvme_ports_fd;
315
316 int ip_family;
317 } NvmePort;
318
319 static NvmePort *nvme_port_free(NvmePort *p) {
320 if (!p)
321 return NULL;
322
323 safe_close(p->nvme_port_fd);
324 safe_close(p->nvme_ports_fd);
325
326 return mfree(p);
327 }
328
329 static int nvme_port_unlink(NvmePort *p) {
330 int r, ret = 0;
331
332 assert(p);
333
334 if (p->nvme_port_fd >= 0) {
335 _cleanup_close_ int subsystems_dir_fd = -EBADF;
336
337 subsystems_dir_fd = openat(p->nvme_port_fd, "subsystems", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
338 if (subsystems_dir_fd < 0)
339 log_warning_errno(errno, "Failed to open 'subsystems' dir of port %" PRIu16 ", ignoring: %m", p->portnr);
340 else {
341 _cleanup_free_ DirectoryEntries *de = NULL;
342
343 r = readdir_all(subsystems_dir_fd, RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT, &de);
344 if (r < 0)
345 log_warning_errno(r, "Failed to read 'subsystems' dir of port %" PRIu16 ", ignoring: %m", p->portnr);
346 else
347 FOREACH_ARRAY(ee, de->entries, de->n_entries) {
348 const struct dirent *e = *ee;
349
350 if (unlinkat(subsystems_dir_fd, e->d_name, 0) < 0 && errno != ENOENT)
351 log_warning_errno(errno, "Failed to remove 'subsystems' symlink '%s' of port %" PRIu16 ", ignoring: %m", e->d_name, p->portnr);
352 }
353 }
354
355 p->nvme_port_fd = safe_close(p->nvme_port_fd);
356 }
357
358 if (p->nvme_ports_fd >= 0) {
359 _cleanup_free_ char *fn = NULL;
360 if (asprintf(&fn, "%" PRIu16, p->portnr) < 0)
361 return log_oom();
362
363 if (unlinkat(p->nvme_ports_fd, fn, AT_REMOVEDIR) < 0) {
364 if (errno == ENOENT)
365 ret = 0;
366 else
367 ret = log_warning_errno(errno, "Failed to remove port '%" PRIu16 ", ignoring: %m", p->portnr);
368 } else
369 ret = 1;
370
371 p->nvme_ports_fd = safe_close(p->nvme_ports_fd);
372 }
373
374 return ret;
375 }
376
377 static NvmePort *nvme_port_destroy(NvmePort *p) {
378 if (!p)
379 return NULL;
380
381 (void) nvme_port_unlink(p);
382
383 return nvme_port_free(p);
384 }
385
386 DEFINE_TRIVIAL_CLEANUP_FUNC(NvmePort*, nvme_port_destroy);
387
388 static int nvme_port_add_portnr(
389 int ports_fd,
390 uint16_t portnr,
391 int ip_family,
392 int *ret_fd) {
393
394 int r;
395
396 assert(ports_fd >= 0);
397 assert(IN_SET(ip_family, AF_INET, AF_INET6));
398 assert(ret_fd);
399
400 _cleanup_free_ char *fname = NULL;
401 if (asprintf(&fname, "%" PRIu16, portnr) < 0)
402 return log_oom();
403
404 _cleanup_close_ int port_fd = -EBADF;
405 port_fd = open_mkdir_at(ports_fd, fname, O_EXCL|O_RDONLY|O_CLOEXEC, 0777);
406 if (port_fd < 0) {
407 if (port_fd != -EEXIST)
408 return log_error_errno(port_fd, "Failed to create port %" PRIu16 ": %m", portnr);
409
410 *ret_fd = -EBADF;
411 return 0;
412 }
413
414 r = write_string_file_at(port_fd, "addr_adrfam", af_to_ipv4_ipv6(ip_family), WRITE_STRING_FILE_DISABLE_BUFFER);
415 if (r < 0)
416 return log_error_errno(r, "Failed to set address family on NVME port %" PRIu16 ": %m", portnr);
417
418 r = write_string_file_at(port_fd, "addr_trtype", "tcp", WRITE_STRING_FILE_DISABLE_BUFFER);
419 if (r < 0)
420 return log_error_errno(r, "Failed to set transport type on NVME port %" PRIu16 ": %m", portnr);
421
422 r = write_string_file_at(port_fd, "addr_trsvcid", fname, WRITE_STRING_FILE_DISABLE_BUFFER);
423 if (r < 0)
424 return log_error_errno(r, "Failed to set IP port on NVME port %" PRIu16 ": %m", portnr);
425
426 r = write_string_file_at(port_fd, "addr_traddr", ip_family == AF_INET6 ? "::" : "0.0.0.0", WRITE_STRING_FILE_DISABLE_BUFFER);
427 if (r < 0)
428 return log_error_errno(r, "Failed to set IP address on NVME port %" PRIu16 ": %m", portnr);
429
430 *ret_fd = TAKE_FD(port_fd);
431 return 1;
432 }
433
434 static uint16_t calculate_start_port(const char *name, int ip_family) {
435 struct siphash state;
436 uint16_t nr;
437
438 assert(name);
439 assert(IN_SET(ip_family, AF_INET, AF_INET6));
440
441 /* Use some fixed key Lennart pulled from /dev/urandom, so that we are deterministic */
442 siphash24_init(&state, SD_ID128_MAKE(d1,0b,67,b5,e2,b7,4a,91,8d,6b,27,b6,35,c1,9f,d9).bytes);
443 siphash24_compress_string(name, &state);
444 siphash24_compress(&ip_family, sizeof(ip_family), &state);
445
446 nr = 1024U + siphash24_finalize(&state) % (0xFFFFU - 1024U);
447 SET_FLAG(nr, 1, ip_family == AF_INET6); /* Lowest bit reflects family */
448
449 return nr;
450 }
451
452 static uint16_t calculate_next_port(int ip_family) {
453 uint16_t nr;
454
455 assert(IN_SET(ip_family, AF_INET, AF_INET6));
456
457 nr = 1024U + random_u64_range(0xFFFFU - 1024U);
458 SET_FLAG(nr, 1, ip_family == AF_INET6); /* Lowest bit reflects family */
459
460 return nr;
461 }
462
463 static int nvme_port_add(const char *name, int ip_family, NvmePort **ret) {
464 int r;
465
466 assert(name);
467 assert(IN_SET(ip_family, AF_INET, AF_INET6));
468 assert(ret);
469
470 _cleanup_close_ int ports_fd = -EBADF;
471 ports_fd = RET_NERRNO(open("/sys/kernel/config/nvmet/ports", O_DIRECTORY|O_RDONLY|O_CLOEXEC));
472 if (ports_fd < 0)
473 return log_error_errno(ports_fd, "Failed to open /sys/kernel/config/nvmet/ports: %m");
474
475 _cleanup_close_ int port_fd = -EBADF;
476 uint16_t portnr = calculate_start_port(name, ip_family);
477 for (unsigned attempt = 0;; attempt++) {
478 r = nvme_port_add_portnr(ports_fd, portnr, ip_family, &port_fd);
479 if (r < 0)
480 return r;
481 if (r > 0)
482 break;
483
484 if (attempt > 16)
485 return log_error_errno(SYNTHETIC_ERRNO(EBUSY), "Can't find free NVME port after %u attempts.", attempt);
486
487 log_debug_errno(port_fd, "NVME port %" PRIu16 " exists already, randomizing port.", portnr);
488
489 portnr = calculate_next_port(ip_family);
490 }
491
492 _cleanup_(nvme_port_destroyp) NvmePort *p = new(NvmePort, 1);
493 if (!p)
494 return log_oom();
495
496 *p = (NvmePort) {
497 .portnr = portnr,
498 .nvme_ports_fd = TAKE_FD(ports_fd),
499 .nvme_port_fd = TAKE_FD(port_fd),
500 .ip_family = ip_family,
501 };
502
503 *ret = TAKE_PTR(p);
504 return 0;
505 }
506
507 static int nvme_port_link_subsystem(NvmePort *port, NvmeSubsystem *subsys) {
508 assert(port);
509 assert(subsys);
510
511 _cleanup_free_ char *target = NULL, *linkname = NULL;
512 target = path_join("/sys/kernel/config/nvmet/subsystems", subsys->name);
513 if (!target)
514 return log_oom();
515
516 linkname = path_join("subsystems", subsys->name);
517 if (!linkname)
518 return log_oom();
519
520 if (symlinkat(target, port->nvme_port_fd, linkname) < 0)
521 return log_error_errno(errno, "Failed to link subsystem '%s' to port %" PRIu16 ": %m", subsys->name, port->portnr);
522
523 return 0;
524 }
525
526 static int nvme_port_unlink_subsystem(NvmePort *port, NvmeSubsystem *subsys) {
527 assert(port);
528 assert(subsys);
529
530 _cleanup_free_ char *linkname = NULL;
531 linkname = path_join("subsystems", subsys->name);
532 if (!linkname)
533 return log_oom();
534
535 if (unlinkat(port->nvme_port_fd, linkname, 0) < 0 && errno != ENOENT)
536 return log_error_errno(errno, "Failed to unlink subsystem '%s' to port %" PRIu16 ": %m", subsys->name, port->portnr);
537
538 return 0;
539 }
540
541 static int nvme_subsystem_report(NvmeSubsystem *subsystem, NvmePort *ipv4, NvmePort *ipv6) {
542 assert(subsystem);
543
544 _cleanup_free_ struct local_address *addresses = NULL;
545 int n_addresses;
546 n_addresses = local_addresses(NULL, 0, AF_UNSPEC, &addresses);
547 if (n_addresses < 0)
548 return log_error_errno(n_addresses, "Failed to determine local IP addresses: %m");
549
550 log_notice("NVMe-TCP: %s %s%s%s (%s)",
551 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
552 emoji_enabled() ? special_glyph(SPECIAL_GLYPH_COMPUTER_DISK) : "", emoji_enabled() ? " " : "",
553 subsystem->name, subsystem->device);
554
555 FOREACH_ARRAY(a, addresses, n_addresses) {
556 NvmePort *port = a->family == AF_INET ? ipv4 : ipv6;
557
558 if (!port)
559 continue;
560
561 log_info(" %s Try for specific device: nvme connect -t tcp -n '%s' -a %s -s %" PRIu16,
562 special_glyph(a >= addresses + (n_addresses - 1) ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH),
563 subsystem->name,
564 IN_ADDR_TO_STRING(a->family, &a->address),
565 port->portnr);
566 }
567
568 return 0;
569 }
570
571 static int nvme_port_report(NvmePort *port) {
572 if (!port)
573 return 0;
574
575 _cleanup_free_ struct local_address *addresses = NULL;
576 int n_addresses;
577 n_addresses = local_addresses(NULL, 0, port->ip_family, &addresses);
578 if (n_addresses < 0)
579 return log_error_errno(n_addresses, "Failed to determine local IP addresses: %m");
580
581 log_notice("NVMe-TCP: %s %s%sListening on %s (port %" PRIu16 ")",
582 special_glyph(SPECIAL_GLYPH_ARROW_RIGHT),
583 emoji_enabled() ? special_glyph(SPECIAL_GLYPH_WORLD) : "", emoji_enabled() ? " " : "",
584 af_to_ipv4_ipv6(port->ip_family),
585 port->portnr);
586
587 FOREACH_ARRAY(a, addresses, n_addresses)
588 log_info(" %s Try for all devices: nvme connect-all -t tcp -a %s -s %" PRIu16,
589 special_glyph(a >= addresses + (n_addresses - 1) ? SPECIAL_GLYPH_TREE_RIGHT : SPECIAL_GLYPH_TREE_BRANCH),
590 IN_ADDR_TO_STRING(a->family, &a->address),
591 port->portnr);
592
593 return 0;
594 }
595
596 typedef struct Context {
597 Hashmap *subsystems;
598 NvmePort *ipv4_port, *ipv6_port;
599
600 bool display_refresh_scheduled;
601 } Context;
602
603 static void device_hash_func(const struct stat *q, struct siphash *state) {
604 assert(q);
605
606 if (S_ISBLK(q->st_mode) || S_ISCHR(q->st_mode)) {
607 mode_t m = q->st_mode & S_IFMT;
608 siphash24_compress(&m, sizeof(m), state);
609 siphash24_compress(&q->st_rdev, sizeof(q->st_rdev), state);
610 return;
611 }
612
613 return inode_hash_func(q, state);
614 }
615
616 static int device_compare_func(const struct stat *a, const struct stat *b) {
617 int r;
618
619 assert(a);
620 assert(b);
621
622 r = CMP(a->st_mode & S_IFMT, b->st_mode & S_IFMT);
623 if (r != 0)
624 return r;
625
626 if (S_ISBLK(a->st_mode) || S_ISCHR(a->st_mode)) {
627 r = CMP(major(a->st_rdev), major(b->st_rdev));
628 if (r != 0)
629 return r;
630
631 r = CMP(minor(a->st_rdev), minor(b->st_rdev));
632 if (r != 0)
633 return r;
634
635 return 0;
636 }
637
638 return inode_compare_func(a, b);
639 }
640
641 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
642 nvme_subsystem_hash_ops,
643 struct stat,
644 device_hash_func,
645 device_compare_func,
646 NvmeSubsystem,
647 nvme_subsystem_destroy);
648
649 static void context_done(Context *c) {
650 assert(c);
651
652 c->ipv4_port = nvme_port_destroy(c->ipv4_port);
653 c->ipv6_port = nvme_port_destroy(c->ipv6_port);
654
655 c->subsystems = hashmap_free(c->subsystems);
656 }
657
658 static void device_track_back(sd_device *d, sd_device **ret) {
659 int r;
660
661 assert(d);
662 assert(ret);
663
664 const char *devname = NULL;
665 (void) sd_device_get_devname(d, &devname);
666
667 _cleanup_(sd_device_unrefp) sd_device *d_originating = NULL;
668 r = block_device_get_originating(d, &d_originating);
669 if (r < 0)
670 log_device_debug_errno(d, r, "Failed to get originating device for '%s', ignoring: %m", strna(devname));
671
672 sd_device *d_whole = NULL;
673 r = block_device_get_whole_disk(d_originating ?: d, &d_whole); /* does not ref returned device */
674 if (r < 0)
675 log_device_debug_errno(d, r, "Failed to get whole device for '%s', ignoring: %m", strna(devname));
676
677 *ret = d_whole ? sd_device_ref(d_whole) : d_originating ? TAKE_PTR(d_originating) : sd_device_ref(d);
678 }
679
680 static int device_is_same(sd_device *a, sd_device *b) {
681 dev_t devnum_a, devnum_b;
682 int r;
683
684 assert(a);
685 assert(b);
686
687 r = sd_device_get_devnum(a, &devnum_a);
688 if (r < 0)
689 return r;
690
691 r = sd_device_get_devnum(b, &devnum_b);
692 if (r < 0)
693 return r;
694
695 return devnum_a == devnum_b;
696 }
697
698 static bool device_is_allowed(sd_device *d) {
699 int r;
700
701 assert(d);
702
703 if (arg_all >= 2) /* If --all is specified twice we allow even the root fs to shared */
704 return true;
705
706 const char *devname;
707 r = sd_device_get_devname(d, &devname);
708 if (r < 0)
709 return log_device_error_errno(d, r, "Failed to get device name: %m");
710
711 dev_t root_devnum;
712 r = get_block_device("/", &root_devnum);
713 if (r < 0) {
714 log_warning_errno(r, "Failed to get backing device of the root file system: %m");
715 return false; /* Better safe */
716 }
717 if (root_devnum == 0) /* Not backed by a block device? */
718 return true;
719
720 _cleanup_(sd_device_unrefp) sd_device *root_device = NULL;
721 r = sd_device_new_from_devnum(&root_device, 'b', root_devnum);
722 if (r < 0) {
723 log_warning_errno(r, "Failed to get root block device, assuming device '%s' is same as root device: %m", devname);
724 return false;
725 }
726
727 _cleanup_(sd_device_unrefp) sd_device *whole_root_device = NULL;
728 device_track_back(root_device, &whole_root_device);
729
730 _cleanup_(sd_device_unrefp) sd_device *whole_d = NULL;
731 device_track_back(d, &whole_d);
732
733 r = device_is_same(whole_root_device, whole_d);
734 if (r < 0) {
735 log_warning_errno(r, "Failed to determine if root device and device '%s' are the same, assuming they are: %m", devname);
736 return false; /* Better safe */
737 }
738
739 return !r;
740 }
741
742 static int device_added(Context *c, sd_device *device) {
743 _cleanup_close_ int fd = -EBADF;
744 int r;
745
746 assert(c);
747 assert(device);
748
749 const char *sysname;
750 r = sd_device_get_sysname(device, &sysname);
751 if (r < 0)
752 return log_device_error_errno(device, r, "Failed to get device name: %m");
753
754 log_device_debug(device, "new block device '%s'", sysname);
755
756 if (STARTSWITH_SET(sysname, "loop", "zram")) /* Ignore some devices */
757 return 0;
758
759 const char *devname;
760 r = sd_device_get_devname(device, &devname);
761 if (r < 0)
762 return log_device_error_errno(device, r, "Failed to get device node path: %m");
763
764 struct stat lookup_key = {
765 .st_mode = S_IFBLK,
766 };
767
768 r = sd_device_get_devnum(device, &lookup_key.st_rdev);
769 if (r < 0)
770 return log_device_error_errno(device, r, "Failed to get major/minor from device: %m");
771
772 if (hashmap_contains(c->subsystems, &lookup_key)) {
773 log_debug("Device '%s' already seen.", devname);
774 return 0;
775 }
776
777 if (!device_is_allowed(device)) {
778 log_device_debug(device, "Not exposing device '%s', as it is backed by root disk.", devname);
779 return 0;
780 }
781
782 fd = sd_device_open(device, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
783 if (fd < 0) {
784 log_device_warning_errno(device, fd, "Failed to open newly acquired device '%s', ignoring device: %m", devname);
785 return 0;
786 }
787
788 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *s = NULL;
789 r = nvme_subsystem_add(devname, TAKE_FD(fd), &s);
790 if (r < 0)
791 return r;
792
793 if (c->ipv4_port) {
794 r = nvme_port_link_subsystem(c->ipv4_port, s);
795 if (r < 0)
796 return r;
797 }
798
799 if (c->ipv6_port) {
800 r = nvme_port_link_subsystem(c->ipv6_port, s);
801 if (r < 0)
802 return r;
803 }
804
805 r = hashmap_ensure_put(&c->subsystems, &nvme_subsystem_hash_ops, &s->device_stat, s);
806 if (r < 0)
807 return log_error_errno(r, "Failed to add subsystem to hash table: %m");
808
809 (void) nvme_subsystem_report(s, c->ipv4_port, c->ipv6_port);
810
811 TAKE_PTR(s);
812 return 1;
813 }
814
815 static int device_removed(Context *c, sd_device *device) {
816 int r;
817
818 assert(device);
819
820 struct stat lookup_key = {
821 .st_mode = S_IFBLK,
822 };
823
824 r = sd_device_get_devnum(device, &lookup_key.st_rdev);
825 if (r < 0)
826 return log_device_error_errno(device, r, "Failed to get major/minor from device: %m");
827
828 NvmeSubsystem *s = hashmap_remove(c->subsystems, &lookup_key);
829 if (!s)
830 return 0;
831
832 log_device_debug(device, "removed block device '%s'", s->name);
833
834 if (c->ipv4_port)
835 (void) nvme_port_unlink_subsystem(c->ipv4_port, s);
836 if (c->ipv6_port)
837 (void) nvme_port_unlink_subsystem(c->ipv6_port, s);
838
839 s = nvme_subsystem_destroy(s);
840 return 1;
841 }
842
843 static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
844 Context *c = ASSERT_PTR(userdata);
845
846 if (device_for_action(device, SD_DEVICE_REMOVE))
847 device_removed(c, device);
848 else
849 device_added(c, device);
850
851 return 0;
852 }
853
854 static int on_display_refresh(sd_event_source *s, uint64_t usec, void *userdata) {
855 Context *c = ASSERT_PTR(userdata);
856
857 assert(s);
858
859 c->display_refresh_scheduled = false;
860
861 if (isatty(STDERR_FILENO) > 0)
862 fputs(ANSI_HOME_CLEAR, stderr);
863
864 (void) nvme_port_report(c->ipv4_port);
865 (void) nvme_port_report(c->ipv6_port);
866
867 NvmeSubsystem *i;
868 HASHMAP_FOREACH(i, c->subsystems)
869 (void) nvme_subsystem_report(i, c->ipv4_port, c->ipv6_port);
870
871 return 0;
872 }
873
874 static int on_address_change(sd_netlink *rtnl, sd_netlink_message *mm, void *userdata) {
875 Context *c = ASSERT_PTR(userdata);
876 int r, family;
877
878 assert(rtnl);
879 assert(mm);
880
881 r = sd_rtnl_message_addr_get_family(mm, &family);
882 if (r < 0) {
883 log_warning_errno(r, "Failed to get address family from netlink address message, ignoring: %m");
884 return 0;
885 }
886
887 if (!c->display_refresh_scheduled) {
888 r = sd_event_add_time_relative(
889 sd_netlink_get_event(rtnl),
890 /* ret_slot= */ NULL,
891 CLOCK_MONOTONIC,
892 750 * USEC_PER_MSEC,
893 0,
894 on_display_refresh,
895 c);
896 if (r < 0)
897 log_warning_errno(r, "Failed to schedule display refresh, ignoring: %m");
898 else
899 c->display_refresh_scheduled = true;
900 }
901
902 return 0;
903 }
904
905 static int run(int argc, char* argv[]) {
906 _cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
907 _cleanup_(sd_event_unrefp) sd_event *event = NULL;
908 _cleanup_(context_done) Context context = {};
909 int r;
910
911 log_show_color(true);
912 log_parse_environment();
913 log_open();
914
915 r = parse_argv(argc, argv);
916 if (r <= 0)
917 return r;
918
919 r = sd_event_new(&event);
920 if (r < 0)
921 return log_error_errno(r, "Failed to allocate event loop: %m");
922
923 r = sd_event_set_signal_exit(event, true);
924 if (r < 0)
925 return log_error_errno(r, "Failed to install exit signal handlers: %m");
926
927 STRV_FOREACH(i, arg_devices) {
928 _cleanup_(nvme_subsystem_destroyp) NvmeSubsystem *subsys = NULL;
929
930 r = nvme_subsystem_add(*i, -EBADF, &subsys);
931 if (r < 0)
932 return r;
933
934 r = hashmap_ensure_put(&context.subsystems, &nvme_subsystem_hash_ops, &subsys->device_stat, subsys);
935 if (r == -EEXIST) {
936 log_warning_errno(r, "Duplicate device '%s' specified, skipping: %m", *i);
937 continue;
938 }
939 if (r < 0)
940 return log_error_errno(r, "Failed to add subsystem to hash table: %m");
941
942 TAKE_PTR(subsys);
943 }
944
945 r = nvme_port_add(arg_nqn, AF_INET, &context.ipv4_port);
946 if (r < 0)
947 return r;
948
949 nvme_port_report(context.ipv4_port);
950
951 if (socket_ipv6_is_enabled()) {
952 r = nvme_port_add(arg_nqn, AF_INET6, &context.ipv6_port);
953 if (r < 0)
954 return r;
955
956 nvme_port_report(context.ipv6_port);
957 }
958
959 NvmeSubsystem *i;
960 HASHMAP_FOREACH(i, context.subsystems) {
961 if (context.ipv4_port) {
962 r = nvme_port_link_subsystem(context.ipv4_port, i);
963 if (r < 0)
964 return r;
965 }
966
967 if (context.ipv6_port) {
968 r = nvme_port_link_subsystem(context.ipv6_port, i);
969 if (r < 0)
970 return r;
971 }
972
973 (void) nvme_subsystem_report(i, context.ipv4_port, context.ipv6_port);
974 }
975
976 if (arg_all > 0) {
977 r = sd_device_monitor_new(&monitor);
978 if (r < 0)
979 return log_error_errno(r, "Failed to allocate device monitor: %m");
980
981 r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "disk");
982 if (r < 0)
983 return log_error_errno(r, "Failed to configure device monitor match: %m");
984
985 r = sd_device_monitor_attach_event(monitor, event);
986 if (r < 0)
987 return log_error_errno(r, "Failed to attach device monitor to event loop: %m");
988
989 r = sd_device_monitor_start(monitor, device_monitor_handler, &context);
990 if (r < 0)
991 return log_error_errno(r, "Failed to start device monitor: %m");
992
993 _cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *enumerator = NULL;
994 r = sd_device_enumerator_new(&enumerator);
995 if (r < 0)
996 return log_error_errno(r, "Failed to allocate enumerator: %m");
997
998 r = sd_device_enumerator_add_match_subsystem(enumerator, "block", /* match= */ true);
999 if (r < 0)
1000 return log_error_errno(r, "Failed to match block devices: %m");
1001
1002 r = sd_device_enumerator_add_match_property(enumerator, "DEVTYPE", "disk");
1003 if (r < 0)
1004 return log_error_errno(r, "Failed to match whole block devices: %m");
1005
1006 r = sd_device_enumerator_add_nomatch_sysname(enumerator, "loop*");
1007 if (r < 0)
1008 return log_error_errno(r, "Failed to exclude loop devices: %m");
1009
1010 FOREACH_DEVICE(enumerator, device)
1011 device_added(&context, device);
1012 }
1013
1014 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
1015 r = sd_netlink_open(&rtnl);
1016 if (r < 0)
1017 return log_error_errno(r, "Failed to connect to netlink: %m");
1018
1019 r = sd_netlink_attach_event(rtnl, event, SD_EVENT_PRIORITY_NORMAL);
1020 if (r < 0)
1021 return log_error_errno(r, "Failed to attach netlink socket to event loop: %m");
1022
1023 r = sd_netlink_add_match(rtnl, /* ret_slot= */ NULL, RTM_NEWADDR, on_address_change, /* destroy_callback= */ NULL, &context, "storagetm-newaddr");
1024 if (r < 0)
1025 return log_error_errno(r, "Failed to subscribe to RTM_NEWADDR events: %m");
1026
1027 r = sd_netlink_add_match(rtnl, /* ret_slot= */ NULL, RTM_DELADDR, on_address_change, /* destroy_callback= */ NULL, &context, "storagetm-deladdr");
1028 if (r < 0)
1029 return log_error_errno(r, "Failed to subscribe to RTM_DELADDR events: %m");
1030
1031 if (isatty(0) > 0)
1032 log_info("Hit Ctrl-C to exit target mode.");
1033
1034 _unused_ _cleanup_(notify_on_cleanup) const char *notify_message =
1035 notify_start("READY=1\n"
1036 "STATUS=Exposing disks in target mode...",
1037 NOTIFY_STOPPING);
1038
1039 r = sd_event_loop(event);
1040 if (r < 0)
1041 return log_error_errno(r, "Failed to run event loop: %m");
1042
1043 log_info("Exiting target mode.");
1044 return r;
1045 }
1046
1047 DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run);