From 4e345324988656fabb767cc6043fdf51b2cbfa21 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Thu, 15 Aug 2024 10:26:31 +0200 Subject: [PATCH] libmount: add support for listmount() The new listmount() syscall returns a list of unique mount IDs (just uint64_t per node, nothing else). It makes it very fast and efficient. * libmount supports two scenarios: - fetch the whole mount table by mnt_table_fetch_listmount(); this is an alternative to mnt_table_parse_file() - on demand; this mode is an extension to the current functionality, when enabled by mnt_table_enable_listmount(), then mnt_table_next_fs() will ask the kernel for data by listmount. If mnt_table_next_fs() iterates on the mount table in reverse order (MNT_ITER_BACKWARD), then it reads mount nodes from the kernel in reverse order too. The advantage of the on-demand mode is that on machines with a huge mount table (thousands of nodes), we can work with only a subset of the table (usually the last few nodes with the most recently mounted filesystems), and the kernel does not have to compose a complete huge table. This should be an improvement over the mountinfo file. The default is to read 512 nodes (IDs) by one listmount() call. This size can be altered by mnt_table_listmount_set_stepsiz(). The default size should be large enough for usual Linux machines. It's also possible to set a sub-tree by mnt_table_listmount_set_id() and a namespace by mnt_table_listmount_set_ns(). If libmnt_statmnt (on-demand statmount()) is assigned to the table, then all filesystems in the table are automatically assigned to this statmount() setup too. This allows for a completely on-demand scenario. tb = mnt_new_table(); sm = mnt_new_statmnt(); mnt_table_refer_statmnt(tb, sm); /* enable statmount() */ mnt_table_enable_listmount(tb, 1); /* enable listmount() */ while (mnt_table_next_fs(tb, itr, &fs) == 0) { if (strcmp("vfat", mnt_fs_get_fstype(fs)) == 0) print("%s", mnt_fs_get_fs_options(fs)); } In this example, mnt_table_next_fs() serves as the frontend for listmount() and mnt_fs_get_...() serves as the frontend for statmount(). The fs-options are read from kernel only for "vfat" filesystems. Signed-off-by: Karel Zak --- libmount/docs/libmount-sections.txt | 5 + libmount/meson.build | 1 + libmount/src/Makemodule.am | 1 + libmount/src/libmount.h.in | 10 +- libmount/src/libmount.sym | 5 + libmount/src/mountP.h | 9 + libmount/src/tab.c | 23 +- libmount/src/tab_listmount.c | 374 ++++++++++++++++++++++++++++ 8 files changed, 425 insertions(+), 3 deletions(-) create mode 100644 libmount/src/tab_listmount.c diff --git a/libmount/docs/libmount-sections.txt b/libmount/docs/libmount-sections.txt index 79c6055e5..13259fa51 100644 --- a/libmount/docs/libmount-sections.txt +++ b/libmount/docs/libmount-sections.txt @@ -368,7 +368,9 @@ mnt_table_add_fs mnt_table_append_intro_comment mnt_table_append_trailing_comment mnt_table_enable_comments +mnt_table_enable_listmount mnt_table_enable_noautofs +mnt_table_fetch_listmount mnt_table_find_devno mnt_table_find_fs mnt_table_find_mountpoint @@ -391,6 +393,9 @@ mnt_table_is_empty mnt_table_is_fs_mounted mnt_table_is_noautofs mnt_table_last_fs +mnt_table_listmount_set_id +mnt_table_listmount_set_ns +mnt_table_listmount_set_stepsiz mnt_table_move_fs mnt_table_next_child_fs mnt_table_next_fs diff --git a/libmount/meson.build b/libmount/meson.build index 335523d26..05b31d4d4 100644 --- a/libmount/meson.build +++ b/libmount/meson.build @@ -44,6 +44,7 @@ lib_mount_sources = ''' if LINUX lib_mount_sources += ''' src/fs_statmount.c + src/tab_listmount.c src/hooks.c src/monitor.c src/optlist.c diff --git a/libmount/src/Makemodule.am b/libmount/src/Makemodule.am index 9c87e0abe..49f6d6f03 100644 --- a/libmount/src/Makemodule.am +++ b/libmount/src/Makemodule.am @@ -31,6 +31,7 @@ libmount_la_SOURCES += \ libmount/src/context_mount.c \ libmount/src/context_umount.c \ libmount/src/fs_statmount.c \ + libmount/src/tab_listmount.c \ libmount/src/hooks.c \ libmount/src/hook_mount.c \ libmount/src/hook_mount_legacy.c \ diff --git a/libmount/src/libmount.h.in b/libmount/src/libmount.h.in index 89bae3a0c..555c7be34 100644 --- a/libmount/src/libmount.h.in +++ b/libmount/src/libmount.h.in @@ -592,7 +592,7 @@ extern int mnt_fs_refer_statmnt(struct libmnt_fs *fs, struct libmnt_statmnt *sm) extern struct libmnt_statmnt *mnt_fs_get_statmnt(struct libmnt_fs *fs); extern int mnt_fs_fetch_statmount(struct libmnt_fs *fs, uint64_t mask); -/* tab-parse.c */ +/* tab_parse.c */ extern struct libmnt_table *mnt_new_table_from_file(const char *filename) __ul_attribute__((warn_unused_result)); extern struct libmnt_table *mnt_new_table_from_dir(const char *dirname) @@ -690,6 +690,14 @@ extern int mnt_table_find_next_fs(struct libmnt_table *tb, extern int mnt_table_is_fs_mounted(struct libmnt_table *tb, struct libmnt_fs *fstab_fs); +/* tab_listmount.c */ +extern int mnt_table_listmount_set_id(struct libmnt_table *tb, uint64_t id); +extern int mnt_table_listmount_set_ns(struct libmnt_table *tb, uint64_t ns); +extern int mnt_table_listmount_set_stepsiz(struct libmnt_table *tb, size_t sz); + +extern int mnt_table_enable_listmount(struct libmnt_table *tb, int enable); +extern int mnt_table_fetch_listmount(struct libmnt_table *tb); + /* tab_update.c */ extern struct libmnt_update *mnt_new_update(void) __ul_attribute__((warn_unused_result)); diff --git a/libmount/src/libmount.sym b/libmount/src/libmount.sym index 761eabdc9..ecf68b865 100644 --- a/libmount/src/libmount.sym +++ b/libmount/src/libmount.sym @@ -396,6 +396,11 @@ MOUNT_2_41 { mnt_ref_statmnt; mnt_statmnt_disable_fetching; mnt_statmnt_set_mask; + mnt_table_enable_listmount; + mnt_table_fetch_listmount; + mnt_table_listmount_set_id; + mnt_table_listmount_set_ns; + mnt_table_listmount_set_stepsiz; mnt_table_refer_statmnt; mnt_unref_statmnt; } MOUNT_2_40; diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h index 6ea1d89cf..ee275f261 100644 --- a/libmount/src/mountP.h +++ b/libmount/src/mountP.h @@ -99,6 +99,9 @@ struct libmnt_test { extern int mnt_run_test(struct libmnt_test *tests, int argc, char *argv[]); #endif +/* private tab_listmount.c */ +struct libmnt_listmnt; + /* utils.c */ extern int mnt_valid_tagname(const char *tagname); @@ -161,6 +164,11 @@ extern int __mnt_table_is_fs_mounted( struct libmnt_table *tb, extern int mnt_table_enable_noautofs(struct libmnt_table *tb, int ignore); extern int mnt_table_is_noautofs(struct libmnt_table *tb); +/* tab_listmount.c */ +extern int mnt_table_next_lsmnt(struct libmnt_table *tb, int direction); +extern int mnt_table_reset_listmount(struct libmnt_table *tb); +extern int mnt_table_want_listmount(struct libmnt_table *tb); + /* * Generic iterator */ @@ -297,6 +305,7 @@ struct libmnt_table { int (*fltrcb)(struct libmnt_fs *fs, void *data); void *fltrcb_data; + struct libmnt_listmnt *lsmnt; /* listmount() stuff */ struct libmnt_statmnt *stmnt; /* statmount() stuff */ int noautofs; /* ignore autofs mounts */ diff --git a/libmount/src/tab.c b/libmount/src/tab.c index fbd216206..7880d6491 100644 --- a/libmount/src/tab.c +++ b/libmount/src/tab.c @@ -116,6 +116,8 @@ int mnt_reset_table(struct libmnt_table *tb) } tb->nents = 0; + mnt_table_reset_listmount(tb); + return 0; } @@ -173,6 +175,9 @@ void mnt_free_table(struct libmnt_table *tb) free(tb->comm_intro); free(tb->comm_tail); + free(tb->lsmnt); + tb->lsmnt = NULL; + mnt_unref_statmnt(tb->stmnt); tb->stmnt = NULL; @@ -432,7 +437,6 @@ int mnt_table_refer_statmnt(struct libmnt_table *tb, struct libmnt_statmnt *sm) return 0; } - /** * mnt_table_find_fs: * @tb: tab pointer @@ -493,7 +497,6 @@ int mnt_table_add_fs(struct libmnt_table *tb, struct libmnt_fs *fs) DBG(TAB, ul_debugobj(tb, "add entry: %s %s", mnt_fs_get_source(fs), mnt_fs_get_target(fs))); - if (tb->stmnt) mnt_fs_refer_statmnt(fs, tb->stmnt); @@ -853,6 +856,22 @@ int mnt_table_next_fs(struct libmnt_table *tb, struct libmnt_iter *itr, struct l if (fs) *fs = NULL; + if (mnt_table_want_listmount(tb) && + (list_empty(&tb->ents) || itr->p == itr->head)) { + struct list_head *prev = NULL; + + if (itr->p) + prev = IS_ITER_FORWARD(itr) ? itr->p->prev : itr->p->next; + rc = mnt_table_next_lsmnt(tb, itr->direction); + if (rc) + return rc; + MNT_ITER_INIT(itr, &tb->ents); + if (prev) { + itr->p = prev; + MNT_ITER_ITERATE(itr); + } + } + if (!itr->head) MNT_ITER_INIT(itr, &tb->ents); if (itr->p != itr->head) { diff --git a/libmount/src/tab_listmount.c b/libmount/src/tab_listmount.c new file mode 100644 index 000000000..2d11af6e9 --- /dev/null +++ b/libmount/src/tab_listmount.c @@ -0,0 +1,374 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * This file is part of libmount from util-linux project. + * + * Copyright (C) 2024 Karel Zak + * + * libmount is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + */ +#include "mountP.h" + +/* +* This struct is not shared between multiple tables, so reference counting is +* not used for it. + */ +struct libmnt_listmnt { + + uint64_t id; /* node ID (LSMT_ROOT for "/") */ + uint64_t ns; /* namespce ID or zero for the current */ + uint64_t last; /* last ID from previous listmount() call */ + size_t stepsiz; /* how many IDs read in one step */ + uint64_t *list; /* buffer for IDs */ + + unsigned int enabled : 1, /* on-demand listmount status */ + done : 1, /* we already have all data */ + reverse : 1; /* current setting */ +}; + +/* default number of IDs read by one listmount() call */ +#define MNT_LSMNT_STEPSIZ 512 + +static int table_init_listmount(struct libmnt_table *tb, size_t stepsiz) +{ + struct libmnt_listmnt *ls = NULL;; + + if (!tb) + return -EINVAL; + if (!stepsiz) + stepsiz = MNT_LSMNT_STEPSIZ; + + ls = tb->lsmnt; + + /* check if supported by current kernel */ + if (!ls) { + uint64_t dummy; + + errno = 0; + if (ul_listmount(LSMT_ROOT, 0, 0, &dummy, 1, LISTMOUNT_REVERSE) != 1) { + if (errno == ENOSYS) + DBG(TAB, ul_debugobj(tb, "listmount: unsuppported")); + if (errno == EINVAL) + DBG(TAB, ul_debugobj(tb, "listmount: reverse unsuppported")); + return -ENOSYS; + } + } + + /* reset if allocated for a different size */ + if (ls && ls->stepsiz != stepsiz) + ls = NULL; + + /* alloc libmnt_listmnt together with list buffer */ + if (!ls) { + char *x = calloc(1, sizeof(struct libmnt_listmnt) + + (sizeof(uint64_t) * stepsiz)); + if (!x) + return -ENOMEM; + + ls = (struct libmnt_listmnt *) x; + ls->list = (uint64_t *) (x + sizeof(struct libmnt_listmnt)); + ls->stepsiz = stepsiz; + ls->id = LSMT_ROOT; /* default */ + } + + /* reuse old setting */ + if (tb->lsmnt) { + ls->id = tb->lsmnt->id; + ls->ns = tb->lsmnt->ns; + ls->last = tb->lsmnt->last; + ls->enabled = tb->lsmnt->enabled; + ls->reverse = tb->lsmnt->reverse; + free(tb->lsmnt); + } + + tb->lsmnt = ls; + + DBG(TAB, ul_debugobj(tb, "listmount: init [step=%zu]", ls->stepsiz)); + return 0; +} + +/** + * mnt_table_listmount_set_id: + * @tb: mount table + * @id: root ID + * + * Set root ID for the table if the table is read from kernel by + * listmount() syscall. The default is to read all filesystems; use + * statx(STATX_MNT_ID_UNIQUE) for subdirectory. + * + * Returns: 0 on sucess, < 0 on error + * Since: 2.41 + */ +int mnt_table_listmount_set_id(struct libmnt_table *tb, uint64_t id) +{ + int rc = 0; + + if (!tb) + return -EINVAL; + if (!tb->lsmnt && (rc = table_init_listmount(tb, 0)) != 0) + return rc; + tb->lsmnt->id = id; + return 0; +} + +/** + * mnt_table_listmount_set_ns: + * @tb: mount table + * @id: namespace ID + * + * Set namespace ID for listmount(). + * + * Returns: 0 on sucess, < 0 on error + * Since: 2.41 + */ +int mnt_table_listmount_set_ns(struct libmnt_table *tb, uint64_t ns) +{ + int rc = 0; + + if (!tb) + return -EINVAL; + if (!tb->lsmnt && (rc = table_init_listmount(tb, 0)) != 0) + return rc; + tb->lsmnt->ns = ns; + return 0; +} + +/** + * mnt_table_listmount_set_stepsiz: + * @tb: mount table + * @sz: number of nodes read by one libmount() call + * + * Returns: 0 on sucess, < 0 on error + * Since: 2.41 + */ +int mnt_table_listmount_set_stepsiz(struct libmnt_table *tb, size_t sz) +{ + if (!tb) + return -EINVAL; + + return table_init_listmount(tb, sz); +} + +/* + * This function is called by mnt_reset_table() and the table must already be + * empty. + * + * Private; not export to library API! + **/ +int mnt_table_reset_listmount(struct libmnt_table *tb) +{ + if (!tb || !tb->lsmnt) + return 0; + if (tb->nents) + return -EINVAL; + + tb->lsmnt->done = 0; + tb->lsmnt->reverse = 0; + tb->lsmnt->last = 0; + return 0; +} + +/** + * mnt_table_enable_listmount: + * @tb: table + * @enable: 0 or 1 + * + * Enable or disable on-demand listmount() to make it usable by + * mnt_table_next_fs(). This function does not affect + * mnt_table_fetch_listmont(). + * + * Returns: old status (1 or 0) + * Since: 2.41 + */ +int mnt_table_enable_listmount(struct libmnt_table *tb, int enable) +{ + int old = 0; + + if (tb && tb->lsmnt) { + old = tb->lsmnt->enabled; + tb->lsmnt->enabled = enable; + DBG(TAB, ul_debugobj(tb, "listmount() %s", + enable ? "on" : "off")); + } + return old; +} + +/* private; returns 1 if on-demand listmount() possible */ +int mnt_table_want_listmount(struct libmnt_table *tb) +{ + return tb && tb->lsmnt && tb->lsmnt->enabled; +} + +/* add new entries from list[] to table */ +static int lsmnt_to_table( + struct libmnt_table *tb, struct libmnt_listmnt *ls, + size_t nitems, int reverse) +{ + int rc = 0; + size_t i; + struct libmnt_fs *prev = NULL; + + if (reverse) + mnt_table_first_fs(tb, &prev); + else + mnt_table_last_fs(tb, &prev); + if (prev) + mnt_ref_fs(prev); + + DBG(TAB, ul_debugobj(tb, "listmount: insert %zu", nitems)); + + for (i = 0; rc == 0 && i < nitems; i++) { + struct libmnt_fs *fs; + uint64_t id = ls->list[i]; + + if (!id) + continue; + + fs = mnt_new_fs(); + if (fs) { + mnt_fs_set_uniq_id(fs, id); + if (ls && ls->ns) + mnt_fs_set_ns(fs, ls->ns); + + rc = mnt_table_insert_fs(tb, reverse, prev, fs); + } else + rc = -ENOMEM; + + mnt_unref_fs(prev); + prev = fs; + } + + mnt_unref_fs(prev); + return rc; +} + +/* + * Private function, backed of mnt_table_next_fs(). + * + * Return: 0 on success, 1 if not more data, <0 on error. + */ +int mnt_table_next_lsmnt(struct libmnt_table *tb, int direction) +{ + ssize_t n; + int reverse = direction == MNT_ITER_BACKWARD; + struct libmnt_listmnt *ls = NULL; + int rc = 0; + + if (!tb || !tb->lsmnt) + return -EINVAL; + if (tb->lsmnt->done || !tb->lsmnt->enabled) + return 1; + + ls = tb->lsmnt; + + /* disable on-demand fetching */ + mnt_table_enable_listmount(tb, 0); + + /* read all to avoid mixing order in the table */ + if (!mnt_table_is_empty(tb) && ls->reverse != reverse) { + rc = mnt_table_fetch_listmount(tb); + goto done; + } + + ls->reverse = reverse; + + DBG(TAB, ul_debugobj(tb, "listmount: call " + "[id=%" PRIu64", ns=%" PRIu64 + "last=%" PRIu64", sz=%zu %s]", + ls->id, ls->ns, + ls->last, ls->stepsiz, + ls->reverse ? "reverse" : "")); + + n = ul_listmount(ls->id, ls->ns, ls->last, ls->list, ls->stepsiz, + reverse ? LISTMOUNT_REVERSE : 0); + if (n < 0) { + rc = -errno; + goto done; + } + + if (n < (ssize_t) ls->stepsiz) + ls->done = 1; + if (n > 0) { + ls->last = ls->list[ n - 1 ]; + rc = lsmnt_to_table(tb, ls, n, reverse); + } else + rc = 0; +done: + mnt_table_enable_listmount(tb, 1); + + DBG(TAB, ul_debugobj(tb, "listmount: on-demand done [rc=%d]", rc)); + return rc; /* nothing */ +} + +/** + * mnt_table_fetch_listmount: + * @tb: table instance + * + * By default, this function reads all mount nodes in the current namespace + * from the kernel and adds them to the @tb table. This default behavior can + * be modified using mnt_table_listmount_set_...(). + * + * The table is reset (all file systems removed) before new data is added. + * + * Return: 0 on success, <0 on error. + * Since: 2.41 + */ +int mnt_table_fetch_listmount(struct libmnt_table *tb) +{ + int rc = 0, stmnt_status = 0, lsmnt_status = 0; + struct libmnt_listmnt *ls = NULL; + ssize_t n; + + if (!tb) + return -EINVAL; + + DBG(TAB, ul_debugobj(tb, "listmount: fetching all")); + + if (!tb->lsmnt && (rc = table_init_listmount(tb, 0)) != 0) + return rc; + + /* disable on-demand statmount() */ + if (tb->stmnt) + stmnt_status = mnt_statmnt_disable_fetching(tb->stmnt, 1); + /* disable on-demand listmount() */ + lsmnt_status = mnt_table_enable_listmount(tb, 0); + + mnt_reset_table(tb); + + ls = tb->lsmnt; + + do { + DBG(TAB, ul_debugobj(tb, "listmount: call " + "[id=%" PRIu64", ns=%" PRIu64 + "last=%" PRIu64", sz=%zu]", + ls->id, ls->ns, + ls->last, ls->stepsiz)); + + n = ul_listmount(ls->id, ls->ns, ls->last, + ls->list, ls->stepsiz, 0); + if (n < 0) { + rc = -errno; + break; + } + ls->last = ls->list[ n - 1 ]; + rc = lsmnt_to_table(tb, ls, n, 0); + + } while (rc == 0 && n == (ssize_t) ls->stepsiz); + + /* Avoid using on-demand mnt_table_next_lsmnt() if we already + * have all the necessary data (or on error) */ + tb->lsmnt->done = 1; + + /* restore */ + if (tb->stmnt) + mnt_statmnt_disable_fetching(tb->stmnt, stmnt_status); + mnt_table_enable_listmount(tb, lsmnt_status); + + DBG(TAB, ul_debugobj(tb, "listmount: fetching done [rc=%d]", rc)); + + return rc; +} + + -- 2.47.2