]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
lsblk: add --dedup <column>
authorKarel Zak <kzak@redhat.com>
Thu, 18 Oct 2018 13:46:07 +0000 (15:46 +0200)
committerKarel Zak <kzak@redhat.com>
Fri, 7 Dec 2018 11:33:34 +0000 (12:33 +0100)
The target use-case are systems with large number of multi-path
devices or systems with duplicate (copied) filesystems.

The feature is flexible enough to use arbitrary column (for example
WWM or UUID, ...) as de-duplication key.

For example tree with multi-path devices sd{c,d,e,f}

./lsblk
NAME        MAJ:MIN RM   SIZE RO TYPE  MOUNTPOINT
sda           8:0    0 223.6G  0 disk
├─sda1        8:1    0   200M  0 part  /boot/efi
├─sda2        8:2    0   200M  0 part  /boot
├─sda3        8:3    0 130.3G  0 part
├─sda4        8:4    0    50G  0 part  /
└─sda5        8:5    0  42.9G  0 part
sdb           8:16   0  74.5G  0 disk
└─sdb1        8:17   0  74.5G  0 part  /home/archive
sdc           8:32   0   100M  0 disk
└─mpatha    253:0    0   100M  0 mpath
  ├─mpatha1 253:1    0    50M  0 part
  └─mpatha2 253:2    0    49M  0 part
sdd           8:48   0   100M  0 disk
└─mpatha    253:0    0   100M  0 mpath
  ├─mpatha1 253:1    0    50M  0 part
  └─mpatha2 253:2    0    49M  0 part
sde           8:64   0   100M  0 disk
└─mpatha    253:0    0   100M  0 mpath
  ├─mpatha1 253:1    0    50M  0 part
  └─mpatha2 253:2    0    49M  0 part
sdf           8:80   0   100M  0 disk
└─mpatha    253:0    0   100M  0 mpath
  ├─mpatha1 253:1    0    50M  0 part
  └─mpatha2 253:2    0    49M  0 part

De-duplicate by WWN:

./lsblk -M WWN
NAME        MAJ:MIN RM   SIZE RO TYPE  MOUNTPOINT
sda           8:0    0 223.6G  0 disk
├─sda1        8:1    0   200M  0 part  /boot/efi
├─sda2        8:2    0   200M  0 part  /boot
├─sda3        8:3    0 130.3G  0 part
├─sda4        8:4    0    50G  0 part  /
└─sda5        8:5    0  42.9G  0 part
sdb           8:16   0  74.5G  0 disk
└─sdb1        8:17   0  74.5G  0 part  /home/archive
sdc           8:32   0   100M  0 disk
└─mpatha    253:0    0   100M  0 mpath
  ├─mpatha1 253:1    0    50M  0 part
  └─mpatha2 253:2    0    49M  0 part

Addresses: https://github.com/karelzak/util-linux/issues/616
Signed-off-by: Karel Zak <kzak@redhat.com>
misc-utils/lsblk-devtree.c
misc-utils/lsblk.8
misc-utils/lsblk.c
misc-utils/lsblk.h

index aab01d3e3489c97766bb03b5881aabf92724954f..1df3e4e2edf9962be0c1cd806acc199b9a3c23dc 100644 (file)
@@ -82,6 +82,7 @@ void lsblk_unref_device(struct lsblk_device *dev)
                free(dev->dm_name);
                free(dev->filename);
                free(dev->mountpoint);
+               free(dev->dedupkey);
 
                ul_unref_path(dev->sysfs);
 
@@ -129,30 +130,40 @@ int lsblk_device_new_dependence(struct lsblk_device *parent, struct lsblk_device
        return 0;
 }
 
-int lsblk_device_next_child(struct lsblk_device *dev,
+static int device_next_dependence(struct lsblk_device *dev,
                          struct lsblk_iter *itr,
-                         struct lsblk_device **child)
+                         struct lsblk_devdep **dp)
 {
        int rc = 1;
 
-       if (!dev || !itr || !child)
+       if (!dev || !itr || !dp)
                return -EINVAL;
-       *child = NULL;
+       *dp = NULL;
 
        if (!itr->head)
                LSBLK_ITER_INIT(itr, &dev->deps);
        if (itr->p != itr->head) {
-               struct lsblk_devdep *dp = NULL;
-
-               LSBLK_ITER_ITERATE(itr, dp, struct lsblk_devdep, ls_deps);
-
-               *child = dp->child;
+               LSBLK_ITER_ITERATE(itr, *dp, struct lsblk_devdep, ls_deps);
                rc = 0;
        }
 
        return rc;
 }
 
+int lsblk_device_next_child(struct lsblk_device *dev,
+                         struct lsblk_iter *itr,
+                         struct lsblk_device **child)
+{
+       struct lsblk_devdep *dp = NULL;
+       int rc = device_next_dependence(dev, itr, &dp);
+
+       if (!child)
+               return -EINVAL;
+
+       *child = rc == 0 ? dp->child : NULL;
+       return rc;
+}
+
 
 struct lsblk_devtree *lsblk_new_devtree()
 {
@@ -296,3 +307,97 @@ int lsblk_devtree_remove_device(struct lsblk_devtree *tr, struct lsblk_device *d
        return 0;
 }
 
+static int device_dedupkey_is_equal(
+                       struct lsblk_device *dev,
+                       struct lsblk_device *pattern)
+{
+       assert(pattern->dedupkey);
+
+       if (!dev->dedupkey || dev == pattern)
+               return 0;
+       if (strcmp(dev->dedupkey, pattern->dedupkey) == 0) {
+               if (!device_is_partition(dev) ||
+                    strcmp(dev->dedupkey, dev->wholedisk->dedupkey) != 0) {
+                       DBG(DEV, ul_debugobj(dev, "%s: match deduplication pattern", dev->name));
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static void device_dedup_dependencies(
+                       struct lsblk_device *dev,
+                       struct lsblk_device *pattern)
+{
+       struct lsblk_iter itr;
+       struct lsblk_devdep *dp;
+
+       lsblk_reset_iter(&itr, LSBLK_ITER_FORWARD);
+
+       while (device_next_dependence(dev, &itr, &dp) == 0) {
+               struct lsblk_device *child = dp->child;
+
+               if (device_dedupkey_is_equal(child, pattern)) {
+                       DBG(DEV, ul_debugobj(dev, "remove duplicate dependence: 0x%p [%s]",
+                                               dp->child, dp->child->name));
+                       device_remove_dependence(dev, dp);
+               } else
+                       device_dedup_dependencies(child, pattern);
+       }
+}
+
+static void devtree_dedup(struct lsblk_devtree *tr, struct lsblk_device *pattern)
+{
+       struct lsblk_iter itr;
+       struct lsblk_device *dev = NULL;
+
+       lsblk_reset_iter(&itr, LSBLK_ITER_FORWARD);
+
+       DBG(TREE, ul_debugobj(tr, "de-duplicate by key: %s", pattern->dedupkey));
+
+       while (lsblk_devtree_next_root(tr, &itr, &dev) == 0) {
+               if (device_dedupkey_is_equal(dev, pattern)) {
+                       DBG(TREE, ul_debugobj(tr, "remove duplicate device: 0x%p [%s]",
+                                               dev, dev->name));
+                       /* Note that root list does not use ref-counting; the
+                        * primary reference is ls_devices */
+                       list_del_init(&dev->ls_roots);
+               } else
+                       device_dedup_dependencies(dev, pattern);
+       }
+}
+
+static int cmp_devices_devno(struct list_head *a, struct list_head *b,
+                         __attribute__((__unused__)) void *data)
+{
+       struct lsblk_device *ax = list_entry(a, struct lsblk_device, ls_devices),
+                           *bx = list_entry(b, struct lsblk_device, ls_devices);
+
+       return cmp_numbers(makedev(ax->maj, ax->min),
+                          makedev(bx->maj, bx->min));
+}
+
+/* Note that dev->dedupkey has to be already set */
+int lsblk_devtree_deduplicate_devices(struct lsblk_devtree *tr)
+{
+       struct lsblk_device *pattern = NULL;
+       struct lsblk_iter itr;
+       char *last = NULL;
+
+       list_sort(&tr->devices, cmp_devices_devno, NULL);
+       lsblk_reset_iter(&itr, LSBLK_ITER_FORWARD);
+
+       while (lsblk_devtree_next_device(tr, &itr, &pattern) == 0) {
+               if (!pattern->dedupkey)
+                       continue;
+               if (device_is_partition(pattern) &&
+                   strcmp(pattern->dedupkey, pattern->wholedisk->dedupkey) == 0)
+                       continue;
+               if (last && strcmp(pattern->dedupkey, last) == 0)
+                       continue;
+
+               devtree_dedup(tr, pattern);
+               last = pattern->dedupkey;
+       }
+       return 0;
+}
index 7cc2788db643c9a16b2e2b0a63133151bddc315d..d2a58ed085a680df2a321edb96ae33058d66b97e 100644 (file)
@@ -88,6 +88,14 @@ Use JSON output format.
 .BR \-l , " \-\-list"
 Produce output in the form of a list.
 .TP
+.BR \-M , " \-\-dedup " \fIcolumn\fP
+Use \fIcolumn\fP as a de-duplication key to de-duplicate output tree. If the
+key is not available for the device, or the device is a partition and parental
+whole-disk device provides the same key than the device is always printed.
+
+The usual use case is to de-duplicate output on system multi-path devices, for
+example by \fB\-M WWN\fR.
+.TP
 .BR \-m , " \-\-perms"
 Output info about device owner, group and mode.  This option is equivalent to
 .BR -o\ NAME,SIZE,OWNER,GROUP,MODE .
index 35926a17d71876e209a96324953dfd78c125694a..95c3cdc4d4d6df0e1c445d42aa513f53357adb87 100644 (file)
@@ -1641,6 +1641,43 @@ static int cmp_u64_cells(struct libscols_cell *a,
        return *adata == *bdata ? 0 : *adata >= *bdata ? 1 : -1;
 }
 
+static void device_set_dedupkey(
+                       struct lsblk_device *dev,
+                       struct lsblk_device *parent,
+                       int id)
+{
+       struct lsblk_iter itr;
+       struct lsblk_device *child = NULL;
+
+       dev->dedupkey = device_get_data(dev, parent, id, NULL);
+       if (dev->dedupkey)
+               DBG(DEV, ul_debugobj(dev, "%s: de-duplication key: %s", dev->name, dev->dedupkey));
+
+       if (dev->npartitions == 0)
+               /* For partitions we often read from parental whole-disk sysfs,
+                * otherwise we can close */
+               ul_path_close_dirfd(dev->sysfs);
+
+       lsblk_reset_iter(&itr, LSBLK_ITER_FORWARD);
+
+       while (lsblk_device_next_child(dev, &itr, &child) == 0)
+               device_set_dedupkey(child, dev, id);
+
+       /* Let's be careful with number of open files */
+       ul_path_close_dirfd(dev->sysfs);
+}
+
+static void devtree_set_dedupkeys(struct lsblk_devtree *tr, int id)
+{
+       struct lsblk_iter itr;
+       struct lsblk_device *dev = NULL;
+
+       lsblk_reset_iter(&itr, LSBLK_ITER_FORWARD);
+
+       while (lsblk_devtree_next_root(tr, &itr, &dev) == 0)
+               device_set_dedupkey(dev, NULL, id);
+}
+
 static void __attribute__((__noreturn__)) usage(void)
 {
        FILE *out = stdout;
@@ -1665,6 +1702,7 @@ static void __attribute__((__noreturn__)) usage(void)
        fputs(_(" -J, --json           use JSON output format\n"), out);
        fputs(_(" -l, --list           use list format output\n"), out);
        fputs(_(" -T, --tree           use tree format output\n"), out);
+       fputs(_(" -M, --dedup <column> de-duplicate output by <column>\n"), out);
        fputs(_(" -m, --perms          output info about permissions\n"), out);
        fputs(_(" -n, --noheadings     don't print headings\n"), out);
        fputs(_(" -o, --output <list>  output columns\n"), out);
@@ -1699,7 +1737,11 @@ static void check_sysdevblock(void)
 
 int main(int argc, char *argv[])
 {
-       struct lsblk _ls = { .sort_id = -1, .flags = LSBLK_TREE };
+       struct lsblk _ls = {
+               .sort_id = -1,
+               .dedup_id = -1,
+               .flags = LSBLK_TREE
+       };
        struct lsblk_devtree *tr = NULL;
        int c, status = EXIT_FAILURE;
        char *outarg = NULL;
@@ -1715,6 +1757,7 @@ int main(int argc, char *argv[])
                { "bytes",      no_argument,       NULL, 'b' },
                { "nodeps",     no_argument,       NULL, 'd' },
                { "discard",    no_argument,       NULL, 'D' },
+               { "dedup",      required_argument, NULL, 'M' },
                { "zoned",      no_argument,       NULL, 'z' },
                { "help",       no_argument,       NULL, 'h' },
                { "json",       no_argument,       NULL, 'J' },
@@ -1763,7 +1806,7 @@ int main(int argc, char *argv[])
        lsblk_init_debug();
 
        while((c = getopt_long(argc, argv,
-                              "abdDze:fhJlnmo:OpPiI:rstVSTx:", longopts, NULL)) != -1) {
+                              "abdDze:fhJlnmM:o:OpPiI:rstVSTx:", longopts, NULL)) != -1) {
 
                err_exclusive_options(c, longopts, excl, excl_st);
 
@@ -1880,6 +1923,12 @@ int main(int argc, char *argv[])
                case 'V':
                        printf(UTIL_LINUX_VERSION);
                        return EXIT_SUCCESS;
+               case 'M':
+                       lsblk->dedup_id = column_name_to_id(optarg, strlen(optarg));
+                       if (lsblk->dedup_id >= 0)
+                               break;
+                       errtryhelp(EXIT_FAILURE);
+                       break;
                case 'x':
                        lsblk->flags &= ~LSBLK_TREE; /* disable the default */
                        lsblk->sort_id = column_name_to_id(optarg, strlen(optarg));
@@ -1928,6 +1977,12 @@ int main(int argc, char *argv[])
                lsblk->sort_hidden = 1;
        }
 
+       if (lsblk->dedup_id >= 0 && column_id_to_number(lsblk->dedup_id) < 0) {
+               /* the deduplication column is not between output columns -- add as hidden */
+               add_column(lsblk->dedup_id);
+               lsblk->dedup_hidden = 1;
+       }
+
        lsblk_mnt_init();
        scols_init_debug(0);
        ul_path_init_debug();
@@ -1955,6 +2010,8 @@ int main(int argc, char *argv[])
                        fl &= ~SCOLS_FL_TREE;
                if (lsblk->sort_hidden && lsblk->sort_id == id)
                        fl |= SCOLS_FL_HIDDEN;
+               if (lsblk->dedup_hidden && lsblk->dedup_id == id)
+                       fl |= SCOLS_FL_HIDDEN;
 
                cl = scols_table_new_column(lsblk->table, ci->name, ci->whint, fl);
                if (!cl) {
@@ -2012,6 +2069,11 @@ int main(int argc, char *argv[])
                                          EXIT_SUCCESS;         /* all success */
        }
 
+       if (lsblk->dedup_id > -1) {
+               devtree_set_dedupkeys(tr, lsblk->dedup_id);
+               lsblk_devtree_deduplicate_devices(tr);
+       }
+
        devtree_to_scols(tr, lsblk->table);
 
        if (lsblk->sort_col)
index 6a2ad6aa24496309bafe54bc2c6bc306ed180483..236c84934ca45ed21a64dfee0b6d4b9087e72df9 100644 (file)
@@ -32,9 +32,12 @@ UL_DEBUG_DECLARE_MASK(lsblk);
 
 struct lsblk {
        struct libscols_table *table;   /* output table */
+
        struct libscols_column *sort_col;/* sort output by this column */
        int sort_id;
 
+       int dedup_id;
+
        const char *sysroot;
        int flags;                      /* LSBLK_* */
 
@@ -45,6 +48,7 @@ struct lsblk {
        unsigned int scsi:1;            /* print only device with HCTL (SCSI) */
        unsigned int paths:1;           /* print devnames with "/dev" prefix */
        unsigned int sort_hidden:1;     /* sort column not between output columns */
+       unsigned int dedup_hidden :1;   /* deduplication column not between output columns */
        unsigned int force_tree_order:1;/* sort lines by parent->tree relation */
 };
 
@@ -91,6 +95,7 @@ struct lsblk_device {
        char *dm_name;          /* DM name (dm/block) */
 
        char *filename;         /* path to device node */
+       char *dedupkey;         /* de-duplication key */
 
        struct path_cxt *sysfs;
 
@@ -200,5 +205,6 @@ int lsblk_devtree_next_device(struct lsblk_devtree *tr,
 int lsblk_devtree_has_device(struct lsblk_devtree *tr, struct lsblk_device *dev);
 struct lsblk_device *lsblk_devtree_get_device(struct lsblk_devtree *tr, const char *name);
 int lsblk_devtree_remove_device(struct lsblk_devtree *tr, struct lsblk_device *dev);
+int lsblk_devtree_deduplicate_devices(struct lsblk_devtree *tr);
 
 #endif /* UTIL_LINUX_LSBLK_H */