]> git.ipfire.org Git - people/ms/linux.git/commitdiff
dm bio prison v2: new interface for the bio prison
authorJoe Thornber <ejt@redhat.com>
Fri, 21 Oct 2016 14:06:40 +0000 (10:06 -0400)
committerMike Snitzer <snitzer@redhat.com>
Tue, 7 Mar 2017 16:30:16 +0000 (11:30 -0500)
The deferred set is gone and all methods have _v2 appended to the end of
their names to allow for continued use of the original bio prison in DM
thin-provisioning.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
drivers/md/Makefile
drivers/md/dm-bio-prison-v1.c [moved from drivers/md/dm-bio-prison.c with 94% similarity]
drivers/md/dm-bio-prison-v1.h [moved from drivers/md/dm-bio-prison.h with 99% similarity]
drivers/md/dm-bio-prison-v2.c [new file with mode: 0644]
drivers/md/dm-bio-prison-v2.h [new file with mode: 0644]
drivers/md/dm-cache-target.c
drivers/md/dm-thin.c

index 3cbda1af87a0d66e8ac2cdf2b60df0f817971d90..d378b1db7852b762e82122bdadfd37736daa1a88 100644 (file)
@@ -11,6 +11,7 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
 dm-mirror-y    += dm-raid1.o
 dm-log-userspace-y \
                += dm-log-userspace-base.o dm-log-userspace-transfer.o
+dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
 dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
 dm-cache-y     += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
 dm-cache-smq-y   += dm-cache-policy-smq.o
similarity index 94%
rename from drivers/md/dm-bio-prison.c
rename to drivers/md/dm-bio-prison-v1.c
index 03af174485d3066c62659f252c5306fc86fa3f68..ae7da2c30a5781353f39ef54b9b5e895e5738319 100644 (file)
@@ -5,7 +5,8 @@
  */
 
 #include "dm.h"
-#include "dm-bio-prison.h"
+#include "dm-bio-prison-v1.h"
+#include "dm-bio-prison-v2.h"
 
 #include <linux/spinlock.h>
 #include <linux/mempool.h>
@@ -398,7 +399,7 @@ EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
 
 /*----------------------------------------------------------------*/
 
-static int __init dm_bio_prison_init(void)
+static int __init dm_bio_prison_init_v1(void)
 {
        _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
        if (!_cell_cache)
@@ -407,12 +408,51 @@ static int __init dm_bio_prison_init(void)
        return 0;
 }
 
-static void __exit dm_bio_prison_exit(void)
+static void dm_bio_prison_exit_v1(void)
 {
        kmem_cache_destroy(_cell_cache);
        _cell_cache = NULL;
 }
 
+static int (*_inits[])(void) __initdata = {
+       dm_bio_prison_init_v1,
+       dm_bio_prison_init_v2,
+};
+
+static void (*_exits[])(void) = {
+       dm_bio_prison_exit_v1,
+       dm_bio_prison_exit_v2,
+};
+
+static int __init dm_bio_prison_init(void)
+{
+       const int count = ARRAY_SIZE(_inits);
+
+       int r, i;
+
+       for (i = 0; i < count; i++) {
+               r = _inits[i]();
+               if (r)
+                       goto bad;
+       }
+
+       return 0;
+
+      bad:
+       while (i--)
+               _exits[i]();
+
+       return r;
+}
+
+static void __exit dm_bio_prison_exit(void)
+{
+       int i = ARRAY_SIZE(_exits);
+
+       while (i--)
+               _exits[i]();
+}
+
 /*
  * module hooks
  */
similarity index 99%
rename from drivers/md/dm-bio-prison.h
rename to drivers/md/dm-bio-prison-v1.h
index 54352f009bfd5ad3cf011896536c4aa5baee18c6..cddd4ac07e2cb2664d3e8b478193f7fe699ee85a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011-2012 Red Hat, Inc.
+ * Copyright (C) 2011-2017 Red Hat, Inc.
  *
  * This file is released under the GPL.
  */
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
new file mode 100644 (file)
index 0000000..c9b11f7
--- /dev/null
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2012-2017 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+#include "dm-bio-prison-v2.h"
+
+#include <linux/spinlock.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rwsem.h>
+
+/*----------------------------------------------------------------*/
+
+#define MIN_CELLS 1024
+
+struct dm_bio_prison_v2 {
+       struct workqueue_struct *wq;
+
+       spinlock_t lock;
+       mempool_t *cell_pool;
+       struct rb_root cells;
+};
+
+static struct kmem_cache *_cell_cache;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * @nr_cells should be the number of cells you want in use _concurrently_.
+ * Don't confuse it with the number of distinct keys.
+ */
+struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
+{
+       struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+
+       if (!prison)
+               return NULL;
+
+       prison->wq = wq;
+       spin_lock_init(&prison->lock);
+
+       prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
+       if (!prison->cell_pool) {
+               kfree(prison);
+               return NULL;
+       }
+
+       prison->cells = RB_ROOT;
+
+       return prison;
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2);
+
+void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
+{
+       mempool_destroy(prison->cell_pool);
+       kfree(prison);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);
+
+struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
+{
+       return mempool_alloc(prison->cell_pool, gfp);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);
+
+void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
+                               struct dm_bio_prison_cell_v2 *cell)
+{
+       mempool_free(cell, prison->cell_pool);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);
+
+static void __setup_new_cell(struct dm_cell_key_v2 *key,
+                            struct dm_bio_prison_cell_v2 *cell)
+{
+       memset(cell, 0, sizeof(*cell));
+       memcpy(&cell->key, key, sizeof(cell->key));
+       bio_list_init(&cell->bios);
+}
+
+static int cmp_keys(struct dm_cell_key_v2 *lhs,
+                   struct dm_cell_key_v2 *rhs)
+{
+       if (lhs->virtual < rhs->virtual)
+               return -1;
+
+       if (lhs->virtual > rhs->virtual)
+               return 1;
+
+       if (lhs->dev < rhs->dev)
+               return -1;
+
+       if (lhs->dev > rhs->dev)
+               return 1;
+
+       if (lhs->block_end <= rhs->block_begin)
+               return -1;
+
+       if (lhs->block_begin >= rhs->block_end)
+               return 1;
+
+       return 0;
+}
+
+/*
+ * Returns true if node found, otherwise it inserts a new one.
+ */
+static bool __find_or_insert(struct dm_bio_prison_v2 *prison,
+                            struct dm_cell_key_v2 *key,
+                            struct dm_bio_prison_cell_v2 *cell_prealloc,
+                            struct dm_bio_prison_cell_v2 **result)
+{
+       int r;
+       struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
+
+       while (*new) {
+               struct dm_bio_prison_cell_v2 *cell =
+                       container_of(*new, struct dm_bio_prison_cell_v2, node);
+
+               r = cmp_keys(key, &cell->key);
+
+               parent = *new;
+               if (r < 0)
+                       new = &((*new)->rb_left);
+
+               else if (r > 0)
+                       new = &((*new)->rb_right);
+
+               else {
+                       *result = cell;
+                       return true;
+               }
+       }
+
+       __setup_new_cell(key, cell_prealloc);
+       *result = cell_prealloc;
+       rb_link_node(&cell_prealloc->node, parent, new);
+       rb_insert_color(&cell_prealloc->node, &prison->cells);
+
+       return false;
+}
+
+static bool __get(struct dm_bio_prison_v2 *prison,
+                 struct dm_cell_key_v2 *key,
+                 unsigned lock_level,
+                 struct bio *inmate,
+                 struct dm_bio_prison_cell_v2 *cell_prealloc,
+                 struct dm_bio_prison_cell_v2 **cell)
+{
+       if (__find_or_insert(prison, key, cell_prealloc, cell)) {
+               if ((*cell)->exclusive_lock) {
+                       if (lock_level <= (*cell)->exclusive_level) {
+                               bio_list_add(&(*cell)->bios, inmate);
+                               return false;
+                       }
+               }
+
+               (*cell)->shared_count++;
+
+       } else
+               (*cell)->shared_count = 1;
+
+       return true;
+}
+
+bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_cell_key_v2 *key,
+                   unsigned lock_level,
+                   struct bio *inmate,
+                   struct dm_bio_prison_cell_v2 *cell_prealloc,
+                   struct dm_bio_prison_cell_v2 **cell_result)
+{
+       int r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_get_v2);
+
+static bool __put(struct dm_bio_prison_v2 *prison,
+                 struct dm_bio_prison_cell_v2 *cell)
+{
+       BUG_ON(!cell->shared_count);
+       cell->shared_count--;
+
+       // FIXME: shared locks granted above the lock level could starve this
+       if (!cell->shared_count) {
+               if (cell->exclusive_lock){
+                       if (cell->quiesce_continuation) {
+                               queue_work(prison->wq, cell->quiesce_continuation);
+                               cell->quiesce_continuation = NULL;
+                       }
+               } else {
+                       rb_erase(&cell->node, &prison->cells);
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_bio_prison_cell_v2 *cell)
+{
+       bool r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       r = __put(prison, cell);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_put_v2);
+
+static int __lock(struct dm_bio_prison_v2 *prison,
+                 struct dm_cell_key_v2 *key,
+                 unsigned lock_level,
+                 struct dm_bio_prison_cell_v2 *cell_prealloc,
+                 struct dm_bio_prison_cell_v2 **cell_result)
+{
+       struct dm_bio_prison_cell_v2 *cell;
+
+       if (__find_or_insert(prison, key, cell_prealloc, &cell)) {
+               if (cell->exclusive_lock)
+                       return -EBUSY;
+
+               cell->exclusive_lock = true;
+               cell->exclusive_level = lock_level;
+               *cell_result = cell;
+
+               // FIXME: we don't yet know what level these shared locks
+               // were taken at, so have to quiesce them all.
+               return cell->shared_count > 0;
+
+       } else {
+               cell = cell_prealloc;
+               cell->shared_count = 0;
+               cell->exclusive_lock = true;
+               cell->exclusive_level = lock_level;
+               *cell_result = cell;
+       }
+
+       return 0;
+}
+
+int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_cell_key_v2 *key,
+                   unsigned lock_level,
+                   struct dm_bio_prison_cell_v2 *cell_prealloc,
+                   struct dm_bio_prison_cell_v2 **cell_result)
+{
+       int r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_lock_v2);
+
+static void __quiesce(struct dm_bio_prison_v2 *prison,
+                     struct dm_bio_prison_cell_v2 *cell,
+                     struct work_struct *continuation)
+{
+       if (!cell->shared_count)
+               queue_work(prison->wq, continuation);
+       else
+               cell->quiesce_continuation = continuation;
+}
+
+void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
+                       struct dm_bio_prison_cell_v2 *cell,
+                       struct work_struct *continuation)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       __quiesce(prison, cell, continuation);
+       spin_unlock_irqrestore(&prison->lock, flags);
+}
+EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);
+
+static int __promote(struct dm_bio_prison_v2 *prison,
+                    struct dm_bio_prison_cell_v2 *cell,
+                    unsigned new_lock_level)
+{
+       if (!cell->exclusive_lock)
+               return -EINVAL;
+
+       cell->exclusive_level = new_lock_level;
+       return cell->shared_count > 0;
+}
+
+int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
+                           struct dm_bio_prison_cell_v2 *cell,
+                           unsigned new_lock_level)
+{
+       int r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       r = __promote(prison, cell, new_lock_level);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2);
+
+static bool __unlock(struct dm_bio_prison_v2 *prison,
+                    struct dm_bio_prison_cell_v2 *cell,
+                    struct bio_list *bios)
+{
+       BUG_ON(!cell->exclusive_lock);
+
+       bio_list_merge(bios, &cell->bios);
+       bio_list_init(&cell->bios);
+
+       if (cell->shared_count) {
+               cell->exclusive_lock = 0;
+               return false;
+       }
+
+       rb_erase(&cell->node, &prison->cells);
+       return true;
+}
+
+bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
+                      struct dm_bio_prison_cell_v2 *cell,
+                      struct bio_list *bios)
+{
+       bool r;
+       unsigned long flags;
+
+       spin_lock_irqsave(&prison->lock, flags);
+       r = __unlock(prison, cell, bios);
+       spin_unlock_irqrestore(&prison->lock, flags);
+
+       return r;
+}
+EXPORT_SYMBOL_GPL(dm_cell_unlock_v2);
+
+/*----------------------------------------------------------------*/
+
+int __init dm_bio_prison_init_v2(void)
+{
+       _cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0);
+       if (!_cell_cache)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void dm_bio_prison_exit_v2(void)
+{
+       kmem_cache_destroy(_cell_cache);
+       _cell_cache = NULL;
+}
diff --git a/drivers/md/dm-bio-prison-v2.h b/drivers/md/dm-bio-prison-v2.h
new file mode 100644 (file)
index 0000000..6e04234
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2011-2017 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_BIO_PRISON_V2_H
+#define DM_BIO_PRISON_V2_H
+
+#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
+#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */
+
+#include <linux/bio.h>
+#include <linux/rbtree.h>
+#include <linux/workqueue.h>
+
+/*----------------------------------------------------------------*/
+
+int dm_bio_prison_init_v2(void);
+void dm_bio_prison_exit_v2(void);
+
+/*
+ * Sometimes we can't deal with a bio straight away.  We put them in prison
+ * where they can't cause any mischief.  Bios are put in a cell identified
+ * by a key, multiple bios can be in the same cell.  When the cell is
+ * subsequently unlocked the bios become available.
+ */
+struct dm_bio_prison_v2;
+
+/*
+ * Keys define a range of blocks within either a virtual or physical
+ * device.
+ */
+struct dm_cell_key_v2 {
+       int virtual;
+       dm_thin_id dev;
+       dm_block_t block_begin, block_end;
+};
+
+/*
+ * Treat this as opaque, only in header so callers can manage allocation
+ * themselves.
+ */
+struct dm_bio_prison_cell_v2 {
+       // FIXME: pack these
+       bool exclusive_lock;
+       unsigned exclusive_level;
+       unsigned shared_count;
+       struct work_struct *quiesce_continuation;
+
+       struct rb_node node;
+       struct dm_cell_key_v2 key;
+       struct bio_list bios;
+};
+
+struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq);
+void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison);
+
+/*
+ * These two functions just wrap a mempool.  This is a transitory step:
+ * Eventually all bio prison clients should manage their own cell memory.
+ *
+ * Like mempool_alloc(), dm_bio_prison_alloc_cell_v2() can only fail if called
+ * in interrupt context or passed GFP_NOWAIT.
+ */
+struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison,
+                                                   gfp_t gfp);
+void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
+                               struct dm_bio_prison_cell_v2 *cell);
+
+/*
+ * Shared locks have a bio associated with them.
+ *
+ * If the lock is granted the caller can continue to use the bio, and must
+ * call dm_cell_put_v2() to drop the reference count when finished using it.
+ *
+ * If the lock cannot be granted then the bio will be tracked within the
+ * cell, and later given to the holder of the exclusive lock.
+ *
+ * See dm_cell_lock_v2() for discussion of the lock_level parameter.
+ *
+ * Compare *cell_result with cell_prealloc to see if the prealloc was used.
+ * If cell_prealloc was used then inmate wasn't added to it.
+ *
+ * Returns true if the lock is granted.
+ */
+bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_cell_key_v2 *key,
+                   unsigned lock_level,
+                   struct bio *inmate,
+                   struct dm_bio_prison_cell_v2 *cell_prealloc,
+                   struct dm_bio_prison_cell_v2 **cell_result);
+
+/*
+ * Decrement the shared reference count for the lock.  Returns true if
+ * returning ownership of the cell (ie. you should free it).
+ */
+bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_bio_prison_cell_v2 *cell);
+
+/*
+ * Locks a cell.  No associated bio.  Exclusive locks get priority.  These
+ * locks constrain whether the io locks are granted according to level.
+ *
+ * Shared locks will still be granted if the lock_level is > (not = to) the
+ * exclusive lock level.
+ *
+ * If an _exclusive_ lock is already held then -EBUSY is returned.
+ *
+ * Return values:
+ *  < 0 - error
+ *  0   - locked; no quiescing needed
+ *  1   - locked; quiescing needed
+ */
+int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
+                   struct dm_cell_key_v2 *key,
+                   unsigned lock_level,
+                   struct dm_bio_prison_cell_v2 *cell_prealloc,
+                   struct dm_bio_prison_cell_v2 **cell_result);
+
+void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
+                       struct dm_bio_prison_cell_v2 *cell,
+                       struct work_struct *continuation);
+
+/*
+ * Promotes an _exclusive_ lock to a higher lock level.
+ *
+ * Return values:
+ *  < 0 - error
+ *  0   - promoted; no quiescing needed
+ *  1   - promoted; quiescing needed
+ */
+int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
+                           struct dm_bio_prison_cell_v2 *cell,
+                           unsigned new_lock_level);
+
+/*
+ * Adds any held bios to the bio list.
+ *
+ * There may be shared locks still held at this point even if you quiesced
+ * (ie. different lock levels).
+ *
+ * Returns true if returning ownership of the cell (ie. you should free
+ * it).
+ */
+bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
+                      struct dm_bio_prison_cell_v2 *cell,
+                      struct bio_list *bios);
+
+/*----------------------------------------------------------------*/
+
+#endif
index 9c689b34e6e792105d64f2bb4835e11e37c91578..2eaa414e1509eee67e5a699df39f40b8edd1f860 100644 (file)
@@ -5,7 +5,7 @@
  */
 
 #include "dm.h"
-#include "dm-bio-prison.h"
+#include "dm-bio-prison-v1.h"
 #include "dm-bio-record.h"
 #include "dm-cache-metadata.h"
 
index 2b266a2b5035b9fa699eb1afdf4c5dc53da54c66..9b3e2fcbfb1b2028b7feb7e116b679e6ce3856ca 100644 (file)
@@ -5,7 +5,7 @@
  */
 
 #include "dm-thin-metadata.h"
-#include "dm-bio-prison.h"
+#include "dm-bio-prison-v1.h"
 #include "dm.h"
 
 #include <linux/device-mapper.h>