]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 23 Jul 2022 15:04:24 +0000 (17:04 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 23 Jul 2022 15:04:24 +0000 (17:04 +0200)
added patches:
block-fix-bounce_clone_bio-for-passthrough-bios.patch
block-split-bio_kmalloc-from-bio_alloc_bioset.patch
docs-net-explain-struct-net_device-lifetime.patch
net-inline-rollback_registered.patch
net-inline-rollback_registered_many.patch
net-make-free_netdev-more-lenient-with-unregistering-devices.patch
net-make-sure-devices-go-through-netdev_wait_all_refs.patch
net-move-net_set_todo-inside-rollback_registered.patch
net-move-rollback_registered_many.patch

queue-5.10/block-fix-bounce_clone_bio-for-passthrough-bios.patch [new file with mode: 0644]
queue-5.10/block-split-bio_kmalloc-from-bio_alloc_bioset.patch [new file with mode: 0644]
queue-5.10/docs-net-explain-struct-net_device-lifetime.patch [new file with mode: 0644]
queue-5.10/net-inline-rollback_registered.patch [new file with mode: 0644]
queue-5.10/net-inline-rollback_registered_many.patch [new file with mode: 0644]
queue-5.10/net-make-free_netdev-more-lenient-with-unregistering-devices.patch [new file with mode: 0644]
queue-5.10/net-make-sure-devices-go-through-netdev_wait_all_refs.patch [new file with mode: 0644]
queue-5.10/net-move-net_set_todo-inside-rollback_registered.patch [new file with mode: 0644]
queue-5.10/net-move-rollback_registered_many.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/block-fix-bounce_clone_bio-for-passthrough-bios.patch b/queue-5.10/block-fix-bounce_clone_bio-for-passthrough-bios.patch
new file mode 100644 (file)
index 0000000..48d8be1
--- /dev/null
@@ -0,0 +1,77 @@
+From b90994c6ab623baf9268df9710692f14920ce9d2 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Wed, 24 Feb 2021 08:24:05 +0100
+Subject: block: fix bounce_clone_bio for passthrough bios
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b90994c6ab623baf9268df9710692f14920ce9d2 upstream.
+
+Now that bio_alloc_bioset does not fall back to kmalloc for a NULL
+bio_set, handle that case explicitly and simplify the calling
+conventions.
+
+Based on an earlier patch from Chaitanya Kulkarni.
+
+Fixes: 3175199ab0ac ("block: split bio_kmalloc from bio_alloc_bioset")
+Reported-by: Chaitanya Kulkarni <Chaitanya.Kulkarni@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bounce.c |   17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+--- a/block/bounce.c
++++ b/block/bounce.c
+@@ -214,8 +214,7 @@ static void bounce_end_io_read_isa(struc
+       __bounce_end_io_read(bio, &isa_page_pool);
+ }
+-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
+-              struct bio_set *bs)
++static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask)
+ {
+       struct bvec_iter iter;
+       struct bio_vec bv;
+@@ -242,8 +241,11 @@ static struct bio *bounce_clone_bio(stru
+        *    asking for trouble and would force extra work on
+        *    __bio_clone_fast() anyways.
+        */
+-
+-      bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
++      if (bio_is_passthrough(bio_src))
++              bio = bio_kmalloc(gfp_mask, bio_segments(bio_src));
++      else
++              bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src),
++                                     &bounce_bio_set);
+       if (!bio)
+               return NULL;
+       bio->bi_disk            = bio_src->bi_disk;
+@@ -294,7 +296,6 @@ static void __blk_queue_bounce(struct re
+       unsigned i = 0;
+       bool bounce = false;
+       int sectors = 0;
+-      bool passthrough = bio_is_passthrough(*bio_orig);
+       bio_for_each_segment(from, *bio_orig, iter) {
+               if (i++ < BIO_MAX_PAGES)
+@@ -305,14 +306,14 @@ static void __blk_queue_bounce(struct re
+       if (!bounce)
+               return;
+-      if (!passthrough && sectors < bio_sectors(*bio_orig)) {
++      if (!bio_is_passthrough(*bio_orig) &&
++          sectors < bio_sectors(*bio_orig)) {
+               bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
+               bio_chain(bio, *bio_orig);
+               submit_bio_noacct(*bio_orig);
+               *bio_orig = bio;
+       }
+-      bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
+-                      &bounce_bio_set);
++      bio = bounce_clone_bio(*bio_orig, GFP_NOIO);
+       /*
+        * Bvec table can't be updated by bio_for_each_segment_all(),
diff --git a/queue-5.10/block-split-bio_kmalloc-from-bio_alloc_bioset.patch b/queue-5.10/block-split-bio_kmalloc-from-bio_alloc_bioset.patch
new file mode 100644 (file)
index 0000000..fc26efb
--- /dev/null
@@ -0,0 +1,267 @@
+From 3175199ab0ac8c874ec25c6bf169f74888917435 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 26 Jan 2021 15:52:34 +0100
+Subject: block: split bio_kmalloc from bio_alloc_bioset
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 3175199ab0ac8c874ec25c6bf169f74888917435 upstream.
+
+bio_kmalloc shares almost no logic with the bio_set based fast path
+in bio_alloc_bioset.  Split it into an entirely separate implementation.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Acked-by: Damien Le Moal <damien.lemoal@wdc.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bio.c         |  174 ++++++++++++++++++++++++++--------------------------
+ include/linux/bio.h |    6 -
+ 2 files changed, 90 insertions(+), 90 deletions(-)
+
+--- a/block/bio.c
++++ b/block/bio.c
+@@ -405,122 +405,101 @@ static void punt_bios_to_rescuer(struct
+  * @nr_iovecs:        number of iovecs to pre-allocate
+  * @bs:               the bio_set to allocate from.
+  *
+- * Description:
+- *   If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
+- *   backed by the @bs's mempool.
+- *
+- *   When @bs is not NULL, if %__GFP_DIRECT_RECLAIM is set then bio_alloc will
+- *   always be able to allocate a bio. This is due to the mempool guarantees.
+- *   To make this work, callers must never allocate more than 1 bio at a time
+- *   from this pool. Callers that need to allocate more than 1 bio must always
+- *   submit the previously allocated bio for IO before attempting to allocate
+- *   a new one. Failure to do so can cause deadlocks under memory pressure.
+- *
+- *   Note that when running under submit_bio_noacct() (i.e. any block
+- *   driver), bios are not submitted until after you return - see the code in
+- *   submit_bio_noacct() that converts recursion into iteration, to prevent
+- *   stack overflows.
+- *
+- *   This would normally mean allocating multiple bios under
+- *   submit_bio_noacct() would be susceptible to deadlocks, but we have
+- *   deadlock avoidance code that resubmits any blocked bios from a rescuer
+- *   thread.
+- *
+- *   However, we do not guarantee forward progress for allocations from other
+- *   mempools. Doing multiple allocations from the same mempool under
+- *   submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
+- *   for per bio allocations.
++ * Allocate a bio from the mempools in @bs.
+  *
+- *   RETURNS:
+- *   Pointer to new bio on success, NULL on failure.
++ * If %__GFP_DIRECT_RECLAIM is set then bio_alloc will always be able to
++ * allocate a bio.  This is due to the mempool guarantees.  To make this work,
++ * callers must never allocate more than 1 bio at a time from the general pool.
++ * Callers that need to allocate more than 1 bio must always submit the
++ * previously allocated bio for IO before attempting to allocate a new one.
++ * Failure to do so can cause deadlocks under memory pressure.
++ *
++ * Note that when running under submit_bio_noacct() (i.e. any block driver),
++ * bios are not submitted until after you return - see the code in
++ * submit_bio_noacct() that converts recursion into iteration, to prevent
++ * stack overflows.
++ *
++ * This would normally mean allocating multiple bios under submit_bio_noacct()
++ * would be susceptible to deadlocks, but we have
++ * deadlock avoidance code that resubmits any blocked bios from a rescuer
++ * thread.
++ *
++ * However, we do not guarantee forward progress for allocations from other
++ * mempools. Doing multiple allocations from the same mempool under
++ * submit_bio_noacct() should be avoided - instead, use bio_set's front_pad
++ * for per bio allocations.
++ *
++ * Returns: Pointer to new bio on success, NULL on failure.
+  */
+ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
+                            struct bio_set *bs)
+ {
+       gfp_t saved_gfp = gfp_mask;
+-      unsigned front_pad;
+-      unsigned inline_vecs;
+-      struct bio_vec *bvl = NULL;
+       struct bio *bio;
+       void *p;
+-      if (!bs) {
+-              if (nr_iovecs > UIO_MAXIOV)
+-                      return NULL;
+-
+-              p = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
+-              front_pad = 0;
+-              inline_vecs = nr_iovecs;
+-      } else {
+-              /* should not use nobvec bioset for nr_iovecs > 0 */
+-              if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
+-                               nr_iovecs > 0))
+-                      return NULL;
+-              /*
+-               * submit_bio_noacct() converts recursion to iteration; this
+-               * means if we're running beneath it, any bios we allocate and
+-               * submit will not be submitted (and thus freed) until after we
+-               * return.
+-               *
+-               * This exposes us to a potential deadlock if we allocate
+-               * multiple bios from the same bio_set() while running
+-               * underneath submit_bio_noacct(). If we were to allocate
+-               * multiple bios (say a stacking block driver that was splitting
+-               * bios), we would deadlock if we exhausted the mempool's
+-               * reserve.
+-               *
+-               * We solve this, and guarantee forward progress, with a rescuer
+-               * workqueue per bio_set. If we go to allocate and there are
+-               * bios on current->bio_list, we first try the allocation
+-               * without __GFP_DIRECT_RECLAIM; if that fails, we punt those
+-               * bios we would be blocking to the rescuer workqueue before
+-               * we retry with the original gfp_flags.
+-               */
+-
+-              if (current->bio_list &&
+-                  (!bio_list_empty(&current->bio_list[0]) ||
+-                   !bio_list_empty(&current->bio_list[1])) &&
+-                  bs->rescue_workqueue)
+-                      gfp_mask &= ~__GFP_DIRECT_RECLAIM;
++      /* should not use nobvec bioset for nr_iovecs > 0 */
++      if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_iovecs > 0))
++              return NULL;
++      /*
++       * submit_bio_noacct() converts recursion to iteration; this means if
++       * we're running beneath it, any bios we allocate and submit will not be
++       * submitted (and thus freed) until after we return.
++       *
++       * This exposes us to a potential deadlock if we allocate multiple bios
++       * from the same bio_set() while running underneath submit_bio_noacct().
++       * If we were to allocate multiple bios (say a stacking block driver
++       * that was splitting bios), we would deadlock if we exhausted the
++       * mempool's reserve.
++       *
++       * We solve this, and guarantee forward progress, with a rescuer
++       * workqueue per bio_set. If we go to allocate and there are bios on
++       * current->bio_list, we first try the allocation without
++       * __GFP_DIRECT_RECLAIM; if that fails, we punt those bios we would be
++       * blocking to the rescuer workqueue before we retry with the original
++       * gfp_flags.
++       */
++      if (current->bio_list &&
++          (!bio_list_empty(&current->bio_list[0]) ||
++           !bio_list_empty(&current->bio_list[1])) &&
++          bs->rescue_workqueue)
++              gfp_mask &= ~__GFP_DIRECT_RECLAIM;
++
++      p = mempool_alloc(&bs->bio_pool, gfp_mask);
++      if (!p && gfp_mask != saved_gfp) {
++              punt_bios_to_rescuer(bs);
++              gfp_mask = saved_gfp;
+               p = mempool_alloc(&bs->bio_pool, gfp_mask);
+-              if (!p && gfp_mask != saved_gfp) {
+-                      punt_bios_to_rescuer(bs);
+-                      gfp_mask = saved_gfp;
+-                      p = mempool_alloc(&bs->bio_pool, gfp_mask);
+-              }
+-
+-              front_pad = bs->front_pad;
+-              inline_vecs = BIO_INLINE_VECS;
+       }
+-
+       if (unlikely(!p))
+               return NULL;
+-      bio = p + front_pad;
+-      bio_init(bio, NULL, 0);
+-
+-      if (nr_iovecs > inline_vecs) {
++      bio = p + bs->front_pad;
++      if (nr_iovecs > BIO_INLINE_VECS) {
+               unsigned long idx = 0;
++              struct bio_vec *bvl = NULL;
+               bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
+               if (!bvl && gfp_mask != saved_gfp) {
+                       punt_bios_to_rescuer(bs);
+                       gfp_mask = saved_gfp;
+-                      bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
++                      bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx,
++                                       &bs->bvec_pool);
+               }
+               if (unlikely(!bvl))
+                       goto err_free;
+               bio->bi_flags |= idx << BVEC_POOL_OFFSET;
++              bio_init(bio, bvl, bvec_nr_vecs(idx));
+       } else if (nr_iovecs) {
+-              bvl = bio->bi_inline_vecs;
++              bio_init(bio, bio->bi_inline_vecs, BIO_INLINE_VECS);
++      } else {
++              bio_init(bio, NULL, 0);
+       }
+       bio->bi_pool = bs;
+-      bio->bi_max_vecs = nr_iovecs;
+-      bio->bi_io_vec = bvl;
+       return bio;
+ err_free:
+@@ -529,6 +508,31 @@ err_free:
+ }
+ EXPORT_SYMBOL(bio_alloc_bioset);
++/**
++ * bio_kmalloc - kmalloc a bio for I/O
++ * @gfp_mask:   the GFP_* mask given to the slab allocator
++ * @nr_iovecs:        number of iovecs to pre-allocate
++ *
++ * Use kmalloc to allocate and initialize a bio.
++ *
++ * Returns: Pointer to new bio on success, NULL on failure.
++ */
++struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
++{
++      struct bio *bio;
++
++      if (nr_iovecs > UIO_MAXIOV)
++              return NULL;
++
++      bio = kmalloc(struct_size(bio, bi_inline_vecs, nr_iovecs), gfp_mask);
++      if (unlikely(!bio))
++              return NULL;
++      bio_init(bio, nr_iovecs ? bio->bi_inline_vecs : NULL, nr_iovecs);
++      bio->bi_pool = NULL;
++      return bio;
++}
++EXPORT_SYMBOL(bio_kmalloc);
++
+ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
+ {
+       unsigned long flags;
+--- a/include/linux/bio.h
++++ b/include/linux/bio.h
+@@ -390,6 +390,7 @@ extern int biovec_init_pool(mempool_t *p
+ extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
+ extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *);
++struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+ extern void bio_put(struct bio *);
+ extern void __bio_clone_fast(struct bio *, struct bio *);
+@@ -402,11 +403,6 @@ static inline struct bio *bio_alloc(gfp_
+       return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
+ }
+-static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+-{
+-      return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+-}
+-
+ extern blk_qc_t submit_bio(struct bio *);
+ extern void bio_endio(struct bio *);
diff --git a/queue-5.10/docs-net-explain-struct-net_device-lifetime.patch b/queue-5.10/docs-net-explain-struct-net_device-lifetime.patch
new file mode 100644 (file)
index 0000000..36c6a4c
--- /dev/null
@@ -0,0 +1,221 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:26 +0300
+Subject: docs: net: explain struct net_device lifetime
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>
+Message-ID: <20220715162632.332718-2-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 2b446e650b418f9a9e75f99852e2f2560cabfa17 upstream.
+
+Explain the two basic flows of struct net_device's operation.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/networking/netdevices.rst |  171 ++++++++++++++++++++++++++++++--
+ net/core/rtnetlink.c                    |    2 
+ 2 files changed, 166 insertions(+), 7 deletions(-)
+
+--- a/Documentation/networking/netdevices.rst
++++ b/Documentation/networking/netdevices.rst
+@@ -10,18 +10,177 @@ Introduction
+ The following is a random collection of documentation regarding
+ network devices.
+-struct net_device allocation rules
+-==================================
++struct net_device lifetime rules
++================================
+ Network device structures need to persist even after module is unloaded and
+ must be allocated with alloc_netdev_mqs() and friends.
+ If device has registered successfully, it will be freed on last use
+-by free_netdev(). This is required to handle the pathologic case cleanly
+-(example: rmmod mydriver </sys/class/net/myeth/mtu )
++by free_netdev(). This is required to handle the pathological case cleanly
++(example: ``rmmod mydriver </sys/class/net/myeth/mtu``)
+-alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
++alloc_netdev_mqs() / alloc_netdev() reserve extra space for driver
+ private data which gets freed when the network device is freed. If
+ separately allocated data is attached to the network device
+-(netdev_priv(dev)) then it is up to the module exit handler to free that.
++(netdev_priv()) then it is up to the module exit handler to free that.
++
++There are two groups of APIs for registering struct net_device.
++First group can be used in normal contexts where ``rtnl_lock`` is not already
++held: register_netdev(), unregister_netdev().
++Second group can be used when ``rtnl_lock`` is already held:
++register_netdevice(), unregister_netdevice(), free_netdevice().
++
++Simple drivers
++--------------
++
++Most drivers (especially device drivers) handle lifetime of struct net_device
++in context where ``rtnl_lock`` is not held (e.g. driver probe and remove paths).
++
++In that case the struct net_device registration is done using
++the register_netdev(), and unregister_netdev() functions:
++
++.. code-block:: c
++
++  int probe()
++  {
++    struct my_device_priv *priv;
++    int err;
++
++    dev = alloc_netdev_mqs(...);
++    if (!dev)
++      return -ENOMEM;
++    priv = netdev_priv(dev);
++
++    /* ... do all device setup before calling register_netdev() ...
++     */
++
++    err = register_netdev(dev);
++    if (err)
++      goto err_undo;
++
++    /* net_device is visible to the user! */
++
++  err_undo:
++    /* ... undo the device setup ... */
++    free_netdev(dev);
++    return err;
++  }
++
++  void remove()
++  {
++    unregister_netdev(dev);
++    free_netdev(dev);
++  }
++
++Note that after calling register_netdev() the device is visible in the system.
++Users can open it and start sending / receiving traffic immediately,
++or run any other callback, so all initialization must be done prior to
++registration.
++
++unregister_netdev() closes the device and waits for all users to be done
++with it. The memory of struct net_device itself may still be referenced
++by sysfs but all operations on that device will fail.
++
++free_netdev() can be called after unregister_netdev() returns on when
++register_netdev() failed.
++
++Device management under RTNL
++----------------------------
++
++Registering struct net_device while in context which already holds
++the ``rtnl_lock`` requires extra care. In those scenarios most drivers
++will want to make use of struct net_device's ``needs_free_netdev``
++and ``priv_destructor`` members for freeing of state.
++
++Example flow of netdev handling under ``rtnl_lock``:
++
++.. code-block:: c
++
++  static void my_setup(struct net_device *dev)
++  {
++    dev->needs_free_netdev = true;
++  }
++
++  static void my_destructor(struct net_device *dev)
++  {
++    some_obj_destroy(priv->obj);
++    some_uninit(priv);
++  }
++
++  int create_link()
++  {
++    struct my_device_priv *priv;
++    int err;
++
++    ASSERT_RTNL();
++
++    dev = alloc_netdev(sizeof(*priv), "net%d", NET_NAME_UNKNOWN, my_setup);
++    if (!dev)
++      return -ENOMEM;
++    priv = netdev_priv(dev);
++
++    /* Implicit constructor */
++    err = some_init(priv);
++    if (err)
++      goto err_free_dev;
++
++    priv->obj = some_obj_create();
++    if (!priv->obj) {
++      err = -ENOMEM;
++      goto err_some_uninit;
++    }
++    /* End of constructor, set the destructor: */
++    dev->priv_destructor = my_destructor;
++
++    err = register_netdevice(dev);
++    if (err)
++      /* register_netdevice() calls destructor on failure */
++      goto err_free_dev;
++
++    /* If anything fails now unregister_netdevice() (or unregister_netdev())
++     * will take care of calling my_destructor and free_netdev().
++     */
++
++    return 0;
++
++  err_some_uninit:
++    some_uninit(priv);
++  err_free_dev:
++    free_netdev(dev);
++    return err;
++  }
++
++If struct net_device.priv_destructor is set it will be called by the core
++some time after unregister_netdevice(), it will also be called if
++register_netdevice() fails. The callback may be invoked with or without
++``rtnl_lock`` held.
++
++There is no explicit constructor callback, driver "constructs" the private
++netdev state after allocating it and before registration.
++
++Setting struct net_device.needs_free_netdev makes core call free_netdevice()
++automatically after unregister_netdevice() when all references to the device
++are gone. It only takes effect after a successful call to register_netdevice()
++so if register_netdevice() fails driver is responsible for calling
++free_netdev().
++
++free_netdev() is safe to call on error paths right after unregister_netdevice()
++or when register_netdevice() fails. Parts of netdev (de)registration process
++happen after ``rtnl_lock`` is released, therefore in those cases free_netdev()
++will defer some of the processing until ``rtnl_lock`` is released.
++
++Devices spawned from struct rtnl_link_ops should never free the
++struct net_device directly.
++
++.ndo_init and .ndo_uninit
++~~~~~~~~~~~~~~~~~~~~~~~~~
++
++``.ndo_init`` and ``.ndo_uninit`` callbacks are called during net_device
++registration and de-registration, under ``rtnl_lock``. Drivers can use
++those e.g. when parts of their init process need to run under ``rtnl_lock``.
++
++``.ndo_init`` runs before device is visible in the system, ``.ndo_uninit``
++runs during de-registering after device is closed but other subsystems
++may still have outstanding references to the netdevice.
+ MTU
+ ===
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3444,7 +3444,7 @@ replay:
+       if (ops->newlink) {
+               err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+-              /* Drivers should call free_netdev() in ->destructor
++              /* Drivers should set dev->needs_free_netdev
+                * and unregister it on failure after registration
+                * so that device could be finally freed in rtnl_unlock.
+                */
diff --git a/queue-5.10/net-inline-rollback_registered.patch b/queue-5.10/net-inline-rollback_registered.patch
new file mode 100644 (file)
index 0000000..c912663
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:30 +0300
+Subject: net: inline rollback_registered()
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>, Edwin Peer <edwin.peer@broadcom.com>
+Message-ID: <20220715162632.332718-6-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 037e56bd965e1bc72c2fa9684ac25b56839a338e upstream.
+
+rollback_registered() is a local helper, it's common for driver
+code to call unregister_netdevice_queue(dev, NULL) when they
+want to unregister netdevices under rtnl_lock. Inline
+rollback_registered() and adjust the only remaining caller.
+
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   17 ++++++-----------
+ 1 file changed, 6 insertions(+), 11 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9601,15 +9601,6 @@ static void rollback_registered_many(str
+       }
+ }
+-static void rollback_registered(struct net_device *dev)
+-{
+-      LIST_HEAD(single);
+-
+-      list_add(&dev->unreg_list, &single);
+-      rollback_registered_many(&single);
+-      list_del(&single);
+-}
+-
+ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+       struct net_device *upper, netdev_features_t features)
+ {
+@@ -10148,7 +10139,7 @@ int register_netdevice(struct net_device
+       if (ret) {
+               /* Expect explicit free_netdev() on failure */
+               dev->needs_free_netdev = false;
+-              rollback_registered(dev);
++              unregister_netdevice_queue(dev, NULL);
+               goto out;
+       }
+       /*
+@@ -10755,7 +10746,11 @@ void unregister_netdevice_queue(struct n
+       if (head) {
+               list_move_tail(&dev->unreg_list, head);
+       } else {
+-              rollback_registered(dev);
++              LIST_HEAD(single);
++
++              list_add(&dev->unreg_list, &single);
++              rollback_registered_many(&single);
++              list_del(&single);
+       }
+ }
+ EXPORT_SYMBOL(unregister_netdevice_queue);
diff --git a/queue-5.10/net-inline-rollback_registered_many.patch b/queue-5.10/net-inline-rollback_registered_many.patch
new file mode 100644 (file)
index 0000000..bed90cb
--- /dev/null
@@ -0,0 +1,99 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:32 +0300
+Subject: net: inline rollback_registered_many()
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>, Edwin Peer <edwin.peer@broadcom.com>
+Message-ID: <20220715162632.332718-8-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 0cbe1e57a7b93517100b0eb63d8e445cfbeb630c upstream.
+
+Similar to the change for rollback_registered() -
+rollback_registered_many() was a part of unregister_netdevice_many()
+minus the net_set_todo(), which is no longer needed.
+
+Functionally this patch moves the list_empty() check back after:
+
+       BUG_ON(dev_boot_phase);
+       ASSERT_RTNL();
+
+but I can't find any reason why that would be an issue.
+
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   22 ++++++++--------------
+ 1 file changed, 8 insertions(+), 14 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -5750,7 +5750,7 @@ static void flush_all_backlogs(void)
+       }
+       /* we can have in flight packet[s] on the cpus we are not flushing,
+-       * synchronize_net() in rollback_registered_many() will take care of
++       * synchronize_net() in unregister_netdevice_many() will take care of
+        * them
+        */
+       for_each_cpu(cpu, &flush_cpus)
+@@ -10633,8 +10633,6 @@ void synchronize_net(void)
+ }
+ EXPORT_SYMBOL(synchronize_net);
+-static void rollback_registered_many(struct list_head *head);
+-
+ /**
+  *    unregister_netdevice_queue - remove device from the kernel
+  *    @dev: device
+@@ -10658,8 +10656,7 @@ void unregister_netdevice_queue(struct n
+               LIST_HEAD(single);
+               list_add(&dev->unreg_list, &single);
+-              rollback_registered_many(&single);
+-              list_del(&single);
++              unregister_netdevice_many(&single);
+       }
+ }
+ EXPORT_SYMBOL(unregister_netdevice_queue);
+@@ -10673,21 +10670,15 @@ EXPORT_SYMBOL(unregister_netdevice_queue
+  */
+ void unregister_netdevice_many(struct list_head *head)
+ {
+-      if (!list_empty(head)) {
+-              rollback_registered_many(head);
+-              list_del(head);
+-      }
+-}
+-EXPORT_SYMBOL(unregister_netdevice_many);
+-
+-static void rollback_registered_many(struct list_head *head)
+-{
+       struct net_device *dev, *tmp;
+       LIST_HEAD(close_head);
+       BUG_ON(dev_boot_phase);
+       ASSERT_RTNL();
++      if (list_empty(head))
++              return;
++
+       list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+               /* Some devices call without registering
+                * for initialization unwind. Remove those
+@@ -10771,7 +10762,10 @@ static void rollback_registered_many(str
+               dev_put(dev);
+               net_set_todo(dev);
+       }
++
++      list_del(head);
+ }
++EXPORT_SYMBOL(unregister_netdevice_many);
+ /**
+  *    unregister_netdev - remove device from the kernel
diff --git a/queue-5.10/net-make-free_netdev-more-lenient-with-unregistering-devices.patch b/queue-5.10/net-make-free_netdev-more-lenient-with-unregistering-devices.patch
new file mode 100644 (file)
index 0000000..84e5f1a
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:27 +0300
+Subject: net: make free_netdev() more lenient with unregistering devices
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>
+Message-ID: <20220715162632.332718-3-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit c269a24ce057abfc31130960e96ab197ef6ab196 upstream.
+
+There are two flavors of handling netdev registration:
+ - ones called without holding rtnl_lock: register_netdev() and
+   unregister_netdev(); and
+ - those called with rtnl_lock held: register_netdevice() and
+   unregister_netdevice().
+
+While the semantics of the former are pretty clear, the same can't
+be said about the latter. The netdev_todo mechanism is utilized to
+perform some of the device unregistering tasks and it hooks into
+rtnl_unlock() so the locked variants can't actually finish the work.
+In general free_netdev() does not mix well with locked calls. Most
+drivers operating under rtnl_lock set dev->needs_free_netdev to true
+and expect core to make the free_netdev() call some time later.
+
+The part where this becomes most problematic is error paths. There is
+no way to unwind the state cleanly after a call to register_netdevice(),
+since unreg can't be performed fully without dropping locks.
+
+Make free_netdev() more lenient, and defer the freeing if device
+is being unregistered. This allows error paths to simply call
+free_netdev() both after register_netdevice() failed, and after
+a call to unregister_netdevice() but before dropping rtnl_lock.
+
+Simplify the error paths which are currently doing gymnastics
+around free_netdev() handling.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/8021q/vlan.c     |    4 +---
+ net/core/dev.c       |   11 +++++++++++
+ net/core/rtnetlink.c |   23 ++++++-----------------
+ 3 files changed, 18 insertions(+), 20 deletions(-)
+
+--- a/net/8021q/vlan.c
++++ b/net/8021q/vlan.c
+@@ -278,9 +278,7 @@ static int register_vlan_device(struct n
+       return 0;
+ out_free_newdev:
+-      if (new_dev->reg_state == NETREG_UNINITIALIZED ||
+-          new_dev->reg_state == NETREG_UNREGISTERED)
+-              free_netdev(new_dev);
++      free_netdev(new_dev);
+       return err;
+ }
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -10683,6 +10683,17 @@ void free_netdev(struct net_device *dev)
+       struct napi_struct *p, *n;
+       might_sleep();
++
++      /* When called immediately after register_netdevice() failed the unwind
++       * handling may still be dismantling the device. Handle that case by
++       * deferring the free.
++       */
++      if (dev->reg_state == NETREG_UNREGISTERING) {
++              ASSERT_RTNL();
++              dev->needs_free_netdev = true;
++              return;
++      }
++
+       netif_free_tx_queues(dev);
+       netif_free_rx_queues(dev);
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3442,26 +3442,15 @@ replay:
+       dev->ifindex = ifm->ifi_index;
+-      if (ops->newlink) {
++      if (ops->newlink)
+               err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+-              /* Drivers should set dev->needs_free_netdev
+-               * and unregister it on failure after registration
+-               * so that device could be finally freed in rtnl_unlock.
+-               */
+-              if (err < 0) {
+-                      /* If device is not registered at all, free it now */
+-                      if (dev->reg_state == NETREG_UNINITIALIZED ||
+-                          dev->reg_state == NETREG_UNREGISTERED)
+-                              free_netdev(dev);
+-                      goto out;
+-              }
+-      } else {
++      else
+               err = register_netdevice(dev);
+-              if (err < 0) {
+-                      free_netdev(dev);
+-                      goto out;
+-              }
++      if (err < 0) {
++              free_netdev(dev);
++              goto out;
+       }
++
+       err = rtnl_configure_link(dev, ifm);
+       if (err < 0)
+               goto out_unregister;
diff --git a/queue-5.10/net-make-sure-devices-go-through-netdev_wait_all_refs.patch b/queue-5.10/net-make-sure-devices-go-through-netdev_wait_all_refs.patch
new file mode 100644 (file)
index 0000000..41e0ecd
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:28 +0300
+Subject: net: make sure devices go through netdev_wait_all_refs
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>, Hulk Robot <hulkci@huawei.com>, Yang Yingliang <yangyingliang@huawei.com>
+Message-ID: <20220715162632.332718-4-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 766b0515d5bec4b780750773ed3009b148df8c0a upstream.
+
+If register_netdevice() fails at the very last stage - the
+notifier call - some subsystems may have already seen it and
+grabbed a reference. struct net_device can't be freed right
+away without calling netdev_wait_all_refs().
+
+Now that we have a clean interface in form of dev->needs_free_netdev
+and lenient free_netdev() we can undo what commit 93ee31f14f6f ("[NET]:
+Fix free_netdev on register_netdev failure.") has done and complete
+the unregistration path by bringing the net_set_todo() call back.
+
+After registration fails user is still expected to explicitly
+free the net_device, so make sure ->needs_free_netdev is cleared,
+otherwise rolling back the registration will cause the old double
+free for callers who release rtnl_lock before the free.
+
+This also solves the problem of priv_destructor not being called
+on notifier error.
+
+net_set_todo() will be moved back into unregister_netdevice_queue()
+in a follow up.
+
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Reported-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   14 ++++----------
+ 1 file changed, 4 insertions(+), 10 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -10144,17 +10144,11 @@ int register_netdevice(struct net_device
+       ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
+       ret = notifier_to_errno(ret);
+       if (ret) {
++              /* Expect explicit free_netdev() on failure */
++              dev->needs_free_netdev = false;
+               rollback_registered(dev);
+-              rcu_barrier();
+-
+-              dev->reg_state = NETREG_UNREGISTERED;
+-              /* We should put the kobject that hold in
+-               * netdev_unregister_kobject(), otherwise
+-               * the net device cannot be freed when
+-               * driver calls free_netdev(), because the
+-               * kobject is being hold.
+-               */
+-              kobject_put(&dev->dev.kobj);
++              net_set_todo(dev);
++              goto out;
+       }
+       /*
+        *      Prevent userspace races by waiting until the network
diff --git a/queue-5.10/net-move-net_set_todo-inside-rollback_registered.patch b/queue-5.10/net-move-net_set_todo-inside-rollback_registered.patch
new file mode 100644 (file)
index 0000000..0505001
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:29 +0300
+Subject: net: move net_set_todo inside rollback_registered()
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>, Edwin Peer <edwin.peer@broadcom.com>
+Message-ID: <20220715162632.332718-5-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 2014beea7eb165c745706b13659a0f1d0a9a2a61 upstream.
+
+Commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev
+failure.") moved net_set_todo() outside of rollback_registered()
+so that rollback_registered() can be used in the failure path of
+register_netdevice() but without risking a double free.
+
+Since commit cf124db566e6 ("net: Fix inconsistent teardown and
+release of private netdev state."), however, we have a better
+way of handling that condition, since destructors don't call
+free_netdev() directly.
+
+After the change in commit c269a24ce057 ("net: make free_netdev()
+more lenient with unregistering devices") we can now move
+net_set_todo() back.
+
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9595,8 +9595,10 @@ static void rollback_registered_many(str
+       synchronize_net();
+-      list_for_each_entry(dev, head, unreg_list)
++      list_for_each_entry(dev, head, unreg_list) {
+               dev_put(dev);
++              net_set_todo(dev);
++      }
+ }
+ static void rollback_registered(struct net_device *dev)
+@@ -10147,7 +10149,6 @@ int register_netdevice(struct net_device
+               /* Expect explicit free_netdev() on failure */
+               dev->needs_free_netdev = false;
+               rollback_registered(dev);
+-              net_set_todo(dev);
+               goto out;
+       }
+       /*
+@@ -10755,8 +10756,6 @@ void unregister_netdevice_queue(struct n
+               list_move_tail(&dev->unreg_list, head);
+       } else {
+               rollback_registered(dev);
+-              /* Finish processing unregister after unlock */
+-              net_set_todo(dev);
+       }
+ }
+ EXPORT_SYMBOL(unregister_netdevice_queue);
+@@ -10770,12 +10769,8 @@ EXPORT_SYMBOL(unregister_netdevice_queue
+  */
+ void unregister_netdevice_many(struct list_head *head)
+ {
+-      struct net_device *dev;
+-
+       if (!list_empty(head)) {
+               rollback_registered_many(head);
+-              list_for_each_entry(dev, head, unreg_list)
+-                      net_set_todo(dev);
+               list_del(head);
+       }
+ }
diff --git a/queue-5.10/net-move-rollback_registered_many.patch b/queue-5.10/net-move-rollback_registered_many.patch
new file mode 100644 (file)
index 0000000..29d9e5b
--- /dev/null
@@ -0,0 +1,239 @@
+From foo@baz Sat Jul 23 05:03:39 PM CEST 2022
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+Date: Fri, 15 Jul 2022 19:26:31 +0300
+Subject: net: move rollback_registered_many()
+To: stable@vger.kernel.org, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Fedor Pchelkin <pchelkin@ispras.ru>, Jakub Kicinski <kuba@kernel.org>, Alexey Khoroshilov <khoroshilov@ispras.ru>, Edwin Peer <edwin.peer@broadcom.com>
+Message-ID: <20220715162632.332718-7-pchelkin@ispras.ru>
+
+From: Fedor Pchelkin <pchelkin@ispras.ru>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit bcfe2f1a3818d9dca945b6aca4ae741cb1f75329 upstream.
+
+Move rollback_registered_many() and add a temporary
+forward declaration to make merging the code into
+unregister_netdevice_many() easier to review.
+
+No functional changes.
+
+Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |  188 ++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 95 insertions(+), 93 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9508,99 +9508,6 @@ static void net_set_todo(struct net_devi
+       dev_net(dev)->dev_unreg_count++;
+ }
+-static void rollback_registered_many(struct list_head *head)
+-{
+-      struct net_device *dev, *tmp;
+-      LIST_HEAD(close_head);
+-
+-      BUG_ON(dev_boot_phase);
+-      ASSERT_RTNL();
+-
+-      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
+-              /* Some devices call without registering
+-               * for initialization unwind. Remove those
+-               * devices and proceed with the remaining.
+-               */
+-              if (dev->reg_state == NETREG_UNINITIALIZED) {
+-                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
+-                               dev->name, dev);
+-
+-                      WARN_ON(1);
+-                      list_del(&dev->unreg_list);
+-                      continue;
+-              }
+-              dev->dismantle = true;
+-              BUG_ON(dev->reg_state != NETREG_REGISTERED);
+-      }
+-
+-      /* If device is running, close it first. */
+-      list_for_each_entry(dev, head, unreg_list)
+-              list_add_tail(&dev->close_list, &close_head);
+-      dev_close_many(&close_head, true);
+-
+-      list_for_each_entry(dev, head, unreg_list) {
+-              /* And unlink it from device chain. */
+-              unlist_netdevice(dev);
+-
+-              dev->reg_state = NETREG_UNREGISTERING;
+-      }
+-      flush_all_backlogs();
+-
+-      synchronize_net();
+-
+-      list_for_each_entry(dev, head, unreg_list) {
+-              struct sk_buff *skb = NULL;
+-
+-              /* Shutdown queueing discipline. */
+-              dev_shutdown(dev);
+-
+-              dev_xdp_uninstall(dev);
+-
+-              /* Notify protocols, that we are about to destroy
+-               * this device. They should clean all the things.
+-               */
+-              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+-
+-              if (!dev->rtnl_link_ops ||
+-                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+-                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
+-                                                   GFP_KERNEL, NULL, 0);
+-
+-              /*
+-               *      Flush the unicast and multicast chains
+-               */
+-              dev_uc_flush(dev);
+-              dev_mc_flush(dev);
+-
+-              netdev_name_node_alt_flush(dev);
+-              netdev_name_node_free(dev->name_node);
+-
+-              if (dev->netdev_ops->ndo_uninit)
+-                      dev->netdev_ops->ndo_uninit(dev);
+-
+-              if (skb)
+-                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
+-
+-              /* Notifier chain MUST detach us all upper devices. */
+-              WARN_ON(netdev_has_any_upper_dev(dev));
+-              WARN_ON(netdev_has_any_lower_dev(dev));
+-
+-              /* Remove entries from kobject tree */
+-              netdev_unregister_kobject(dev);
+-#ifdef CONFIG_XPS
+-              /* Remove XPS queueing entries */
+-              netif_reset_xps_queues_gt(dev, 0);
+-#endif
+-      }
+-
+-      synchronize_net();
+-
+-      list_for_each_entry(dev, head, unreg_list) {
+-              dev_put(dev);
+-              net_set_todo(dev);
+-      }
+-}
+-
+ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+       struct net_device *upper, netdev_features_t features)
+ {
+@@ -10726,6 +10633,8 @@ void synchronize_net(void)
+ }
+ EXPORT_SYMBOL(synchronize_net);
++static void rollback_registered_many(struct list_head *head);
++
+ /**
+  *    unregister_netdevice_queue - remove device from the kernel
+  *    @dev: device
+@@ -10771,6 +10680,99 @@ void unregister_netdevice_many(struct li
+ }
+ EXPORT_SYMBOL(unregister_netdevice_many);
++static void rollback_registered_many(struct list_head *head)
++{
++      struct net_device *dev, *tmp;
++      LIST_HEAD(close_head);
++
++      BUG_ON(dev_boot_phase);
++      ASSERT_RTNL();
++
++      list_for_each_entry_safe(dev, tmp, head, unreg_list) {
++              /* Some devices call without registering
++               * for initialization unwind. Remove those
++               * devices and proceed with the remaining.
++               */
++              if (dev->reg_state == NETREG_UNINITIALIZED) {
++                      pr_debug("unregister_netdevice: device %s/%p never was registered\n",
++                               dev->name, dev);
++
++                      WARN_ON(1);
++                      list_del(&dev->unreg_list);
++                      continue;
++              }
++              dev->dismantle = true;
++              BUG_ON(dev->reg_state != NETREG_REGISTERED);
++      }
++
++      /* If device is running, close it first. */
++      list_for_each_entry(dev, head, unreg_list)
++              list_add_tail(&dev->close_list, &close_head);
++      dev_close_many(&close_head, true);
++
++      list_for_each_entry(dev, head, unreg_list) {
++              /* And unlink it from device chain. */
++              unlist_netdevice(dev);
++
++              dev->reg_state = NETREG_UNREGISTERING;
++      }
++      flush_all_backlogs();
++
++      synchronize_net();
++
++      list_for_each_entry(dev, head, unreg_list) {
++              struct sk_buff *skb = NULL;
++
++              /* Shutdown queueing discipline. */
++              dev_shutdown(dev);
++
++              dev_xdp_uninstall(dev);
++
++              /* Notify protocols, that we are about to destroy
++               * this device. They should clean all the things.
++               */
++              call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++
++              if (!dev->rtnl_link_ops ||
++                  dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
++                      skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
++                                                   GFP_KERNEL, NULL, 0);
++
++              /*
++               *      Flush the unicast and multicast chains
++               */
++              dev_uc_flush(dev);
++              dev_mc_flush(dev);
++
++              netdev_name_node_alt_flush(dev);
++              netdev_name_node_free(dev->name_node);
++
++              if (dev->netdev_ops->ndo_uninit)
++                      dev->netdev_ops->ndo_uninit(dev);
++
++              if (skb)
++                      rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
++
++              /* Notifier chain MUST detach us all upper devices. */
++              WARN_ON(netdev_has_any_upper_dev(dev));
++              WARN_ON(netdev_has_any_lower_dev(dev));
++
++              /* Remove entries from kobject tree */
++              netdev_unregister_kobject(dev);
++#ifdef CONFIG_XPS
++              /* Remove XPS queueing entries */
++              netif_reset_xps_queues_gt(dev, 0);
++#endif
++      }
++
++      synchronize_net();
++
++      list_for_each_entry(dev, head, unreg_list) {
++              dev_put(dev);
++              net_set_todo(dev);
++      }
++}
++
+ /**
+  *    unregister_netdev - remove device from the kernel
+  *    @dev: device
index a180deda5c8c89ff33ca86833e3993ed5068792b..2f7c45e97e81ed57be564a453ca1120ed7345c74 100644 (file)
@@ -4,3 +4,12 @@ mlxsw-spectrum_router-fix-ipv4-nexthop-gateway-indication.patch
 lockdown-fix-kexec-lockdown-bypass-with-ima-policy.patch
 io_uring-use-original-task-for-req-identity-in-io_identity_cow.patch
 xen-gntdev-ignore-failure-to-unmap-invalid_grant_handle.patch
+block-split-bio_kmalloc-from-bio_alloc_bioset.patch
+block-fix-bounce_clone_bio-for-passthrough-bios.patch
+docs-net-explain-struct-net_device-lifetime.patch
+net-make-free_netdev-more-lenient-with-unregistering-devices.patch
+net-make-sure-devices-go-through-netdev_wait_all_refs.patch
+net-move-net_set_todo-inside-rollback_registered.patch
+net-inline-rollback_registered.patch
+net-move-rollback_registered_many.patch
+net-inline-rollback_registered_many.patch