]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 26 Aug 2019 16:50:45 +0000 (18:50 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 26 Aug 2019 16:50:45 +0000 (18:50 +0200)
added patches:
mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch

queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch [new file with mode: 0644]
queue-4.9/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch [new file with mode: 0644]

diff --git a/queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch b/queue-4.9/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
new file mode 100644 (file)
index 0000000..aacf142
--- /dev/null
@@ -0,0 +1,171 @@
+From 701d678599d0c1623aaf4139c03eea260a75b027 Mon Sep 17 00:00:00 2001
+From: Henry Burns <henryburns@google.com>
+Date: Sat, 24 Aug 2019 17:55:06 -0700
+Subject: mm/zsmalloc.c: fix race condition in zs_destroy_pool
+
+From: Henry Burns <henryburns@google.com>
+
+commit 701d678599d0c1623aaf4139c03eea260a75b027 upstream.
+
+In zs_destroy_pool() we call flush_work(&pool->free_work).  However, we
+have no guarantee that migration isn't happening in the background at
+that time.
+
+Since migration can't directly free pages, it relies on free_work being
+scheduled to free the pages.  But there's nothing preventing an
+in-progress migrate from queuing the work *after*
+zs_unregister_migration() has called flush_work().  Which would mean
+pages still pointing at the inode when we free it.
+
+Since we know at destroy time all objects should be free, no new
+migrations can come in (since zs_page_isolate() fails for fully-free
+zspages).  This means it is sufficient to track a "# isolated zspages"
+count by class, and have the destroy logic ensure all such pages have
+drained before proceeding.  Keeping that state under the class spinlock
+keeps the logic straightforward.
+
+In this case a memory leak could lead to an eventual crash if compaction
+hits the leaked page.  This crash would only occur if people are
+changing their zswap backend at runtime (which eventually starts
+destruction).
+
+Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com
+Fixes: 48b4800a1c6a ("zsmalloc: page migration support")
+Signed-off-by: Henry Burns <henryburns@google.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Jonathan Adams <jwadams@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 59 insertions(+), 2 deletions(-)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -52,6 +52,7 @@
+ #include <linux/zpool.h>
+ #include <linux/mount.h>
+ #include <linux/migrate.h>
++#include <linux/wait.h>
+ #include <linux/pagemap.h>
+ #define ZSPAGE_MAGIC  0x58
+@@ -265,6 +266,10 @@ struct zs_pool {
+ #ifdef CONFIG_COMPACTION
+       struct inode *inode;
+       struct work_struct free_work;
++      /* A wait queue for when migration races with async_free_zspage() */
++      struct wait_queue_head migration_wait;
++      atomic_long_t isolated_pages;
++      bool destroying;
+ #endif
+ };
+@@ -1951,6 +1956,19 @@ static void putback_zspage_deferred(stru
+ }
++static inline void zs_pool_dec_isolated(struct zs_pool *pool)
++{
++      VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
++      atomic_long_dec(&pool->isolated_pages);
++      /*
++       * There's no possibility of racing, since wait_for_isolated_drain()
++       * checks the isolated count under &class->lock after enqueuing
++       * on migration_wait.
++       */
++      if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
++              wake_up_all(&pool->migration_wait);
++}
++
+ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+                               struct page *newpage, struct page *oldpage)
+ {
+@@ -2020,6 +2038,7 @@ bool zs_page_isolate(struct page *page,
+        */
+       if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+               get_zspage_mapping(zspage, &class_idx, &fullness);
++              atomic_long_inc(&pool->isolated_pages);
+               remove_zspage(class, zspage, fullness);
+       }
+@@ -2108,8 +2127,16 @@ int zs_page_migrate(struct address_space
+        * Page migration is done so let's putback isolated zspage to
+        * the list if @page is final isolated subpage in the zspage.
+        */
+-      if (!is_zspage_isolated(zspage))
++      if (!is_zspage_isolated(zspage)) {
++              /*
++               * We cannot race with zs_destroy_pool() here because we wait
++               * for isolation to hit zero before we start destroying.
++               * Also, we ensure that everyone can see pool->destroying before
++               * we start waiting.
++               */
+               putback_zspage_deferred(pool, class, zspage);
++              zs_pool_dec_isolated(pool);
++      }
+       reset_page(page);
+       put_page(page);
+@@ -2161,8 +2188,8 @@ void zs_page_putback(struct page *page)
+                * so let's defer.
+                */
+               putback_zspage_deferred(pool, class, zspage);
++              zs_pool_dec_isolated(pool);
+       }
+-
+       spin_unlock(&class->lock);
+ }
+@@ -2185,8 +2212,36 @@ static int zs_register_migration(struct
+       return 0;
+ }
++static bool pool_isolated_are_drained(struct zs_pool *pool)
++{
++      return atomic_long_read(&pool->isolated_pages) == 0;
++}
++
++/* Function for resolving migration */
++static void wait_for_isolated_drain(struct zs_pool *pool)
++{
++
++      /*
++       * We're in the process of destroying the pool, so there are no
++       * active allocations. zs_page_isolate() fails for completely free
++       * zspages, so we need only wait for the zs_pool's isolated
++       * count to hit zero.
++       */
++      wait_event(pool->migration_wait,
++                 pool_isolated_are_drained(pool));
++}
++
+ static void zs_unregister_migration(struct zs_pool *pool)
+ {
++      pool->destroying = true;
++      /*
++       * We need a memory barrier here to ensure global visibility of
++       * pool->destroying. Thus pool->isolated pages will either be 0 in which
++       * case we don't care, or it will be > 0 and pool->destroying will
++       * ensure that we wake up once isolation hits 0.
++       */
++      smp_mb();
++      wait_for_isolated_drain(pool); /* This can block */
+       flush_work(&pool->free_work);
+       iput(pool->inode);
+ }
+@@ -2433,6 +2488,8 @@ struct zs_pool *zs_create_pool(const cha
+       if (!pool->name)
+               goto err;
++      init_waitqueue_head(&pool->migration_wait);
++
+       if (create_cache(pool))
+               goto err;
diff --git a/queue-4.9/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch b/queue-4.9/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
new file mode 100644 (file)
index 0000000..5f5d0dd
--- /dev/null
@@ -0,0 +1,87 @@
+From 1a87aa03597efa9641e92875b883c94c7f872ccb Mon Sep 17 00:00:00 2001
+From: Henry Burns <henryburns@google.com>
+Date: Sat, 24 Aug 2019 17:55:03 -0700
+Subject: mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely
+
+From: Henry Burns <henryburns@google.com>
+
+commit 1a87aa03597efa9641e92875b883c94c7f872ccb upstream.
+
+In zs_page_migrate() we call putback_zspage() after we have finished
+migrating all pages in this zspage.  However, the return value is
+ignored.  If a zs_free() races in between zs_page_isolate() and
+zs_page_migrate(), freeing the last object in the zspage,
+putback_zspage() will leave the page in ZS_EMPTY for potentially an
+unbounded amount of time.
+
+To fix this, we need to do the same thing as zs_page_putback() does:
+schedule free_work to occur.
+
+To avoid duplicated code, move the sequence to a new
+putback_zspage_deferred() function which both zs_page_migrate() and
+zs_page_putback() call.
+
+Link: http://lkml.kernel.org/r/20190809181751.219326-1-henryburns@google.com
+Fixes: 48b4800a1c6a ("zsmalloc: page migration support")
+Signed-off-by: Henry Burns <henryburns@google.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Jonathan Adams <jwadams@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c |   19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -1939,6 +1939,18 @@ static void dec_zspage_isolation(struct
+       zspage->isolated--;
+ }
++static void putback_zspage_deferred(struct zs_pool *pool,
++                                  struct size_class *class,
++                                  struct zspage *zspage)
++{
++      enum fullness_group fg;
++
++      fg = putback_zspage(class, zspage);
++      if (fg == ZS_EMPTY)
++              schedule_work(&pool->free_work);
++
++}
++
+ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+                               struct page *newpage, struct page *oldpage)
+ {
+@@ -2097,7 +2109,7 @@ int zs_page_migrate(struct address_space
+        * the list if @page is final isolated subpage in the zspage.
+        */
+       if (!is_zspage_isolated(zspage))
+-              putback_zspage(class, zspage);
++              putback_zspage_deferred(pool, class, zspage);
+       reset_page(page);
+       put_page(page);
+@@ -2144,14 +2156,13 @@ void zs_page_putback(struct page *page)
+       spin_lock(&class->lock);
+       dec_zspage_isolation(zspage);
+       if (!is_zspage_isolated(zspage)) {
+-              fg = putback_zspage(class, zspage);
+               /*
+                * Due to page_lock, we cannot free zspage immediately
+                * so let's defer.
+                */
+-              if (fg == ZS_EMPTY)
+-                      schedule_work(&pool->free_work);
++              putback_zspage_deferred(pool, class, zspage);
+       }
++
+       spin_unlock(&class->lock);
+ }
index b8e7da35714d2028fddf7917d6a2a46b6f495b5c..b9040311ba7efa86c0b6aa25659859165d3eba77 100644 (file)
@@ -39,3 +39,6 @@ dm-space-map-metadata-fix-missing-store-of-apply_bops-return-value.patch
 dm-table-fix-invalid-memory-accesses-with-too-high-sector-number.patch
 genirq-properly-pair-kobject_del-with-kobject_add.patch
 mm-page_owner-handle-thp-splits-correctly.patch
+mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
+mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
+xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
diff --git a/queue-4.9/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch b/queue-4.9/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
new file mode 100644 (file)
index 0000000..0075c19
--- /dev/null
@@ -0,0 +1,63 @@
+From 1fb254aa983bf190cfd685d40c64a480a9bafaee Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Thu, 22 Aug 2019 20:55:54 -0700
+Subject: xfs: fix missing ILOCK unlock when xfs_setattr_nonsize fails due to EDQUOT
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 1fb254aa983bf190cfd685d40c64a480a9bafaee upstream.
+
+Benjamin Moody reported to Debian that XFS partially wedges when a chgrp
+fails on account of being out of disk quota.  I ran his reproducer
+script:
+
+# adduser dummy
+# adduser dummy plugdev
+
+# dd if=/dev/zero bs=1M count=100 of=test.img
+# mkfs.xfs test.img
+# mount -t xfs -o gquota test.img /mnt
+# mkdir -p /mnt/dummy
+# chown -c dummy /mnt/dummy
+# xfs_quota -xc 'limit -g bsoft=100k bhard=100k plugdev' /mnt
+
+(and then as user dummy)
+
+$ dd if=/dev/urandom bs=1M count=50 of=/mnt/dummy/foo
+$ chgrp plugdev /mnt/dummy/foo
+
+and saw:
+
+================================================
+WARNING: lock held when returning to user space!
+5.3.0-rc5 #rc5 Tainted: G        W
+------------------------------------------------
+chgrp/47006 is leaving the kernel with locks still held!
+1 lock held by chgrp/47006:
+ #0: 000000006664ea2d (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0xd2/0x290 [xfs]
+
+...which is clearly caused by xfs_setattr_nonsize failing to unlock the
+ILOCK after the xfs_qm_vop_chown_reserve call fails.  Add the missing
+unlock.
+
+Reported-by: benjamin.moody@gmail.com
+Fixes: 253f4911f297 ("xfs: better xfs_trans_alloc interface")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Tested-by: Salvatore Bonaccorso <carnil@debian.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_iops.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -774,6 +774,7 @@ xfs_setattr_nonsize(
+ out_cancel:
+       xfs_trans_cancel(tp);
++      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ out_dqrele:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);