From 44d4609e32c7154d360938e021ddbef88902d1f6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 26 Aug 2019 19:06:44 +0200
Subject: [PATCH] 4.14-stable patches

added patches:
	mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
	mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
	xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
---
 ...ix-race-condition-in-zs_destroy_pool.patch | 171 ++++++++++++++++++
 ...leave-pages-in-zs_empty-indefinitely.patch |  87 +++++++++
 queue-4.14/series                             |   3 +
 ..._setattr_nonsize-fails-due-to-edquot.patch |  63 +++++++
 4 files changed, 324 insertions(+)
 create mode 100644 queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
 create mode 100644 queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
 create mode 100644 queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch

diff --git a/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch b/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
new file mode 100644
index 00000000000..61696d4e22d
--- /dev/null
+++ b/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
@@ -0,0 +1,171 @@
+From 701d678599d0c1623aaf4139c03eea260a75b027 Mon Sep 17 00:00:00 2001
+From: Henry Burns <henryburns@google.com>
+Date: Sat, 24 Aug 2019 17:55:06 -0700
+Subject: mm/zsmalloc.c: fix race condition in zs_destroy_pool
+
+From: Henry Burns <henryburns@google.com>
+
+commit 701d678599d0c1623aaf4139c03eea260a75b027 upstream.
+
+In zs_destroy_pool() we call flush_work(&pool->free_work).  However, we
+have no guarantee that migration isn't happening in the background at
+that time.
+
+Since migration can't directly free pages, it relies on free_work being
+scheduled to free the pages.  But there's nothing preventing an
+in-progress migrate from queuing the work *after*
+zs_unregister_migration() has called flush_work().  Which would mean
+pages still pointing at the inode when we free it.
+
+Since we know at destroy time all objects should be free, no new
+migrations can come in (since zs_page_isolate() fails for fully-free
+zspages).  This means it is sufficient to track a "# isolated zspages"
+count by class, and have the destroy logic ensure all such pages have
+drained before proceeding.  Keeping that state under the class spinlock
+keeps the logic straightforward.
+
+In this case a memory leak could lead to an eventual crash if compaction
+hits the leaked page.  This crash would only occur if people are
+changing their zswap backend at runtime (which eventually starts
+destruction).
+
+Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com
+Fixes: 48b4800a1c6a ("zsmalloc: page migration support")
+Signed-off-by: Henry Burns <henryburns@google.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Jonathan Adams <jwadams@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c |   61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 59 insertions(+), 2 deletions(-)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -52,6 +52,7 @@
+ #include <linux/zpool.h>
+ #include <linux/mount.h>
+ #include <linux/migrate.h>
++#include <linux/wait.h>
+ #include <linux/pagemap.h>
+ 
+ #define ZSPAGE_MAGIC	0x58
+@@ -267,6 +268,10 @@ struct zs_pool {
+ #ifdef CONFIG_COMPACTION
+ 	struct inode *inode;
+ 	struct work_struct free_work;
++	/* A wait queue for when migration races with async_free_zspage() */
++	struct wait_queue_head migration_wait;
++	atomic_long_t isolated_pages;
++	bool destroying;
+ #endif
+ };
+ 
+@@ -1890,6 +1895,19 @@ static void putback_zspage_deferred(stru
+ 
+ }
+ 
++static inline void zs_pool_dec_isolated(struct zs_pool *pool)
++{
++	VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
++	atomic_long_dec(&pool->isolated_pages);
++	/*
++	 * There's no possibility of racing, since wait_for_isolated_drain()
++	 * checks the isolated count under &class->lock after enqueuing
++	 * on migration_wait.
++	 */
++	if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
++		wake_up_all(&pool->migration_wait);
++}
++
+ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+ 				struct page *newpage, struct page *oldpage)
+ {
+@@ -1959,6 +1977,7 @@ bool zs_page_isolate(struct page *page,
+ 	 */
+ 	if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
+ 		get_zspage_mapping(zspage, &class_idx, &fullness);
++		atomic_long_inc(&pool->isolated_pages);
+ 		remove_zspage(class, zspage, fullness);
+ 	}
+ 
+@@ -2058,8 +2077,16 @@ int zs_page_migrate(struct address_space
+ 	 * Page migration is done so let's putback isolated zspage to
+ 	 * the list if @page is final isolated subpage in the zspage.
+ 	 */
+-	if (!is_zspage_isolated(zspage))
++	if (!is_zspage_isolated(zspage)) {
++		/*
++		 * We cannot race with zs_destroy_pool() here because we wait
++		 * for isolation to hit zero before we start destroying.
++		 * Also, we ensure that everyone can see pool->destroying before
++		 * we start waiting.
++		 */
+ 		putback_zspage_deferred(pool, class, zspage);
++		zs_pool_dec_isolated(pool);
++	}
+ 
+ 	reset_page(page);
+ 	put_page(page);
+@@ -2110,8 +2137,8 @@ void zs_page_putback(struct page *page)
+ 		 * so let's defer.
+ 		 */
+ 		putback_zspage_deferred(pool, class, zspage);
++		zs_pool_dec_isolated(pool);
+ 	}
+-
+ 	spin_unlock(&class->lock);
+ }
+ 
+@@ -2134,8 +2161,36 @@ static int zs_register_migration(struct
+ 	return 0;
+ }
+ 
++static bool pool_isolated_are_drained(struct zs_pool *pool)
++{
++	return atomic_long_read(&pool->isolated_pages) == 0;
++}
++
++/* Function for resolving migration */
++static void wait_for_isolated_drain(struct zs_pool *pool)
++{
++
++	/*
++	 * We're in the process of destroying the pool, so there are no
++	 * active allocations. zs_page_isolate() fails for completely free
++	 * zspages, so we need only wait for the zs_pool's isolated
++	 * count to hit zero.
++	 */
++	wait_event(pool->migration_wait,
++		   pool_isolated_are_drained(pool));
++}
++
+ static void zs_unregister_migration(struct zs_pool *pool)
+ {
++	pool->destroying = true;
++	/*
++	 * We need a memory barrier here to ensure global visibility of
++	 * pool->destroying. Thus pool->isolated pages will either be 0 in which
++	 * case we don't care, or it will be > 0 and pool->destroying will
++	 * ensure that we wake up once isolation hits 0.
++	 */
++	smp_mb();
++	wait_for_isolated_drain(pool); /* This can block */
+ 	flush_work(&pool->free_work);
+ 	iput(pool->inode);
+ }
+@@ -2376,6 +2431,8 @@ struct zs_pool *zs_create_pool(const cha
+ 	if (!pool->name)
+ 		goto err;
+ 
++	init_waitqueue_head(&pool->migration_wait);
++
+ 	if (create_cache(pool))
+ 		goto err;
+ 
diff --git a/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch b/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
new file mode 100644
index 00000000000..f84b092f838
--- /dev/null
+++ b/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
@@ -0,0 +1,87 @@
+From 1a87aa03597efa9641e92875b883c94c7f872ccb Mon Sep 17 00:00:00 2001
+From: Henry Burns <henryburns@google.com>
+Date: Sat, 24 Aug 2019 17:55:03 -0700
+Subject: mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely
+
+From: Henry Burns <henryburns@google.com>
+
+commit 1a87aa03597efa9641e92875b883c94c7f872ccb upstream.
+
+In zs_page_migrate() we call putback_zspage() after we have finished
+migrating all pages in this zspage.  However, the return value is
+ignored.  If a zs_free() races in between zs_page_isolate() and
+zs_page_migrate(), freeing the last object in the zspage,
+putback_zspage() will leave the page in ZS_EMPTY for potentially an
+unbounded amount of time.
+
+To fix this, we need to do the same thing as zs_page_putback() does:
+schedule free_work to occur.
+
+To avoid duplicated code, move the sequence to a new
+putback_zspage_deferred() function which both zs_page_migrate() and
+zs_page_putback() call.
+
+Link: http://lkml.kernel.org/r/20190809181751.219326-1-henryburns@google.com
+Fixes: 48b4800a1c6a ("zsmalloc: page migration support")
+Signed-off-by: Henry Burns <henryburns@google.com>
+Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Cc: Henry Burns <henrywolfeburns@gmail.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Jonathan Adams <jwadams@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/zsmalloc.c |   19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -1878,6 +1878,18 @@ static void dec_zspage_isolation(struct
+ 	zspage->isolated--;
+ }
+ 
++static void putback_zspage_deferred(struct zs_pool *pool,
++				    struct size_class *class,
++				    struct zspage *zspage)
++{
++	enum fullness_group fg;
++
++	fg = putback_zspage(class, zspage);
++	if (fg == ZS_EMPTY)
++		schedule_work(&pool->free_work);
++
++}
++
+ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
+ 				struct page *newpage, struct page *oldpage)
+ {
+@@ -2047,7 +2059,7 @@ int zs_page_migrate(struct address_space
+ 	 * the list if @page is final isolated subpage in the zspage.
+ 	 */
+ 	if (!is_zspage_isolated(zspage))
+-		putback_zspage(class, zspage);
++		putback_zspage_deferred(pool, class, zspage);
+ 
+ 	reset_page(page);
+ 	put_page(page);
+@@ -2093,14 +2105,13 @@ void zs_page_putback(struct page *page)
+ 	spin_lock(&class->lock);
+ 	dec_zspage_isolation(zspage);
+ 	if (!is_zspage_isolated(zspage)) {
+-		fg = putback_zspage(class, zspage);
+ 		/*
+ 		 * Due to page_lock, we cannot free zspage immediately
+ 		 * so let's defer.
+ 		 */
+-		if (fg == ZS_EMPTY)
+-			schedule_work(&pool->free_work);
++		putback_zspage_deferred(pool, class, zspage);
+ 	}
++
+ 	spin_unlock(&class->lock);
+ }
+ 
diff --git a/queue-4.14/series b/queue-4.14/series
index bd2d850872d..41eba2ba919 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -55,3 +55,6 @@ dm-zoned-improve-error-handling-in-i-o-map-code.patch
 dm-zoned-properly-handle-backing-device-failure.patch
 genirq-properly-pair-kobject_del-with-kobject_add.patch
 mm-page_owner-handle-thp-splits-correctly.patch
+mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch
+mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch
+xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
diff --git a/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch b/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
new file mode 100644
index 00000000000..9dcca1589e1
--- /dev/null
+++ b/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch
@@ -0,0 +1,63 @@
+From 1fb254aa983bf190cfd685d40c64a480a9bafaee Mon Sep 17 00:00:00 2001
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+Date: Thu, 22 Aug 2019 20:55:54 -0700
+Subject: xfs: fix missing ILOCK unlock when xfs_setattr_nonsize fails due to EDQUOT
+
+From: Darrick J. Wong <darrick.wong@oracle.com>
+
+commit 1fb254aa983bf190cfd685d40c64a480a9bafaee upstream.
+
+Benjamin Moody reported to Debian that XFS partially wedges when a chgrp
+fails on account of being out of disk quota.  I ran his reproducer
+script:
+
+# adduser dummy
+# adduser dummy plugdev
+
+# dd if=/dev/zero bs=1M count=100 of=test.img
+# mkfs.xfs test.img
+# mount -t xfs -o gquota test.img /mnt
+# mkdir -p /mnt/dummy
+# chown -c dummy /mnt/dummy
+# xfs_quota -xc 'limit -g bsoft=100k bhard=100k plugdev' /mnt
+
+(and then as user dummy)
+
+$ dd if=/dev/urandom bs=1M count=50 of=/mnt/dummy/foo
+$ chgrp plugdev /mnt/dummy/foo
+
+and saw:
+
+================================================
+WARNING: lock held when returning to user space!
+5.3.0-rc5 #rc5 Tainted: G        W
+------------------------------------------------
+chgrp/47006 is leaving the kernel with locks still held!
+1 lock held by chgrp/47006:
+ #0: 000000006664ea2d (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0xd2/0x290 [xfs]
+
+...which is clearly caused by xfs_setattr_nonsize failing to unlock the
+ILOCK after the xfs_qm_vop_chown_reserve call fails.  Add the missing
+unlock.
+
+Reported-by: benjamin.moody@gmail.com
+Fixes: 253f4911f297 ("xfs: better xfs_trans_alloc interface")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Tested-by: Salvatore Bonaccorso <carnil@debian.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xfs/xfs_iops.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -789,6 +789,7 @@ xfs_setattr_nonsize(
+ 
+ out_cancel:
+ 	xfs_trans_cancel(tp);
++	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ out_dqrele:
+ 	xfs_qm_dqrele(udqp);
+ 	xfs_qm_dqrele(gdqp);
-- 
2.47.3