From 44d4609e32c7154d360938e021ddbef88902d1f6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 Aug 2019 19:06:44 +0200 Subject: [PATCH] 4.14-stable patches added patches: mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch --- ...ix-race-condition-in-zs_destroy_pool.patch | 171 ++++++++++++++++++ ...leave-pages-in-zs_empty-indefinitely.patch | 87 +++++++++ queue-4.14/series | 3 + ..._setattr_nonsize-fails-due-to-edquot.patch | 63 +++++++ 4 files changed, 324 insertions(+) create mode 100644 queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch create mode 100644 queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch create mode 100644 queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch diff --git a/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch b/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch new file mode 100644 index 00000000000..61696d4e22d --- /dev/null +++ b/queue-4.14/mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch @@ -0,0 +1,171 @@ +From 701d678599d0c1623aaf4139c03eea260a75b027 Mon Sep 17 00:00:00 2001 +From: Henry Burns +Date: Sat, 24 Aug 2019 17:55:06 -0700 +Subject: mm/zsmalloc.c: fix race condition in zs_destroy_pool + +From: Henry Burns + +commit 701d678599d0c1623aaf4139c03eea260a75b027 upstream. + +In zs_destroy_pool() we call flush_work(&pool->free_work). However, we +have no guarantee that migration isn't happening in the background at +that time. + +Since migration can't directly free pages, it relies on free_work being +scheduled to free the pages. But there's nothing preventing an +in-progress migrate from queuing the work *after* +zs_unregister_migration() has called flush_work(). Which would mean +pages still pointing at the inode when we free it. + +Since we know at destroy time all objects should be free, no new +migrations can come in (since zs_page_isolate() fails for fully-free +zspages). This means it is sufficient to track a "# isolated zspages" +count by class, and have the destroy logic ensure all such pages have +drained before proceeding. Keeping that state under the class spinlock +keeps the logic straightforward. + +In this case a memory leak could lead to an eventual crash if compaction +hits the leaked page. This crash would only occur if people are +changing their zswap backend at runtime (which eventually starts +destruction). + +Link: http://lkml.kernel.org/r/20190809181751.219326-2-henryburns@google.com +Fixes: 48b4800a1c6a ("zsmalloc: page migration support") +Signed-off-by: Henry Burns +Reviewed-by: Sergey Senozhatsky +Cc: Henry Burns +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Jonathan Adams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/zsmalloc.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 59 insertions(+), 2 deletions(-) + +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -52,6 +52,7 @@ + #include + #include + #include ++#include + #include + + #define ZSPAGE_MAGIC 0x58 +@@ -267,6 +268,10 @@ struct zs_pool { + #ifdef CONFIG_COMPACTION + struct inode *inode; + struct work_struct free_work; ++ /* A wait queue for when migration races with async_free_zspage() */ ++ struct wait_queue_head migration_wait; ++ atomic_long_t isolated_pages; ++ bool destroying; + #endif + }; + +@@ -1890,6 +1895,19 @@ static void putback_zspage_deferred(stru + + } + ++static inline void zs_pool_dec_isolated(struct zs_pool *pool) ++{ ++ VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); ++ atomic_long_dec(&pool->isolated_pages); ++ /* ++ * There's no possibility of racing, since wait_for_isolated_drain() ++ * checks the isolated count under &class->lock after enqueuing ++ * on migration_wait. ++ */ ++ if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) ++ wake_up_all(&pool->migration_wait); ++} ++ + static void replace_sub_page(struct size_class *class, struct zspage *zspage, + struct page *newpage, struct page *oldpage) + { +@@ -1959,6 +1977,7 @@ bool zs_page_isolate(struct page *page, + */ + if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { + get_zspage_mapping(zspage, &class_idx, &fullness); ++ atomic_long_inc(&pool->isolated_pages); + remove_zspage(class, zspage, fullness); + } + +@@ -2058,8 +2077,16 @@ int zs_page_migrate(struct address_space + * Page migration is done so let's putback isolated zspage to + * the list if @page is final isolated subpage in the zspage. + */ +- if (!is_zspage_isolated(zspage)) ++ if (!is_zspage_isolated(zspage)) { ++ /* ++ * We cannot race with zs_destroy_pool() here because we wait ++ * for isolation to hit zero before we start destroying. ++ * Also, we ensure that everyone can see pool->destroying before ++ * we start waiting. ++ */ + putback_zspage_deferred(pool, class, zspage); ++ zs_pool_dec_isolated(pool); ++ } + + reset_page(page); + put_page(page); +@@ -2110,8 +2137,8 @@ void zs_page_putback(struct page *page) + * so let's defer. + */ + putback_zspage_deferred(pool, class, zspage); ++ zs_pool_dec_isolated(pool); + } +- + spin_unlock(&class->lock); + } + +@@ -2134,8 +2161,36 @@ static int zs_register_migration(struct + return 0; + } + ++static bool pool_isolated_are_drained(struct zs_pool *pool) ++{ ++ return atomic_long_read(&pool->isolated_pages) == 0; ++} ++ ++/* Function for resolving migration */ ++static void wait_for_isolated_drain(struct zs_pool *pool) ++{ ++ ++ /* ++ * We're in the process of destroying the pool, so there are no ++ * active allocations. zs_page_isolate() fails for completely free ++ * zspages, so we need only wait for the zs_pool's isolated ++ * count to hit zero. ++ */ ++ wait_event(pool->migration_wait, ++ pool_isolated_are_drained(pool)); ++} ++ + static void zs_unregister_migration(struct zs_pool *pool) + { ++ pool->destroying = true; ++ /* ++ * We need a memory barrier here to ensure global visibility of ++ * pool->destroying. Thus pool->isolated pages will either be 0 in which ++ * case we don't care, or it will be > 0 and pool->destroying will ++ * ensure that we wake up once isolation hits 0. ++ */ ++ smp_mb(); ++ wait_for_isolated_drain(pool); /* This can block */ + flush_work(&pool->free_work); + iput(pool->inode); + } +@@ -2376,6 +2431,8 @@ struct zs_pool *zs_create_pool(const cha + if (!pool->name) + goto err; + ++ init_waitqueue_head(&pool->migration_wait); ++ + if (create_cache(pool)) + goto err; + diff --git a/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch b/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch new file mode 100644 index 00000000000..f84b092f838 --- /dev/null +++ b/queue-4.14/mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch @@ -0,0 +1,87 @@ +From 1a87aa03597efa9641e92875b883c94c7f872ccb Mon Sep 17 00:00:00 2001 +From: Henry Burns +Date: Sat, 24 Aug 2019 17:55:03 -0700 +Subject: mm/zsmalloc.c: migration can leave pages in ZS_EMPTY indefinitely + +From: Henry Burns + +commit 1a87aa03597efa9641e92875b883c94c7f872ccb upstream. + +In zs_page_migrate() we call putback_zspage() after we have finished +migrating all pages in this zspage. However, the return value is +ignored. If a zs_free() races in between zs_page_isolate() and +zs_page_migrate(), freeing the last object in the zspage, +putback_zspage() will leave the page in ZS_EMPTY for potentially an +unbounded amount of time. + +To fix this, we need to do the same thing as zs_page_putback() does: +schedule free_work to occur. + +To avoid duplicated code, move the sequence to a new +putback_zspage_deferred() function which both zs_page_migrate() and +zs_page_putback() call. + +Link: http://lkml.kernel.org/r/20190809181751.219326-1-henryburns@google.com +Fixes: 48b4800a1c6a ("zsmalloc: page migration support") +Signed-off-by: Henry Burns +Reviewed-by: Sergey Senozhatsky +Cc: Henry Burns +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Jonathan Adams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/zsmalloc.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -1878,6 +1878,18 @@ static void dec_zspage_isolation(struct + zspage->isolated--; + } + ++static void putback_zspage_deferred(struct zs_pool *pool, ++ struct size_class *class, ++ struct zspage *zspage) ++{ ++ enum fullness_group fg; ++ ++ fg = putback_zspage(class, zspage); ++ if (fg == ZS_EMPTY) ++ schedule_work(&pool->free_work); ++ ++} ++ + static void replace_sub_page(struct size_class *class, struct zspage *zspage, + struct page *newpage, struct page *oldpage) + { +@@ -2047,7 +2059,7 @@ int zs_page_migrate(struct address_space + * the list if @page is final isolated subpage in the zspage. + */ + if (!is_zspage_isolated(zspage)) +- putback_zspage(class, zspage); ++ putback_zspage_deferred(pool, class, zspage); + + reset_page(page); + put_page(page); +@@ -2093,14 +2105,13 @@ void zs_page_putback(struct page *page) + spin_lock(&class->lock); + dec_zspage_isolation(zspage); + if (!is_zspage_isolated(zspage)) { +- fg = putback_zspage(class, zspage); + /* + * Due to page_lock, we cannot free zspage immediately + * so let's defer. + */ +- if (fg == ZS_EMPTY) +- schedule_work(&pool->free_work); ++ putback_zspage_deferred(pool, class, zspage); + } ++ + spin_unlock(&class->lock); + } + diff --git a/queue-4.14/series b/queue-4.14/series index bd2d850872d..41eba2ba919 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -55,3 +55,6 @@ dm-zoned-improve-error-handling-in-i-o-map-code.patch dm-zoned-properly-handle-backing-device-failure.patch genirq-properly-pair-kobject_del-with-kobject_add.patch mm-page_owner-handle-thp-splits-correctly.patch +mm-zsmalloc.c-migration-can-leave-pages-in-zs_empty-indefinitely.patch +mm-zsmalloc.c-fix-race-condition-in-zs_destroy_pool.patch +xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch diff --git a/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch b/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch new file mode 100644 index 00000000000..9dcca1589e1 --- /dev/null +++ b/queue-4.14/xfs-fix-missing-ilock-unlock-when-xfs_setattr_nonsize-fails-due-to-edquot.patch @@ -0,0 +1,63 @@ +From 1fb254aa983bf190cfd685d40c64a480a9bafaee Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Thu, 22 Aug 2019 20:55:54 -0700 +Subject: xfs: fix missing ILOCK unlock when xfs_setattr_nonsize fails due to EDQUOT + +From: Darrick J. Wong + +commit 1fb254aa983bf190cfd685d40c64a480a9bafaee upstream. + +Benjamin Moody reported to Debian that XFS partially wedges when a chgrp +fails on account of being out of disk quota. I ran his reproducer +script: + +# adduser dummy +# adduser dummy plugdev + +# dd if=/dev/zero bs=1M count=100 of=test.img +# mkfs.xfs test.img +# mount -t xfs -o gquota test.img /mnt +# mkdir -p /mnt/dummy +# chown -c dummy /mnt/dummy +# xfs_quota -xc 'limit -g bsoft=100k bhard=100k plugdev' /mnt + +(and then as user dummy) + +$ dd if=/dev/urandom bs=1M count=50 of=/mnt/dummy/foo +$ chgrp plugdev /mnt/dummy/foo + +and saw: + +================================================ +WARNING: lock held when returning to user space! +5.3.0-rc5 #rc5 Tainted: G W +------------------------------------------------ +chgrp/47006 is leaving the kernel with locks still held! +1 lock held by chgrp/47006: + #0: 000000006664ea2d (&xfs_nondir_ilock_class){++++}, at: xfs_ilock+0xd2/0x290 [xfs] + +...which is clearly caused by xfs_setattr_nonsize failing to unlock the +ILOCK after the xfs_qm_vop_chown_reserve call fails. Add the missing +unlock. + +Reported-by: benjamin.moody@gmail.com +Fixes: 253f4911f297 ("xfs: better xfs_trans_alloc interface") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Tested-by: Salvatore Bonaccorso +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xfs/xfs_iops.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -789,6 +789,7 @@ xfs_setattr_nonsize( + + out_cancel: + xfs_trans_cancel(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + out_dqrele: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); -- 2.47.3