From 4d0d74105fc8d74ce0316b802c2e627f614b1814 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Feb 2021 15:06:42 +0100 Subject: [PATCH] 4.9-stable patches added patches: mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch --- queue-4.19/series | 1 + ...r-crash-in-test_clear_page_writeback.patch | 280 ++++++++++++++++++ queue-4.9/series | 1 + queue-5.4/series | 1 + 4 files changed, 283 insertions(+) create mode 100644 queue-4.19/series create mode 100644 queue-4.9/mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch create mode 100644 queue-4.9/series create mode 100644 queue-5.4/series diff --git a/queue-4.19/series b/queue-4.19/series new file mode 100644 index 00000000000..cd3ec3ba600 --- /dev/null +++ b/queue-4.19/series @@ -0,0 +1 @@ +tracing-kprobe-fix-to-support-kretprobe-events-on-unloaded-modules.patch diff --git a/queue-4.9/mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch b/queue-4.9/mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch new file mode 100644 index 00000000000..7ad67dc0604 --- /dev/null +++ b/queue-4.9/mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch @@ -0,0 +1,280 @@ +From 739f79fc9db1b38f96b5a5109b247a650fbebf6d Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Fri, 18 Aug 2017 15:15:48 -0700 +Subject: mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback() + +From: Johannes Weiner + +commit 739f79fc9db1b38f96b5a5109b247a650fbebf6d upstream. + +Jaegeuk and Brad report a NULL pointer crash when writeback ending tries +to update the memcg stats: + + BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0 + IP: test_clear_page_writeback+0x12e/0x2c0 + [...] + RIP: 0010:test_clear_page_writeback+0x12e/0x2c0 + Call Trace: + + end_page_writeback+0x47/0x70 + f2fs_write_end_io+0x76/0x180 [f2fs] + bio_endio+0x9f/0x120 + blk_update_request+0xa8/0x2f0 + scsi_end_request+0x39/0x1d0 + scsi_io_completion+0x211/0x690 + scsi_finish_command+0xd9/0x120 + scsi_softirq_done+0x127/0x150 + __blk_mq_complete_request_remote+0x13/0x20 + flush_smp_call_function_queue+0x56/0x110 + generic_smp_call_function_single_interrupt+0x13/0x30 + smp_call_function_single_interrupt+0x27/0x40 + call_function_single_interrupt+0x89/0x90 + RIP: 0010:native_safe_halt+0x6/0x10 + + (gdb) l *(test_clear_page_writeback+0x12e) + 0xffffffff811bae3e is in test_clear_page_writeback (./include/linux/memcontrol.h:619). + 614 mod_node_page_state(page_pgdat(page), idx, val); + 615 if (mem_cgroup_disabled() || !page->mem_cgroup) + 616 return; + 617 mod_memcg_state(page->mem_cgroup, idx, val); + 618 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; + 619 this_cpu_add(pn->lruvec_stat->count[idx], val); + 620 } + 621 + 622 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, + 623 gfp_t gfp_mask, + +The issue is that writeback doesn't hold a page reference and the page +might get freed after PG_writeback is cleared (and the mapping is +unlocked) in test_clear_page_writeback(). The stat functions looking up +the page's node or zone are safe, as those attributes are static across +allocation and free cycles. But page->mem_cgroup is not, and it will +get cleared if we race with truncation or migration. + +It appears this race window has been around for a while, but less likely +to trigger when the memcg stats were updated first thing after +PG_writeback is cleared. Recent changes reshuffled this code to update +the global node stats before the memcg ones, though, stretching the race +window out to an extent where people can reproduce the problem. + +Update test_clear_page_writeback() to look up and pin page->mem_cgroup +before clearing PG_writeback, then not use that pointer afterward. It +is a partial revert of 62cccb8c8e7a ("mm: simplify lock_page_memcg()") +but leaves the pageref-holding callsites that aren't affected alone. + +Link: http://lkml.kernel.org/r/20170809183825.GA26387@cmpxchg.org +Fixes: 62cccb8c8e7a ("mm: simplify lock_page_memcg()") +Signed-off-by: Johannes Weiner +Reported-by: Jaegeuk Kim +Tested-by: Jaegeuk Kim +Reported-by: Bradley Bolen +Tested-by: Brad Bolen +Cc: Vladimir Davydov +Cc: Michal Hocko +Cc: [4.6+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[guptap@codeaurora.org: Resolved merge conflicts] +Signed-off-by: Prakash Gupta +Signed-off-by: Florian Fainelli +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/memcontrol.h | 33 ++++++++++++++++++++++++++++----- + mm/memcontrol.c | 43 +++++++++++++++++++++++++++++++------------ + mm/page-writeback.c | 14 +++++++++++--- + 3 files changed, 70 insertions(+), 20 deletions(-) + +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -490,9 +490,21 @@ bool mem_cgroup_oom_synchronize(bool wai + extern int do_swap_account; + #endif + +-void lock_page_memcg(struct page *page); ++struct mem_cgroup *lock_page_memcg(struct page *page); ++void __unlock_page_memcg(struct mem_cgroup *memcg); + void unlock_page_memcg(struct page *page); + ++static inline void __mem_cgroup_update_page_stat(struct page *page, ++ struct mem_cgroup *memcg, ++ enum mem_cgroup_stat_index idx, ++ int val) ++{ ++ VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); ++ ++ if (memcg && memcg->stat) ++ this_cpu_add(memcg->stat->count[idx], val); ++} ++ + /** + * mem_cgroup_update_page_stat - update page state statistics + * @page: the page +@@ -508,13 +520,12 @@ void unlock_page_memcg(struct page *page + * mem_cgroup_update_page_stat(page, state, -1); + * unlock_page(page) or unlock_page_memcg(page) + */ ++ + static inline void mem_cgroup_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) + { +- VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); + +- if (page->mem_cgroup) +- this_cpu_add(page->mem_cgroup->stat->count[idx], val); ++ __mem_cgroup_update_page_stat(page, page->mem_cgroup, idx, val); + } + + static inline void mem_cgroup_inc_page_stat(struct page *page, +@@ -709,7 +720,12 @@ mem_cgroup_print_oom_info(struct mem_cgr + { + } + +-static inline void lock_page_memcg(struct page *page) ++static inline struct mem_cgroup *lock_page_memcg(struct page *page) ++{ ++ return NULL; ++} ++ ++static inline void __unlock_page_memcg(struct mem_cgroup *memcg) + { + } + +@@ -745,6 +761,13 @@ static inline void mem_cgroup_update_pag + { + } + ++static inline void __mem_cgroup_update_page_stat(struct page *page, ++ struct mem_cgroup *memcg, ++ enum mem_cgroup_stat_index idx, ++ int nr) ++{ ++} ++ + static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_stat_index idx) + { +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -1638,9 +1638,13 @@ cleanup: + * @page: the page + * + * This function protects unlocked LRU pages from being moved to +- * another cgroup and stabilizes their page->mem_cgroup binding. ++ * another cgroup. ++ * ++ * It ensures lifetime of the returned memcg. Caller is responsible ++ * for the lifetime of the page; __unlock_page_memcg() is available ++ * when @page might get freed inside the locked section. + */ +-void lock_page_memcg(struct page *page) ++struct mem_cgroup *lock_page_memcg(struct page *page) + { + struct mem_cgroup *memcg; + unsigned long flags; +@@ -1649,18 +1653,24 @@ void lock_page_memcg(struct page *page) + * The RCU lock is held throughout the transaction. The fast + * path can get away without acquiring the memcg->move_lock + * because page moving starts with an RCU grace period. +- */ ++ * ++ * The RCU lock also protects the memcg from being freed when ++ * the page state that is going to change is the only thing ++ * preventing the page itself from being freed. E.g. writeback ++ * doesn't hold a page reference and relies on PG_writeback to ++ * keep off truncation, migration and so forth. ++ */ + rcu_read_lock(); + + if (mem_cgroup_disabled()) +- return; ++ return NULL; + again: + memcg = page->mem_cgroup; + if (unlikely(!memcg)) +- return; ++ return NULL; + + if (atomic_read(&memcg->moving_account) <= 0) +- return; ++ return memcg; + + spin_lock_irqsave(&memcg->move_lock, flags); + if (memcg != page->mem_cgroup) { +@@ -1676,18 +1686,18 @@ again: + memcg->move_lock_task = current; + memcg->move_lock_flags = flags; + +- return; ++ return memcg; + } + EXPORT_SYMBOL(lock_page_memcg); + + /** +- * unlock_page_memcg - unlock a page->mem_cgroup binding +- * @page: the page ++ * __unlock_page_memcg - unlock and unpin a memcg ++ * @memcg: the memcg ++ * ++ * Unlock and unpin a memcg returned by lock_page_memcg(). + */ +-void unlock_page_memcg(struct page *page) ++void __unlock_page_memcg(struct mem_cgroup *memcg) + { +- struct mem_cgroup *memcg = page->mem_cgroup; +- + if (memcg && memcg->move_lock_task == current) { + unsigned long flags = memcg->move_lock_flags; + +@@ -1699,6 +1709,15 @@ void unlock_page_memcg(struct page *page + + rcu_read_unlock(); + } ++ ++/** ++ * unlock_page_memcg - unlock a page->mem_cgroup binding ++ * @page: the page ++ */ ++void unlock_page_memcg(struct page *page) ++{ ++ __unlock_page_memcg(page->mem_cgroup); ++} + EXPORT_SYMBOL(unlock_page_memcg); + + /* +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -2717,9 +2717,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); + int test_clear_page_writeback(struct page *page) + { + struct address_space *mapping = page_mapping(page); ++ struct mem_cgroup *memcg; + int ret; + +- lock_page_memcg(page); ++ memcg = lock_page_memcg(page); + if (mapping && mapping_use_writeback_tags(mapping)) { + struct inode *inode = mapping->host; + struct backing_dev_info *bdi = inode_to_bdi(inode); +@@ -2747,13 +2748,20 @@ int test_clear_page_writeback(struct pag + } else { + ret = TestClearPageWriteback(page); + } ++ /* ++ * NOTE: Page might be free now! Writeback doesn't hold a page ++ * reference on its own, it relies on truncation to wait for ++ * the clearing of PG_writeback. The below can only access ++ * page state that is static across allocation cycles. ++ */ + if (ret) { +- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); ++ __mem_cgroup_update_page_stat(page, memcg, ++ MEM_CGROUP_STAT_WRITEBACK, -1); + dec_node_page_state(page, NR_WRITEBACK); + dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); + inc_node_page_state(page, NR_WRITTEN); + } +- unlock_page_memcg(page); ++ __unlock_page_memcg(memcg); + return ret; + } + diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..1bb534521c8 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1 @@ +mm-memcontrol-fix-null-pointer-crash-in-test_clear_page_writeback.patch diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..cd3ec3ba600 --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1 @@ +tracing-kprobe-fix-to-support-kretprobe-events-on-unloaded-modules.patch -- 2.47.2