--- /dev/null
+From 739f79fc9db1b38f96b5a5109b247a650fbebf6d Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Fri, 18 Aug 2017 15:15:48 -0700
+Subject: mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback()
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 739f79fc9db1b38f96b5a5109b247a650fbebf6d upstream.
+
+Jaegeuk and Brad report a NULL pointer crash when writeback ending tries
+to update the memcg stats:
+
+ BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0
+ IP: test_clear_page_writeback+0x12e/0x2c0
+ [...]
+ RIP: 0010:test_clear_page_writeback+0x12e/0x2c0
+ Call Trace:
+ <IRQ>
+ end_page_writeback+0x47/0x70
+ f2fs_write_end_io+0x76/0x180 [f2fs]
+ bio_endio+0x9f/0x120
+ blk_update_request+0xa8/0x2f0
+ scsi_end_request+0x39/0x1d0
+ scsi_io_completion+0x211/0x690
+ scsi_finish_command+0xd9/0x120
+ scsi_softirq_done+0x127/0x150
+ __blk_mq_complete_request_remote+0x13/0x20
+ flush_smp_call_function_queue+0x56/0x110
+ generic_smp_call_function_single_interrupt+0x13/0x30
+ smp_call_function_single_interrupt+0x27/0x40
+ call_function_single_interrupt+0x89/0x90
+ RIP: 0010:native_safe_halt+0x6/0x10
+
+ (gdb) l *(test_clear_page_writeback+0x12e)
+ 0xffffffff811bae3e is in test_clear_page_writeback (./include/linux/memcontrol.h:619).
+ 614 mod_node_page_state(page_pgdat(page), idx, val);
+ 615 if (mem_cgroup_disabled() || !page->mem_cgroup)
+ 616 return;
+ 617 mod_memcg_state(page->mem_cgroup, idx, val);
+ 618 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)];
+ 619 this_cpu_add(pn->lruvec_stat->count[idx], val);
+ 620 }
+ 621
+ 622 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
+ 623 gfp_t gfp_mask,
+
+The issue is that writeback doesn't hold a page reference and the page
+might get freed after PG_writeback is cleared (and the mapping is
+unlocked) in test_clear_page_writeback(). The stat functions looking up
+the page's node or zone are safe, as those attributes are static across
+allocation and free cycles. But page->mem_cgroup is not, and it will
+get cleared if we race with truncation or migration.
+
+It appears this race window has been around for a while, but less likely
+to trigger when the memcg stats were updated first thing after
+PG_writeback is cleared. Recent changes reshuffled this code to update
+the global node stats before the memcg ones, though, stretching the race
+window out to an extent where people can reproduce the problem.
+
+Update test_clear_page_writeback() to look up and pin page->mem_cgroup
+before clearing PG_writeback, then not use that pointer afterward. It
+is a partial revert of 62cccb8c8e7a ("mm: simplify lock_page_memcg()")
+but leaves the pageref-holding callsites that aren't affected alone.
+
+Link: http://lkml.kernel.org/r/20170809183825.GA26387@cmpxchg.org
+Fixes: 62cccb8c8e7a ("mm: simplify lock_page_memcg()")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Tested-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Reported-by: Bradley Bolen <bradleybolen@gmail.com>
+Tested-by: Brad Bolen <bradleybolen@gmail.com>
+Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
+Cc: Michal Hocko <mhocko@suse.cz>
+Cc: <stable@vger.kernel.org> [4.6+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[guptap@codeaurora.org: Resolved merge conflicts]
+Signed-off-by: Prakash Gupta <guptap@codeaurora.org>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/memcontrol.h | 33 ++++++++++++++++++++++++++++-----
+ mm/memcontrol.c | 43 +++++++++++++++++++++++++++++++------------
+ mm/page-writeback.c | 14 +++++++++++---
+ 3 files changed, 70 insertions(+), 20 deletions(-)
+
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -490,9 +490,21 @@ bool mem_cgroup_oom_synchronize(bool wai
+ extern int do_swap_account;
+ #endif
+
+-void lock_page_memcg(struct page *page);
++struct mem_cgroup *lock_page_memcg(struct page *page);
++void __unlock_page_memcg(struct mem_cgroup *memcg);
+ void unlock_page_memcg(struct page *page);
+
++static inline void __mem_cgroup_update_page_stat(struct page *page,
++ struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx,
++ int val)
++{
++ VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
++
++ if (memcg && memcg->stat)
++ this_cpu_add(memcg->stat->count[idx], val);
++}
++
+ /**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @page: the page
+@@ -508,13 +520,12 @@ void unlock_page_memcg(struct page *page
+ * mem_cgroup_update_page_stat(page, state, -1);
+ * unlock_page(page) or unlock_page_memcg(page)
+ */
++
+ static inline void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx, int val)
+ {
+- VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
+
+- if (page->mem_cgroup)
+- this_cpu_add(page->mem_cgroup->stat->count[idx], val);
++ __mem_cgroup_update_page_stat(page, page->mem_cgroup, idx, val);
+ }
+
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+@@ -709,7 +720,12 @@ mem_cgroup_print_oom_info(struct mem_cgr
+ {
+ }
+
+-static inline void lock_page_memcg(struct page *page)
++static inline struct mem_cgroup *lock_page_memcg(struct page *page)
++{
++ return NULL;
++}
++
++static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
+ {
+ }
+
+@@ -745,6 +761,13 @@ static inline void mem_cgroup_update_pag
+ {
+ }
+
++static inline void __mem_cgroup_update_page_stat(struct page *page,
++ struct mem_cgroup *memcg,
++ enum mem_cgroup_stat_index idx,
++ int nr)
++{
++}
++
+ static inline void mem_cgroup_inc_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx)
+ {
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1638,9 +1638,13 @@ cleanup:
+ * @page: the page
+ *
+ * This function protects unlocked LRU pages from being moved to
+- * another cgroup and stabilizes their page->mem_cgroup binding.
++ * another cgroup.
++ *
++ * It ensures lifetime of the returned memcg. Caller is responsible
++ * for the lifetime of the page; __unlock_page_memcg() is available
++ * when @page might get freed inside the locked section.
+ */
+-void lock_page_memcg(struct page *page)
++struct mem_cgroup *lock_page_memcg(struct page *page)
+ {
+ struct mem_cgroup *memcg;
+ unsigned long flags;
+@@ -1649,18 +1653,24 @@ void lock_page_memcg(struct page *page)
+ * The RCU lock is held throughout the transaction. The fast
+ * path can get away without acquiring the memcg->move_lock
+ * because page moving starts with an RCU grace period.
+- */
++ *
++ * The RCU lock also protects the memcg from being freed when
++ * the page state that is going to change is the only thing
++ * preventing the page itself from being freed. E.g. writeback
++ * doesn't hold a page reference and relies on PG_writeback to
++ * keep off truncation, migration and so forth.
++ */
+ rcu_read_lock();
+
+ if (mem_cgroup_disabled())
+- return;
++ return NULL;
+ again:
+ memcg = page->mem_cgroup;
+ if (unlikely(!memcg))
+- return;
++ return NULL;
+
+ if (atomic_read(&memcg->moving_account) <= 0)
+- return;
++ return memcg;
+
+ spin_lock_irqsave(&memcg->move_lock, flags);
+ if (memcg != page->mem_cgroup) {
+@@ -1676,18 +1686,18 @@ again:
+ memcg->move_lock_task = current;
+ memcg->move_lock_flags = flags;
+
+- return;
++ return memcg;
+ }
+ EXPORT_SYMBOL(lock_page_memcg);
+
+ /**
+- * unlock_page_memcg - unlock a page->mem_cgroup binding
+- * @page: the page
++ * __unlock_page_memcg - unlock and unpin a memcg
++ * @memcg: the memcg
++ *
++ * Unlock and unpin a memcg returned by lock_page_memcg().
+ */
+-void unlock_page_memcg(struct page *page)
++void __unlock_page_memcg(struct mem_cgroup *memcg)
+ {
+- struct mem_cgroup *memcg = page->mem_cgroup;
+-
+ if (memcg && memcg->move_lock_task == current) {
+ unsigned long flags = memcg->move_lock_flags;
+
+@@ -1699,6 +1709,15 @@ void unlock_page_memcg(struct page *page
+
+ rcu_read_unlock();
+ }
++
++/**
++ * unlock_page_memcg - unlock a page->mem_cgroup binding
++ * @page: the page
++ */
++void unlock_page_memcg(struct page *page)
++{
++ __unlock_page_memcg(page->mem_cgroup);
++}
+ EXPORT_SYMBOL(unlock_page_memcg);
+
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -2717,9 +2717,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
+ int test_clear_page_writeback(struct page *page)
+ {
+ struct address_space *mapping = page_mapping(page);
++ struct mem_cgroup *memcg;
+ int ret;
+
+- lock_page_memcg(page);
++ memcg = lock_page_memcg(page);
+ if (mapping && mapping_use_writeback_tags(mapping)) {
+ struct inode *inode = mapping->host;
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
+@@ -2747,13 +2748,20 @@ int test_clear_page_writeback(struct pag
+ } else {
+ ret = TestClearPageWriteback(page);
+ }
++ /*
++ * NOTE: Page might be free now! Writeback doesn't hold a page
++ * reference on its own, it relies on truncation to wait for
++ * the clearing of PG_writeback. The below can only access
++ * page state that is static across allocation cycles.
++ */
+ if (ret) {
+- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
++ __mem_cgroup_update_page_stat(page, memcg,
++ MEM_CGROUP_STAT_WRITEBACK, -1);
+ dec_node_page_state(page, NR_WRITEBACK);
+ dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+ inc_node_page_state(page, NR_WRITTEN);
+ }
+- unlock_page_memcg(page);
++ __unlock_page_memcg(memcg);
+ return ret;
+ }
+