From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 11 Oct 2020 08:48:33 +0000 (+0200)
Subject: 5.8-stable patches
X-Git-Tag: v4.4.239~41
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1e6858cd91dafbc3f699fe612dc587f2881f2f95;p=thirdparty%2Fkernel%2Fstable-queue.git

5.8-stable patches

added patches:
	i2c-imx-fix-reset-of-i2sr_ial-flag.patch
	mm-khugepaged-fix-filemap-page_to_pgoff-page-offset.patch
	net-introduce-helper-sendpage_ok-in-include-linux-net.h.patch
	nvme-tcp-check-page-by-sendpage_ok-before-calling-kernel_sendpage.patch
	tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch
---

diff --git a/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch b/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch
new file mode 100644
index 00000000000..9f3244998dd
--- /dev/null
+++ b/queue-5.8/i2c-imx-fix-reset-of-i2sr_ial-flag.patch
@@ -0,0 +1,71 @@
+From fa4d30556883f2eaab425b88ba9904865a4d00f3 Mon Sep 17 00:00:00 2001
+From: Christian Eggers <ceggers@arri.de>
+Date: Wed, 7 Oct 2020 10:45:22 +0200
+Subject: i2c: imx: Fix reset of I2SR_IAL flag
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian Eggers <ceggers@arri.de>
+
+commit fa4d30556883f2eaab425b88ba9904865a4d00f3 upstream.
+
+According to the "VFxxx Controller Reference Manual" (and the comment
+block starting at line 97), Vybrid requires writing a one for clearing
+an interrupt flag. Syncing the method for clearing I2SR_IIF in
+i2c_imx_isr().
+
+Signed-off-by: Christian Eggers <ceggers@arri.de>
+Fixes: 4b775022f6fd ("i2c: imx: add struct to hold more configurable quirks")
+Reviewed-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/i2c/busses/i2c-imx.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-imx.c
++++ b/drivers/i2c/busses/i2c-imx.c
+@@ -412,6 +412,19 @@ static void i2c_imx_dma_free(struct imx_
+ 	dma->chan_using = NULL;
+ }
+ 
++static void i2c_imx_clear_irq(struct imx_i2c_struct *i2c_imx, unsigned int bits)
++{
++	unsigned int temp;
++
++	/*
++	 * i2sr_clr_opcode is the value to clear all interrupts. Here we want to
++	 * clear only <bits>, so we write ~i2sr_clr_opcode with just <bits>
++	 * toggled. This is required because i.MX needs W1C and Vybrid uses W0C.
++	 */
++	temp = ~i2c_imx->hwdata->i2sr_clr_opcode ^ bits;
++	imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
++}
++
+ static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool atomic)
+ {
+ 	unsigned long orig_jiffies = jiffies;
+@@ -424,8 +437,7 @@ static int i2c_imx_bus_busy(struct imx_i
+ 
+ 		/* check for arbitration lost */
+ 		if (temp & I2SR_IAL) {
+-			temp &= ~I2SR_IAL;
+-			imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
++			i2c_imx_clear_irq(i2c_imx, I2SR_IAL);
+ 			return -EAGAIN;
+ 		}
+ 
+@@ -623,9 +635,7 @@ static irqreturn_t i2c_imx_isr(int irq,
+ 	if (temp & I2SR_IIF) {
+ 		/* save status register */
+ 		i2c_imx->i2csr = temp;
+-		temp &= ~I2SR_IIF;
+-		temp |= (i2c_imx->hwdata->i2sr_clr_opcode & I2SR_IIF);
+-		imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2SR);
++		i2c_imx_clear_irq(i2c_imx, I2SR_IIF);
+ 		wake_up(&i2c_imx->queue);
+ 		return IRQ_HANDLED;
+ 	}
diff --git a/queue-5.8/mm-khugepaged-fix-filemap-page_to_pgoff-page-offset.patch b/queue-5.8/mm-khugepaged-fix-filemap-page_to_pgoff-page-offset.patch
new file mode 100644
index 00000000000..146d4094ddd
--- /dev/null
+++ b/queue-5.8/mm-khugepaged-fix-filemap-page_to_pgoff-page-offset.patch
@@ -0,0 +1,106 @@
+From 033b5d77551167f8c24ca862ce83d3e0745f9245 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Fri, 9 Oct 2020 20:07:59 -0700
+Subject: mm/khugepaged: fix filemap page_to_pgoff(page) != offset
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 033b5d77551167f8c24ca862ce83d3e0745f9245 upstream.
+
+There have been elusive reports of filemap_fault() hitting its
+VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page) on kernels built
+with CONFIG_READ_ONLY_THP_FOR_FS=y.
+
+Suren has hit it on a kernel with CONFIG_READ_ONLY_THP_FOR_FS=y and
+CONFIG_NUMA is not set: and he has analyzed it down to how khugepaged
+without NUMA reuses the same huge page after collapse_file() failed
+(whereas NUMA targets its allocation to the respective node each time).
+And most of us were usually testing with CONFIG_NUMA=y kernels.
+
+collapse_file(old start)
+  new_page = khugepaged_alloc_page(hpage)
+  __SetPageLocked(new_page)
+  new_page->index = start // hpage->index=old offset
+  new_page->mapping = mapping
+  xas_store(&xas, new_page)
+
+                          filemap_fault
+                            page = find_get_page(mapping, offset)
+                            // if offset falls inside hpage then
+                            // compound_head(page) == hpage
+                            lock_page_maybe_drop_mmap()
+                              __lock_page(page)
+
+  // collapse fails
+  xas_store(&xas, old page)
+  new_page->mapping = NULL
+  unlock_page(new_page)
+
+collapse_file(new start)
+  new_page = khugepaged_alloc_page(hpage)
+  __SetPageLocked(new_page)
+  new_page->index = start // hpage->index=new offset
+  new_page->mapping = mapping // mapping becomes valid again
+
+                            // since compound_head(page) == hpage
+                            // page_to_pgoff(page) got changed
+                            VM_BUG_ON_PAGE(page_to_pgoff(page) != offset)
+
+An initial patch replaced __SetPageLocked() by lock_page(), which did
+fix the race which Suren illustrates above.  But testing showed that it's
+not good enough: if the racing task's __lock_page() gets delayed long
+after its find_get_page(), then it may follow collapse_file(new start)'s
+successful final unlock_page(), and crash on the same VM_BUG_ON_PAGE.
+
+It could be fixed by relaxing filemap_fault()'s VM_BUG_ON_PAGE to a
+check and retry (as is done for mapping), with similar relaxations in
+find_lock_entry() and pagecache_get_page(): but it's not obvious what
+else might get caught out; and khugepaged non-NUMA appears to be unique
+in exposing a page to page cache, then revoking, without going through
+a full cycle of freeing before reuse.
+
+Instead, non-NUMA khugepaged_prealloc_page() release the old page
+if anyone else has a reference to it (1% of cases when I tested).
+
+Although never reported on huge tmpfs, I believe its find_lock_entry()
+has been at similar risk; but huge tmpfs does not rely on khugepaged
+for its normal working nearly so much as READ_ONLY_THP_FOR_FS does.
+
+Reported-by: Denis Lisov <dennis.lissov@gmail.com>
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206569
+Link: https://lore.kernel.org/linux-mm/?q=20200219144635.3b7417145de19b65f258c943%40linux-foundation.org
+Reported-by: Qian Cai <cai@lca.pw>
+Link: https://lore.kernel.org/linux-xfs/?q=20200616013309.GB815%40lca.pw
+Reported-and-analyzed-by: Suren Baghdasaryan <surenb@google.com>
+Fixes: 87c460a0bded ("mm/khugepaged: collapse_shmem() without freezing new_page")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Cc: stable@vger.kernel.org # v4.9+
+Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/khugepaged.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/mm/khugepaged.c
++++ b/mm/khugepaged.c
+@@ -914,6 +914,18 @@ static struct page *khugepaged_alloc_hug
+ 
+ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
+ {
++	/*
++	 * If the hpage allocated earlier was briefly exposed in page cache
++	 * before collapse_file() failed, it is possible that racing lookups
++	 * have not yet completed, and would then be unpleasantly surprised by
++	 * finding the hpage reused for the same mapping at a different offset.
++	 * Just release the previous allocation if there is any danger of that.
++	 */
++	if (*hpage && page_count(*hpage) > 1) {
++		put_page(*hpage);
++		*hpage = NULL;
++	}
++
+ 	if (!*hpage)
+ 		*hpage = khugepaged_alloc_hugepage(wait);
+ 
diff --git a/queue-5.8/net-introduce-helper-sendpage_ok-in-include-linux-net.h.patch b/queue-5.8/net-introduce-helper-sendpage_ok-in-include-linux-net.h.patch
new file mode 100644
index 00000000000..9d1eed3738a
--- /dev/null
+++ b/queue-5.8/net-introduce-helper-sendpage_ok-in-include-linux-net.h.patch
@@ -0,0 +1,76 @@
+From c381b07941adc2274ce552daf86c94701c5e265a Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Fri, 2 Oct 2020 16:27:28 +0800
+Subject: net: introduce helper sendpage_ok() in include/linux/net.h
+
+From: Coly Li <colyli@suse.de>
+
+commit c381b07941adc2274ce552daf86c94701c5e265a upstream.
+
+The original problem was from nvme-over-tcp code, who mistakenly uses
+kernel_sendpage() to send pages allocated by __get_free_pages() without
+__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
+tail pages, sending them by kernel_sendpage() may trigger a kernel panic
+from a corrupted kernel heap, because these pages are incorrectly freed
+in network stack as page_count 0 pages.
+
+This patch introduces a helper sendpage_ok(), it returns true if the
+checking page,
+- is not slab page: PageSlab(page) is false.
+- has page refcount: page_count(page) is not zero
+
+All drivers who want to send page to remote end by kernel_sendpage()
+may use this helper to check whether the page is OK. If the helper does
+not return true, the driver should try other non sendpage method (e.g.
+sock_no_sendpage()) to handle the page.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jan Kara <jack@suse.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com>
+Cc: Philipp Reisner <philipp.reisner@linbit.com>
+Cc: Sagi Grimberg <sagi@grimberg.me>
+Cc: Vlastimil Babka <vbabka@suse.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/net.h |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/include/linux/net.h
++++ b/include/linux/net.h
+@@ -21,6 +21,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/once.h>
+ #include <linux/fs.h>
++#include <linux/mm.h>
+ 
+ #include <uapi/linux/net.h>
+ 
+@@ -290,6 +291,21 @@ do {									\
+ #define net_get_random_once_wait(buf, nbytes)			\
+ 	get_random_once_wait((buf), (nbytes))
+ 
++/*
++ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
++ * data pages, or other high order pages allocated by
++ * __get_free_pages() without __GFP_COMP, which have a page_count
++ * of 0 and/or have PageSlab() set. We cannot use send_page for
++ * those, as that does get_page(); put_page(); and would cause
++ * either a VM_BUG directly, or __page_cache_release a page that
++ * would actually still be referenced by someone, leading to some
++ * obscure delayed Oops somewhere else.
++ */
++static inline bool sendpage_ok(struct page *page)
++{
++	return !PageSlab(page) && page_count(page) >= 1;
++}
++
+ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+ 		   size_t num, size_t len);
+ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
diff --git a/queue-5.8/nvme-tcp-check-page-by-sendpage_ok-before-calling-kernel_sendpage.patch b/queue-5.8/nvme-tcp-check-page-by-sendpage_ok-before-calling-kernel_sendpage.patch
new file mode 100644
index 00000000000..7dfcb76cb18
--- /dev/null
+++ b/queue-5.8/nvme-tcp-check-page-by-sendpage_ok-before-calling-kernel_sendpage.patch
@@ -0,0 +1,58 @@
+From 7d4194abfc4de13a2663c7fee6891de8360f7a52 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Fri, 2 Oct 2020 16:27:30 +0800
+Subject: nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage()
+
+From: Coly Li <colyli@suse.de>
+
+commit 7d4194abfc4de13a2663c7fee6891de8360f7a52 upstream.
+
+Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to
+send slab pages. But for pages allocated by __get_free_pages() without
+__GFP_COMP, which also have refcount as 0, they are still sent by
+kernel_sendpage() to remote end, this is problematic.
+
+The new introduced helper sendpage_ok() checks both PageSlab tag and
+page_count counter, and returns true if the checking page is OK to be
+sent by kernel_sendpage().
+
+This patch fixes the page checking issue of nvme_tcp_try_send_data()
+with sendpage_ok(). If sendpage_ok() returns true, send this page by
+kernel_sendpage(), otherwise use sock_no_sendpage to handle this page.
+
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Jan Kara <jack@suse.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com>
+Cc: Philipp Reisner <philipp.reisner@linbit.com>
+Cc: Sagi Grimberg <sagi@grimberg.me>
+Cc: Vlastimil Babka <vbabka@suse.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/nvme/host/tcp.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/nvme/host/tcp.c
++++ b/drivers/nvme/host/tcp.c
+@@ -889,12 +889,11 @@ static int nvme_tcp_try_send_data(struct
+ 		else
+ 			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ 
+-		/* can't zcopy slab pages */
+-		if (unlikely(PageSlab(page))) {
+-			ret = sock_no_sendpage(queue->sock, page, offset, len,
++		if (sendpage_ok(page)) {
++			ret = kernel_sendpage(queue->sock, page, offset, len,
+ 					flags);
+ 		} else {
+-			ret = kernel_sendpage(queue->sock, page, offset, len,
++			ret = sock_no_sendpage(queue->sock, page, offset, len,
+ 					flags);
+ 		}
+ 		if (ret <= 0)
diff --git a/queue-5.8/series b/queue-5.8/series
index c236bb89804..137b9013e85 100644
--- a/queue-5.8/series
+++ b/queue-5.8/series
@@ -27,3 +27,8 @@ nvme-core-put-ctrl-ref-when-module-ref-get-fail.patch
 macsec-avoid-use-after-free-in-macsec_handle_frame.patch
 risc-v-make-sure-memblock-reserves-the-memory-containing-dt.patch
 gpiolib-disable-compat-read-code-in-uml-case.patch
+i2c-imx-fix-reset-of-i2sr_ial-flag.patch
+mm-khugepaged-fix-filemap-page_to_pgoff-page-offset.patch
+net-introduce-helper-sendpage_ok-in-include-linux-net.h.patch
+tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch
+nvme-tcp-check-page-by-sendpage_ok-before-calling-kernel_sendpage.patch
diff --git a/queue-5.8/tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch b/queue-5.8/tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch
new file mode 100644
index 00000000000..46d68c00ad8
--- /dev/null
+++ b/queue-5.8/tcp-use-sendpage_ok-to-detect-misused-.sendpage.patch
@@ -0,0 +1,46 @@
+From cf83a17edeeb36195596d2dae060a7c381db35f1 Mon Sep 17 00:00:00 2001
+From: Coly Li <colyli@suse.de>
+Date: Fri, 2 Oct 2020 16:27:31 +0800
+Subject: tcp: use sendpage_ok() to detect misused .sendpage
+
+From: Coly Li <colyli@suse.de>
+
+commit cf83a17edeeb36195596d2dae060a7c381db35f1 upstream.
+
+commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab
+objects") adds the checks for Slab pages, but the pages don't have
+page_count are still missing from the check.
+
+Network layer's sendpage method is not designed to send page_count 0
+pages neither, therefore both PageSlab() and page_count() should be
+both checked for the sending page. This is exactly what sendpage_ok()
+does.
+
+This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused
+.sendpage, to make the code more robust.
+
+Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects")
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Coly Li <colyli@suse.de>
+Cc: Vasily Averin <vvs@virtuozzo.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: stable@vger.kernel.org
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ net/ipv4/tcp.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk
+ 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ 
+ 	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
+-	    WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
++	    WARN_ONCE(!sendpage_ok(page),
++		      "page must not be a Slab one and have page_count > 0"))
+ 		return -EINVAL;
+ 
+ 	/* Wait for a connection to finish. One exception is TCP Fast Open