From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 12 Aug 2018 15:14:57 +0000 (+0200)
Subject: 4.17-stable patches
X-Git-Tag: v4.18.1~43
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e62e2ba869080a7cd558193c5f568974555ba397;p=thirdparty%2Fkernel%2Fstable-queue.git

4.17-stable patches

added patches:
	bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
	bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
	xen-netfront-don-t-cache-skb_shinfo.patch
	zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
---

diff --git a/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
new file mode 100644
index 00000000000..aaeeacbd381
--- /dev/null
+++ b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
@@ -0,0 +1,39 @@
+From 5121700b346b6160ccc9411194e3f1f417c340d1 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 8 Aug 2018 19:23:13 +0200
+Subject: bpf, sockmap: fix bpf_tcp_sendmsg sock error handling
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 5121700b346b6160ccc9411194e3f1f417c340d1 upstream.
+
+While working on bpf_tcp_sendmsg() code, I noticed that when a
+sk->sk_err is set we error out with err = sk->sk_err. However
+this is problematic since sk->sk_err is a positive error value
+and therefore we will neither go into sk_stream_error() nor will
+we report an error back to user space. I had this case with EPIPE
+and user space was thinking sendmsg() succeeded since EPIPE is
+a positive value, thinking we submitted 32 bytes. Fix it by
+negating the sk->sk_err value.
+
+Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/sockmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/sockmap.c
++++ b/kernel/bpf/sockmap.c
+@@ -952,7 +952,7 @@ static int bpf_tcp_sendmsg(struct sock *
+ 		int copy;
+ 
+ 		if (sk->sk_err) {
+-			err = sk->sk_err;
++			err = -sk->sk_err;
+ 			goto out_err;
+ 		}
+ 
diff --git a/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
new file mode 100644
index 00000000000..be40af36bd8
--- /dev/null
+++ b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
@@ -0,0 +1,49 @@
+From 7c81c71730456845e6212dccbf00098faa66740f Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 8 Aug 2018 19:23:14 +0200
+Subject: bpf, sockmap: fix leak in bpf_tcp_sendmsg wait for mem path
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 7c81c71730456845e6212dccbf00098faa66740f upstream.
+
+In bpf_tcp_sendmsg() the sk_alloc_sg() may fail. In the case of
+ENOMEM, it may also mean that we've partially filled the scatterlist
+entries with pages. Later jumping to sk_stream_wait_memory()
+we could further fail with an error for several reasons, however
+we miss to call free_start_sg() if the local sk_msg_buff was used.
+
+Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/sockmap.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/sockmap.c
++++ b/kernel/bpf/sockmap.c
+@@ -947,7 +947,7 @@ static int bpf_tcp_sendmsg(struct sock *
+ 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ 
+ 	while (msg_data_left(msg)) {
+-		struct sk_msg_buff *m;
++		struct sk_msg_buff *m = NULL;
+ 		bool enospc = false;
+ 		int copy;
+ 
+@@ -1015,8 +1015,11 @@ wait_for_sndbuf:
+ 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
+ 		err = sk_stream_wait_memory(sk, &timeo);
+-		if (err)
++		if (err) {
++			if (m && m != psock->cork)
++				free_start_sg(sk, m);
+ 			goto out_err;
++		}
+ 	}
+ out_err:
+ 	if (err < 0)
diff --git a/queue-4.17/series b/queue-4.17/series
index a840d3a49d2..1910e3fb382 100644
--- a/queue-4.17/series
+++ b/queue-4.17/series
@@ -3,3 +3,7 @@ parisc-define-mb-and-add-memory-barriers-to-assembler-unlock-sequences.patch
 mark-hi-and-tasklet-softirq-synchronous.patch
 stop_machine-disable-preemption-after-queueing-stopper-threads.patch
 sched-deadline-update-rq_clock-of-later_rq-when-pushing-a-task.patch
+zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
+xen-netfront-don-t-cache-skb_shinfo.patch
+bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
+bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
diff --git a/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch
new file mode 100644
index 00000000000..8d978ce2478
--- /dev/null
+++ b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch
@@ -0,0 +1,52 @@
+From d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Aug 2018 16:42:16 +0200
+Subject: xen/netfront: don't cache skb_shinfo()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream.
+
+skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache
+its return value.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/xen-netfront.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -894,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct
+ 				  struct sk_buff *skb,
+ 				  struct sk_buff_head *list)
+ {
+-	struct skb_shared_info *shinfo = skb_shinfo(skb);
+ 	RING_IDX cons = queue->rx.rsp_cons;
+ 	struct sk_buff *nskb;
+ 
+@@ -903,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct
+ 			RING_GET_RESPONSE(&queue->rx, ++cons);
+ 		skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
+ 
+-		if (shinfo->nr_frags == MAX_SKB_FRAGS) {
++		if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
+ 			unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
+ 
+ 			BUG_ON(pull_to <= skb_headlen(skb));
+ 			__pskb_pull_tail(skb, pull_to - skb_headlen(skb));
+ 		}
+-		BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
++		BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
+ 
+-		skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
++		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
++				skb_frag_page(nfrag),
+ 				rx->offset, rx->status, PAGE_SIZE);
+ 
+ 		skb_shinfo(nskb)->nr_frags = 0;
diff --git a/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
new file mode 100644
index 00000000000..6a932ae6e87
--- /dev/null
+++ b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
@@ -0,0 +1,108 @@
+From 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Fri, 10 Aug 2018 17:23:10 -0700
+Subject: zram: remove BD_CAP_SYNCHRONOUS_IO with writeback feature
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 upstream.
+
+If zram supports writeback feature, it's no longer a
+BD_CAP_SYNCHRONOUS_IO device beause zram does asynchronous IO operations
+for incompressible pages.
+
+Do not pretend to be synchronous IO device.  It makes the system very
+sluggish due to waiting for IO completion from upper layers.
+
+Furthermore, it causes a user-after-free problem because swap thinks the
+opearion is done when the IO functions returns so it can free the page
+(e.g., lock_page_or_retry and goto out_release in do_swap_page) but in
+fact, IO is asynchronous so the driver could access a just freed page
+afterward.
+
+This patch fixes the problem.
+
+  BUG: Bad page state in process qemu-system-x86  pfn:3dfab21
+  page:ffffdfb137eac840 count:0 mapcount:0 mapping:0000000000000000 index:0x1
+  flags: 0x17fffc000000008(uptodate)
+  raw: 017fffc000000008 dead000000000100 dead000000000200 0000000000000000
+  raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
+  page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
+  bad because of flags: 0x8(uptodate)
+  CPU: 4 PID: 1039 Comm: qemu-system-x86 Tainted: G    B 4.18.0-rc5+ #1
+  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0b 05/02/2017
+  Call Trace:
+    dump_stack+0x5c/0x7b
+    bad_page+0xba/0x120
+    get_page_from_freelist+0x1016/0x1250
+    __alloc_pages_nodemask+0xfa/0x250
+    alloc_pages_vma+0x7c/0x1c0
+    do_swap_page+0x347/0x920
+    __handle_mm_fault+0x7b4/0x1110
+    handle_mm_fault+0xfc/0x1f0
+    __get_user_pages+0x12f/0x690
+    get_user_pages_unlocked+0x148/0x1f0
+    __gfn_to_pfn_memslot+0xff/0x3c0 [kvm]
+    try_async_pf+0x87/0x230 [kvm]
+    tdp_page_fault+0x132/0x290 [kvm]
+    kvm_mmu_page_fault+0x74/0x570 [kvm]
+    kvm_arch_vcpu_ioctl_run+0x9b3/0x1990 [kvm]
+    kvm_vcpu_ioctl+0x388/0x5d0 [kvm]
+    do_vfs_ioctl+0xa2/0x630
+    ksys_ioctl+0x70/0x80
+    __x64_sys_ioctl+0x16/0x20
+    do_syscall_64+0x55/0x100
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/
+Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org
+[minchan@kernel.org: fix changelog, add comment]
+ Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/
+ Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org
+ Link: http://lkml.kernel.org/r/20180805233722.217347-1-minchan@kernel.org
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reported-by: Tino Lehnig <tino.lehnig@contabo.de>
+Tested-by: Tino Lehnig <tino.lehnig@contabo.de>
+Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org>	[4.15+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -280,7 +280,8 @@ static void reset_bdev(struct zram *zram
+ 	zram->backing_dev = NULL;
+ 	zram->old_block_size = 0;
+ 	zram->bdev = NULL;
+-
++	zram->disk->queue->backing_dev_info->capabilities |=
++				BDI_CAP_SYNCHRONOUS_IO;
+ 	kvfree(zram->bitmap);
+ 	zram->bitmap = NULL;
+ }
+@@ -382,6 +383,18 @@ static ssize_t backing_dev_store(struct
+ 	zram->backing_dev = backing_dev;
+ 	zram->bitmap = bitmap;
+ 	zram->nr_pages = nr_pages;
++	/*
++	 * With writeback feature, zram does asynchronous IO so it's no longer
++	 * synchronous device so let's remove synchronous io flag. Othewise,
++	 * upper layer(e.g., swap) could wait IO completion rather than
++	 * (submit and return), which will cause system sluggish.
++	 * Furthermore, when the IO function returns(e.g., swap_readpage),
++	 * upper layer expects IO was done so it could deallocate the page
++	 * freely but in fact, IO is going on so finally could cause
++	 * use-after-free when the IO is really done.
++	 */
++	zram->disk->queue->backing_dev_info->capabilities &=
++			~BDI_CAP_SYNCHRONOUS_IO;
+ 	up_write(&zram->init_lock);
+ 
+ 	pr_info("setup backing device %s\n", file_name);