4.17-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)
diff --git a/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch

new file mode 100644 (file)

index 0000000..aaeeacb
--- /dev/null
+++ b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
@@ -0,0 +1,39 @@
+From 5121700b346b6160ccc9411194e3f1f417c340d1 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 8 Aug 2018 19:23:13 +0200
+Subject: bpf, sockmap: fix bpf_tcp_sendmsg sock error handling
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 5121700b346b6160ccc9411194e3f1f417c340d1 upstream.
+
+While working on bpf_tcp_sendmsg() code, I noticed that when a
+sk->sk_err is set we error out with err = sk->sk_err. However
+this is problematic since sk->sk_err is a positive error value
+and therefore we will neither go into sk_stream_error() nor will
+we report an error back to user space. I had this case with EPIPE
+and user space was thinking sendmsg() succeeded since EPIPE is
+a positive value, thinking we submitted 32 bytes. Fix it by
+negating the sk->sk_err value.
+
+Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/sockmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/bpf/sockmap.c
++++ b/kernel/bpf/sockmap.c
+@@ -952,7 +952,7 @@ static int bpf_tcp_sendmsg(struct sock *
+               int copy;
+ 
+               if (sk->sk_err) {
+-                      err = sk->sk_err;
++                      err = -sk->sk_err;
+                       goto out_err;
+               }
+ 
diff --git a/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch

new file mode 100644 (file)

index 0000000..be40af3
--- /dev/null
+++ b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
@@ -0,0 +1,49 @@
+From 7c81c71730456845e6212dccbf00098faa66740f Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Wed, 8 Aug 2018 19:23:14 +0200
+Subject: bpf, sockmap: fix leak in bpf_tcp_sendmsg wait for mem path
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 7c81c71730456845e6212dccbf00098faa66740f upstream.
+
+In bpf_tcp_sendmsg() the sk_alloc_sg() may fail. In the case of
+ENOMEM, it may also mean that we've partially filled the scatterlist
+entries with pages. Later jumping to sk_stream_wait_memory()
+we could further fail with an error for several reasons, however
+we miss to call free_start_sg() if the local sk_msg_buff was used.
+
+Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/bpf/sockmap.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/sockmap.c
++++ b/kernel/bpf/sockmap.c
+@@ -947,7 +947,7 @@ static int bpf_tcp_sendmsg(struct sock *
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ 
+       while (msg_data_left(msg)) {
+-              struct sk_msg_buff *m;
++              struct sk_msg_buff *m = NULL;
+               bool enospc = false;
+               int copy;
+ 
+@@ -1015,8 +1015,11 @@ wait_for_sndbuf:
+               set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ wait_for_memory:
+               err = sk_stream_wait_memory(sk, &timeo);
+-              if (err)
++              if (err) {
++                      if (m && m != psock->cork)
++                              free_start_sg(sk, m);
+                       goto out_err;
++              }
+       }
+ out_err:
+       if (err < 0)
diff --git a/queue-4.17/series b/queue-4.17/series

index a840d3a49d2f91ba26bc674bebc15ca5b63b5d9c..1910e3fb382ae0460ea310d1ed5dedc180254e1b 100644 (file)
--- a/queue-4.17/series
+++ b/queue-4.17/series
@@ -3,3 +3,7 @@ parisc-define-mb-and-add-memory-barriers-to-assembler-unlock-sequences.patch
  mark-hi-and-tasklet-softirq-synchronous.patch
  stop_machine-disable-preemption-after-queueing-stopper-threads.patch
  sched-deadline-update-rq_clock-of-later_rq-when-pushing-a-task.patch
+zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
+xen-netfront-don-t-cache-skb_shinfo.patch
+bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch
+bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch
diff --git a/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch

new file mode 100644 (file)

index 0000000..8d978ce
--- /dev/null
+++ b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch
@@ -0,0 +1,52 @@
+From d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 9 Aug 2018 16:42:16 +0200
+Subject: xen/netfront: don't cache skb_shinfo()
+
+From: Juergen Gross <jgross@suse.com>
+
+commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream.
+
+skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache
+its return value.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Wei Liu <wei.liu2@citrix.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/xen-netfront.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -894,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct
+                                 struct sk_buff *skb,
+                                 struct sk_buff_head *list)
+ {
+-      struct skb_shared_info *shinfo = skb_shinfo(skb);
+       RING_IDX cons = queue->rx.rsp_cons;
+       struct sk_buff *nskb;
+ 
+@@ -903,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct
+                       RING_GET_RESPONSE(&queue->rx, ++cons);
+               skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
+ 
+-              if (shinfo->nr_frags == MAX_SKB_FRAGS) {
++              if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
+                       unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
+ 
+                       BUG_ON(pull_to <= skb_headlen(skb));
+                       __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
+               }
+-              BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS);
++              BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
+ 
+-              skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag),
++              skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
++                              skb_frag_page(nfrag),
+                               rx->offset, rx->status, PAGE_SIZE);
+ 
+               skb_shinfo(nskb)->nr_frags = 0;
diff --git a/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch

new file mode 100644 (file)

index 0000000..6a932ae
--- /dev/null
+++ b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch
@@ -0,0 +1,108 @@
+From 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Fri, 10 Aug 2018 17:23:10 -0700
+Subject: zram: remove BD_CAP_SYNCHRONOUS_IO with writeback feature
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 upstream.
+
+If zram supports writeback feature, it's no longer a
+BD_CAP_SYNCHRONOUS_IO device beause zram does asynchronous IO operations
+for incompressible pages.
+
+Do not pretend to be synchronous IO device.  It makes the system very
+sluggish due to waiting for IO completion from upper layers.
+
+Furthermore, it causes a user-after-free problem because swap thinks the
+opearion is done when the IO functions returns so it can free the page
+(e.g., lock_page_or_retry and goto out_release in do_swap_page) but in
+fact, IO is asynchronous so the driver could access a just freed page
+afterward.
+
+This patch fixes the problem.
+
+  BUG: Bad page state in process qemu-system-x86  pfn:3dfab21
+  page:ffffdfb137eac840 count:0 mapcount:0 mapping:0000000000000000 index:0x1
+  flags: 0x17fffc000000008(uptodate)
+  raw: 017fffc000000008 dead000000000100 dead000000000200 0000000000000000
+  raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
+  page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set
+  bad because of flags: 0x8(uptodate)
+  CPU: 4 PID: 1039 Comm: qemu-system-x86 Tainted: G    B 4.18.0-rc5+ #1
+  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0b 05/02/2017
+  Call Trace:
+    dump_stack+0x5c/0x7b
+    bad_page+0xba/0x120
+    get_page_from_freelist+0x1016/0x1250
+    __alloc_pages_nodemask+0xfa/0x250
+    alloc_pages_vma+0x7c/0x1c0
+    do_swap_page+0x347/0x920
+    __handle_mm_fault+0x7b4/0x1110
+    handle_mm_fault+0xfc/0x1f0
+    __get_user_pages+0x12f/0x690
+    get_user_pages_unlocked+0x148/0x1f0
+    __gfn_to_pfn_memslot+0xff/0x3c0 [kvm]
+    try_async_pf+0x87/0x230 [kvm]
+    tdp_page_fault+0x132/0x290 [kvm]
+    kvm_mmu_page_fault+0x74/0x570 [kvm]
+    kvm_arch_vcpu_ioctl_run+0x9b3/0x1990 [kvm]
+    kvm_vcpu_ioctl+0x388/0x5d0 [kvm]
+    do_vfs_ioctl+0xa2/0x630
+    ksys_ioctl+0x70/0x80
+    __x64_sys_ioctl+0x16/0x20
+    do_syscall_64+0x55/0x100
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/
+Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org
+[minchan@kernel.org: fix changelog, add comment]
+ Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/
+ Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org
+ Link: http://lkml.kernel.org/r/20180805233722.217347-1-minchan@kernel.org
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reported-by: Tino Lehnig <tino.lehnig@contabo.de>
+Tested-by: Tino Lehnig <tino.lehnig@contabo.de>
+Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org>   [4.15+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/zram/zram_drv.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -280,7 +280,8 @@ static void reset_bdev(struct zram *zram
+       zram->backing_dev = NULL;
+       zram->old_block_size = 0;
+       zram->bdev = NULL;
+-
++      zram->disk->queue->backing_dev_info->capabilities |=
++                              BDI_CAP_SYNCHRONOUS_IO;
+       kvfree(zram->bitmap);
+       zram->bitmap = NULL;
+ }
+@@ -382,6 +383,18 @@ static ssize_t backing_dev_store(struct
+       zram->backing_dev = backing_dev;
+       zram->bitmap = bitmap;
+       zram->nr_pages = nr_pages;
++      /*
++       * With writeback feature, zram does asynchronous IO so it's no longer
++       * synchronous device so let's remove synchronous io flag. Othewise,
++       * upper layer(e.g., swap) could wait IO completion rather than
++       * (submit and return), which will cause system sluggish.
++       * Furthermore, when the IO function returns(e.g., swap_readpage),
++       * upper layer expects IO was done so it could deallocate the page
++       * freely but in fact, IO is going on so finally could cause
++       * use-after-free when the IO is really done.
++       */
++      zram->disk->queue->backing_dev_info->capabilities &=
++                      ~BDI_CAP_SYNCHRONOUS_IO;
+       up_write(&zram->init_lock);
+ 
+       pr_info("setup backing device %s\n", file_name);
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 12 Aug 2018 15:14:57 +0000 (17:14 +0200)
queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch	[new file with mode: 0644]	patch \| blob
queue-4.17/series		patch \| blob \| blame \| history
queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch	[new file with mode: 0644]	patch \| blob
queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch	[new file with mode: 0644]	patch \| blob