From: Greg Kroah-Hartman Date: Sun, 12 Aug 2018 15:14:57 +0000 (+0200) Subject: 4.17-stable patches X-Git-Tag: v4.18.1~43 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e62e2ba869080a7cd558193c5f568974555ba397;p=thirdparty%2Fkernel%2Fstable-queue.git 4.17-stable patches added patches: bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch xen-netfront-don-t-cache-skb_shinfo.patch zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch --- diff --git a/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch new file mode 100644 index 00000000000..aaeeacbd381 --- /dev/null +++ b/queue-4.17/bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch @@ -0,0 +1,39 @@ +From 5121700b346b6160ccc9411194e3f1f417c340d1 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Wed, 8 Aug 2018 19:23:13 +0200 +Subject: bpf, sockmap: fix bpf_tcp_sendmsg sock error handling + +From: Daniel Borkmann + +commit 5121700b346b6160ccc9411194e3f1f417c340d1 upstream. + +While working on bpf_tcp_sendmsg() code, I noticed that when a +sk->sk_err is set we error out with err = sk->sk_err. However +this is problematic since sk->sk_err is a positive error value +and therefore we will neither go into sk_stream_error() nor will +we report an error back to user space. I had this case with EPIPE +and user space was thinking sendmsg() succeeded since EPIPE is +a positive value, thinking we submitted 32 bytes. Fix it by +negating the sk->sk_err value. + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/sockmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/sockmap.c ++++ b/kernel/bpf/sockmap.c +@@ -952,7 +952,7 @@ static int bpf_tcp_sendmsg(struct sock * + int copy; + + if (sk->sk_err) { +- err = sk->sk_err; ++ err = -sk->sk_err; + goto out_err; + } + diff --git a/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch new file mode 100644 index 00000000000..be40af36bd8 --- /dev/null +++ b/queue-4.17/bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch @@ -0,0 +1,49 @@ +From 7c81c71730456845e6212dccbf00098faa66740f Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Wed, 8 Aug 2018 19:23:14 +0200 +Subject: bpf, sockmap: fix leak in bpf_tcp_sendmsg wait for mem path + +From: Daniel Borkmann + +commit 7c81c71730456845e6212dccbf00098faa66740f upstream. + +In bpf_tcp_sendmsg() the sk_alloc_sg() may fail. In the case of +ENOMEM, it may also mean that we've partially filled the scatterlist +entries with pages. Later jumping to sk_stream_wait_memory() +we could further fail with an error for several reasons, however +we miss to call free_start_sg() if the local sk_msg_buff was used. + +Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/bpf/sockmap.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/sockmap.c ++++ b/kernel/bpf/sockmap.c +@@ -947,7 +947,7 @@ static int bpf_tcp_sendmsg(struct sock * + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + while (msg_data_left(msg)) { +- struct sk_msg_buff *m; ++ struct sk_msg_buff *m = NULL; + bool enospc = false; + int copy; + +@@ -1015,8 +1015,11 @@ wait_for_sndbuf: + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + wait_for_memory: + err = sk_stream_wait_memory(sk, &timeo); +- if (err) ++ if (err) { ++ if (m && m != psock->cork) ++ free_start_sg(sk, m); + goto out_err; ++ } + } + out_err: + if (err < 0) diff --git a/queue-4.17/series b/queue-4.17/series index a840d3a49d2..1910e3fb382 100644 --- a/queue-4.17/series +++ b/queue-4.17/series @@ -3,3 +3,7 @@ parisc-define-mb-and-add-memory-barriers-to-assembler-unlock-sequences.patch mark-hi-and-tasklet-softirq-synchronous.patch stop_machine-disable-preemption-after-queueing-stopper-threads.patch sched-deadline-update-rq_clock-of-later_rq-when-pushing-a-task.patch +zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch +xen-netfront-don-t-cache-skb_shinfo.patch +bpf-sockmap-fix-leak-in-bpf_tcp_sendmsg-wait-for-mem-path.patch +bpf-sockmap-fix-bpf_tcp_sendmsg-sock-error-handling.patch diff --git a/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch new file mode 100644 index 00000000000..8d978ce2478 --- /dev/null +++ b/queue-4.17/xen-netfront-don-t-cache-skb_shinfo.patch @@ -0,0 +1,52 @@ +From d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Thu, 9 Aug 2018 16:42:16 +0200 +Subject: xen/netfront: don't cache skb_shinfo() + +From: Juergen Gross + +commit d472b3a6cf63cd31cae1ed61930f07e6cd6671b5 upstream. + +skb_shinfo() can change when calling __pskb_pull_tail(): Don't cache +its return value. + +Cc: stable@vger.kernel.org +Signed-off-by: Juergen Gross +Reviewed-by: Wei Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/xen-netfront.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -894,7 +894,6 @@ static RING_IDX xennet_fill_frags(struct + struct sk_buff *skb, + struct sk_buff_head *list) + { +- struct skb_shared_info *shinfo = skb_shinfo(skb); + RING_IDX cons = queue->rx.rsp_cons; + struct sk_buff *nskb; + +@@ -903,15 +902,16 @@ static RING_IDX xennet_fill_frags(struct + RING_GET_RESPONSE(&queue->rx, ++cons); + skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; + +- if (shinfo->nr_frags == MAX_SKB_FRAGS) { ++ if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { + unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; + + BUG_ON(pull_to <= skb_headlen(skb)); + __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); + } +- BUG_ON(shinfo->nr_frags >= MAX_SKB_FRAGS); ++ BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); + +- skb_add_rx_frag(skb, shinfo->nr_frags, skb_frag_page(nfrag), ++ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, ++ skb_frag_page(nfrag), + rx->offset, rx->status, PAGE_SIZE); + + skb_shinfo(nskb)->nr_frags = 0; diff --git a/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch new file mode 100644 index 00000000000..6a932ae6e87 --- /dev/null +++ b/queue-4.17/zram-remove-bd_cap_synchronous_io-with-writeback-feature.patch @@ -0,0 +1,108 @@ +From 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 Mon Sep 17 00:00:00 2001 +From: Minchan Kim +Date: Fri, 10 Aug 2018 17:23:10 -0700 +Subject: zram: remove BD_CAP_SYNCHRONOUS_IO with writeback feature + +From: Minchan Kim + +commit 4f7a7beaee77275671654f7b9f3f9e73ca16ec65 upstream. + +If zram supports writeback feature, it's no longer a +BD_CAP_SYNCHRONOUS_IO device beause zram does asynchronous IO operations +for incompressible pages. + +Do not pretend to be synchronous IO device. It makes the system very +sluggish due to waiting for IO completion from upper layers. + +Furthermore, it causes a user-after-free problem because swap thinks the +opearion is done when the IO functions returns so it can free the page +(e.g., lock_page_or_retry and goto out_release in do_swap_page) but in +fact, IO is asynchronous so the driver could access a just freed page +afterward. + +This patch fixes the problem. + + BUG: Bad page state in process qemu-system-x86 pfn:3dfab21 + page:ffffdfb137eac840 count:0 mapcount:0 mapping:0000000000000000 index:0x1 + flags: 0x17fffc000000008(uptodate) + raw: 017fffc000000008 dead000000000100 dead000000000200 0000000000000000 + raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000 + page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag set + bad because of flags: 0x8(uptodate) + CPU: 4 PID: 1039 Comm: qemu-system-x86 Tainted: G B 4.18.0-rc5+ #1 + Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0b 05/02/2017 + Call Trace: + dump_stack+0x5c/0x7b + bad_page+0xba/0x120 + get_page_from_freelist+0x1016/0x1250 + __alloc_pages_nodemask+0xfa/0x250 + alloc_pages_vma+0x7c/0x1c0 + do_swap_page+0x347/0x920 + __handle_mm_fault+0x7b4/0x1110 + handle_mm_fault+0xfc/0x1f0 + __get_user_pages+0x12f/0x690 + get_user_pages_unlocked+0x148/0x1f0 + __gfn_to_pfn_memslot+0xff/0x3c0 [kvm] + try_async_pf+0x87/0x230 [kvm] + tdp_page_fault+0x132/0x290 [kvm] + kvm_mmu_page_fault+0x74/0x570 [kvm] + kvm_arch_vcpu_ioctl_run+0x9b3/0x1990 [kvm] + kvm_vcpu_ioctl+0x388/0x5d0 [kvm] + do_vfs_ioctl+0xa2/0x630 + ksys_ioctl+0x70/0x80 + __x64_sys_ioctl+0x16/0x20 + do_syscall_64+0x55/0x100 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/ +Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org +[minchan@kernel.org: fix changelog, add comment] + Link: https://lore.kernel.org/lkml/0516ae2d-b0fd-92c5-aa92-112ba7bd32fc@contabo.de/ + Link: http://lkml.kernel.org/r/20180802051112.86174-1-minchan@kernel.org + Link: http://lkml.kernel.org/r/20180805233722.217347-1-minchan@kernel.org +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Minchan Kim +Reported-by: Tino Lehnig +Tested-by: Tino Lehnig +Cc: Sergey Senozhatsky +Cc: Jens Axboe +Cc: [4.15+] +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/zram/zram_drv.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -280,7 +280,8 @@ static void reset_bdev(struct zram *zram + zram->backing_dev = NULL; + zram->old_block_size = 0; + zram->bdev = NULL; +- ++ zram->disk->queue->backing_dev_info->capabilities |= ++ BDI_CAP_SYNCHRONOUS_IO; + kvfree(zram->bitmap); + zram->bitmap = NULL; + } +@@ -382,6 +383,18 @@ static ssize_t backing_dev_store(struct + zram->backing_dev = backing_dev; + zram->bitmap = bitmap; + zram->nr_pages = nr_pages; ++ /* ++ * With writeback feature, zram does asynchronous IO so it's no longer ++ * synchronous device so let's remove synchronous io flag. Othewise, ++ * upper layer(e.g., swap) could wait IO completion rather than ++ * (submit and return), which will cause system sluggish. ++ * Furthermore, when the IO function returns(e.g., swap_readpage), ++ * upper layer expects IO was done so it could deallocate the page ++ * freely but in fact, IO is going on so finally could cause ++ * use-after-free when the IO is really done. ++ */ ++ zram->disk->queue->backing_dev_info->capabilities &= ++ ~BDI_CAP_SYNCHRONOUS_IO; + up_write(&zram->init_lock); + + pr_info("setup backing device %s\n", file_name);