From: Greg Kroah-Hartman Date: Tue, 11 Apr 2017 18:45:45 +0000 (+0200) Subject: 3.18 patches X-Git-Tag: v4.4.61~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3bbf1c72bcb19308616ea5c22ddb387e1d9db59b;p=thirdparty%2Fkernel%2Fstable-queue.git 3.18 patches --- diff --git a/queue-3.18/cpmac-remove-hopeless-warning.patch b/queue-3.18/cpmac-remove-hopeless-warning.patch new file mode 100644 index 00000000000..6aeeea377ef --- /dev/null +++ b/queue-3.18/cpmac-remove-hopeless-warning.patch @@ -0,0 +1,36 @@ +From d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Mon, 16 Jan 2017 14:20:54 +0100 +Subject: cpmac: remove hopeless #warning + +From: Arnd Bergmann + +commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream. + +The #warning was present 10 years ago when the driver first got merged. +As the platform is rather obsolete by now, it seems very unlikely that +the warning will cause anyone to fix the code properly. + +kernelci.org reports the warning for every build in the meantime, so +I think it's better to just turn it into a code comment to reduce +noise. + +Signed-off-by: Arnd Bergmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/ti/cpmac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ti/cpmac.c ++++ b/drivers/net/ethernet/ti/cpmac.c +@@ -1235,7 +1235,7 @@ int cpmac_init(void) + goto fail_alloc; + } + +-#warning FIXME: unhardcode gpio&reset bits ++ /* FIXME: unhardcode gpio&reset bits */ + ar7_gpio_disable(26); + ar7_gpio_disable(27); + ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); diff --git a/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch b/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch new file mode 100644 index 00000000000..f18b73174ea --- /dev/null +++ b/queue-3.18/dccp-unlock-sock-before-calling-sk_free.patch @@ -0,0 +1,81 @@ +From d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Wed, 1 Mar 2017 16:35:07 -0300 +Subject: dccp: Unlock sock before calling sk_free() + +From: Arnaldo Carvalho de Melo + +commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 upstream. + +The code where sk_clone() came from created a new socket and locked it, +but then, on the error path didn't unlock it. + +This problem stayed there for a long while, till b0691c8ee7c2 ("net: +Unlock sock before calling sk_free()") fixed it, but unfortunately the +callers of sk_clone() (now sk_clone_locked()) were not audited and the +one in dccp_create_openreq_child() remained. + +Now in the age of the syskaller fuzzer, this was finally uncovered, as +reported by Dmitry: + + ---- 8< ---- + +I've got the following report while running syzkaller fuzzer on +86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)") + + [ BUG: held lock freed! ] + 4.10.0+ #234 Not tainted + ------------------------- + syz-executor6/6898 is freeing memory + ffff88006286cac0-ffff88006286d3b7, with a lock still held there! + (slock-AF_INET6){+.-...}, at: [] spin_lock + include/linux/spinlock.h:299 [inline] + (slock-AF_INET6){+.-...}, at: [] + sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 + 5 locks held by syz-executor6/6898: + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock + include/net/sock.h:1460 [inline] + #0: (sk_lock-AF_INET6){+.+.+.}, at: [] + inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681 + #1: (rcu_read_lock){......}, at: [] + inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126 + #2: (rcu_read_lock){......}, at: [] __skb_unlink + include/linux/skbuff.h:1767 [inline] + #2: (rcu_read_lock){......}, at: [] __skb_dequeue + include/linux/skbuff.h:1783 [inline] + #2: (rcu_read_lock){......}, at: [] + process_backlog+0x264/0x730 net/core/dev.c:4835 + #3: (rcu_read_lock){......}, at: [] + ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59 + #4: (slock-AF_INET6){+.-...}, at: [] spin_lock + include/linux/spinlock.h:299 [inline] + #4: (slock-AF_INET6){+.-...}, at: [] + sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 + +Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling +sk_free()"). + +Reported-by: Dmitry Vyukov +Cc: Cong Wang +Cc: Eric Dumazet +Cc: Gerrit Renker +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/dccp/minisocks.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/dccp/minisocks.c ++++ b/net/dccp/minisocks.c +@@ -135,6 +135,7 @@ struct sock *dccp_create_openreq_child(s + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; ++ bh_unlock_sock(newsk); + sk_free(newsk); + return NULL; + } diff --git a/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch b/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch new file mode 100644 index 00000000000..d1dbe23ead7 --- /dev/null +++ b/queue-3.18/drm-ast-call-open_key-before-enable_mmio-in-post-code.patch @@ -0,0 +1,34 @@ +From 9bb92f51558f2ef5f56c257bdcea0588f31d857e Mon Sep 17 00:00:00 2001 +From: "Y.C. Chen" +Date: Wed, 22 Feb 2017 15:14:19 +1100 +Subject: drm/ast: Call open_key before enable_mmio in POST code + +From: Y.C. Chen + +commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream. + +open_key enables access the registers used by enable_mmio + +Signed-off-by: Y.C. Chen +Signed-off-by: Benjamin Herrenschmidt +Acked-by: Joel Stanley +Tested-by: Y.C. Chen +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/ast/ast_post.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/ast/ast_post.c ++++ b/drivers/gpu/drm/ast/ast_post.c +@@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev + pci_write_config_dword(ast->dev->pdev, 0x04, reg); + + ast_enable_vga(dev); +- ast_enable_mmio(dev); + ast_open_key(ast); ++ ast_enable_mmio(dev); + ast_set_def_ext_reg(dev); + + if (ast->chip == AST2300 || ast->chip == AST2400) diff --git a/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch b/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch new file mode 100644 index 00000000000..6074ef14f07 --- /dev/null +++ b/queue-3.18/drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch @@ -0,0 +1,74 @@ +From 3856081eede297b617560b85e948cfb00bb395ec Mon Sep 17 00:00:00 2001 +From: "Y.C. Chen" +Date: Thu, 23 Feb 2017 15:52:33 +0800 +Subject: drm/ast: Fix AST2400 POST failure without BMC FW or VBIOS + +From: Y.C. Chen + +commit 3856081eede297b617560b85e948cfb00bb395ec upstream. + +The current POST code for the AST2300/2400 family doesn't work properly +if the chip hasn't been initialized previously by either the BMC own FW +or the VBIOS. This fixes it. + +Signed-off-by: Y.C. Chen +Signed-off-by: Benjamin Herrenschmidt +Tested-by: Y.C. Chen +Acked-by: Joel Stanley +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++++++--- + 1 file changed, 35 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/ast/ast_post.c ++++ b/drivers/gpu/drm/ast/ast_post.c +@@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct dr + temp |= 0x73; + ast_write32(ast, 0x12008, temp); + ++ param.dram_freq = 396; + param.dram_type = AST_DDR3; ++ temp = ast_mindwm(ast, 0x1e6e2070); + if (temp & 0x01000000) + param.dram_type = AST_DDR2; +- param.dram_chipid = ast->dram_type; +- param.dram_freq = ast->mclk; +- param.vram_size = ast->vram_size; ++ switch (temp & 0x18000000) { ++ case 0: ++ param.dram_chipid = AST_DRAM_512Mx16; ++ break; ++ default: ++ case 0x08000000: ++ param.dram_chipid = AST_DRAM_1Gx16; ++ break; ++ case 0x10000000: ++ param.dram_chipid = AST_DRAM_2Gx16; ++ break; ++ case 0x18000000: ++ param.dram_chipid = AST_DRAM_4Gx16; ++ break; ++ } ++ switch (temp & 0x0c) { ++ default: ++ case 0x00: ++ param.vram_size = AST_VIDMEM_SIZE_8M; ++ break; ++ ++ case 0x04: ++ param.vram_size = AST_VIDMEM_SIZE_16M; ++ break; ++ ++ case 0x08: ++ param.vram_size = AST_VIDMEM_SIZE_32M; ++ break; ++ ++ case 0x0c: ++ param.vram_size = AST_VIDMEM_SIZE_64M; ++ break; ++ } + + if (param.dram_type == AST_DDR3) { + get_ddr3_info(ast, ¶m); diff --git a/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch b/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch new file mode 100644 index 00000000000..43df6ed4aa7 --- /dev/null +++ b/queue-3.18/drm-ast-fix-test-for-vga-enabled.patch @@ -0,0 +1,42 @@ +From 905f21a49d388de3e99438235f3301cabf0c0ef4 Mon Sep 17 00:00:00 2001 +From: "Y.C. Chen" +Date: Wed, 22 Feb 2017 15:10:50 +1100 +Subject: drm/ast: Fix test for VGA enabled + +From: Y.C. Chen + +commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream. + +The test to see if VGA was already enabled is doing an unnecessary +second test from a register that may or may not have been initialized +to a valid value. Remove it. + +Signed-off-by: Y.C. Chen +Signed-off-by: Benjamin Herrenschmidt +Acked-by: Joel Stanley +Tested-by: Y.C. Chen +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/ast/ast_post.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/ast/ast_post.c ++++ b/drivers/gpu/drm/ast/ast_post.c +@@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_devic + /* TODO 1180 */ + } else { + ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT); +- if (ch) { +- ast_open_key(ast); +- ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff); +- return ch & 0x04; +- } ++ return !!(ch & 0x01); + } +- return 0; ++ return false; + } + + static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; diff --git a/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch b/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch new file mode 100644 index 00000000000..b9ae1de7c60 --- /dev/null +++ b/queue-3.18/drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch @@ -0,0 +1,51 @@ +From 239ac65fa5ffab71adf66e642750f940e7241d99 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Michel=20D=C3=A4nzer?= +Date: Wed, 25 Jan 2017 17:21:31 +0900 +Subject: drm/ttm: Make sure BOs being swapped out are cacheable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michel Dänzer + +commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream. + +The current caching state may not be tt_cached, even though the +placement contains TTM_PL_FLAG_CACHED, because placement can contain +multiple caching flags. Trying to swap out such a BO would trip up the + + BUG_ON(ttm->caching_state != tt_cached); + +in ttm_tt_swapout. + +Signed-off-by: Michel Dänzer +Reviewed-by: Thomas Hellstrom +Reviewed-by: Christian König . +Reviewed-by: Sinclair Yeh +Signed-off-by: Christian König +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/ttm/ttm_bo.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/ttm/ttm_bo.c ++++ b/drivers/gpu/drm/ttm/ttm_bo.c +@@ -1617,7 +1617,6 @@ static int ttm_bo_swapout(struct ttm_mem + struct ttm_buffer_object *bo; + int ret = -EBUSY; + int put_count; +- uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); + + spin_lock(&glob->lru_lock); + list_for_each_entry(bo, &glob->swap_lru, swap) { +@@ -1653,7 +1652,8 @@ static int ttm_bo_swapout(struct ttm_mem + if (unlikely(ret != 0)) + goto out; + +- if ((bo->mem.placement & swap_placement) != swap_placement) { ++ if (bo->mem.mem_type != TTM_PL_SYSTEM || ++ bo->ttm->caching_state != tt_cached) { + struct ttm_mem_reg evict_mem; + + evict_mem = bo->mem; diff --git a/queue-3.18/ipv4-mask-tos-for-input-route.patch b/queue-3.18/ipv4-mask-tos-for-input-route.patch new file mode 100644 index 00000000000..394163c7ea7 --- /dev/null +++ b/queue-3.18/ipv4-mask-tos-for-input-route.patch @@ -0,0 +1,35 @@ +From 6e28099d38c0e50d62c1afc054e37e573adf3d21 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Sun, 26 Feb 2017 17:14:35 +0200 +Subject: ipv4: mask tos for input route + +From: Julian Anastasov + +commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 upstream. + +Restore the lost masking of TOS in input route code to +allow ip rules to match it properly. + +Problem [1] noticed by Shmulik Ladkani + +[1] http://marc.info/?t=137331755300040&r=1&w=2 + +Fixes: 89aef8921bfb ("ipv4: Delete routing cache.") +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv4/route.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1835,6 +1835,7 @@ int ip_route_input_noref(struct sk_buff + { + int res; + ++ tos &= IPTOS_RT_MASK; + rcu_read_lock(); + + /* Multicast recognition logic is moved from route cache to here. diff --git a/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch b/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch new file mode 100644 index 00000000000..9afb48c89ef --- /dev/null +++ b/queue-3.18/l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch @@ -0,0 +1,35 @@ +From 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Paul=20H=C3=BCber?= +Date: Sun, 26 Feb 2017 17:58:19 +0100 +Subject: l2tp: avoid use-after-free caused by l2tp_ip_backlog_recv +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Paul Hüber + +commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e upstream. + +l2tp_ip_backlog_recv may not return -1 if the packet gets dropped. +The return value is passed up to ip_local_deliver_finish, which treats +negative values as an IP protocol number for resubmission. + +Signed-off-by: Paul Hüber +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/l2tp/l2tp_ip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ip.c ++++ b/net/l2tp/l2tp_ip.c +@@ -382,7 +382,7 @@ static int l2tp_ip_backlog_recv(struct s + drop: + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); + kfree_skb(skb); +- return -1; ++ return 0; + } + + /* Userspace will call sendmsg() on the tunnel socket to send L2TP diff --git a/queue-3.18/mbox_todo b/queue-3.18/mbox_todo index d406246b6c5..221e6def33a 100644 --- a/queue-3.18/mbox_todo +++ b/queue-3.18/mbox_todo @@ -674,893 +674,11 @@ index 0fcdbe7ca648..623f01772bec 100644 -- 2.12.2 -From 944690cdb5f48d03842365b7359fe090d6c2b1fa Mon Sep 17 00:00:00 2001 -From: Bart Van Assche -Date: Tue, 14 Feb 2017 10:56:30 -0800 -Subject: [PATCH 016/251] IB/srp: Avoid that duplicate responses trigger a - kernel bug -Content-Length: 1979 -Lines: 54 - -commit 6cb72bc1b40bb2c1750ee7a5ebade93bed49a5fb upstream. - -After srp_process_rsp() returns there is a short time during which -the scsi_host_find_tag() call will return a pointer to the SCSI -command that is being completed. If during that time a duplicate -response is received, avoid that the following call stack appears: - -BUG: unable to handle kernel NULL pointer dereference at (null) -IP: srp_recv_done+0x450/0x6b0 [ib_srp] -Oops: 0000 [#1] SMP -CPU: 10 PID: 0 Comm: swapper/10 Not tainted 4.10.0-rc7-dbg+ #1 -Call Trace: - - __ib_process_cq+0x4b/0xd0 [ib_core] - ib_poll_handler+0x1d/0x70 [ib_core] - irq_poll_softirq+0xba/0x120 - __do_softirq+0xba/0x4c0 - irq_exit+0xbe/0xd0 - smp_apic_timer_interrupt+0x38/0x50 - apic_timer_interrupt+0x90/0xa0 - -RIP: srp_recv_done+0x450/0x6b0 [ib_srp] RSP: ffff88046f483e20 - -Signed-off-by: Bart Van Assche -Cc: Israel Rukshin -Cc: Max Gurtovoy -Cc: Laurence Oberman -Cc: Steve Feeley -Reviewed-by: Leon Romanovsky -Signed-off-by: Doug Ledford -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/ulp/srp/ib_srp.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c -index 5f0f4fc58f43..57a8a200e741 100644 ---- a/drivers/infiniband/ulp/srp/ib_srp.c -+++ b/drivers/infiniband/ulp/srp/ib_srp.c -@@ -1795,9 +1795,11 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) - complete(&ch->tsk_mgmt_done); - } else { - scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); -- if (scmnd) { -+ if (scmnd && scmnd->host_scribble) { - req = (void *)scmnd->host_scribble; - scmnd = srp_claim_req(ch, req, NULL, scmnd); -+ } else { -+ scmnd = NULL; - } - if (!scmnd) { - shost_printk(KERN_ERR, target->scsi_host, --- -2.12.2 - -From 696255449b89af5487bce53b1a65eddedc72aeff Mon Sep 17 00:00:00 2001 -From: Bart Van Assche -Date: Tue, 14 Feb 2017 10:56:31 -0800 -Subject: [PATCH 017/251] IB/srp: Fix race conditions related to task - management -Content-Length: 5896 -Lines: 169 - -commit 0a6fdbdeb1c25e31763c1fb333fa2723a7d2aba6 upstream. - -Avoid that srp_process_rsp() overwrites the status information -in ch if the SRP target response timed out and processing of -another task management function has already started. Avoid that -issuing multiple task management functions concurrently triggers -list corruption. This patch prevents that the following stack -trace appears in the system log: - -WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0 -list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68 -CPU: 8 PID: 9269 Comm: sg_reset Tainted: G W 4.10.0-rc7-dbg+ #3 -Call Trace: - dump_stack+0x68/0x93 - __warn+0xc6/0xe0 - warn_slowpath_fmt+0x4a/0x50 - __list_del_entry_valid+0xbc/0xc0 - wait_for_completion_timeout+0x12e/0x170 - srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp] - srp_reset_device+0x5b/0x110 [ib_srp] - scsi_ioctl_reset+0x1c7/0x290 - scsi_ioctl+0x12a/0x420 - sd_ioctl+0x9d/0x100 - blkdev_ioctl+0x51e/0x9f0 - block_ioctl+0x38/0x40 - do_vfs_ioctl+0x8f/0x700 - SyS_ioctl+0x3c/0x70 - entry_SYSCALL_64_fastpath+0x18/0xad - -Signed-off-by: Bart Van Assche -Cc: Israel Rukshin -Cc: Max Gurtovoy -Cc: Laurence Oberman -Cc: Steve Feeley -Signed-off-by: Doug Ledford -Signed-off-by: Greg Kroah-Hartman ---- - drivers/infiniband/ulp/srp/ib_srp.c | 45 ++++++++++++++++++++++++------------- - drivers/infiniband/ulp/srp/ib_srp.h | 1 + - 2 files changed, 30 insertions(+), 16 deletions(-) - -diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c -index 57a8a200e741..e397f1b0af09 100644 ---- a/drivers/infiniband/ulp/srp/ib_srp.c -+++ b/drivers/infiniband/ulp/srp/ib_srp.c -@@ -1787,12 +1787,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) - if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { - spin_lock_irqsave(&ch->lock, flags); - ch->req_lim += be32_to_cpu(rsp->req_lim_delta); -+ if (rsp->tag == ch->tsk_mgmt_tag) { -+ ch->tsk_mgmt_status = -1; -+ if (be32_to_cpu(rsp->resp_data_len) >= 4) -+ ch->tsk_mgmt_status = rsp->data[3]; -+ complete(&ch->tsk_mgmt_done); -+ } else { -+ shost_printk(KERN_ERR, target->scsi_host, -+ "Received tsk mgmt response too late for tag %#llx\n", -+ rsp->tag); -+ } - spin_unlock_irqrestore(&ch->lock, flags); -- -- ch->tsk_mgmt_status = -1; -- if (be32_to_cpu(rsp->resp_data_len) >= 4) -- ch->tsk_mgmt_status = rsp->data[3]; -- complete(&ch->tsk_mgmt_done); - } else { - scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); - if (scmnd && scmnd->host_scribble) { -@@ -2471,19 +2476,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) - } - - static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, -- u8 func) -+ u8 func, u8 *status) - { - struct srp_target_port *target = ch->target; - struct srp_rport *rport = target->rport; - struct ib_device *dev = target->srp_host->srp_dev->dev; - struct srp_iu *iu; - struct srp_tsk_mgmt *tsk_mgmt; -+ int res; - - if (!ch->connected || target->qp_in_error) - return -1; - -- init_completion(&ch->tsk_mgmt_done); -- - /* - * Lock the rport mutex to avoid that srp_create_ch_ib() is - * invoked while a task management function is being sent. -@@ -2506,10 +2510,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, - - tsk_mgmt->opcode = SRP_TSK_MGMT; - int_to_scsilun(lun, &tsk_mgmt->lun); -- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; - tsk_mgmt->tsk_mgmt_func = func; - tsk_mgmt->task_tag = req_tag; - -+ spin_lock_irq(&ch->lock); -+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; -+ tsk_mgmt->tag = ch->tsk_mgmt_tag; -+ spin_unlock_irq(&ch->lock); -+ -+ init_completion(&ch->tsk_mgmt_done); -+ - ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, - DMA_TO_DEVICE); - if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { -@@ -2518,13 +2528,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, - - return -1; - } -+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done, -+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); -+ if (res > 0 && status) -+ *status = ch->tsk_mgmt_status; - mutex_unlock(&rport->mutex); - -- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done, -- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) -- return -1; -+ WARN_ON_ONCE(res < 0); - -- return 0; -+ return res > 0 ? 0 : -1; - } - - static int srp_abort(struct scsi_cmnd *scmnd) -@@ -2550,7 +2562,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) - shost_printk(KERN_ERR, target->scsi_host, - "Sending SRP abort for tag %#x\n", tag); - if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, -- SRP_TSK_ABORT_TASK) == 0) -+ SRP_TSK_ABORT_TASK, NULL) == 0) - ret = SUCCESS; - else if (target->rport->state == SRP_RPORT_LOST) - ret = FAST_IO_FAIL; -@@ -2568,14 +2580,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) - struct srp_target_port *target = host_to_target(scmnd->device->host); - struct srp_rdma_ch *ch; - int i; -+ u8 status; - - shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); - - ch = &target->ch[0]; - if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, -- SRP_TSK_LUN_RESET)) -+ SRP_TSK_LUN_RESET, &status)) - return FAILED; -- if (ch->tsk_mgmt_status) -+ if (status) - return FAILED; - - for (i = 0; i < target->ch_count; i++) { -diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h -index f6af531f9f32..109eea94d0f9 100644 ---- a/drivers/infiniband/ulp/srp/ib_srp.h -+++ b/drivers/infiniband/ulp/srp/ib_srp.h -@@ -168,6 +168,7 @@ struct srp_rdma_ch { - int max_ti_iu_len; - int comp_vector; - -+ u64 tsk_mgmt_tag; - struct completion tsk_mgmt_done; - u8 tsk_mgmt_status; - bool connected; --- -2.12.2 - -From ca739e3fd7dc803d526ea5bb9b80c0d07fbca55f Mon Sep 17 00:00:00 2001 -From: Nicholas Bellinger -Date: Wed, 22 Feb 2017 22:06:32 -0800 -Subject: [PATCH 020/251] target: Fix NULL dereference during LUN lookup + - active I/O shutdown -Content-Length: 6768 -Lines: 191 - -commit bd4e2d2907fa23a11d46217064ecf80470ddae10 upstream. - -When transport_clear_lun_ref() is shutting down a se_lun via -configfs with new I/O in-flight, it's possible to trigger a -NULL pointer dereference in transport_lookup_cmd_lun() due -to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD -checking before incrementing lun->lun_ref.count after -lun->lun_ref has switched to atomic_t mode. - -This results in a NULL pointer dereference as LUN shutdown -code in core_tpg_remove_lun() continues running after the -existing ->release() -> core_tpg_lun_ref_release() callback -completes, and clears the RCU protected se_lun->lun_se_dev -pointer. - -During the OOPs, the state of lun->lun_ref in the process -which triggered the NULL pointer dereference looks like -the following on v4.1.y stable code: - -struct se_lun { - lun_link_magic = 4294932337, - lun_status = TRANSPORT_LUN_STATUS_FREE, - - ..... - - lun_se_dev = 0x0, - lun_sep = 0x0, - - ..... - - lun_ref = { - count = { - counter = 1 - }, - percpu_count_ptr = 3, - release = 0xffffffffa02fa1e0 , - confirm_switch = 0x0, - force_atomic = false, - rcu = { - next = 0xffff88154fa1a5d0, - func = 0xffffffff8137c4c0 - } - } -} - -To address this bug, use percpu_ref_tryget_live() to ensure -once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref -has switched to atomic_t, all new I/Os will fail to obtain -a new lun->lun_ref reference. - -Also use an explicit percpu_ref_kill_and_confirm() callback -to block on ->lun_ref_comp to allow the first stage and -associated RCU grace period to complete, and then block on -->lun_ref_shutdown waiting for the final percpu_ref_put() -to drop the last reference via transport_lun_remove_cmd() -before continuing with core_tpg_remove_lun() shutdown. - -Reported-by: Rob Millner -Tested-by: Rob Millner -Cc: Rob Millner -Tested-by: Vaibhav Tandon -Cc: Vaibhav Tandon -Tested-by: Bryant G. Ly -Signed-off-by: Nicholas Bellinger -Signed-off-by: Greg Kroah-Hartman ---- - drivers/target/target_core_device.c | 10 ++++++++-- - drivers/target/target_core_tpg.c | 3 ++- - drivers/target/target_core_transport.c | 31 ++++++++++++++++++++++++++++++- - include/target/target_core_base.h | 1 + - 4 files changed, 41 insertions(+), 4 deletions(-) - -diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c -index 356c80fbb304..bb6a6c35324a 100644 ---- a/drivers/target/target_core_device.c -+++ b/drivers/target/target_core_device.c -@@ -77,12 +77,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) - &deve->read_bytes); - - se_lun = rcu_dereference(deve->se_lun); -+ -+ if (!percpu_ref_tryget_live(&se_lun->lun_ref)) { -+ se_lun = NULL; -+ goto out_unlock; -+ } -+ - se_cmd->se_lun = rcu_dereference(deve->se_lun); - se_cmd->pr_res_key = deve->pr_res_key; - se_cmd->orig_fe_lun = unpacked_lun; - se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; -- -- percpu_ref_get(&se_lun->lun_ref); - se_cmd->lun_ref_active = true; - - if ((se_cmd->data_direction == DMA_TO_DEVICE) && -@@ -96,6 +100,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) - goto ref_dev; - } - } -+out_unlock: - rcu_read_unlock(); - - if (!se_lun) { -@@ -826,6 +831,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) - xcopy_lun = &dev->xcopy_lun; - rcu_assign_pointer(xcopy_lun->lun_se_dev, dev); - init_completion(&xcopy_lun->lun_ref_comp); -+ init_completion(&xcopy_lun->lun_shutdown_comp); - INIT_LIST_HEAD(&xcopy_lun->lun_deve_list); - INIT_LIST_HEAD(&xcopy_lun->lun_dev_link); - mutex_init(&xcopy_lun->lun_tg_pt_md_mutex); -diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c -index 028854cda97b..2794c6ec5c3c 100644 ---- a/drivers/target/target_core_tpg.c -+++ b/drivers/target/target_core_tpg.c -@@ -539,7 +539,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref) - { - struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); - -- complete(&lun->lun_ref_comp); -+ complete(&lun->lun_shutdown_comp); - } - - int core_tpg_register( -@@ -666,6 +666,7 @@ struct se_lun *core_tpg_alloc_lun( - lun->lun_link_magic = SE_LUN_LINK_MAGIC; - atomic_set(&lun->lun_acl_count, 0); - init_completion(&lun->lun_ref_comp); -+ init_completion(&lun->lun_shutdown_comp); - INIT_LIST_HEAD(&lun->lun_deve_list); - INIT_LIST_HEAD(&lun->lun_dev_link); - atomic_set(&lun->lun_tg_pt_secondary_offline, 0); -diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c -index befe22744802..df2059984e14 100644 ---- a/drivers/target/target_core_transport.c -+++ b/drivers/target/target_core_transport.c -@@ -2680,10 +2680,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) - } - EXPORT_SYMBOL(target_wait_for_sess_cmds); - -+static void target_lun_confirm(struct percpu_ref *ref) -+{ -+ struct se_lun *lun = container_of(ref, struct se_lun, lun_ref); -+ -+ complete(&lun->lun_ref_comp); -+} -+ - void transport_clear_lun_ref(struct se_lun *lun) - { -- percpu_ref_kill(&lun->lun_ref); -+ /* -+ * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop -+ * the initial reference and schedule confirm kill to be -+ * executed after one full RCU grace period has completed. -+ */ -+ percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm); -+ /* -+ * The first completion waits for percpu_ref_switch_to_atomic_rcu() -+ * to call target_lun_confirm after lun->lun_ref has been marked -+ * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t -+ * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref -+ * fails for all new incoming I/O. -+ */ - wait_for_completion(&lun->lun_ref_comp); -+ /* -+ * The second completion waits for percpu_ref_put_many() to -+ * invoke ->release() after lun->lun_ref has switched to -+ * atomic_t mode, and lun->lun_ref.count has reached zero. -+ * -+ * At this point all target-core lun->lun_ref references have -+ * been dropped via transport_lun_remove_cmd(), and it's safe -+ * to proceed with the remaining LUN shutdown. -+ */ -+ wait_for_completion(&lun->lun_shutdown_comp); - } - - static bool -diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h -index 800fe16cc36f..ed66414b91f0 100644 ---- a/include/target/target_core_base.h -+++ b/include/target/target_core_base.h -@@ -740,6 +740,7 @@ struct se_lun { - struct config_group lun_group; - struct se_port_stat_grps port_stat_grps; - struct completion lun_ref_comp; -+ struct completion lun_shutdown_comp; - struct percpu_ref lun_ref; - struct list_head lun_dev_link; - struct hlist_node link; --- -2.12.2 - -From 0d80ac62b609bce00b78a656b7cdde2d8f587345 Mon Sep 17 00:00:00 2001 -From: Alex Deucher -Date: Fri, 10 Feb 2017 00:00:52 -0500 -Subject: [PATCH 025/251] drm/amdgpu: add more cases to DCE11 possible crtc - mask setup -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Content-Length: 1129 -Lines: 38 - -commit 4ce3bd45b351633f2a0512c587f7fcba2ce044e8 upstream. - -Add cases for asics with 3 and 5 crtcs. Fixes an artificial -limitation on asics with 3 or 5 crtcs. - -Fixes: -https://bugs.freedesktop.org/show_bug.cgi?id=99744 - -Reviewed-by: Michel Dänzer -Reviewed-by: Christian König -Signed-off-by: Alex Deucher -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -index c161eeda417b..267749a94c5a 100644 ---- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c -@@ -3704,9 +3704,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, - default: - encoder->possible_crtcs = 0x3; - break; -+ case 3: -+ encoder->possible_crtcs = 0x7; -+ break; - case 4: - encoder->possible_crtcs = 0xf; - break; -+ case 5: -+ encoder->possible_crtcs = 0x1f; -+ break; - case 6: - encoder->possible_crtcs = 0x3f; - break; --- -2.12.2 - -From 8b787652386e26c7974092f11bd477126b0d53ce Mon Sep 17 00:00:00 2001 -From: "Y.C. Chen" -Date: Wed, 22 Feb 2017 15:10:50 +1100 -Subject: [PATCH 026/251] drm/ast: Fix test for VGA enabled -Content-Length: 1240 -Lines: 38 - -commit 905f21a49d388de3e99438235f3301cabf0c0ef4 upstream. - -The test to see if VGA was already enabled is doing an unnecessary -second test from a register that may or may not have been initialized -to a valid value. Remove it. - -Signed-off-by: Y.C. Chen -Signed-off-by: Benjamin Herrenschmidt -Acked-by: Joel Stanley -Tested-by: Y.C. Chen -Signed-off-by: Dave Airlie -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/ast/ast_post.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - -diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c -index 810c51d92b99..4e8aaab5dd52 100644 ---- a/drivers/gpu/drm/ast/ast_post.c -+++ b/drivers/gpu/drm/ast/ast_post.c -@@ -58,13 +58,9 @@ bool ast_is_vga_enabled(struct drm_device *dev) - /* TODO 1180 */ - } else { - ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT); -- if (ch) { -- ast_open_key(ast); -- ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff); -- return ch & 0x04; -- } -+ return !!(ch & 0x01); - } -- return 0; -+ return false; - } - - static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff }; --- -2.12.2 - -From 93eab4f5259485e9cad0339a298b6da1dd2e6e40 Mon Sep 17 00:00:00 2001 -From: "Y.C. Chen" -Date: Wed, 22 Feb 2017 15:14:19 +1100 -Subject: [PATCH 027/251] drm/ast: Call open_key before enable_mmio in POST - code -Content-Length: 1014 -Lines: 30 - -commit 9bb92f51558f2ef5f56c257bdcea0588f31d857e upstream. - -open_key enables access the registers used by enable_mmio - -Signed-off-by: Y.C. Chen -Signed-off-by: Benjamin Herrenschmidt -Acked-by: Joel Stanley -Tested-by: Y.C. Chen -Signed-off-by: Dave Airlie -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/ast/ast_post.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c -index 4e8aaab5dd52..50836e549867 100644 ---- a/drivers/gpu/drm/ast/ast_post.c -+++ b/drivers/gpu/drm/ast/ast_post.c -@@ -371,8 +371,8 @@ void ast_post_gpu(struct drm_device *dev) - pci_write_config_dword(ast->dev->pdev, 0x04, reg); - - ast_enable_vga(dev); -- ast_enable_mmio(dev); - ast_open_key(ast); -+ ast_enable_mmio(dev); - ast_set_def_ext_reg(dev); - - if (ast->chip == AST2300 || ast->chip == AST2400) --- -2.12.2 - -From b9cfd5517b309513e50d80b89eaae98a82a2c3b1 Mon Sep 17 00:00:00 2001 -From: "Y.C. Chen" -Date: Thu, 23 Feb 2017 15:52:33 +0800 -Subject: [PATCH 028/251] drm/ast: Fix AST2400 POST failure without BMC FW or - VBIOS -Content-Length: 2034 -Lines: 70 - -commit 3856081eede297b617560b85e948cfb00bb395ec upstream. - -The current POST code for the AST2300/2400 family doesn't work properly -if the chip hasn't been initialized previously by either the BMC own FW -or the VBIOS. This fixes it. - -Signed-off-by: Y.C. Chen -Signed-off-by: Benjamin Herrenschmidt -Tested-by: Y.C. Chen -Acked-by: Joel Stanley -Signed-off-by: Dave Airlie -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++++++--- - 1 file changed, 35 insertions(+), 3 deletions(-) - -diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c -index 50836e549867..30672a3df8a9 100644 ---- a/drivers/gpu/drm/ast/ast_post.c -+++ b/drivers/gpu/drm/ast/ast_post.c -@@ -1626,12 +1626,44 @@ static void ast_init_dram_2300(struct drm_device *dev) - temp |= 0x73; - ast_write32(ast, 0x12008, temp); - -+ param.dram_freq = 396; - param.dram_type = AST_DDR3; -+ temp = ast_mindwm(ast, 0x1e6e2070); - if (temp & 0x01000000) - param.dram_type = AST_DDR2; -- param.dram_chipid = ast->dram_type; -- param.dram_freq = ast->mclk; -- param.vram_size = ast->vram_size; -+ switch (temp & 0x18000000) { -+ case 0: -+ param.dram_chipid = AST_DRAM_512Mx16; -+ break; -+ default: -+ case 0x08000000: -+ param.dram_chipid = AST_DRAM_1Gx16; -+ break; -+ case 0x10000000: -+ param.dram_chipid = AST_DRAM_2Gx16; -+ break; -+ case 0x18000000: -+ param.dram_chipid = AST_DRAM_4Gx16; -+ break; -+ } -+ switch (temp & 0x0c) { -+ default: -+ case 0x00: -+ param.vram_size = AST_VIDMEM_SIZE_8M; -+ break; -+ -+ case 0x04: -+ param.vram_size = AST_VIDMEM_SIZE_16M; -+ break; -+ -+ case 0x08: -+ param.vram_size = AST_VIDMEM_SIZE_32M; -+ break; -+ -+ case 0x0c: -+ param.vram_size = AST_VIDMEM_SIZE_64M; -+ break; -+ } - - if (param.dram_type == AST_DDR3) { - get_ddr3_info(ast, ¶m); --- -2.12.2 - -From 36fd36b900b9382af54a1e49a81cd99663b83eda Mon Sep 17 00:00:00 2001 -From: Tomeu Vizoso -Date: Mon, 20 Feb 2017 16:25:45 +0100 -Subject: [PATCH 029/251] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel - RSX-1058 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Content-Length: 1596 -Lines: 44 - -commit 36fc579761b50784b63dafd0f2e796b659e0f5ee upstream. - -Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all -1.1. - -When a sink that supports deep color is connected to the output, the -receiver will send EDIDs that advertise this capability, even if it -isn't possible with HDMI versions earlier than 1.3. - -Currently the kernel is assuming that deep color is possible and the -sink displays an error. - -This quirk will make sure that deep color isn't used with this -particular receiver. - -Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"") -Signed-off-by: Tomeu Vizoso -Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com -Cc: Matt Horan -Tested-by: Matt Horan -Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869 -Reviewed-by: Ville Syrjälä -Signed-off-by: Ville Syrjälä -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/drm_edid.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c -index 8c9ac021608f..cc1e16fd7e76 100644 ---- a/drivers/gpu/drm/drm_edid.c -+++ b/drivers/gpu/drm/drm_edid.c -@@ -144,6 +144,9 @@ static struct edid_quirk { - - /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ - { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, -+ -+ /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ -+ { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, - }; - - /* --- -2.12.2 - -From 59fc34fc69066bfabf8bed21f4ce5bf312e68bb3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Michel=20D=C3=A4nzer?= -Date: Wed, 25 Jan 2017 17:21:31 +0900 -Subject: [PATCH 030/251] drm/ttm: Make sure BOs being swapped out are - cacheable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Content-Length: 1580 -Lines: 44 - -commit 239ac65fa5ffab71adf66e642750f940e7241d99 upstream. - -The current caching state may not be tt_cached, even though the -placement contains TTM_PL_FLAG_CACHED, because placement can contain -multiple caching flags. Trying to swap out such a BO would trip up the - - BUG_ON(ttm->caching_state != tt_cached); - -in ttm_tt_swapout. - -Signed-off-by: Michel Dänzer -Reviewed-by: Thomas Hellstrom -Reviewed-by: Christian König . -Reviewed-by: Sinclair Yeh -Signed-off-by: Christian König -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/ttm/ttm_bo.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c -index 4ae8b56b1847..037c38bb5333 100644 ---- a/drivers/gpu/drm/ttm/ttm_bo.c -+++ b/drivers/gpu/drm/ttm/ttm_bo.c -@@ -1621,7 +1621,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) - struct ttm_buffer_object *bo; - int ret = -EBUSY; - int put_count; -- uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM); - - spin_lock(&glob->lru_lock); - list_for_each_entry(bo, &glob->swap_lru, swap) { -@@ -1657,7 +1656,8 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) - if (unlikely(ret != 0)) - goto out; - -- if ((bo->mem.placement & swap_placement) != swap_placement) { -+ if (bo->mem.mem_type != TTM_PL_SYSTEM || -+ bo->ttm->caching_state != tt_cached) { - struct ttm_mem_reg evict_mem; - - evict_mem = bo->mem; --- -2.12.2 - -From bb5b96344ed378a1d5b8cf3bd149bb86919f3b9f Mon Sep 17 00:00:00 2001 -From: Dan Carpenter -Date: Wed, 8 Feb 2017 02:46:01 +0300 -Subject: [PATCH 031/251] drm/atomic: fix an error code in mode_fixup() -Content-Length: 1297 -Lines: 34 - -commit f9ad86e42d0303eeb8e0d41bb208153022ebd9d2 upstream. - -Having "ret" be a bool type works for everything except -ret = funcs->atomic_check(). The other functions all return zero on -error but ->atomic_check() returns negative error codes. We want to -propagate the error code but instead we return 1. - -I found this bug with static analysis and I don't know if it affects -run time. - -Fixes: 4cd4df8080a3 ("drm/atomic: Add ->atomic_check() to encoder helpers") -Signed-off-by: Dan Carpenter -Signed-off-by: Daniel Vetter -Link: http://patchwork.freedesktop.org/patch/msgid/20170207234601.GA23981@mwanda -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/drm_atomic_helper.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c -index 1ac29d703c12..ea443fafb934 100644 ---- a/drivers/gpu/drm/drm_atomic_helper.c -+++ b/drivers/gpu/drm/drm_atomic_helper.c -@@ -265,7 +265,7 @@ mode_fixup(struct drm_atomic_state *state) - struct drm_connector *connector; - struct drm_connector_state *conn_state; - int i; -- bool ret; -+ int ret; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - if (!crtc_state->mode_changed && --- -2.12.2 - -From 7952b6490bbce45e078c8c0e669df7a0a8f8948a Mon Sep 17 00:00:00 2001 -From: Hans de Goede -Date: Fri, 2 Dec 2016 15:29:04 +0100 -Subject: [PATCH 033/251] drm/i915/dsi: Do not clear DPOUNIT_CLOCK_GATE_DISABLE - from vlv_init_display_clock_gating -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Content-Length: 2606 -Lines: 62 - -commit bb98e72adaf9d19719aba35f802d4836f5d5176c upstream. - -On my Cherrytrail CUBE iwork8 Air tablet PIPE-A would get stuck on loading -i915 at boot 1 out of every 3 boots, resulting in a non functional LCD. -Once the i915 driver has successfully loaded, the panel can be disabled / -enabled without hitting this issue. - -The getting stuck is caused by vlv_init_display_clock_gating() clearing -the DPOUNIT_CLOCK_GATE_DISABLE bit in DSPCLK_GATE_D when called from -chv_pipe_power_well_ops.enable() on driver load, while a pipe is enabled -driving the DSI LCD by the BIOS. - -Clearing this bit while DSI is in use is a known issue and -intel_dsi_pre_enable() / intel_dsi_post_disable() already set / clear it -as appropriate. - -This commit modifies vlv_init_display_clock_gating() to leave the -DPOUNIT_CLOCK_GATE_DISABLE bit alone fixing the pipe getting stuck. - -Changes in v2: --Replace PIPE-A with "a pipe" or "the pipe" in the commit msg and -comment - -Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97330 -Signed-off-by: Hans de Goede -Reviewed-by: Ville Syrjälä -Link: http://patchwork.freedesktop.org/patch/msgid/20161202142904.25613-1-hdegoede@redhat.com -Signed-off-by: Ville Syrjälä -(cherry picked from commit 721d484563e1a51ada760089c490cbc47e909756) -Signed-off-by: Jani Nikula -Signed-off-by: River Zhou -Signed-off-by: Greg Kroah-Hartman ---- - drivers/gpu/drm/i915/intel_pm.c | 13 ++++++++++++- - 1 file changed, 12 insertions(+), 1 deletion(-) - -diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c -index 3f802163f7d4..e7c18519274a 100644 ---- a/drivers/gpu/drm/i915/intel_pm.c -+++ b/drivers/gpu/drm/i915/intel_pm.c -@@ -6803,7 +6803,18 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) - - static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv) - { -- I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); -+ u32 val; -+ -+ /* -+ * On driver load, a pipe may be active and driving a DSI display. -+ * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck -+ * (and never recovering) in this case. intel_dsi_post_disable() will -+ * clear it when we turn off the display. -+ */ -+ val = I915_READ(DSPCLK_GATE_D); -+ val &= DPOUNIT_CLOCK_GATE_DISABLE; -+ val |= VRHUNIT_CLOCK_GATE_DISABLE; -+ I915_WRITE(DSPCLK_GATE_D, val); - - /* - * Disable trickle feed and enable pnd deadline calculation --- -2.12.2 - From 804a935963a91acd1764ba914f825dd2a29c5871 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 15 Mar 2017 09:57:56 +0800 Subject: [PATCH 037/251] Linux 4.4.54 +Status: RO Content-Length: 301 Lines: 18 @@ -2065,160 +1183,33 @@ index 582995aaaf4e..f42834c7f007 100644 -- 2.12.2 -From 5e45d834f762312e3031a8b6bba3bc2b1f9481ec Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann -Date: Mon, 16 Jan 2017 14:20:54 +0100 -Subject: [PATCH 049/251] cpmac: remove hopeless #warning -Content-Length: 1108 -Lines: 32 +From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001 +From: Ralf Baechle +Date: Tue, 20 Sep 2016 14:33:01 +0200 +Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots +Content-Length: 2448 +Lines: 81 + +commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream. -commit d43e6fb4ac4abfe4ef7c102833ed02330ad701e0 upstream. +When expanding the la or dla pseudo-instruction in a delay slot the GNU +assembler will complain should the pseudo-instruction expand to multiple +actual instructions, since only the first of them will be in the delay +slot leading to the pseudo-instruction being only partially executed if +the branch is taken. Use of PTR_LA in the dec int-handler.S leads to +such warnings: -The #warning was present 10 years ago when the driver first got merged. -As the platform is rather obsolete by now, it seems very unlikely that -the warning will cause anyone to fix the code properly. + arch/mips/dec/int-handler.S: Assembler messages: + arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot + arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot -kernelci.org reports the warning for every build in the meantime, so -I think it's better to just turn it into a code comment to reduce -noise. +Avoid this by open coding the PTR_LA macros. -Signed-off-by: Arnd Bergmann -Signed-off-by: David S. Miller +Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- - drivers/net/ethernet/ti/cpmac.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c -index d52ea3008946..7e8bce46e6b4 100644 ---- a/drivers/net/ethernet/ti/cpmac.c -+++ b/drivers/net/ethernet/ti/cpmac.c -@@ -1237,7 +1237,7 @@ int cpmac_init(void) - goto fail_alloc; - } - --#warning FIXME: unhardcode gpio&reset bits -+ /* FIXME: unhardcode gpio&reset bits */ - ar7_gpio_disable(26); - ar7_gpio_disable(27); - ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); --- -2.12.2 - -From 5fad17434465a9e9ddddfb38a162e9e2e53e33a1 Mon Sep 17 00:00:00 2001 -From: Arnd Bergmann -Date: Thu, 25 Aug 2016 15:17:08 -0700 -Subject: [PATCH 050/251] mm: memcontrol: avoid unused function warning -Content-Length: 2551 -Lines: 79 - -commit 358c07fcc3b60ab08d77f1684de8bd81bcf49a1a upstream. - -A bugfix in v4.8-rc2 introduced a harmless warning when -CONFIG_MEMCG_SWAP is disabled but CONFIG_MEMCG is enabled: - - mm/memcontrol.c:4085:27: error: 'mem_cgroup_id_get_online' defined but not used [-Werror=unused-function] - static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) - -This moves the function inside of the #ifdef block that hides the -calling function, to avoid the warning. - -Fixes: 1f47b61fb407 ("mm: memcontrol: fix swap counter leak on swapout from offline cgroup") -Link: http://lkml.kernel.org/r/20160824113733.2776701-1-arnd@arndb.de -Signed-off-by: Arnd Bergmann -Acked-by: Michal Hocko -Acked-by: Vladimir Davydov -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Greg Kroah-Hartman ---- - mm/memcontrol.c | 36 ++++++++++++++++++------------------ - 1 file changed, 18 insertions(+), 18 deletions(-) - -diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 43eefe9d834c..e25b93a4267d 100644 ---- a/mm/memcontrol.c -+++ b/mm/memcontrol.c -@@ -4150,24 +4150,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) - atomic_add(n, &memcg->id.ref); - } - --static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) --{ -- while (!atomic_inc_not_zero(&memcg->id.ref)) { -- /* -- * The root cgroup cannot be destroyed, so it's refcount must -- * always be >= 1. -- */ -- if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { -- VM_BUG_ON(1); -- break; -- } -- memcg = parent_mem_cgroup(memcg); -- if (!memcg) -- memcg = root_mem_cgroup; -- } -- return memcg; --} -- - static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) - { - if (atomic_sub_and_test(n, &memcg->id.ref)) { -@@ -5751,6 +5733,24 @@ static int __init mem_cgroup_init(void) - subsys_initcall(mem_cgroup_init); - - #ifdef CONFIG_MEMCG_SWAP -+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) -+{ -+ while (!atomic_inc_not_zero(&memcg->id.ref)) { -+ /* -+ * The root cgroup cannot be destroyed, so it's refcount must -+ * always be >= 1. -+ */ -+ if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { -+ VM_BUG_ON(1); -+ break; -+ } -+ memcg = parent_mem_cgroup(memcg); -+ if (!memcg) -+ memcg = root_mem_cgroup; -+ } -+ return memcg; -+} -+ - /** - * mem_cgroup_swapout - transfer a memsw charge to swap - * @page: page whose memsw charge to transfer --- -2.12.2 - -From 2e4aff2405af6a4573299dee361a44903c9bb717 Mon Sep 17 00:00:00 2001 -From: Ralf Baechle -Date: Tue, 20 Sep 2016 14:33:01 +0200 -Subject: [PATCH 051/251] MIPS: DEC: Avoid la pseudo-instruction in delay slots -Content-Length: 2448 -Lines: 81 - -commit 3021773c7c3e75e20b693931a19362681e744ea9 upstream. - -When expanding the la or dla pseudo-instruction in a delay slot the GNU -assembler will complain should the pseudo-instruction expand to multiple -actual instructions, since only the first of them will be in the delay -slot leading to the pseudo-instruction being only partially executed if -the branch is taken. Use of PTR_LA in the dec int-handler.S leads to -such warnings: - - arch/mips/dec/int-handler.S: Assembler messages: - arch/mips/dec/int-handler.S:149: Warning: macro instruction expanded into multiple instructions in a branch delay slot - arch/mips/dec/int-handler.S:198: Warning: macro instruction expanded into multiple instructions in a branch delay slot - -Avoid this by open coding the PTR_LA macros. - -Signed-off-by: Ralf Baechle -Signed-off-by: Greg Kroah-Hartman ---- - arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 38 insertions(+), 2 deletions(-) + arch/mips/dec/int-handler.S | 40 ++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 8c6f508e59de..554d1da97743 100644 @@ -2395,54 +1386,12 @@ index 805355b0bd05..f0cc4c9de2bb 100644 -- 2.12.2 -From 074893495b72c043a108797ffd6297db3e4af1dc Mon Sep 17 00:00:00 2001 -From: Rik van Riel -Date: Wed, 28 Sep 2016 22:55:54 -0400 -Subject: [PATCH 053/251] tracing: Add #undef to fix compile error -Content-Length: 1319 -Lines: 35 - -commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream. - -There are several trace include files that define TRACE_INCLUDE_FILE. - -Include several of them in the same .c file (as I currently have in -some code I am working on), and the compile will blow up with a -"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" - -Every other include file in include/trace/events/ avoids that issue -by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h -should have one, too. - -Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com - -Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") -Signed-off-by: Rik van Riel -Signed-off-by: Steven Rostedt (VMware) -Signed-off-by: Greg Kroah-Hartman ---- - include/trace/events/syscalls.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h -index 14e49c798135..b35533b94277 100644 ---- a/include/trace/events/syscalls.h -+++ b/include/trace/events/syscalls.h -@@ -1,5 +1,6 @@ - #undef TRACE_SYSTEM - #define TRACE_SYSTEM raw_syscalls -+#undef TRACE_INCLUDE_FILE - #define TRACE_INCLUDE_FILE syscalls - - #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) --- -2.12.2 - From 2ca39d1300152e70977797c3e39c105adfcc0e0b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: [PATCH 054/251] powerpc: Emulation support for load/store instructions on LE +Status: RO Content-Length: 3197 Lines: 106 @@ -2558,6 +1507,7 @@ From: Janosch Frank Date: Thu, 2 Mar 2017 15:23:42 +0100 Subject: [PATCH 068/251] KVM: s390: Fix guest migration for huge guests resulting in panic +Status: RO Content-Length: 1904 Lines: 58 @@ -2620,123 +1570,6 @@ index 8345ae1f117d..05ae254f84cf 100644 -- 2.12.2 -From a084aeef5633db4f649b699785f79676cb71ba6c Mon Sep 17 00:00:00 2001 -From: Michael Holzheu -Date: Tue, 7 Feb 2017 18:09:14 +0100 -Subject: [PATCH 069/251] s390/kdump: Use "LINUX" ELF note name instead of - "CORE" -Content-Length: 3784 -Lines: 108 - -commit a4a81d8eebdc1d209d034f62a082a5131e4242b5 upstream. - -In binutils/libbfd (bfd/elf.c) it is enforced that all s390 specific ELF -notes like e.g. NT_S390_PREFIX or NT_S390_CTRS have "LINUX" specified -as note name. Otherwise the notes are ignored. - -For /proc/vmcore we currently use "CORE" for these notes. - -Up to now this has not been a real problem because the dump analysis tool -"crash" does not check the note name. But it will break all programs that -use libbfd for processing ELF notes. - -So fix this and use "LINUX" for all s390 specific notes to comply with -libbfd. - -Reported-by: Philipp Rudo -Reviewed-by: Philipp Rudo -Signed-off-by: Michael Holzheu -Signed-off-by: Martin Schwidefsky -Signed-off-by: Greg Kroah-Hartman ---- - arch/s390/kernel/crash_dump.c | 18 ++++++++++-------- - 1 file changed, 10 insertions(+), 8 deletions(-) - -diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c -index 171e09bb8ea2..f7c3a61040bd 100644 ---- a/arch/s390/kernel/crash_dump.c -+++ b/arch/s390/kernel/crash_dump.c -@@ -23,6 +23,8 @@ - #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) - #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) - -+#define LINUX_NOTE_NAME "LINUX" -+ - static struct memblock_region oldmem_region; - - static struct memblock_type oldmem_type = { -@@ -312,7 +314,7 @@ static void *nt_fpregset(void *ptr, struct save_area *sa) - static void *nt_s390_timer(void *ptr, struct save_area *sa) - { - return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), -- KEXEC_CORE_NOTE_NAME); -+ LINUX_NOTE_NAME); - } - - /* -@@ -321,7 +323,7 @@ static void *nt_s390_timer(void *ptr, struct save_area *sa) - static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) - { - return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, -- sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); -+ sizeof(sa->clk_cmp), LINUX_NOTE_NAME); - } - - /* -@@ -330,7 +332,7 @@ static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) - static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) - { - return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, -- sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); -+ sizeof(sa->tod_reg), LINUX_NOTE_NAME); - } - - /* -@@ -339,7 +341,7 @@ static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) - static void *nt_s390_ctrs(void *ptr, struct save_area *sa) - { - return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, -- sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); -+ sizeof(sa->ctrl_regs), LINUX_NOTE_NAME); - } - - /* -@@ -348,7 +350,7 @@ static void *nt_s390_ctrs(void *ptr, struct save_area *sa) - static void *nt_s390_prefix(void *ptr, struct save_area *sa) - { - return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, -- sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); -+ sizeof(sa->pref_reg), LINUX_NOTE_NAME); - } - - /* -@@ -357,7 +359,7 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa) - static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) - { - return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], -- 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); -+ 16 * sizeof(__vector128), LINUX_NOTE_NAME); - } - - /* -@@ -370,12 +372,12 @@ static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) - int i; - - note = (Elf64_Nhdr *)ptr; -- note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; -+ note->n_namesz = strlen(LINUX_NOTE_NAME) + 1; - note->n_descsz = 16 * 8; - note->n_type = NT_S390_VXRS_LOW; - len = sizeof(Elf64_Nhdr); - -- memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); -+ memcpy(ptr + len, LINUX_NOTE_NAME, note->n_namesz); - len = roundup(len + note->n_namesz, 4); - - ptr += len; --- -2.12.2 - From 28ec98bc2e4a175b60f45d505e715a33b93dd077 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 18 Mar 2017 19:10:23 +0800 @@ -2763,1899 +1596,6 @@ index 7f54ac081cf3..d9cc21df444d 100644 -- 2.12.2 -From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001 -From: Florian Westphal -Date: Thu, 18 Feb 2016 15:03:24 +0100 -Subject: [PATCH 074/251] netlink: remove mmapped netlink support -Content-Length: 42335 -Lines: 1432 - -commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream. - -mmapped netlink has a number of unresolved issues: - -- TX zerocopy support had to be disabled more than a year ago via - commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.") - because the content of the mmapped area can change after netlink - attribute validation but before message processing. - -- RX support was implemented mainly to speed up nfqueue dumping packet - payload to userspace. However, since commit ae08ce0021087a5d812d2 - ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy - with the socket-based interface too (via the skb_zerocopy helper). - -The other problem is that skbs attached to mmaped netlink socket -behave different from normal skbs: - -- they don't have a shinfo area, so all functions that use skb_shinfo() -(e.g. skb_clone) cannot be used. - -- reserving headroom prevents userspace from seeing the content as -it expects message to start at skb->head. -See for instance -commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump"). - -- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we -crash because it needs the sk to check if a tx ring is attached. - -Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359 -("netfilter: nfnetlink: use original skbuff when acking batches"). - -mmaped netlink also didn't play nicely with the skb_zerocopy helper -used by nfqueue and openvswitch. Daniel Borkmann fixed this via -commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue -zero-copy")' but at the cost of also needing to provide remaining -length to the allocation function. - -nfqueue also has problems when used with mmaped rx netlink: -- mmaped netlink doesn't allow use of nfqueue batch verdict messages. - Problem is that in the mmap case, the allocation time also determines - the ordering in which the frame will be seen by userspace (A - allocating before B means that A is located in earlier ring slot, - but this also means that B might get a lower sequence number then A - since seqno is decided later. To fix this we would need to extend the - spinlocked region to also cover the allocation and message setup which - isn't desirable. -- nfqueue can now be configured to queue large (GSO) skbs to userspace. - Queing GSO packets is faster than having to force a software segmentation - in the kernel, so this is a desirable option. However, with a mmap based - ring one has to use 64kb per ring slot element, else mmap has to fall back - to the socket path (NL_MMAP_STATUS_COPY) for all large packets. - -To use the mmap interface, userspace not only has to probe for mmap netlink -support, it also has to implement a recv/socket receive path in order to -handle messages that exceed the size of an rx ring element. - -Cc: Daniel Borkmann -Cc: Ken-ichirou MATSUZAWA -Cc: Pablo Neira Ayuso -Cc: Patrick McHardy -Cc: Thomas Graf -Signed-off-by: Florian Westphal -Signed-off-by: David S. Miller -Cc: Shi Yuejie -Signed-off-by: Greg Kroah-Hartman ---- - Documentation/networking/netlink_mmap.txt | 332 ------------- - include/uapi/linux/netlink.h | 4 + - include/uapi/linux/netlink_diag.h | 2 + - net/netlink/Kconfig | 9 - - net/netlink/af_netlink.c | 751 +----------------------------- - net/netlink/af_netlink.h | 15 - - net/netlink/diag.c | 39 -- - 7 files changed, 14 insertions(+), 1138 deletions(-) - delete mode 100644 Documentation/networking/netlink_mmap.txt - -diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt -deleted file mode 100644 -index 54f10478e8e3..000000000000 ---- a/Documentation/networking/netlink_mmap.txt -+++ /dev/null -@@ -1,332 +0,0 @@ --This file documents how to use memory mapped I/O with netlink. -- --Author: Patrick McHardy -- --Overview ---------- -- --Memory mapped netlink I/O can be used to increase throughput and decrease --overhead of unicast receive and transmit operations. Some netlink subsystems --require high throughput, these are mainly the netfilter subsystems --nfnetlink_queue and nfnetlink_log, but it can also help speed up large --dump operations of f.i. the routing database. -- --Memory mapped netlink I/O used two circular ring buffers for RX and TX which --are mapped into the processes address space. -- --The RX ring is used by the kernel to directly construct netlink messages into --user-space memory without copying them as done with regular socket I/O, --additionally as long as the ring contains messages no recvmsg() or poll() --syscalls have to be issued by user-space to get more message. -- --The TX ring is used to process messages directly from user-space memory, the --kernel processes all messages contained in the ring using a single sendmsg() --call. -- --Usage overview ---------------- -- --In order to use memory mapped netlink I/O, user-space needs three main changes: -- --- ring setup --- conversion of the RX path to get messages from the ring instead of recvmsg() --- conversion of the TX path to construct messages into the ring -- --Ring setup is done using setsockopt() to provide the ring parameters to the --kernel, then a call to mmap() to map the ring into the processes address space: -- --- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); --- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); --- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) -- --Usage of either ring is optional, but even if only the RX ring is used the --mapping still needs to be writable in order to update the frame status after --processing. -- --Conversion of the reception path involves calling poll() on the file --descriptor, once the socket is readable the frames from the ring are --processed in order until no more messages are available, as indicated by --a status word in the frame header. -- --On kernel side, in order to make use of memory mapped I/O on receive, the --originating netlink subsystem needs to support memory mapped I/O, otherwise --it will use an allocated socket buffer as usual and the contents will be -- copied to the ring on transmission, nullifying most of the performance gains. --Dumps of kernel databases automatically support memory mapped I/O. -- --Conversion of the transmit path involves changing message construction to --use memory from the TX ring instead of (usually) a buffer declared on the --stack and setting up the frame header appropriately. Optionally poll() can --be used to wait for free frames in the TX ring. -- --Structured and definitions for using memory mapped I/O are contained in --. -- --RX and TX rings ------------------ -- --Each ring contains a number of continuous memory blocks, containing frames of --fixed size dependent on the parameters used for ring setup. -- --Ring: [ block 0 ] -- [ frame 0 ] -- [ frame 1 ] -- [ block 1 ] -- [ frame 2 ] -- [ frame 3 ] -- ... -- [ block n ] -- [ frame 2 * n ] -- [ frame 2 * n + 1 ] -- --The blocks are only visible to the kernel, from the point of view of user-space --the ring just contains the frames in a continuous memory zone. -- --The ring parameters used for setting up the ring are defined as follows: -- --struct nl_mmap_req { -- unsigned int nm_block_size; -- unsigned int nm_block_nr; -- unsigned int nm_frame_size; -- unsigned int nm_frame_nr; --}; -- --Frames are grouped into blocks, where each block is a continuous region of memory --and holds nm_block_size / nm_frame_size frames. The total number of frames in --the ring is nm_frame_nr. The following invariants hold: -- --- frames_per_block = nm_block_size / nm_frame_size -- --- nm_frame_nr = frames_per_block * nm_block_nr -- --Some parameters are constrained, specifically: -- --- nm_block_size must be a multiple of the architectures memory page size. -- The getpagesize() function can be used to get the page size. -- --- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be -- able to hold at least the frame header -- --- nm_frame_size must be smaller or equal to nm_block_size -- --- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT -- --- nm_frame_nr must equal the actual number of frames as specified above. -- --When the kernel can't allocate physically continuous memory for a ring block, --it will fall back to use physically discontinuous memory. This might affect --performance negatively, in order to avoid this the nm_frame_size parameter --should be chosen to be as small as possible for the required frame size and --the number of blocks should be increased instead. -- --Ring frames -------------- -- --Each frames contain a frame header, consisting of a synchronization word and some --meta-data, and the message itself. -- --Frame: [ header message ] -- --The frame header is defined as follows: -- --struct nl_mmap_hdr { -- unsigned int nm_status; -- unsigned int nm_len; -- __u32 nm_group; -- /* credentials */ -- __u32 nm_pid; -- __u32 nm_uid; -- __u32 nm_gid; --}; -- --- nm_status is used for synchronizing processing between the kernel and user- -- space and specifies ownership of the frame as well as the operation to perform -- --- nm_len contains the length of the message contained in the data area -- --- nm_group specified the destination multicast group of message -- --- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending -- process. These values correspond to the data available using SOCK_PASSCRED in -- the SCM_CREDENTIALS cmsg. -- --The possible values in the status word are: -- --- NL_MMAP_STATUS_UNUSED: -- RX ring: frame belongs to the kernel and contains no message -- for user-space. Approriate action is to invoke poll() -- to wait for new messages. -- -- TX ring: frame belongs to user-space and can be used for -- message construction. -- --- NL_MMAP_STATUS_RESERVED: -- RX ring only: frame is currently used by the kernel for message -- construction and contains no valid message yet. -- Appropriate action is to invoke poll() to wait for -- new messages. -- --- NL_MMAP_STATUS_VALID: -- RX ring: frame contains a valid message. Approriate action is -- to process the message and release the frame back to -- the kernel by setting the status to -- NL_MMAP_STATUS_UNUSED or queue the frame by setting the -- status to NL_MMAP_STATUS_SKIP. -- -- TX ring: the frame contains a valid message from user-space to -- be processed by the kernel. After completing processing -- the kernel will release the frame back to user-space by -- setting the status to NL_MMAP_STATUS_UNUSED. -- --- NL_MMAP_STATUS_COPY: -- RX ring only: a message is ready to be processed but could not be -- stored in the ring, either because it exceeded the -- frame size or because the originating subsystem does -- not support memory mapped I/O. Appropriate action is -- to invoke recvmsg() to receive the message and release -- the frame back to the kernel by setting the status to -- NL_MMAP_STATUS_UNUSED. -- --- NL_MMAP_STATUS_SKIP: -- RX ring only: user-space queued the message for later processing, but -- processed some messages following it in the ring. The -- kernel should skip this frame when looking for unused -- frames. -- --The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the --frame header. -- --TX limitations ---------------- -- --As of Jan 2015 the message is always copied from the ring frame to an --allocated buffer due to unresolved security concerns. --See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). -- --Example --------- -- --Ring setup: -- -- unsigned int block_size = 16 * getpagesize(); -- struct nl_mmap_req req = { -- .nm_block_size = block_size, -- .nm_block_nr = 64, -- .nm_frame_size = 16384, -- .nm_frame_nr = 64 * block_size / 16384, -- }; -- unsigned int ring_size; -- void *rx_ring, *tx_ring; -- -- /* Configure ring parameters */ -- if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) -- exit(1); -- if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) -- exit(1) -- -- /* Calculate size of each individual ring */ -- ring_size = req.nm_block_nr * req.nm_block_size; -- -- /* Map RX/TX rings. The TX ring is located after the RX ring */ -- rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, -- MAP_SHARED, fd, 0); -- if ((long)rx_ring == -1L) -- exit(1); -- tx_ring = rx_ring + ring_size: -- --Message reception: -- --This example assumes some ring parameters of the ring setup are available. -- -- unsigned int frame_offset = 0; -- struct nl_mmap_hdr *hdr; -- struct nlmsghdr *nlh; -- unsigned char buf[16384]; -- ssize_t len; -- -- while (1) { -- struct pollfd pfds[1]; -- -- pfds[0].fd = fd; -- pfds[0].events = POLLIN | POLLERR; -- pfds[0].revents = 0; -- -- if (poll(pfds, 1, -1) < 0 && errno != -EINTR) -- exit(1); -- -- /* Check for errors. Error handling omitted */ -- if (pfds[0].revents & POLLERR) -- -- -- /* If no new messages, poll again */ -- if (!(pfds[0].revents & POLLIN)) -- continue; -- -- /* Process all frames */ -- while (1) { -- /* Get next frame header */ -- hdr = rx_ring + frame_offset; -- -- if (hdr->nm_status == NL_MMAP_STATUS_VALID) { -- /* Regular memory mapped frame */ -- nlh = (void *)hdr + NL_MMAP_HDRLEN; -- len = hdr->nm_len; -- -- /* Release empty message immediately. May happen -- * on error during message construction. -- */ -- if (len == 0) -- goto release; -- } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { -- /* Frame queued to socket receive queue */ -- len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); -- if (len <= 0) -- break; -- nlh = buf; -- } else -- /* No more messages to process, continue polling */ -- break; -- -- process_msg(nlh); --release: -- /* Release frame back to the kernel */ -- hdr->nm_status = NL_MMAP_STATUS_UNUSED; -- -- /* Advance frame offset to next frame */ -- frame_offset = (frame_offset + frame_size) % ring_size; -- } -- } -- --Message transmission: -- --This example assumes some ring parameters of the ring setup are available. --A single message is constructed and transmitted, to send multiple messages --at once they would be constructed in consecutive frames before a final call --to sendto(). -- -- unsigned int frame_offset = 0; -- struct nl_mmap_hdr *hdr; -- struct nlmsghdr *nlh; -- struct sockaddr_nl addr = { -- .nl_family = AF_NETLINK, -- }; -- -- hdr = tx_ring + frame_offset; -- if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) -- /* No frame available. Use poll() to avoid. */ -- exit(1); -- -- nlh = (void *)hdr + NL_MMAP_HDRLEN; -- -- /* Build message */ -- build_message(nlh); -- -- /* Fill frame header: length and status need to be set */ -- hdr->nm_len = nlh->nlmsg_len; -- hdr->nm_status = NL_MMAP_STATUS_VALID; -- -- if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) -- exit(1); -- -- /* Advance frame offset to next frame */ -- frame_offset = (frame_offset + frame_size) % ring_size; -diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h -index f095155d8749..0dba4e4ed2be 100644 ---- a/include/uapi/linux/netlink.h -+++ b/include/uapi/linux/netlink.h -@@ -107,8 +107,10 @@ struct nlmsgerr { - #define NETLINK_PKTINFO 3 - #define NETLINK_BROADCAST_ERROR 4 - #define NETLINK_NO_ENOBUFS 5 -+#ifndef __KERNEL__ - #define NETLINK_RX_RING 6 - #define NETLINK_TX_RING 7 -+#endif - #define NETLINK_LISTEN_ALL_NSID 8 - #define NETLINK_LIST_MEMBERSHIPS 9 - #define NETLINK_CAP_ACK 10 -@@ -134,6 +136,7 @@ struct nl_mmap_hdr { - __u32 nm_gid; - }; - -+#ifndef __KERNEL__ - enum nl_mmap_status { - NL_MMAP_STATUS_UNUSED, - NL_MMAP_STATUS_RESERVED, -@@ -145,6 +148,7 @@ enum nl_mmap_status { - #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO - #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) - #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) -+#endif - - #define NET_MAJOR 36 /* Major 36 is reserved for networking */ - -diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h -index f2159d30d1f5..d79399394b46 100644 ---- a/include/uapi/linux/netlink_diag.h -+++ b/include/uapi/linux/netlink_diag.h -@@ -48,6 +48,8 @@ enum { - - #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ - #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ -+#ifndef __KERNEL__ - #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ -+#endif - - #endif -diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig -index 2c5e95e9bfbd..5d6e8c05b3d4 100644 ---- a/net/netlink/Kconfig -+++ b/net/netlink/Kconfig -@@ -2,15 +2,6 @@ - # Netlink Sockets - # - --config NETLINK_MMAP -- bool "NETLINK: mmaped IO" -- ---help--- -- This option enables support for memory mapped netlink IO. This -- reduces overhead by avoiding copying data between kernel- and -- userspace. -- -- If unsure, say N. -- - config NETLINK_DIAG - tristate "NETLINK: socket monitoring interface" - default n -diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c -index 360700a2f46c..8e33019d8e7b 100644 ---- a/net/netlink/af_netlink.c -+++ b/net/netlink/af_netlink.c -@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, - - dev_hold(dev); - -- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) -+ if (is_vmalloc_addr(skb->head)) - nskb = netlink_to_full_skb(skb, GFP_ATOMIC); - else - nskb = skb_clone(skb, GFP_ATOMIC); -@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk) - wake_up_interruptible(&nlk->wait); - } - --#ifdef CONFIG_NETLINK_MMAP --static bool netlink_rx_is_mmaped(struct sock *sk) --{ -- return nlk_sk(sk)->rx_ring.pg_vec != NULL; --} -- --static bool netlink_tx_is_mmaped(struct sock *sk) --{ -- return nlk_sk(sk)->tx_ring.pg_vec != NULL; --} -- --static __pure struct page *pgvec_to_page(const void *addr) --{ -- if (is_vmalloc_addr(addr)) -- return vmalloc_to_page(addr); -- else -- return virt_to_page(addr); --} -- --static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) --{ -- unsigned int i; -- -- for (i = 0; i < len; i++) { -- if (pg_vec[i] != NULL) { -- if (is_vmalloc_addr(pg_vec[i])) -- vfree(pg_vec[i]); -- else -- free_pages((unsigned long)pg_vec[i], order); -- } -- } -- kfree(pg_vec); --} -- --static void *alloc_one_pg_vec_page(unsigned long order) --{ -- void *buffer; -- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | -- __GFP_NOWARN | __GFP_NORETRY; -- -- buffer = (void *)__get_free_pages(gfp_flags, order); -- if (buffer != NULL) -- return buffer; -- -- buffer = vzalloc((1 << order) * PAGE_SIZE); -- if (buffer != NULL) -- return buffer; -- -- gfp_flags &= ~__GFP_NORETRY; -- return (void *)__get_free_pages(gfp_flags, order); --} -- --static void **alloc_pg_vec(struct netlink_sock *nlk, -- struct nl_mmap_req *req, unsigned int order) --{ -- unsigned int block_nr = req->nm_block_nr; -- unsigned int i; -- void **pg_vec; -- -- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); -- if (pg_vec == NULL) -- return NULL; -- -- for (i = 0; i < block_nr; i++) { -- pg_vec[i] = alloc_one_pg_vec_page(order); -- if (pg_vec[i] == NULL) -- goto err1; -- } -- -- return pg_vec; --err1: -- free_pg_vec(pg_vec, order, block_nr); -- return NULL; --} -- -- --static void --__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, -- unsigned int order) --{ -- struct netlink_sock *nlk = nlk_sk(sk); -- struct sk_buff_head *queue; -- struct netlink_ring *ring; -- -- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; -- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; -- -- spin_lock_bh(&queue->lock); -- -- ring->frame_max = req->nm_frame_nr - 1; -- ring->head = 0; -- ring->frame_size = req->nm_frame_size; -- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; -- -- swap(ring->pg_vec_len, req->nm_block_nr); -- swap(ring->pg_vec_order, order); -- swap(ring->pg_vec, pg_vec); -- -- __skb_queue_purge(queue); -- spin_unlock_bh(&queue->lock); -- -- WARN_ON(atomic_read(&nlk->mapped)); -- -- if (pg_vec) -- free_pg_vec(pg_vec, order, req->nm_block_nr); --} -- --static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, -- bool tx_ring) --{ -- struct netlink_sock *nlk = nlk_sk(sk); -- struct netlink_ring *ring; -- void **pg_vec = NULL; -- unsigned int order = 0; -- -- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; -- -- if (atomic_read(&nlk->mapped)) -- return -EBUSY; -- if (atomic_read(&ring->pending)) -- return -EBUSY; -- -- if (req->nm_block_nr) { -- if (ring->pg_vec != NULL) -- return -EBUSY; -- -- if ((int)req->nm_block_size <= 0) -- return -EINVAL; -- if (!PAGE_ALIGNED(req->nm_block_size)) -- return -EINVAL; -- if (req->nm_frame_size < NL_MMAP_HDRLEN) -- return -EINVAL; -- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) -- return -EINVAL; -- -- ring->frames_per_block = req->nm_block_size / -- req->nm_frame_size; -- if (ring->frames_per_block == 0) -- return -EINVAL; -- if (ring->frames_per_block * req->nm_block_nr != -- req->nm_frame_nr) -- return -EINVAL; -- -- order = get_order(req->nm_block_size); -- pg_vec = alloc_pg_vec(nlk, req, order); -- if (pg_vec == NULL) -- return -ENOMEM; -- } else { -- if (req->nm_frame_nr) -- return -EINVAL; -- } -- -- mutex_lock(&nlk->pg_vec_lock); -- if (atomic_read(&nlk->mapped) == 0) { -- __netlink_set_ring(sk, req, tx_ring, pg_vec, order); -- mutex_unlock(&nlk->pg_vec_lock); -- return 0; -- } -- -- mutex_unlock(&nlk->pg_vec_lock); -- -- if (pg_vec) -- free_pg_vec(pg_vec, order, req->nm_block_nr); -- -- return -EBUSY; --} -- --static void netlink_mm_open(struct vm_area_struct *vma) --{ -- struct file *file = vma->vm_file; -- struct socket *sock = file->private_data; -- struct sock *sk = sock->sk; -- -- if (sk) -- atomic_inc(&nlk_sk(sk)->mapped); --} -- --static void netlink_mm_close(struct vm_area_struct *vma) --{ -- struct file *file = vma->vm_file; -- struct socket *sock = file->private_data; -- struct sock *sk = sock->sk; -- -- if (sk) -- atomic_dec(&nlk_sk(sk)->mapped); --} -- --static const struct vm_operations_struct netlink_mmap_ops = { -- .open = netlink_mm_open, -- .close = netlink_mm_close, --}; -- --static int netlink_mmap(struct file *file, struct socket *sock, -- struct vm_area_struct *vma) --{ -- struct sock *sk = sock->sk; -- struct netlink_sock *nlk = nlk_sk(sk); -- struct netlink_ring *ring; -- unsigned long start, size, expected; -- unsigned int i; -- int err = -EINVAL; -- -- if (vma->vm_pgoff) -- return -EINVAL; -- -- mutex_lock(&nlk->pg_vec_lock); -- -- expected = 0; -- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { -- if (ring->pg_vec == NULL) -- continue; -- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; -- } -- -- if (expected == 0) -- goto out; -- -- size = vma->vm_end - vma->vm_start; -- if (size != expected) -- goto out; -- -- start = vma->vm_start; -- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { -- if (ring->pg_vec == NULL) -- continue; -- -- for (i = 0; i < ring->pg_vec_len; i++) { -- struct page *page; -- void *kaddr = ring->pg_vec[i]; -- unsigned int pg_num; -- -- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { -- page = pgvec_to_page(kaddr); -- err = vm_insert_page(vma, start, page); -- if (err < 0) -- goto out; -- start += PAGE_SIZE; -- kaddr += PAGE_SIZE; -- } -- } -- } -- -- atomic_inc(&nlk->mapped); -- vma->vm_ops = &netlink_mmap_ops; -- err = 0; --out: -- mutex_unlock(&nlk->pg_vec_lock); -- return err; --} -- --static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) --{ --#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 -- struct page *p_start, *p_end; -- -- /* First page is flushed through netlink_{get,set}_status */ -- p_start = pgvec_to_page(hdr + PAGE_SIZE); -- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); -- while (p_start <= p_end) { -- flush_dcache_page(p_start); -- p_start++; -- } --#endif --} -- --static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) --{ -- smp_rmb(); -- flush_dcache_page(pgvec_to_page(hdr)); -- return hdr->nm_status; --} -- --static void netlink_set_status(struct nl_mmap_hdr *hdr, -- enum nl_mmap_status status) --{ -- smp_mb(); -- hdr->nm_status = status; -- flush_dcache_page(pgvec_to_page(hdr)); --} -- --static struct nl_mmap_hdr * --__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) --{ -- unsigned int pg_vec_pos, frame_off; -- -- pg_vec_pos = pos / ring->frames_per_block; -- frame_off = pos % ring->frames_per_block; -- -- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); --} -- --static struct nl_mmap_hdr * --netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, -- enum nl_mmap_status status) --{ -- struct nl_mmap_hdr *hdr; -- -- hdr = __netlink_lookup_frame(ring, pos); -- if (netlink_get_status(hdr) != status) -- return NULL; -- -- return hdr; --} -- --static struct nl_mmap_hdr * --netlink_current_frame(const struct netlink_ring *ring, -- enum nl_mmap_status status) --{ -- return netlink_lookup_frame(ring, ring->head, status); --} -- --static void netlink_increment_head(struct netlink_ring *ring) --{ -- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; --} -- --static void netlink_forward_ring(struct netlink_ring *ring) --{ -- unsigned int head = ring->head; -- const struct nl_mmap_hdr *hdr; -- -- do { -- hdr = __netlink_lookup_frame(ring, ring->head); -- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) -- break; -- if (hdr->nm_status != NL_MMAP_STATUS_SKIP) -- break; -- netlink_increment_head(ring); -- } while (ring->head != head); --} -- --static bool netlink_has_valid_frame(struct netlink_ring *ring) --{ -- unsigned int head = ring->head, pos = head; -- const struct nl_mmap_hdr *hdr; -- -- do { -- hdr = __netlink_lookup_frame(ring, pos); -- if (hdr->nm_status == NL_MMAP_STATUS_VALID) -- return true; -- pos = pos != 0 ? pos - 1 : ring->frame_max; -- } while (pos != head); -- -- return false; --} -- --static bool netlink_dump_space(struct netlink_sock *nlk) --{ -- struct netlink_ring *ring = &nlk->rx_ring; -- struct nl_mmap_hdr *hdr; -- unsigned int n; -- -- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); -- if (hdr == NULL) -- return false; -- -- n = ring->head + ring->frame_max / 2; -- if (n > ring->frame_max) -- n -= ring->frame_max; -- -- hdr = __netlink_lookup_frame(ring, n); -- -- return hdr->nm_status == NL_MMAP_STATUS_UNUSED; --} -- --static unsigned int netlink_poll(struct file *file, struct socket *sock, -- poll_table *wait) --{ -- struct sock *sk = sock->sk; -- struct netlink_sock *nlk = nlk_sk(sk); -- unsigned int mask; -- int err; -- -- if (nlk->rx_ring.pg_vec != NULL) { -- /* Memory mapped sockets don't call recvmsg(), so flow control -- * for dumps is performed here. A dump is allowed to continue -- * if at least half the ring is unused. -- */ -- while (nlk->cb_running && netlink_dump_space(nlk)) { -- err = netlink_dump(sk); -- if (err < 0) { -- sk->sk_err = -err; -- sk->sk_error_report(sk); -- break; -- } -- } -- netlink_rcv_wake(sk); -- } -- -- mask = datagram_poll(file, sock, wait); -- -- /* We could already have received frames in the normal receive -- * queue, that will show up as NL_MMAP_STATUS_COPY in the ring, -- * so if mask contains pollin/etc already, there's no point -- * walking the ring. -- */ -- if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) { -- spin_lock_bh(&sk->sk_receive_queue.lock); -- if (nlk->rx_ring.pg_vec) { -- if (netlink_has_valid_frame(&nlk->rx_ring)) -- mask |= POLLIN | POLLRDNORM; -- } -- spin_unlock_bh(&sk->sk_receive_queue.lock); -- } -- -- spin_lock_bh(&sk->sk_write_queue.lock); -- if (nlk->tx_ring.pg_vec) { -- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) -- mask |= POLLOUT | POLLWRNORM; -- } -- spin_unlock_bh(&sk->sk_write_queue.lock); -- -- return mask; --} -- --static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) --{ -- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); --} -- --static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, -- struct netlink_ring *ring, -- struct nl_mmap_hdr *hdr) --{ -- unsigned int size; -- void *data; -- -- size = ring->frame_size - NL_MMAP_HDRLEN; -- data = (void *)hdr + NL_MMAP_HDRLEN; -- -- skb->head = data; -- skb->data = data; -- skb_reset_tail_pointer(skb); -- skb->end = skb->tail + size; -- skb->len = 0; -- -- skb->destructor = netlink_skb_destructor; -- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; -- NETLINK_CB(skb).sk = sk; --} -- --static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, -- u32 dst_portid, u32 dst_group, -- struct scm_cookie *scm) --{ -- struct netlink_sock *nlk = nlk_sk(sk); -- struct netlink_ring *ring; -- struct nl_mmap_hdr *hdr; -- struct sk_buff *skb; -- unsigned int maxlen; -- int err = 0, len = 0; -- -- mutex_lock(&nlk->pg_vec_lock); -- -- ring = &nlk->tx_ring; -- maxlen = ring->frame_size - NL_MMAP_HDRLEN; -- -- do { -- unsigned int nm_len; -- -- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); -- if (hdr == NULL) { -- if (!(msg->msg_flags & MSG_DONTWAIT) && -- atomic_read(&nlk->tx_ring.pending)) -- schedule(); -- continue; -- } -- -- nm_len = ACCESS_ONCE(hdr->nm_len); -- if (nm_len > maxlen) { -- err = -EINVAL; -- goto out; -- } -- -- netlink_frame_flush_dcache(hdr, nm_len); -- -- skb = alloc_skb(nm_len, GFP_KERNEL); -- if (skb == NULL) { -- err = -ENOBUFS; -- goto out; -- } -- __skb_put(skb, nm_len); -- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); -- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); -- -- netlink_increment_head(ring); -- -- NETLINK_CB(skb).portid = nlk->portid; -- NETLINK_CB(skb).dst_group = dst_group; -- NETLINK_CB(skb).creds = scm->creds; -- -- err = security_netlink_send(sk, skb); -- if (err) { -- kfree_skb(skb); -- goto out; -- } -- -- if (unlikely(dst_group)) { -- atomic_inc(&skb->users); -- netlink_broadcast(sk, skb, dst_portid, dst_group, -- GFP_KERNEL); -- } -- err = netlink_unicast(sk, skb, dst_portid, -- msg->msg_flags & MSG_DONTWAIT); -- if (err < 0) -- goto out; -- len += err; -- -- } while (hdr != NULL || -- (!(msg->msg_flags & MSG_DONTWAIT) && -- atomic_read(&nlk->tx_ring.pending))); -- -- if (len > 0) -- err = len; --out: -- mutex_unlock(&nlk->pg_vec_lock); -- return err; --} -- --static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) --{ -- struct nl_mmap_hdr *hdr; -- -- hdr = netlink_mmap_hdr(skb); -- hdr->nm_len = skb->len; -- hdr->nm_group = NETLINK_CB(skb).dst_group; -- hdr->nm_pid = NETLINK_CB(skb).creds.pid; -- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); -- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); -- netlink_frame_flush_dcache(hdr, hdr->nm_len); -- netlink_set_status(hdr, NL_MMAP_STATUS_VALID); -- -- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; -- kfree_skb(skb); --} -- --static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) --{ -- struct netlink_sock *nlk = nlk_sk(sk); -- struct netlink_ring *ring = &nlk->rx_ring; -- struct nl_mmap_hdr *hdr; -- -- spin_lock_bh(&sk->sk_receive_queue.lock); -- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); -- if (hdr == NULL) { -- spin_unlock_bh(&sk->sk_receive_queue.lock); -- kfree_skb(skb); -- netlink_overrun(sk); -- return; -- } -- netlink_increment_head(ring); -- __skb_queue_tail(&sk->sk_receive_queue, skb); -- spin_unlock_bh(&sk->sk_receive_queue.lock); -- -- hdr->nm_len = skb->len; -- hdr->nm_group = NETLINK_CB(skb).dst_group; -- hdr->nm_pid = NETLINK_CB(skb).creds.pid; -- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); -- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); -- netlink_set_status(hdr, NL_MMAP_STATUS_COPY); --} -- --#else /* CONFIG_NETLINK_MMAP */ --#define netlink_rx_is_mmaped(sk) false --#define netlink_tx_is_mmaped(sk) false --#define netlink_mmap sock_no_mmap --#define netlink_poll datagram_poll --#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0 --#endif /* CONFIG_NETLINK_MMAP */ -- - static void netlink_skb_destructor(struct sk_buff *skb) - { --#ifdef CONFIG_NETLINK_MMAP -- struct nl_mmap_hdr *hdr; -- struct netlink_ring *ring; -- struct sock *sk; -- -- /* If a packet from the kernel to userspace was freed because of an -- * error without being delivered to userspace, the kernel must reset -- * the status. In the direction userspace to kernel, the status is -- * always reset here after the packet was processed and freed. -- */ -- if (netlink_skb_is_mmaped(skb)) { -- hdr = netlink_mmap_hdr(skb); -- sk = NETLINK_CB(skb).sk; -- -- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { -- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); -- ring = &nlk_sk(sk)->tx_ring; -- } else { -- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { -- hdr->nm_len = 0; -- netlink_set_status(hdr, NL_MMAP_STATUS_VALID); -- } -- ring = &nlk_sk(sk)->rx_ring; -- } -- -- WARN_ON(atomic_read(&ring->pending) == 0); -- atomic_dec(&ring->pending); -- sock_put(sk); -- -- skb->head = NULL; -- } --#endif - if (is_vmalloc_addr(skb->head)) { - if (!skb->cloned || - !atomic_dec_return(&(skb_shinfo(skb)->dataref))) -@@ -936,18 +334,6 @@ static void netlink_sock_destruct(struct sock *sk) - } - - skb_queue_purge(&sk->sk_receive_queue); --#ifdef CONFIG_NETLINK_MMAP -- if (1) { -- struct nl_mmap_req req; -- -- memset(&req, 0, sizeof(req)); -- if (nlk->rx_ring.pg_vec) -- __netlink_set_ring(sk, &req, false, NULL, 0); -- memset(&req, 0, sizeof(req)); -- if (nlk->tx_ring.pg_vec) -- __netlink_set_ring(sk, &req, true, NULL, 0); -- } --#endif /* CONFIG_NETLINK_MMAP */ - - if (!sock_flag(sk, SOCK_DEAD)) { - printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); -@@ -1201,9 +587,6 @@ static int __netlink_create(struct net *net, struct socket *sock, - mutex_init(nlk->cb_mutex); - } - init_waitqueue_head(&nlk->wait); --#ifdef CONFIG_NETLINK_MMAP -- mutex_init(&nlk->pg_vec_lock); --#endif - - sk->sk_destruct = netlink_sock_destruct; - sk->sk_protocol = protocol; -@@ -1745,8 +1128,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, - nlk = nlk_sk(sk); - - if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || -- test_bit(NETLINK_S_CONGESTED, &nlk->state)) && -- !netlink_skb_is_mmaped(skb)) { -+ test_bit(NETLINK_S_CONGESTED, &nlk->state))) { - DECLARE_WAITQUEUE(wait, current); - if (!*timeo) { - if (!ssk || netlink_is_kernel(ssk)) -@@ -1784,14 +1166,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) - - netlink_deliver_tap(skb); - --#ifdef CONFIG_NETLINK_MMAP -- if (netlink_skb_is_mmaped(skb)) -- netlink_queue_mmaped_skb(sk, skb); -- else if (netlink_rx_is_mmaped(sk)) -- netlink_ring_set_copied(sk, skb); -- else --#endif /* CONFIG_NETLINK_MMAP */ -- skb_queue_tail(&sk->sk_receive_queue, skb); -+ skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_data_ready(sk); - return len; - } -@@ -1815,9 +1190,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) - int delta; - - WARN_ON(skb->sk != NULL); -- if (netlink_skb_is_mmaped(skb)) -- return skb; -- - delta = skb->end - skb->tail; - if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) - return skb; -@@ -1897,71 +1269,6 @@ struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size, - unsigned int ldiff, u32 dst_portid, - gfp_t gfp_mask) - { --#ifdef CONFIG_NETLINK_MMAP -- unsigned int maxlen, linear_size; -- struct sock *sk = NULL; -- struct sk_buff *skb; -- struct netlink_ring *ring; -- struct nl_mmap_hdr *hdr; -- -- sk = netlink_getsockbyportid(ssk, dst_portid); -- if (IS_ERR(sk)) -- goto out; -- -- ring = &nlk_sk(sk)->rx_ring; -- /* fast-path without atomic ops for common case: non-mmaped receiver */ -- if (ring->pg_vec == NULL) -- goto out_put; -- -- /* We need to account the full linear size needed as a ring -- * slot cannot have non-linear parts. -- */ -- linear_size = size + ldiff; -- if (ring->frame_size - NL_MMAP_HDRLEN < linear_size) -- goto out_put; -- -- skb = alloc_skb_head(gfp_mask); -- if (skb == NULL) -- goto err1; -- -- spin_lock_bh(&sk->sk_receive_queue.lock); -- /* check again under lock */ -- if (ring->pg_vec == NULL) -- goto out_free; -- -- /* check again under lock */ -- maxlen = ring->frame_size - NL_MMAP_HDRLEN; -- if (maxlen < linear_size) -- goto out_free; -- -- netlink_forward_ring(ring); -- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); -- if (hdr == NULL) -- goto err2; -- -- netlink_ring_setup_skb(skb, sk, ring, hdr); -- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); -- atomic_inc(&ring->pending); -- netlink_increment_head(ring); -- -- spin_unlock_bh(&sk->sk_receive_queue.lock); -- return skb; -- --err2: -- kfree_skb(skb); -- spin_unlock_bh(&sk->sk_receive_queue.lock); -- netlink_overrun(sk); --err1: -- sock_put(sk); -- return NULL; -- --out_free: -- kfree_skb(skb); -- spin_unlock_bh(&sk->sk_receive_queue.lock); --out_put: -- sock_put(sk); --out: --#endif - return alloc_skb(size, gfp_mask); - } - EXPORT_SYMBOL_GPL(__netlink_alloc_skb); -@@ -2242,8 +1549,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, - if (level != SOL_NETLINK) - return -ENOPROTOOPT; - -- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && -- optlen >= sizeof(int) && -+ if (optlen >= sizeof(int) && - get_user(val, (unsigned int __user *)optval)) - return -EFAULT; - -@@ -2296,25 +1602,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, - } - err = 0; - break; --#ifdef CONFIG_NETLINK_MMAP -- case NETLINK_RX_RING: -- case NETLINK_TX_RING: { -- struct nl_mmap_req req; -- -- /* Rings might consume more memory than queue limits, require -- * CAP_NET_ADMIN. -- */ -- if (!capable(CAP_NET_ADMIN)) -- return -EPERM; -- if (optlen < sizeof(req)) -- return -EINVAL; -- if (copy_from_user(&req, optval, sizeof(req))) -- return -EFAULT; -- err = netlink_set_ring(sk, &req, -- optname == NETLINK_TX_RING); -- break; -- } --#endif /* CONFIG_NETLINK_MMAP */ - case NETLINK_LISTEN_ALL_NSID: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) - return -EPERM; -@@ -2484,18 +1771,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) - smp_rmb(); - } - -- /* It's a really convoluted way for userland to ask for mmaped -- * sendmsg(), but that's what we've got... -- */ -- if (netlink_tx_is_mmaped(sk) && -- iter_is_iovec(&msg->msg_iter) && -- msg->msg_iter.nr_segs == 1 && -- msg->msg_iter.iov->iov_base == NULL) { -- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, -- &scm); -- goto out; -- } -- - err = -EMSGSIZE; - if (len > sk->sk_sndbuf - 32) - goto out; -@@ -2812,8 +2087,7 @@ static int netlink_dump(struct sock *sk) - goto errout_skb; - } - -- if (!netlink_rx_is_mmaped(sk) && -- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) -+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - goto errout_skb; - - /* NLMSG_GOODSIZE is small to avoid high order allocations being -@@ -2902,16 +2176,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - struct netlink_sock *nlk; - int ret; - -- /* Memory mapped dump requests need to be copied to avoid looping -- * on the pending state in netlink_mmap_sendmsg() while the CB hold -- * a reference to the skb. -- */ -- if (netlink_skb_is_mmaped(skb)) { -- skb = skb_copy(skb, GFP_KERNEL); -- if (skb == NULL) -- return -ENOBUFS; -- } else -- atomic_inc(&skb->users); -+ atomic_inc(&skb->users); - - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); - if (sk == NULL) { -@@ -3255,7 +2520,7 @@ static const struct proto_ops netlink_ops = { - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = netlink_getname, -- .poll = netlink_poll, -+ .poll = datagram_poll, - .ioctl = sock_no_ioctl, - .listen = sock_no_listen, - .shutdown = sock_no_shutdown, -@@ -3263,7 +2528,7 @@ static const struct proto_ops netlink_ops = { - .getsockopt = netlink_getsockopt, - .sendmsg = netlink_sendmsg, - .recvmsg = netlink_recvmsg, -- .mmap = netlink_mmap, -+ .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, - }; - -diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h -index df32cb92d9fc..ea4600aea6b0 100644 ---- a/net/netlink/af_netlink.h -+++ b/net/netlink/af_netlink.h -@@ -45,12 +45,6 @@ struct netlink_sock { - int (*netlink_bind)(struct net *net, int group); - void (*netlink_unbind)(struct net *net, int group); - struct module *module; --#ifdef CONFIG_NETLINK_MMAP -- struct mutex pg_vec_lock; -- struct netlink_ring rx_ring; -- struct netlink_ring tx_ring; -- atomic_t mapped; --#endif /* CONFIG_NETLINK_MMAP */ - - struct rhash_head node; - struct rcu_head rcu; -@@ -62,15 +56,6 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk) - return container_of(sk, struct netlink_sock, sk); - } - --static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) --{ --#ifdef CONFIG_NETLINK_MMAP -- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; --#else -- return false; --#endif /* CONFIG_NETLINK_MMAP */ --} -- - struct netlink_table { - struct rhashtable hash; - struct hlist_head mc_list; -diff --git a/net/netlink/diag.c b/net/netlink/diag.c -index 3ee63a3cff30..8dd836a8dd60 100644 ---- a/net/netlink/diag.c -+++ b/net/netlink/diag.c -@@ -8,41 +8,6 @@ - - #include "af_netlink.h" - --#ifdef CONFIG_NETLINK_MMAP --static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, -- struct sk_buff *nlskb) --{ -- struct netlink_diag_ring ndr; -- -- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; -- ndr.ndr_block_nr = ring->pg_vec_len; -- ndr.ndr_frame_size = ring->frame_size; -- ndr.ndr_frame_nr = ring->frame_max + 1; -- -- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); --} -- --static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) --{ -- struct netlink_sock *nlk = nlk_sk(sk); -- int ret; -- -- mutex_lock(&nlk->pg_vec_lock); -- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); -- if (!ret) -- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, -- nlskb); -- mutex_unlock(&nlk->pg_vec_lock); -- -- return ret; --} --#else --static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) --{ -- return 0; --} --#endif -- - static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) - { - struct netlink_sock *nlk = nlk_sk(sk); -@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) - goto out_nlmsg_trim; - -- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && -- sk_diag_put_rings_cfg(sk, skb)) -- goto out_nlmsg_trim; -- - nlmsg_end(skb, nlh); - return 0; - --- -2.12.2 - -From 51a219a1371ed26ce45acc8209d6064257d00f70 Mon Sep 17 00:00:00 2001 -From: Matthias Schiffer -Date: Thu, 23 Feb 2017 17:19:41 +0100 -Subject: [PATCH 075/251] vxlan: correctly validate VXLAN ID against - VXLAN_N_VID -Content-Length: 915 -Lines: 29 - -[ Upstream commit 4e37d6911f36545b286d15073f6f2222f840e81c ] - -The incorrect check caused an off-by-one error: the maximum VID 0xffffff -was unusable. - -Fixes: d342894c5d2f ("vxlan: virtual extensible lan") -Signed-off-by: Matthias Schiffer -Acked-by: Jiri Benc -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - drivers/net/vxlan.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c -index 6fa8e165878e..590750ab6564 100644 ---- a/drivers/net/vxlan.c -+++ b/drivers/net/vxlan.c -@@ -2600,7 +2600,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) - - if (data[IFLA_VXLAN_ID]) { - __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); -- if (id >= VXLAN_VID_MASK) -+ if (id >= VXLAN_N_VID) - return -ERANGE; - } - --- -2.12.2 - -From f1b3aae1f1bfdbec1956670aa3aa28d25f88d4b3 Mon Sep 17 00:00:00 2001 -From: David Forster -Date: Fri, 24 Feb 2017 14:20:32 +0000 -Subject: [PATCH 076/251] vti6: return GRE_KEY for vti6 -Content-Length: 884 -Lines: 29 - -[ Upstream commit 7dcdf941cdc96692ab99fd790c8cc68945514851 ] - -Align vti6 with vti by returning GRE_KEY flag. This enables iproute2 -to display tunnel keys on "ip -6 tunnel show" - -Signed-off-by: David Forster -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/ipv6/ip6_vti.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c -index 0a8610b33d79..bdcc4d9cedd3 100644 ---- a/net/ipv6/ip6_vti.c -+++ b/net/ipv6/ip6_vti.c -@@ -680,6 +680,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) - u->link = p->link; - u->i_key = p->i_key; - u->o_key = p->o_key; -+ if (u->i_key) -+ u->i_flags |= GRE_KEY; -+ if (u->o_key) -+ u->o_flags |= GRE_KEY; - u->proto = p->proto; - - memcpy(u->name, p->name, sizeof(u->name)); --- -2.12.2 - -From 354f79125f12bcd7352704e770c0b10c4a4b424e Mon Sep 17 00:00:00 2001 -From: Julian Anastasov -Date: Sun, 26 Feb 2017 17:14:35 +0200 -Subject: [PATCH 077/251] ipv4: mask tos for input route -Content-Length: 916 -Lines: 31 - -[ Upstream commit 6e28099d38c0e50d62c1afc054e37e573adf3d21 ] - -Restore the lost masking of TOS in input route code to -allow ip rules to match it properly. - -Problem [1] noticed by Shmulik Ladkani - -[1] http://marc.info/?t=137331755300040&r=1&w=2 - -Fixes: 89aef8921bfb ("ipv4: Delete routing cache.") -Signed-off-by: Julian Anastasov -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/ipv4/route.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index ef2f527a119b..da4d68d78590 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -1958,6 +1958,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, - { - int res; - -+ tos &= IPTOS_RT_MASK; - rcu_read_lock(); - - /* Multicast recognition logic is moved from route cache to here. --- -2.12.2 - -From 2cd0afc64e333f2ef62444300418883cff0e79da Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Paul=20H=C3=BCber?= -Date: Sun, 26 Feb 2017 17:58:19 +0100 -Subject: [PATCH 078/251] l2tp: avoid use-after-free caused by - l2tp_ip_backlog_recv -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Content-Length: 923 -Lines: 28 - -[ Upstream commit 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e ] - -l2tp_ip_backlog_recv may not return -1 if the packet gets dropped. -The return value is passed up to ip_local_deliver_finish, which treats -negative values as an IP protocol number for resubmission. - -Signed-off-by: Paul Hüber -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/l2tp/l2tp_ip.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c -index 445b7cd0826a..48ab93842322 100644 ---- a/net/l2tp/l2tp_ip.c -+++ b/net/l2tp/l2tp_ip.c -@@ -383,7 +383,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb) - drop: - IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS); - kfree_skb(skb); -- return -1; -+ return 0; - } - - /* Userspace will call sendmsg() on the tunnel socket to send L2TP --- -2.12.2 - -From f331d6445a3e4013428b06169acf3ae33614e69b Mon Sep 17 00:00:00 2001 -From: Alexander Potapenko -Date: Wed, 1 Mar 2017 12:57:20 +0100 -Subject: [PATCH 079/251] net: don't call strlen() on the user buffer in - packet_bind_spkt() -Content-Length: 3957 -Lines: 104 - -[ Upstream commit 540e2894f7905538740aaf122bd8e0548e1c34a4 ] - -KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of -uninitialized memory in packet_bind_spkt(): -Acked-by: Eric Dumazet - -================================================================== -BUG: KMSAN: use of unitialized memory -CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891 -Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs -01/01/2011 - 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48 - ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550 - 0000000000000000 0000000000000092 00000000ec400911 0000000000000002 -Call Trace: - [< inline >] __dump_stack lib/dump_stack.c:15 - [] dump_stack+0x238/0x290 lib/dump_stack.c:51 - [] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003 - [] __msan_warning+0x5b/0xb0 -mm/kmsan/kmsan_instr.c:424 - [< inline >] strlen lib/string.c:484 - [] strlcpy+0x9d/0x200 lib/string.c:144 - [] packet_bind_spkt+0x144/0x230 -net/packet/af_packet.c:3132 - [] SYSC_bind+0x40d/0x5f0 net/socket.c:1370 - [] SyS_bind+0x82/0xa0 net/socket.c:1356 - [] entry_SYSCALL_64_fastpath+0x13/0x8f -arch/x86/entry/entry_64.o:? -chained origin: 00000000eba00911 - [] save_stack_trace+0x27/0x50 -arch/x86/kernel/stacktrace.c:67 - [< inline >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322 - [< inline >] kmsan_save_stack mm/kmsan/kmsan.c:334 - [] kmsan_internal_chain_origin+0x118/0x1e0 -mm/kmsan/kmsan.c:527 - [] __msan_set_alloca_origin4+0xc3/0x130 -mm/kmsan/kmsan_instr.c:380 - [] SYSC_bind+0x129/0x5f0 net/socket.c:1356 - [] SyS_bind+0x82/0xa0 net/socket.c:1356 - [] entry_SYSCALL_64_fastpath+0x13/0x8f -arch/x86/entry/entry_64.o:? -origin description: ----address@SYSC_bind (origin=00000000eb400911) -================================================================== -(the line numbers are relative to 4.8-rc6, but the bug persists -upstream) - -, when I run the following program as root: - -===================================== - #include - #include - #include - #include - - int main() { - struct sockaddr addr; - memset(&addr, 0xff, sizeof(addr)); - addr.sa_family = AF_PACKET; - int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL)); - bind(fd, &addr, sizeof(addr)); - return 0; - } -===================================== - -This happens because addr.sa_data copied from the userspace is not -zero-terminated, and copying it with strlcpy() in packet_bind_spkt() -results in calling strlen() on the kernel copy of that non-terminated -buffer. - -Signed-off-by: Alexander Potapenko -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/packet/af_packet.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c -index d805cd577a60..3975ac809934 100644 ---- a/net/packet/af_packet.c -+++ b/net/packet/af_packet.c -@@ -3021,7 +3021,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, - int addr_len) - { - struct sock *sk = sock->sk; -- char name[15]; -+ char name[sizeof(uaddr->sa_data) + 1]; - - /* - * Check legality -@@ -3029,7 +3029,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, - - if (addr_len != sizeof(struct sockaddr)) - return -EINVAL; -- strlcpy(name, uaddr->sa_data, sizeof(name)); -+ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be -+ * zero-terminated. -+ */ -+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); -+ name[sizeof(uaddr->sa_data)] = 0; - - return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); - } --- -2.12.2 - -From a70c328597045be2962098916c88ddd172caa054 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Wed, 1 Mar 2017 14:28:39 -0800 -Subject: [PATCH 080/251] net: net_enable_timestamp() can be called from irq - contexts -Content-Length: 2734 -Lines: 92 - -[ Upstream commit 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d ] - -It is now very clear that silly TCP listeners might play with -enabling/disabling timestamping while new children are added -to their accept queue. - -Meaning net_enable_timestamp() can be called from BH context -while current state of the static key is not enabled. - -Lets play safe and allow all contexts. - -The work queue is scheduled only under the problematic cases, -which are the static key enable/disable transition, to not slow down -critical paths. - -This extends and improves what we did in commit 5fa8bbda38c6 ("net: use -a work queue to defer net_disable_timestamp() work") - -Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context") -Signed-off-by: Eric Dumazet -Reported-by: Dmitry Vyukov -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/core/dev.c | 35 +++++++++++++++++++++++++++++++---- - 1 file changed, 31 insertions(+), 4 deletions(-) - -diff --git a/net/core/dev.c b/net/core/dev.c -index 08215a85c742..48399d8ce614 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -1677,27 +1677,54 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue); - static struct static_key netstamp_needed __read_mostly; - #ifdef HAVE_JUMP_LABEL - static atomic_t netstamp_needed_deferred; -+static atomic_t netstamp_wanted; - static void netstamp_clear(struct work_struct *work) - { - int deferred = atomic_xchg(&netstamp_needed_deferred, 0); -+ int wanted; - -- while (deferred--) -- static_key_slow_dec(&netstamp_needed); -+ wanted = atomic_add_return(deferred, &netstamp_wanted); -+ if (wanted > 0) -+ static_key_enable(&netstamp_needed); -+ else -+ static_key_disable(&netstamp_needed); - } - static DECLARE_WORK(netstamp_work, netstamp_clear); - #endif - - void net_enable_timestamp(void) - { -+#ifdef HAVE_JUMP_LABEL -+ int wanted; -+ -+ while (1) { -+ wanted = atomic_read(&netstamp_wanted); -+ if (wanted <= 0) -+ break; -+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted) -+ return; -+ } -+ atomic_inc(&netstamp_needed_deferred); -+ schedule_work(&netstamp_work); -+#else - static_key_slow_inc(&netstamp_needed); -+#endif - } - EXPORT_SYMBOL(net_enable_timestamp); - - void net_disable_timestamp(void) - { - #ifdef HAVE_JUMP_LABEL -- /* net_disable_timestamp() can be called from non process context */ -- atomic_inc(&netstamp_needed_deferred); -+ int wanted; -+ -+ while (1) { -+ wanted = atomic_read(&netstamp_wanted); -+ if (wanted <= 1) -+ break; -+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted) -+ return; -+ } -+ atomic_dec(&netstamp_needed_deferred); - schedule_work(&netstamp_work); - #else - static_key_slow_dec(&netstamp_needed); --- -2.12.2 - -From 9216632bf4a0bafdc998d1c68b37b70446775900 Mon Sep 17 00:00:00 2001 -From: Arnaldo Carvalho de Melo -Date: Wed, 1 Mar 2017 16:35:07 -0300 -Subject: [PATCH 081/251] dccp: Unlock sock before calling sk_free() -Content-Length: 3158 -Lines: 77 - -[ Upstream commit d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 ] - -The code where sk_clone() came from created a new socket and locked it, -but then, on the error path didn't unlock it. - -This problem stayed there for a long while, till b0691c8ee7c2 ("net: -Unlock sock before calling sk_free()") fixed it, but unfortunately the -callers of sk_clone() (now sk_clone_locked()) were not audited and the -one in dccp_create_openreq_child() remained. - -Now in the age of the syskaller fuzzer, this was finally uncovered, as -reported by Dmitry: - - ---- 8< ---- - -I've got the following report while running syzkaller fuzzer on -86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)") - - [ BUG: held lock freed! ] - 4.10.0+ #234 Not tainted - ------------------------- - syz-executor6/6898 is freeing memory - ffff88006286cac0-ffff88006286d3b7, with a lock still held there! - (slock-AF_INET6){+.-...}, at: [] spin_lock - include/linux/spinlock.h:299 [inline] - (slock-AF_INET6){+.-...}, at: [] - sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 - 5 locks held by syz-executor6/6898: - #0: (sk_lock-AF_INET6){+.+.+.}, at: [] lock_sock - include/net/sock.h:1460 [inline] - #0: (sk_lock-AF_INET6){+.+.+.}, at: [] - inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681 - #1: (rcu_read_lock){......}, at: [] - inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126 - #2: (rcu_read_lock){......}, at: [] __skb_unlink - include/linux/skbuff.h:1767 [inline] - #2: (rcu_read_lock){......}, at: [] __skb_dequeue - include/linux/skbuff.h:1783 [inline] - #2: (rcu_read_lock){......}, at: [] - process_backlog+0x264/0x730 net/core/dev.c:4835 - #3: (rcu_read_lock){......}, at: [] - ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59 - #4: (slock-AF_INET6){+.-...}, at: [] spin_lock - include/linux/spinlock.h:299 [inline] - #4: (slock-AF_INET6){+.-...}, at: [] - sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504 - -Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling -sk_free()"). - -Reported-by: Dmitry Vyukov -Cc: Cong Wang -Cc: Eric Dumazet -Cc: Gerrit Renker -Cc: Thomas Gleixner -Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org -Signed-off-by: Arnaldo Carvalho de Melo -Signed-off-by: David S. Miller -Signed-off-by: Greg Kroah-Hartman ---- - net/dccp/minisocks.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c -index 1994f8af646b..e314caa39176 100644 ---- a/net/dccp/minisocks.c -+++ b/net/dccp/minisocks.c -@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, - /* It is still raw copy of parent, so invalidate - * destructor and make plain sk_free() */ - newsk->sk_destruct = NULL; -+ bh_unlock_sock(newsk); - sk_free(newsk); - return NULL; - } --- -2.12.2 - From 2681a7853ad73bfebc3a683765a496bb283c6648 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 14:08:21 -0800 diff --git a/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch b/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch new file mode 100644 index 00000000000..d4b1a5c4b6b --- /dev/null +++ b/queue-3.18/net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch @@ -0,0 +1,108 @@ +From 540e2894f7905538740aaf122bd8e0548e1c34a4 Mon Sep 17 00:00:00 2001 +From: Alexander Potapenko +Date: Wed, 1 Mar 2017 12:57:20 +0100 +Subject: net: don't call strlen() on the user buffer in packet_bind_spkt() + +From: Alexander Potapenko + +commit 540e2894f7905538740aaf122bd8e0548e1c34a4 upstream. + +KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of +uninitialized memory in packet_bind_spkt(): +Acked-by: Eric Dumazet + +================================================================== +BUG: KMSAN: use of unitialized memory +CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs +01/01/2011 + 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48 + ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550 + 0000000000000000 0000000000000092 00000000ec400911 0000000000000002 +Call Trace: + [< inline >] __dump_stack lib/dump_stack.c:15 + [] dump_stack+0x238/0x290 lib/dump_stack.c:51 + [] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003 + [] __msan_warning+0x5b/0xb0 +mm/kmsan/kmsan_instr.c:424 + [< inline >] strlen lib/string.c:484 + [] strlcpy+0x9d/0x200 lib/string.c:144 + [] packet_bind_spkt+0x144/0x230 +net/packet/af_packet.c:3132 + [] SYSC_bind+0x40d/0x5f0 net/socket.c:1370 + [] SyS_bind+0x82/0xa0 net/socket.c:1356 + [] entry_SYSCALL_64_fastpath+0x13/0x8f +arch/x86/entry/entry_64.o:? +chained origin: 00000000eba00911 + [] save_stack_trace+0x27/0x50 +arch/x86/kernel/stacktrace.c:67 + [< inline >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322 + [< inline >] kmsan_save_stack mm/kmsan/kmsan.c:334 + [] kmsan_internal_chain_origin+0x118/0x1e0 +mm/kmsan/kmsan.c:527 + [] __msan_set_alloca_origin4+0xc3/0x130 +mm/kmsan/kmsan_instr.c:380 + [] SYSC_bind+0x129/0x5f0 net/socket.c:1356 + [] SyS_bind+0x82/0xa0 net/socket.c:1356 + [] entry_SYSCALL_64_fastpath+0x13/0x8f +arch/x86/entry/entry_64.o:? +origin description: ----address@SYSC_bind (origin=00000000eb400911) +================================================================== +(the line numbers are relative to 4.8-rc6, but the bug persists +upstream) + +, when I run the following program as root: + +===================================== + #include + #include + #include + #include + + int main() { + struct sockaddr addr; + memset(&addr, 0xff, sizeof(addr)); + addr.sa_family = AF_PACKET; + int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL)); + bind(fd, &addr, sizeof(addr)); + return 0; + } +===================================== + +This happens because addr.sa_data copied from the userspace is not +zero-terminated, and copying it with strlcpy() in packet_bind_spkt() +results in calling strlen() on the kernel copy of that non-terminated +buffer. + +Signed-off-by: Alexander Potapenko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/packet/af_packet.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2738,7 +2738,7 @@ static int packet_bind_spkt(struct socke + int addr_len) + { + struct sock *sk = sock->sk; +- char name[15]; ++ char name[sizeof(uaddr->sa_data) + 1]; + + /* + * Check legality +@@ -2746,7 +2746,11 @@ static int packet_bind_spkt(struct socke + + if (addr_len != sizeof(struct sockaddr)) + return -EINVAL; +- strlcpy(name, uaddr->sa_data, sizeof(name)); ++ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be ++ * zero-terminated. ++ */ ++ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); ++ name[sizeof(uaddr->sa_data)] = 0; + + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); + } diff --git a/queue-3.18/netlink-remove-mmapped-netlink-support.patch b/queue-3.18/netlink-remove-mmapped-netlink-support.patch new file mode 100644 index 00000000000..99649a1d5b4 --- /dev/null +++ b/queue-3.18/netlink-remove-mmapped-netlink-support.patch @@ -0,0 +1,1410 @@ +From 0c0be310ba29e4a053e8aac934aebe590c5da909 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Thu, 18 Feb 2016 15:03:24 +0100 +Subject: netlink: remove mmapped netlink support + +From: Florian Westphal + +commit d1b4c689d4130bcfd3532680b64db562300716b6 upstream. + +mmapped netlink has a number of unresolved issues: + +- TX zerocopy support had to be disabled more than a year ago via + commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.") + because the content of the mmapped area can change after netlink + attribute validation but before message processing. + +- RX support was implemented mainly to speed up nfqueue dumping packet + payload to userspace. However, since commit ae08ce0021087a5d812d2 + ("netfilter: nfnetlink_queue: zero copy support") we avoid one copy + with the socket-based interface too (via the skb_zerocopy helper). + +The other problem is that skbs attached to mmaped netlink socket +behave different from normal skbs: + +- they don't have a shinfo area, so all functions that use skb_shinfo() +(e.g. skb_clone) cannot be used. + +- reserving headroom prevents userspace from seeing the content as +it expects message to start at skb->head. +See for instance +commit aa3a022094fa ("netlink: not trim skb for mmaped socket when dump"). + +- skbs handed e.g. to netlink_ack must have non-NULL skb->sk, else we +crash because it needs the sk to check if a tx ring is attached. + +Also not obvious, leads to non-intuitive bug fixes such as 7c7bdf359 +("netfilter: nfnetlink: use original skbuff when acking batches"). + +mmaped netlink also didn't play nicely with the skb_zerocopy helper +used by nfqueue and openvswitch. Daniel Borkmann fixed this via +commit 6bb0fef489f6 ("netlink, mmap: fix edge-case leakages in nf queue +zero-copy")' but at the cost of also needing to provide remaining +length to the allocation function. + +nfqueue also has problems when used with mmaped rx netlink: +- mmaped netlink doesn't allow use of nfqueue batch verdict messages. + Problem is that in the mmap case, the allocation time also determines + the ordering in which the frame will be seen by userspace (A + allocating before B means that A is located in earlier ring slot, + but this also means that B might get a lower sequence number then A + since seqno is decided later. To fix this we would need to extend the + spinlocked region to also cover the allocation and message setup which + isn't desirable. +- nfqueue can now be configured to queue large (GSO) skbs to userspace. + Queing GSO packets is faster than having to force a software segmentation + in the kernel, so this is a desirable option. However, with a mmap based + ring one has to use 64kb per ring slot element, else mmap has to fall back + to the socket path (NL_MMAP_STATUS_COPY) for all large packets. + +To use the mmap interface, userspace not only has to probe for mmap netlink +support, it also has to implement a recv/socket receive path in order to +handle messages that exceed the size of an rx ring element. + +Cc: Daniel Borkmann +Cc: Ken-ichirou MATSUZAWA +Cc: Pablo Neira Ayuso +Cc: Patrick McHardy +Cc: Thomas Graf +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Cc: Shi Yuejie +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/networking/netlink_mmap.txt | 339 ------------- + include/uapi/linux/netlink.h | 4 + include/uapi/linux/netlink_diag.h | 2 + net/netlink/Kconfig | 9 + net/netlink/af_netlink.c | 732 ------------------------------ + net/netlink/af_netlink.h | 15 + net/netlink/diag.c | 39 - + 7 files changed, 15 insertions(+), 1125 deletions(-) + +--- a/Documentation/networking/netlink_mmap.txt ++++ /dev/null +@@ -1,339 +0,0 @@ +-This file documents how to use memory mapped I/O with netlink. +- +-Author: Patrick McHardy +- +-Overview +--------- +- +-Memory mapped netlink I/O can be used to increase throughput and decrease +-overhead of unicast receive and transmit operations. Some netlink subsystems +-require high throughput, these are mainly the netfilter subsystems +-nfnetlink_queue and nfnetlink_log, but it can also help speed up large +-dump operations of f.i. the routing database. +- +-Memory mapped netlink I/O used two circular ring buffers for RX and TX which +-are mapped into the processes address space. +- +-The RX ring is used by the kernel to directly construct netlink messages into +-user-space memory without copying them as done with regular socket I/O, +-additionally as long as the ring contains messages no recvmsg() or poll() +-syscalls have to be issued by user-space to get more message. +- +-The TX ring is used to process messages directly from user-space memory, the +-kernel processes all messages contained in the ring using a single sendmsg() +-call. +- +-Usage overview +--------------- +- +-In order to use memory mapped netlink I/O, user-space needs three main changes: +- +-- ring setup +-- conversion of the RX path to get messages from the ring instead of recvmsg() +-- conversion of the TX path to construct messages into the ring +- +-Ring setup is done using setsockopt() to provide the ring parameters to the +-kernel, then a call to mmap() to map the ring into the processes address space: +- +-- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, ¶ms, sizeof(params)); +-- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, ¶ms, sizeof(params)); +-- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) +- +-Usage of either ring is optional, but even if only the RX ring is used the +-mapping still needs to be writable in order to update the frame status after +-processing. +- +-Conversion of the reception path involves calling poll() on the file +-descriptor, once the socket is readable the frames from the ring are +-processed in order until no more messages are available, as indicated by +-a status word in the frame header. +- +-On kernel side, in order to make use of memory mapped I/O on receive, the +-originating netlink subsystem needs to support memory mapped I/O, otherwise +-it will use an allocated socket buffer as usual and the contents will be +- copied to the ring on transmission, nullifying most of the performance gains. +-Dumps of kernel databases automatically support memory mapped I/O. +- +-Conversion of the transmit path involves changing message construction to +-use memory from the TX ring instead of (usually) a buffer declared on the +-stack and setting up the frame header appropriately. Optionally poll() can +-be used to wait for free frames in the TX ring. +- +-Structured and definitions for using memory mapped I/O are contained in +-. +- +-RX and TX rings +----------------- +- +-Each ring contains a number of continuous memory blocks, containing frames of +-fixed size dependent on the parameters used for ring setup. +- +-Ring: [ block 0 ] +- [ frame 0 ] +- [ frame 1 ] +- [ block 1 ] +- [ frame 2 ] +- [ frame 3 ] +- ... +- [ block n ] +- [ frame 2 * n ] +- [ frame 2 * n + 1 ] +- +-The blocks are only visible to the kernel, from the point of view of user-space +-the ring just contains the frames in a continuous memory zone. +- +-The ring parameters used for setting up the ring are defined as follows: +- +-struct nl_mmap_req { +- unsigned int nm_block_size; +- unsigned int nm_block_nr; +- unsigned int nm_frame_size; +- unsigned int nm_frame_nr; +-}; +- +-Frames are grouped into blocks, where each block is a continuous region of memory +-and holds nm_block_size / nm_frame_size frames. The total number of frames in +-the ring is nm_frame_nr. The following invariants hold: +- +-- frames_per_block = nm_block_size / nm_frame_size +- +-- nm_frame_nr = frames_per_block * nm_block_nr +- +-Some parameters are constrained, specifically: +- +-- nm_block_size must be a multiple of the architectures memory page size. +- The getpagesize() function can be used to get the page size. +- +-- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be +- able to hold at least the frame header +- +-- nm_frame_size must be smaller or equal to nm_block_size +- +-- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT +- +-- nm_frame_nr must equal the actual number of frames as specified above. +- +-When the kernel can't allocate physically continuous memory for a ring block, +-it will fall back to use physically discontinuous memory. This might affect +-performance negatively, in order to avoid this the nm_frame_size parameter +-should be chosen to be as small as possible for the required frame size and +-the number of blocks should be increased instead. +- +-Ring frames +------------- +- +-Each frames contain a frame header, consisting of a synchronization word and some +-meta-data, and the message itself. +- +-Frame: [ header message ] +- +-The frame header is defined as follows: +- +-struct nl_mmap_hdr { +- unsigned int nm_status; +- unsigned int nm_len; +- __u32 nm_group; +- /* credentials */ +- __u32 nm_pid; +- __u32 nm_uid; +- __u32 nm_gid; +-}; +- +-- nm_status is used for synchronizing processing between the kernel and user- +- space and specifies ownership of the frame as well as the operation to perform +- +-- nm_len contains the length of the message contained in the data area +- +-- nm_group specified the destination multicast group of message +- +-- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending +- process. These values correspond to the data available using SOCK_PASSCRED in +- the SCM_CREDENTIALS cmsg. +- +-The possible values in the status word are: +- +-- NL_MMAP_STATUS_UNUSED: +- RX ring: frame belongs to the kernel and contains no message +- for user-space. Approriate action is to invoke poll() +- to wait for new messages. +- +- TX ring: frame belongs to user-space and can be used for +- message construction. +- +-- NL_MMAP_STATUS_RESERVED: +- RX ring only: frame is currently used by the kernel for message +- construction and contains no valid message yet. +- Appropriate action is to invoke poll() to wait for +- new messages. +- +-- NL_MMAP_STATUS_VALID: +- RX ring: frame contains a valid message. Approriate action is +- to process the message and release the frame back to +- the kernel by setting the status to +- NL_MMAP_STATUS_UNUSED or queue the frame by setting the +- status to NL_MMAP_STATUS_SKIP. +- +- TX ring: the frame contains a valid message from user-space to +- be processed by the kernel. After completing processing +- the kernel will release the frame back to user-space by +- setting the status to NL_MMAP_STATUS_UNUSED. +- +-- NL_MMAP_STATUS_COPY: +- RX ring only: a message is ready to be processed but could not be +- stored in the ring, either because it exceeded the +- frame size or because the originating subsystem does +- not support memory mapped I/O. Appropriate action is +- to invoke recvmsg() to receive the message and release +- the frame back to the kernel by setting the status to +- NL_MMAP_STATUS_UNUSED. +- +-- NL_MMAP_STATUS_SKIP: +- RX ring only: user-space queued the message for later processing, but +- processed some messages following it in the ring. The +- kernel should skip this frame when looking for unused +- frames. +- +-The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the +-frame header. +- +-TX limitations +--------------- +- +-Kernel processing usually involves validation of the message received by +-user-space, then processing its contents. The kernel must assure that +-userspace is not able to modify the message contents after they have been +-validated. In order to do so, the message is copied from the ring frame +-to an allocated buffer if either of these conditions is false: +- +-- only a single mapping of the ring exists +-- the file descriptor is not shared between processes +- +-This means that for threaded programs, the kernel will fall back to copying. +- +-Example +-------- +- +-Ring setup: +- +- unsigned int block_size = 16 * getpagesize(); +- struct nl_mmap_req req = { +- .nm_block_size = block_size, +- .nm_block_nr = 64, +- .nm_frame_size = 16384, +- .nm_frame_nr = 64 * block_size / 16384, +- }; +- unsigned int ring_size; +- void *rx_ring, *tx_ring; +- +- /* Configure ring parameters */ +- if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) +- exit(1); +- if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) +- exit(1) +- +- /* Calculate size of each individual ring */ +- ring_size = req.nm_block_nr * req.nm_block_size; +- +- /* Map RX/TX rings. The TX ring is located after the RX ring */ +- rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE, +- MAP_SHARED, fd, 0); +- if ((long)rx_ring == -1L) +- exit(1); +- tx_ring = rx_ring + ring_size: +- +-Message reception: +- +-This example assumes some ring parameters of the ring setup are available. +- +- unsigned int frame_offset = 0; +- struct nl_mmap_hdr *hdr; +- struct nlmsghdr *nlh; +- unsigned char buf[16384]; +- ssize_t len; +- +- while (1) { +- struct pollfd pfds[1]; +- +- pfds[0].fd = fd; +- pfds[0].events = POLLIN | POLLERR; +- pfds[0].revents = 0; +- +- if (poll(pfds, 1, -1) < 0 && errno != -EINTR) +- exit(1); +- +- /* Check for errors. Error handling omitted */ +- if (pfds[0].revents & POLLERR) +- +- +- /* If no new messages, poll again */ +- if (!(pfds[0].revents & POLLIN)) +- continue; +- +- /* Process all frames */ +- while (1) { +- /* Get next frame header */ +- hdr = rx_ring + frame_offset; +- +- if (hdr->nm_status == NL_MMAP_STATUS_VALID) { +- /* Regular memory mapped frame */ +- nlh = (void *)hdr + NL_MMAP_HDRLEN; +- len = hdr->nm_len; +- +- /* Release empty message immediately. May happen +- * on error during message construction. +- */ +- if (len == 0) +- goto release; +- } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) { +- /* Frame queued to socket receive queue */ +- len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); +- if (len <= 0) +- break; +- nlh = buf; +- } else +- /* No more messages to process, continue polling */ +- break; +- +- process_msg(nlh); +-release: +- /* Release frame back to the kernel */ +- hdr->nm_status = NL_MMAP_STATUS_UNUSED; +- +- /* Advance frame offset to next frame */ +- frame_offset = (frame_offset + frame_size) % ring_size; +- } +- } +- +-Message transmission: +- +-This example assumes some ring parameters of the ring setup are available. +-A single message is constructed and transmitted, to send multiple messages +-at once they would be constructed in consecutive frames before a final call +-to sendto(). +- +- unsigned int frame_offset = 0; +- struct nl_mmap_hdr *hdr; +- struct nlmsghdr *nlh; +- struct sockaddr_nl addr = { +- .nl_family = AF_NETLINK, +- }; +- +- hdr = tx_ring + frame_offset; +- if (hdr->nm_status != NL_MMAP_STATUS_UNUSED) +- /* No frame available. Use poll() to avoid. */ +- exit(1); +- +- nlh = (void *)hdr + NL_MMAP_HDRLEN; +- +- /* Build message */ +- build_message(nlh); +- +- /* Fill frame header: length and status need to be set */ +- hdr->nm_len = nlh->nlmsg_len; +- hdr->nm_status = NL_MMAP_STATUS_VALID; +- +- if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0) +- exit(1); +- +- /* Advance frame offset to next frame */ +- frame_offset = (frame_offset + frame_size) % ring_size; +--- a/include/uapi/linux/netlink.h ++++ b/include/uapi/linux/netlink.h +@@ -106,8 +106,10 @@ struct nlmsgerr { + #define NETLINK_PKTINFO 3 + #define NETLINK_BROADCAST_ERROR 4 + #define NETLINK_NO_ENOBUFS 5 ++#ifndef __KERNEL__ + #define NETLINK_RX_RING 6 + #define NETLINK_TX_RING 7 ++#endif + + struct nl_pktinfo { + __u32 group; +@@ -130,6 +132,7 @@ struct nl_mmap_hdr { + __u32 nm_gid; + }; + ++#ifndef __KERNEL__ + enum nl_mmap_status { + NL_MMAP_STATUS_UNUSED, + NL_MMAP_STATUS_RESERVED, +@@ -141,6 +144,7 @@ enum nl_mmap_status { + #define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO + #define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT) + #define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr)) ++#endif + + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ + +--- a/include/uapi/linux/netlink_diag.h ++++ b/include/uapi/linux/netlink_diag.h +@@ -48,6 +48,8 @@ enum { + + #define NDIAG_SHOW_MEMINFO 0x00000001 /* show memory info of a socket */ + #define NDIAG_SHOW_GROUPS 0x00000002 /* show groups of a netlink socket */ ++#ifndef __KERNEL__ + #define NDIAG_SHOW_RING_CFG 0x00000004 /* show ring configuration */ ++#endif + + #endif +--- a/net/netlink/Kconfig ++++ b/net/netlink/Kconfig +@@ -2,15 +2,6 @@ + # Netlink Sockets + # + +-config NETLINK_MMAP +- bool "NETLINK: mmaped IO" +- ---help--- +- This option enables support for memory mapped netlink IO. This +- reduces overhead by avoiding copying data between kernel- and +- userspace. +- +- If unsure, say N. +- + config NETLINK_DIAG + tristate "NETLINK: socket monitoring interface" + default n +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -234,7 +234,7 @@ static int __netlink_deliver_tap_skb(str + + dev_hold(dev); + +- if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) ++ if (is_vmalloc_addr(skb->head)) + nskb = netlink_to_full_skb(skb, GFP_ATOMIC); + else + nskb = skb_clone(skb, GFP_ATOMIC); +@@ -308,599 +308,8 @@ static void netlink_rcv_wake(struct sock + wake_up_interruptible(&nlk->wait); + } + +-#ifdef CONFIG_NETLINK_MMAP +-static bool netlink_rx_is_mmaped(struct sock *sk) +-{ +- return nlk_sk(sk)->rx_ring.pg_vec != NULL; +-} +- +-static bool netlink_tx_is_mmaped(struct sock *sk) +-{ +- return nlk_sk(sk)->tx_ring.pg_vec != NULL; +-} +- +-static __pure struct page *pgvec_to_page(const void *addr) +-{ +- if (is_vmalloc_addr(addr)) +- return vmalloc_to_page(addr); +- else +- return virt_to_page(addr); +-} +- +-static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) +-{ +- unsigned int i; +- +- for (i = 0; i < len; i++) { +- if (pg_vec[i] != NULL) { +- if (is_vmalloc_addr(pg_vec[i])) +- vfree(pg_vec[i]); +- else +- free_pages((unsigned long)pg_vec[i], order); +- } +- } +- kfree(pg_vec); +-} +- +-static void *alloc_one_pg_vec_page(unsigned long order) +-{ +- void *buffer; +- gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | +- __GFP_NOWARN | __GFP_NORETRY; +- +- buffer = (void *)__get_free_pages(gfp_flags, order); +- if (buffer != NULL) +- return buffer; +- +- buffer = vzalloc((1 << order) * PAGE_SIZE); +- if (buffer != NULL) +- return buffer; +- +- gfp_flags &= ~__GFP_NORETRY; +- return (void *)__get_free_pages(gfp_flags, order); +-} +- +-static void **alloc_pg_vec(struct netlink_sock *nlk, +- struct nl_mmap_req *req, unsigned int order) +-{ +- unsigned int block_nr = req->nm_block_nr; +- unsigned int i; +- void **pg_vec; +- +- pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); +- if (pg_vec == NULL) +- return NULL; +- +- for (i = 0; i < block_nr; i++) { +- pg_vec[i] = alloc_one_pg_vec_page(order); +- if (pg_vec[i] == NULL) +- goto err1; +- } +- +- return pg_vec; +-err1: +- free_pg_vec(pg_vec, order, block_nr); +- return NULL; +-} +- +- +-static void +-__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, +- unsigned int order) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- struct sk_buff_head *queue; +- struct netlink_ring *ring; +- +- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; +- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; +- +- spin_lock_bh(&queue->lock); +- +- ring->frame_max = req->nm_frame_nr - 1; +- ring->head = 0; +- ring->frame_size = req->nm_frame_size; +- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; +- +- swap(ring->pg_vec_len, req->nm_block_nr); +- swap(ring->pg_vec_order, order); +- swap(ring->pg_vec, pg_vec); +- +- __skb_queue_purge(queue); +- spin_unlock_bh(&queue->lock); +- +- WARN_ON(atomic_read(&nlk->mapped)); +- +- if (pg_vec) +- free_pg_vec(pg_vec, order, req->nm_block_nr); +-} +- +-static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, +- bool tx_ring) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- struct netlink_ring *ring; +- void **pg_vec = NULL; +- unsigned int order = 0; +- +- ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; +- +- if (atomic_read(&nlk->mapped)) +- return -EBUSY; +- if (atomic_read(&ring->pending)) +- return -EBUSY; +- +- if (req->nm_block_nr) { +- if (ring->pg_vec != NULL) +- return -EBUSY; +- +- if ((int)req->nm_block_size <= 0) +- return -EINVAL; +- if (!PAGE_ALIGNED(req->nm_block_size)) +- return -EINVAL; +- if (req->nm_frame_size < NL_MMAP_HDRLEN) +- return -EINVAL; +- if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) +- return -EINVAL; +- +- ring->frames_per_block = req->nm_block_size / +- req->nm_frame_size; +- if (ring->frames_per_block == 0) +- return -EINVAL; +- if (ring->frames_per_block * req->nm_block_nr != +- req->nm_frame_nr) +- return -EINVAL; +- +- order = get_order(req->nm_block_size); +- pg_vec = alloc_pg_vec(nlk, req, order); +- if (pg_vec == NULL) +- return -ENOMEM; +- } else { +- if (req->nm_frame_nr) +- return -EINVAL; +- } +- +- mutex_lock(&nlk->pg_vec_lock); +- if (atomic_read(&nlk->mapped) == 0) { +- __netlink_set_ring(sk, req, tx_ring, pg_vec, order); +- mutex_unlock(&nlk->pg_vec_lock); +- return 0; +- } +- +- mutex_unlock(&nlk->pg_vec_lock); +- +- if (pg_vec) +- free_pg_vec(pg_vec, order, req->nm_block_nr); +- +- return -EBUSY; +-} +- +-static void netlink_mm_open(struct vm_area_struct *vma) +-{ +- struct file *file = vma->vm_file; +- struct socket *sock = file->private_data; +- struct sock *sk = sock->sk; +- +- if (sk) +- atomic_inc(&nlk_sk(sk)->mapped); +-} +- +-static void netlink_mm_close(struct vm_area_struct *vma) +-{ +- struct file *file = vma->vm_file; +- struct socket *sock = file->private_data; +- struct sock *sk = sock->sk; +- +- if (sk) +- atomic_dec(&nlk_sk(sk)->mapped); +-} +- +-static const struct vm_operations_struct netlink_mmap_ops = { +- .open = netlink_mm_open, +- .close = netlink_mm_close, +-}; +- +-static int netlink_mmap(struct file *file, struct socket *sock, +- struct vm_area_struct *vma) +-{ +- struct sock *sk = sock->sk; +- struct netlink_sock *nlk = nlk_sk(sk); +- struct netlink_ring *ring; +- unsigned long start, size, expected; +- unsigned int i; +- int err = -EINVAL; +- +- if (vma->vm_pgoff) +- return -EINVAL; +- +- mutex_lock(&nlk->pg_vec_lock); +- +- expected = 0; +- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { +- if (ring->pg_vec == NULL) +- continue; +- expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; +- } +- +- if (expected == 0) +- goto out; +- +- size = vma->vm_end - vma->vm_start; +- if (size != expected) +- goto out; +- +- start = vma->vm_start; +- for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { +- if (ring->pg_vec == NULL) +- continue; +- +- for (i = 0; i < ring->pg_vec_len; i++) { +- struct page *page; +- void *kaddr = ring->pg_vec[i]; +- unsigned int pg_num; +- +- for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { +- page = pgvec_to_page(kaddr); +- err = vm_insert_page(vma, start, page); +- if (err < 0) +- goto out; +- start += PAGE_SIZE; +- kaddr += PAGE_SIZE; +- } +- } +- } +- +- atomic_inc(&nlk->mapped); +- vma->vm_ops = &netlink_mmap_ops; +- err = 0; +-out: +- mutex_unlock(&nlk->pg_vec_lock); +- return err; +-} +- +-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) +-{ +-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 +- struct page *p_start, *p_end; +- +- /* First page is flushed through netlink_{get,set}_status */ +- p_start = pgvec_to_page(hdr + PAGE_SIZE); +- p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); +- while (p_start <= p_end) { +- flush_dcache_page(p_start); +- p_start++; +- } +-#endif +-} +- +-static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) +-{ +- smp_rmb(); +- flush_dcache_page(pgvec_to_page(hdr)); +- return hdr->nm_status; +-} +- +-static void netlink_set_status(struct nl_mmap_hdr *hdr, +- enum nl_mmap_status status) +-{ +- smp_mb(); +- hdr->nm_status = status; +- flush_dcache_page(pgvec_to_page(hdr)); +-} +- +-static struct nl_mmap_hdr * +-__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) +-{ +- unsigned int pg_vec_pos, frame_off; +- +- pg_vec_pos = pos / ring->frames_per_block; +- frame_off = pos % ring->frames_per_block; +- +- return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); +-} +- +-static struct nl_mmap_hdr * +-netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, +- enum nl_mmap_status status) +-{ +- struct nl_mmap_hdr *hdr; +- +- hdr = __netlink_lookup_frame(ring, pos); +- if (netlink_get_status(hdr) != status) +- return NULL; +- +- return hdr; +-} +- +-static struct nl_mmap_hdr * +-netlink_current_frame(const struct netlink_ring *ring, +- enum nl_mmap_status status) +-{ +- return netlink_lookup_frame(ring, ring->head, status); +-} +- +-static struct nl_mmap_hdr * +-netlink_previous_frame(const struct netlink_ring *ring, +- enum nl_mmap_status status) +-{ +- unsigned int prev; +- +- prev = ring->head ? ring->head - 1 : ring->frame_max; +- return netlink_lookup_frame(ring, prev, status); +-} +- +-static void netlink_increment_head(struct netlink_ring *ring) +-{ +- ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; +-} +- +-static void netlink_forward_ring(struct netlink_ring *ring) +-{ +- unsigned int head = ring->head, pos = head; +- const struct nl_mmap_hdr *hdr; +- +- do { +- hdr = __netlink_lookup_frame(ring, pos); +- if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) +- break; +- if (hdr->nm_status != NL_MMAP_STATUS_SKIP) +- break; +- netlink_increment_head(ring); +- } while (ring->head != head); +-} +- +-static bool netlink_dump_space(struct netlink_sock *nlk) +-{ +- struct netlink_ring *ring = &nlk->rx_ring; +- struct nl_mmap_hdr *hdr; +- unsigned int n; +- +- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +- if (hdr == NULL) +- return false; +- +- n = ring->head + ring->frame_max / 2; +- if (n > ring->frame_max) +- n -= ring->frame_max; +- +- hdr = __netlink_lookup_frame(ring, n); +- +- return hdr->nm_status == NL_MMAP_STATUS_UNUSED; +-} +- +-static unsigned int netlink_poll(struct file *file, struct socket *sock, +- poll_table *wait) +-{ +- struct sock *sk = sock->sk; +- struct netlink_sock *nlk = nlk_sk(sk); +- unsigned int mask; +- int err; +- +- if (nlk->rx_ring.pg_vec != NULL) { +- /* Memory mapped sockets don't call recvmsg(), so flow control +- * for dumps is performed here. A dump is allowed to continue +- * if at least half the ring is unused. +- */ +- while (nlk->cb_running && netlink_dump_space(nlk)) { +- err = netlink_dump(sk); +- if (err < 0) { +- sk->sk_err = -err; +- sk->sk_error_report(sk); +- break; +- } +- } +- netlink_rcv_wake(sk); +- } +- +- mask = datagram_poll(file, sock, wait); +- +- spin_lock_bh(&sk->sk_receive_queue.lock); +- if (nlk->rx_ring.pg_vec) { +- netlink_forward_ring(&nlk->rx_ring); +- if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) +- mask |= POLLIN | POLLRDNORM; +- } +- spin_unlock_bh(&sk->sk_receive_queue.lock); +- +- spin_lock_bh(&sk->sk_write_queue.lock); +- if (nlk->tx_ring.pg_vec) { +- if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) +- mask |= POLLOUT | POLLWRNORM; +- } +- spin_unlock_bh(&sk->sk_write_queue.lock); +- +- return mask; +-} +- +-static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) +-{ +- return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); +-} +- +-static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, +- struct netlink_ring *ring, +- struct nl_mmap_hdr *hdr) +-{ +- unsigned int size; +- void *data; +- +- size = ring->frame_size - NL_MMAP_HDRLEN; +- data = (void *)hdr + NL_MMAP_HDRLEN; +- +- skb->head = data; +- skb->data = data; +- skb_reset_tail_pointer(skb); +- skb->end = skb->tail + size; +- skb->len = 0; +- +- skb->destructor = netlink_skb_destructor; +- NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; +- NETLINK_CB(skb).sk = sk; +-} +- +-static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, +- u32 dst_portid, u32 dst_group, +- struct sock_iocb *siocb) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- struct netlink_ring *ring; +- struct nl_mmap_hdr *hdr; +- struct sk_buff *skb; +- unsigned int maxlen; +- int err = 0, len = 0; +- +- mutex_lock(&nlk->pg_vec_lock); +- +- ring = &nlk->tx_ring; +- maxlen = ring->frame_size - NL_MMAP_HDRLEN; +- +- do { +- unsigned int nm_len; +- +- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); +- if (hdr == NULL) { +- if (!(msg->msg_flags & MSG_DONTWAIT) && +- atomic_read(&nlk->tx_ring.pending)) +- schedule(); +- continue; +- } +- +- nm_len = ACCESS_ONCE(hdr->nm_len); +- if (nm_len > maxlen) { +- err = -EINVAL; +- goto out; +- } +- +- netlink_frame_flush_dcache(hdr, nm_len); +- +- skb = alloc_skb(nm_len, GFP_KERNEL); +- if (skb == NULL) { +- err = -ENOBUFS; +- goto out; +- } +- __skb_put(skb, nm_len); +- memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); +- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); +- +- netlink_increment_head(ring); +- +- NETLINK_CB(skb).portid = nlk->portid; +- NETLINK_CB(skb).dst_group = dst_group; +- NETLINK_CB(skb).creds = siocb->scm->creds; +- +- err = security_netlink_send(sk, skb); +- if (err) { +- kfree_skb(skb); +- goto out; +- } +- +- if (unlikely(dst_group)) { +- atomic_inc(&skb->users); +- netlink_broadcast(sk, skb, dst_portid, dst_group, +- GFP_KERNEL); +- } +- err = netlink_unicast(sk, skb, dst_portid, +- msg->msg_flags & MSG_DONTWAIT); +- if (err < 0) +- goto out; +- len += err; +- +- } while (hdr != NULL || +- (!(msg->msg_flags & MSG_DONTWAIT) && +- atomic_read(&nlk->tx_ring.pending))); +- +- if (len > 0) +- err = len; +-out: +- mutex_unlock(&nlk->pg_vec_lock); +- return err; +-} +- +-static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) +-{ +- struct nl_mmap_hdr *hdr; +- +- hdr = netlink_mmap_hdr(skb); +- hdr->nm_len = skb->len; +- hdr->nm_group = NETLINK_CB(skb).dst_group; +- hdr->nm_pid = NETLINK_CB(skb).creds.pid; +- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); +- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); +- netlink_frame_flush_dcache(hdr, hdr->nm_len); +- netlink_set_status(hdr, NL_MMAP_STATUS_VALID); +- +- NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; +- kfree_skb(skb); +-} +- +-static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- struct netlink_ring *ring = &nlk->rx_ring; +- struct nl_mmap_hdr *hdr; +- +- spin_lock_bh(&sk->sk_receive_queue.lock); +- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +- if (hdr == NULL) { +- spin_unlock_bh(&sk->sk_receive_queue.lock); +- kfree_skb(skb); +- netlink_overrun(sk); +- return; +- } +- netlink_increment_head(ring); +- __skb_queue_tail(&sk->sk_receive_queue, skb); +- spin_unlock_bh(&sk->sk_receive_queue.lock); +- +- hdr->nm_len = skb->len; +- hdr->nm_group = NETLINK_CB(skb).dst_group; +- hdr->nm_pid = NETLINK_CB(skb).creds.pid; +- hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); +- hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); +- netlink_set_status(hdr, NL_MMAP_STATUS_COPY); +-} +- +-#else /* CONFIG_NETLINK_MMAP */ +-#define netlink_rx_is_mmaped(sk) false +-#define netlink_tx_is_mmaped(sk) false +-#define netlink_mmap sock_no_mmap +-#define netlink_poll datagram_poll +-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0 +-#endif /* CONFIG_NETLINK_MMAP */ +- + static void netlink_skb_destructor(struct sk_buff *skb) + { +-#ifdef CONFIG_NETLINK_MMAP +- struct nl_mmap_hdr *hdr; +- struct netlink_ring *ring; +- struct sock *sk; +- +- /* If a packet from the kernel to userspace was freed because of an +- * error without being delivered to userspace, the kernel must reset +- * the status. In the direction userspace to kernel, the status is +- * always reset here after the packet was processed and freed. +- */ +- if (netlink_skb_is_mmaped(skb)) { +- hdr = netlink_mmap_hdr(skb); +- sk = NETLINK_CB(skb).sk; +- +- if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { +- netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); +- ring = &nlk_sk(sk)->tx_ring; +- } else { +- if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { +- hdr->nm_len = 0; +- netlink_set_status(hdr, NL_MMAP_STATUS_VALID); +- } +- ring = &nlk_sk(sk)->rx_ring; +- } +- +- WARN_ON(atomic_read(&ring->pending) == 0); +- atomic_dec(&ring->pending); +- sock_put(sk); +- +- skb->head = NULL; +- } +-#endif + if (is_vmalloc_addr(skb->head)) { + if (!skb->cloned || + !atomic_dec_return(&(skb_shinfo(skb)->dataref))) +@@ -934,18 +343,6 @@ static void netlink_sock_destruct(struct + } + + skb_queue_purge(&sk->sk_receive_queue); +-#ifdef CONFIG_NETLINK_MMAP +- if (1) { +- struct nl_mmap_req req; +- +- memset(&req, 0, sizeof(req)); +- if (nlk->rx_ring.pg_vec) +- __netlink_set_ring(sk, &req, false, NULL, 0); +- memset(&req, 0, sizeof(req)); +- if (nlk->tx_ring.pg_vec) +- __netlink_set_ring(sk, &req, true, NULL, 0); +- } +-#endif /* CONFIG_NETLINK_MMAP */ + + if (!sock_flag(sk, SOCK_DEAD)) { + printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); +@@ -1153,9 +550,6 @@ static int __netlink_create(struct net * + mutex_init(nlk->cb_mutex); + } + init_waitqueue_head(&nlk->wait); +-#ifdef CONFIG_NETLINK_MMAP +- mutex_init(&nlk->pg_vec_lock); +-#endif + + sk->sk_destruct = netlink_sock_destruct; + sk->sk_protocol = protocol; +@@ -1653,9 +1047,8 @@ int netlink_attachskb(struct sock *sk, s + + nlk = nlk_sk(sk); + +- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || +- test_bit(NETLINK_CONGESTED, &nlk->state)) && +- !netlink_skb_is_mmaped(skb)) { ++ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || ++ test_bit(NETLINK_CONGESTED, &nlk->state)) { + DECLARE_WAITQUEUE(wait, current); + if (!*timeo) { + if (!ssk || netlink_is_kernel(ssk)) +@@ -1693,14 +1086,7 @@ static int __netlink_sendskb(struct sock + + netlink_deliver_tap(skb); + +-#ifdef CONFIG_NETLINK_MMAP +- if (netlink_skb_is_mmaped(skb)) +- netlink_queue_mmaped_skb(sk, skb); +- else if (netlink_rx_is_mmaped(sk)) +- netlink_ring_set_copied(sk, skb); +- else +-#endif /* CONFIG_NETLINK_MMAP */ +- skb_queue_tail(&sk->sk_receive_queue, skb); ++ skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk); + return len; + } +@@ -1724,9 +1110,6 @@ static struct sk_buff *netlink_trim(stru + int delta; + + WARN_ON(skb->sk != NULL); +- if (netlink_skb_is_mmaped(skb)) +- return skb; +- + delta = skb->end - skb->tail; + if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) + return skb; +@@ -1805,66 +1188,6 @@ EXPORT_SYMBOL(netlink_unicast); + struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, + u32 dst_portid, gfp_t gfp_mask) + { +-#ifdef CONFIG_NETLINK_MMAP +- struct sock *sk = NULL; +- struct sk_buff *skb; +- struct netlink_ring *ring; +- struct nl_mmap_hdr *hdr; +- unsigned int maxlen; +- +- sk = netlink_getsockbyportid(ssk, dst_portid); +- if (IS_ERR(sk)) +- goto out; +- +- ring = &nlk_sk(sk)->rx_ring; +- /* fast-path without atomic ops for common case: non-mmaped receiver */ +- if (ring->pg_vec == NULL) +- goto out_put; +- +- if (ring->frame_size - NL_MMAP_HDRLEN < size) +- goto out_put; +- +- skb = alloc_skb_head(gfp_mask); +- if (skb == NULL) +- goto err1; +- +- spin_lock_bh(&sk->sk_receive_queue.lock); +- /* check again under lock */ +- if (ring->pg_vec == NULL) +- goto out_free; +- +- /* check again under lock */ +- maxlen = ring->frame_size - NL_MMAP_HDRLEN; +- if (maxlen < size) +- goto out_free; +- +- netlink_forward_ring(ring); +- hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +- if (hdr == NULL) +- goto err2; +- netlink_ring_setup_skb(skb, sk, ring, hdr); +- netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); +- atomic_inc(&ring->pending); +- netlink_increment_head(ring); +- +- spin_unlock_bh(&sk->sk_receive_queue.lock); +- return skb; +- +-err2: +- kfree_skb(skb); +- spin_unlock_bh(&sk->sk_receive_queue.lock); +- netlink_overrun(sk); +-err1: +- sock_put(sk); +- return NULL; +- +-out_free: +- kfree_skb(skb); +- spin_unlock_bh(&sk->sk_receive_queue.lock); +-out_put: +- sock_put(sk); +-out: +-#endif + return alloc_skb(size, gfp_mask); + } + EXPORT_SYMBOL_GPL(netlink_alloc_skb); +@@ -2126,8 +1449,7 @@ static int netlink_setsockopt(struct soc + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + +- if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && +- optlen >= sizeof(int) && ++ if (optlen >= sizeof(int) && + get_user(val, (unsigned int __user *)optval)) + return -EFAULT; + +@@ -2180,25 +1502,6 @@ static int netlink_setsockopt(struct soc + } + err = 0; + break; +-#ifdef CONFIG_NETLINK_MMAP +- case NETLINK_RX_RING: +- case NETLINK_TX_RING: { +- struct nl_mmap_req req; +- +- /* Rings might consume more memory than queue limits, require +- * CAP_NET_ADMIN. +- */ +- if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- if (optlen < sizeof(req)) +- return -EINVAL; +- if (copy_from_user(&req, optval, sizeof(req))) +- return -EFAULT; +- err = netlink_set_ring(sk, &req, +- optname == NETLINK_TX_RING); +- break; +- } +-#endif /* CONFIG_NETLINK_MMAP */ + default: + err = -ENOPROTOOPT; + } +@@ -2311,13 +1614,6 @@ static int netlink_sendmsg(struct kiocb + goto out; + } + +- if (netlink_tx_is_mmaped(sk) && +- msg->msg_iov->iov_base == NULL) { +- err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, +- siocb); +- goto out; +- } +- + err = -EMSGSIZE; + if (len > sk->sk_sndbuf - 32) + goto out; +@@ -2643,8 +1939,7 @@ static int netlink_dump(struct sock *sk) + cb = &nlk->cb; + alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); + +- if (!netlink_rx_is_mmaped(sk) && +- atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) ++ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + goto errout_skb; + + /* NLMSG_GOODSIZE is small to avoid high order allocations being +@@ -2721,16 +2016,7 @@ int __netlink_dump_start(struct sock *ss + struct netlink_sock *nlk; + int ret; + +- /* Memory mapped dump requests need to be copied to avoid looping +- * on the pending state in netlink_mmap_sendmsg() while the CB hold +- * a reference to the skb. +- */ +- if (netlink_skb_is_mmaped(skb)) { +- skb = skb_copy(skb, GFP_KERNEL); +- if (skb == NULL) +- return -ENOBUFS; +- } else +- atomic_inc(&skb->users); ++ atomic_inc(&skb->users); + + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); + if (sk == NULL) { +@@ -3071,7 +2357,7 @@ static const struct proto_ops netlink_op + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = netlink_getname, +- .poll = netlink_poll, ++ .poll = datagram_poll, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, +@@ -3079,7 +2365,7 @@ static const struct proto_ops netlink_op + .getsockopt = netlink_getsockopt, + .sendmsg = netlink_sendmsg, + .recvmsg = netlink_recvmsg, +- .mmap = netlink_mmap, ++ .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + }; + +--- a/net/netlink/af_netlink.h ++++ b/net/netlink/af_netlink.h +@@ -42,12 +42,6 @@ struct netlink_sock { + int (*netlink_bind)(int group); + void (*netlink_unbind)(int group); + struct module *module; +-#ifdef CONFIG_NETLINK_MMAP +- struct mutex pg_vec_lock; +- struct netlink_ring rx_ring; +- struct netlink_ring tx_ring; +- atomic_t mapped; +-#endif /* CONFIG_NETLINK_MMAP */ + + struct rhash_head node; + }; +@@ -57,15 +51,6 @@ static inline struct netlink_sock *nlk_s + return container_of(sk, struct netlink_sock, sk); + } + +-static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb) +-{ +-#ifdef CONFIG_NETLINK_MMAP +- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +-#else +- return false; +-#endif /* CONFIG_NETLINK_MMAP */ +-} +- + struct netlink_table { + struct rhashtable hash; + struct hlist_head mc_list; +--- a/net/netlink/diag.c ++++ b/net/netlink/diag.c +@@ -8,41 +8,6 @@ + + #include "af_netlink.h" + +-#ifdef CONFIG_NETLINK_MMAP +-static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, +- struct sk_buff *nlskb) +-{ +- struct netlink_diag_ring ndr; +- +- ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; +- ndr.ndr_block_nr = ring->pg_vec_len; +- ndr.ndr_frame_size = ring->frame_size; +- ndr.ndr_frame_nr = ring->frame_max + 1; +- +- return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); +-} +- +-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +-{ +- struct netlink_sock *nlk = nlk_sk(sk); +- int ret; +- +- mutex_lock(&nlk->pg_vec_lock); +- ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); +- if (!ret) +- ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, +- nlskb); +- mutex_unlock(&nlk->pg_vec_lock); +- +- return ret; +-} +-#else +-static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +-{ +- return 0; +-} +-#endif +- + static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) + { + struct netlink_sock *nlk = nlk_sk(sk); +@@ -87,10 +52,6 @@ static int sk_diag_fill(struct sock *sk, + sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) + goto out_nlmsg_trim; + +- if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && +- sk_diag_put_rings_cfg(sk, skb)) +- goto out_nlmsg_trim; +- + return nlmsg_end(skb, nlh); + + out_nlmsg_trim: diff --git a/queue-3.18/series b/queue-3.18/series index 57e65b69a50..09bfe83dd12 100644 --- a/queue-3.18/series +++ b/queue-3.18/series @@ -94,3 +94,16 @@ mvsas-fix-misleading-indentation.patch dm-flush-queued-bios-when-process-blocks-to-avoid-deadlock.patch padata-avoid-race-in-reordering.patch samples-move-mic-mpssd-example-code-from-documentation.patch +drm-ast-fix-test-for-vga-enabled.patch +drm-ast-call-open_key-before-enable_mmio-in-post-code.patch +drm-ast-fix-ast2400-post-failure-without-bmc-fw-or-vbios.patch +drm-ttm-make-sure-bos-being-swapped-out-are-cacheable.patch +cpmac-remove-hopeless-warning.patch +tracing-add-undef-to-fix-compile-error.patch +netlink-remove-mmapped-netlink-support.patch +vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch +vti6-return-gre_key-for-vti6.patch +ipv4-mask-tos-for-input-route.patch +l2tp-avoid-use-after-free-caused-by-l2tp_ip_backlog_recv.patch +net-don-t-call-strlen-on-the-user-buffer-in-packet_bind_spkt.patch +dccp-unlock-sock-before-calling-sk_free.patch diff --git a/queue-3.18/tracing-add-undef-to-fix-compile-error.patch b/queue-3.18/tracing-add-undef-to-fix-compile-error.patch new file mode 100644 index 00000000000..94914075d1f --- /dev/null +++ b/queue-3.18/tracing-add-undef-to-fix-compile-error.patch @@ -0,0 +1,39 @@ +From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001 +From: Rik van Riel +Date: Wed, 28 Sep 2016 22:55:54 -0400 +Subject: tracing: Add #undef to fix compile error + +From: Rik van Riel + +commit bf7165cfa23695c51998231c4efa080fe1d3548d upstream. + +There are several trace include files that define TRACE_INCLUDE_FILE. + +Include several of them in the same .c file (as I currently have in +some code I am working on), and the compile will blow up with a +"warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" + +Every other include file in include/trace/events/ avoids that issue +by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h +should have one, too. + +Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com + +Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") +Signed-off-by: Rik van Riel +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Greg Kroah-Hartman + +--- + include/trace/events/syscalls.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/trace/events/syscalls.h ++++ b/include/trace/events/syscalls.h +@@ -1,5 +1,6 @@ + #undef TRACE_SYSTEM + #define TRACE_SYSTEM raw_syscalls ++#undef TRACE_INCLUDE_FILE + #define TRACE_INCLUDE_FILE syscalls + + #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) diff --git a/queue-3.18/vti6-return-gre_key-for-vti6.patch b/queue-3.18/vti6-return-gre_key-for-vti6.patch new file mode 100644 index 00000000000..fb52f4a3ca0 --- /dev/null +++ b/queue-3.18/vti6-return-gre_key-for-vti6.patch @@ -0,0 +1,33 @@ +From 7dcdf941cdc96692ab99fd790c8cc68945514851 Mon Sep 17 00:00:00 2001 +From: David Forster +Date: Fri, 24 Feb 2017 14:20:32 +0000 +Subject: vti6: return GRE_KEY for vti6 + +From: David Forster + +commit 7dcdf941cdc96692ab99fd790c8cc68945514851 upstream. + +Align vti6 with vti by returning GRE_KEY flag. This enables iproute2 +to display tunnel keys on "ip -6 tunnel show" + +Signed-off-by: David Forster +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + net/ipv6/ip6_vti.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -645,6 +645,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 * + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; ++ if (u->i_key) ++ u->i_flags |= GRE_KEY; ++ if (u->o_key) ++ u->o_flags |= GRE_KEY; + u->proto = p->proto; + + memcpy(u->name, p->name, sizeof(u->name)); diff --git a/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch b/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch new file mode 100644 index 00000000000..553871c05b8 --- /dev/null +++ b/queue-3.18/vxlan-correctly-validate-vxlan-id-against-vxlan_n_vid.patch @@ -0,0 +1,33 @@ +From 4e37d6911f36545b286d15073f6f2222f840e81c Mon Sep 17 00:00:00 2001 +From: Matthias Schiffer +Date: Thu, 23 Feb 2017 17:19:41 +0100 +Subject: vxlan: correctly validate VXLAN ID against VXLAN_N_VID + +From: Matthias Schiffer + +commit 4e37d6911f36545b286d15073f6f2222f840e81c upstream. + +The incorrect check caused an off-by-one error: the maximum VID 0xffffff +was unusable. + +Fixes: d342894c5d2f ("vxlan: virtual extensible lan") +Signed-off-by: Matthias Schiffer +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/vxlan.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2260,7 +2260,7 @@ static int vxlan_validate(struct nlattr + + if (data[IFLA_VXLAN_ID]) { + __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]); +- if (id >= VXLAN_VID_MASK) ++ if (id >= VXLAN_N_VID) + return -ERANGE; + } +