From a202b2d1d5aac0f93a84ea2c4f1f51be8c45aebc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 8 Jun 2018 06:55:47 +0200 Subject: [PATCH] 4.16-stable patches added patches: be2net-fix-error-detection-logic-for-be3.patch bnx2x-use-the-right-constant.patch cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch enic-set-dma-mask-to-47-bit.patch ip6_tunnel-remove-magic-mtu-value-0xfff8.patch ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch ipmr-properly-check-rhltable_init-return-value.patch ipv4-remove-warning-in-ip_recv_error.patch ipv6-omit-traffic-class-when-calculating-flow-hash.patch ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch isdn-eicon-fix-a-missing-check-bug.patch kcm-fix-use-after-free-caused-by-clonned-sockets.patch l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch net-ethernet-davinci_emac-fix-error-handling-in-probe.patch net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch net-metrics-add-proper-netlink-validation.patch net-mlx4-fix-irq-unsafe-spinlock-usage.patch net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch net-netsec-reduce-dma-mask-to-40-bits.patch net-packet-refine-check-for-priv-area-size.patch net-phy-broadcom-fix-auxiliary-control-register-reads.patch net-phy-broadcom-fix-bcm_write_exp.patch net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch net-sysfs-fix-memory-leak-in-xps-configuration.patch net-usb-cdc_mbim-add-flag-flag_send_zlp.patch netdev-faq-clarify-davem-s-position-for-stable-backports.patch packet-fix-reserve-calculation.patch qed-fix-mask-for-physical-address-in-ilt-entry.patch rtnetlink-validate-attributes-in-do_setlink.patch sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch team-use-netdev_features_t-instead-of-u32.patch tun-fix-null-pointer-dereference-in-xdp-redirect.patch vhost-synchronize-iotlb-message-with-dev-cleanup.patch vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch virtio-net-correctly-check-num_buf-during-err-path.patch virtio-net-correctly-redirect-linearized-packet.patch virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch vrf-check-the-original-netdevice-for-generating-redirect.patch --- ...et-fix-error-detection-logic-for-be3.patch | 33 ++++ queue-4.16/bnx2x-use-the-right-constant.patch | 42 ++++ ...-release-when-failing-to-modify-rule.patch | 36 ++++ ...hc_tx_sock-struct-in-dccp_disconnect.patch | 145 ++++++++++++++ queue-4.16/enic-set-dma-mask-to-47-bit.patch | 50 +++++ ...tunnel-remove-magic-mtu-value-0xfff8.patch | 70 +++++++ ...ockopt-when-ip6mr_new_table-succeeds.patch | 37 ++++ ...e-binding-to-ifaces-with-a-large-mtu.patch | 80 ++++++++ ...rly-check-rhltable_init-return-value.patch | 89 +++++++++ ...ipv4-remove-warning-in-ip_recv_error.patch | 66 +++++++ ...fic-class-when-calculating-flow-hash.patch | 66 +++++++ ...b-access-in-seg6_do_srh_encap-inline.patch | 179 +++++++++++++++++ .../isdn-eicon-fix-a-missing-check-bug.patch | 185 ++++++++++++++++++ ...after-free-caused-by-clonned-sockets.patch | 34 ++++ ...refcount-leakage-on-pppol2tp-sockets.patch | 145 ++++++++++++++ ...bid-creation-of-vlan-1-over-port-lag.patch | 79 ++++++++ ...fix-for-brcm-tag-issue-in-cygnus-soc.patch | 106 ++++++++++ ...nci_emac-fix-error-handling-in-probe.patch | 93 +++++++++ ...missing-rta_table-to-rtm_ipv4_policy.patch | 26 +++ ...etrics-add-proper-netlink-validation.patch | 129 ++++++++++++ ...t-mlx4-fix-irq-unsafe-spinlock-usage.patch | 72 +++++++ ...d-fcs-data-into-checksum-calculation.patch | 84 ++++++++ ...et-netsec-reduce-dma-mask-to-40-bits.patch | 71 +++++++ ...cket-refine-check-for-priv-area-size.patch | 94 +++++++++ ...fix-auxiliary-control-register-reads.patch | 37 ++++ .../net-phy-broadcom-fix-bcm_write_exp.patch | 90 +++++++++ ...-deal-with-egdev-path-only-if-needed.patch | 38 ++++ ...fix-memory-leak-in-xps-configuration.patch | 43 ++++ ...-usb-cdc_mbim-add-flag-flag_send_zlp.patch | 32 +++ ...avem-s-position-for-stable-backports.patch | 41 ++++ .../packet-fix-reserve-calculation.patch | 46 +++++ ...sk-for-physical-address-in-ilt-entry.patch | 38 ++++ ...nk-validate-attributes-in-do_setlink.patch | 139 +++++++++++++ ...ut-value-less-than-hz-5-for-hb_timer.patch | 44 +++++ queue-4.16/series | 43 ++++ ...use-netdev_features_t-instead-of-u32.patch | 33 ++++ ...-pointer-dereference-in-xdp-redirect.patch | 179 +++++++++++++++++ ...onize-iotlb-message-with-dev-cleanup.patch | 57 ++++++ ...-heads-before-trying-to-busy-polling.patch | 114 +++++++++++ ...rectly-check-num_buf-during-err-path.patch | 33 ++++ ...correctly-redirect-linearized-packet.patch | 36 ++++ ...-transmit-xdp-buff-after-linearizing.patch | 35 ++++ ...-for-gso-packet-during-mergeable-xdp.patch | 53 +++++ ...al-netdevice-for-generating-redirect.patch | 48 +++++ 44 files changed, 3190 insertions(+) create mode 100644 queue-4.16/be2net-fix-error-detection-logic-for-be3.patch create mode 100644 queue-4.16/bnx2x-use-the-right-constant.patch create mode 100644 queue-4.16/cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch create mode 100644 queue-4.16/dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch create mode 100644 queue-4.16/enic-set-dma-mask-to-47-bit.patch create mode 100644 queue-4.16/ip6_tunnel-remove-magic-mtu-value-0xfff8.patch create mode 100644 queue-4.16/ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch create mode 100644 queue-4.16/ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch create mode 100644 queue-4.16/ipmr-properly-check-rhltable_init-return-value.patch create mode 100644 queue-4.16/ipv4-remove-warning-in-ip_recv_error.patch create mode 100644 queue-4.16/ipv6-omit-traffic-class-when-calculating-flow-hash.patch create mode 100644 queue-4.16/ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch create mode 100644 queue-4.16/isdn-eicon-fix-a-missing-check-bug.patch create mode 100644 queue-4.16/kcm-fix-use-after-free-caused-by-clonned-sockets.patch create mode 100644 queue-4.16/l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch create mode 100644 queue-4.16/mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch create mode 100644 queue-4.16/net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch create mode 100644 queue-4.16/net-ethernet-davinci_emac-fix-error-handling-in-probe.patch create mode 100644 queue-4.16/net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch create mode 100644 queue-4.16/net-metrics-add-proper-netlink-validation.patch create mode 100644 queue-4.16/net-mlx4-fix-irq-unsafe-spinlock-usage.patch create mode 100644 queue-4.16/net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch create mode 100644 queue-4.16/net-netsec-reduce-dma-mask-to-40-bits.patch create mode 100644 queue-4.16/net-packet-refine-check-for-priv-area-size.patch create mode 100644 queue-4.16/net-phy-broadcom-fix-auxiliary-control-register-reads.patch create mode 100644 queue-4.16/net-phy-broadcom-fix-bcm_write_exp.patch create mode 100644 queue-4.16/net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch create mode 100644 queue-4.16/net-sysfs-fix-memory-leak-in-xps-configuration.patch create mode 100644 queue-4.16/net-usb-cdc_mbim-add-flag-flag_send_zlp.patch create mode 100644 queue-4.16/netdev-faq-clarify-davem-s-position-for-stable-backports.patch create mode 100644 queue-4.16/packet-fix-reserve-calculation.patch create mode 100644 queue-4.16/qed-fix-mask-for-physical-address-in-ilt-entry.patch create mode 100644 queue-4.16/rtnetlink-validate-attributes-in-do_setlink.patch create mode 100644 queue-4.16/sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch create mode 100644 queue-4.16/team-use-netdev_features_t-instead-of-u32.patch create mode 100644 queue-4.16/tun-fix-null-pointer-dereference-in-xdp-redirect.patch create mode 100644 queue-4.16/vhost-synchronize-iotlb-message-with-dev-cleanup.patch create mode 100644 queue-4.16/vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch create mode 100644 queue-4.16/virtio-net-correctly-check-num_buf-during-err-path.patch create mode 100644 queue-4.16/virtio-net-correctly-redirect-linearized-packet.patch create mode 100644 queue-4.16/virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch create mode 100644 queue-4.16/virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch create mode 100644 queue-4.16/vrf-check-the-original-netdevice-for-generating-redirect.patch diff --git a/queue-4.16/be2net-fix-error-detection-logic-for-be3.patch b/queue-4.16/be2net-fix-error-detection-logic-for-be3.patch new file mode 100644 index 00000000000..367c84efb68 --- /dev/null +++ b/queue-4.16/be2net-fix-error-detection-logic-for-be3.patch @@ -0,0 +1,33 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Suresh Reddy +Date: Mon, 28 May 2018 01:26:06 -0400 +Subject: be2net: Fix error detection logic for BE3 + +From: Suresh Reddy + +[ Upstream commit d2c2725c2cdbcc108a191f50953d31c7b6556761 ] + +Check for 0xE00 (RECOVERABLE_ERR) along with ARMFW UE (0x0) +in be_detect_error() to know whether the error is valid error or not + +Fixes: 673c96e5a ("be2net: Fix UE detection logic for BE3") +Signed-off-by: Suresh Reddy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -3309,7 +3309,9 @@ void be_detect_error(struct be_adapter * + if ((val & POST_STAGE_FAT_LOG_START) + != POST_STAGE_FAT_LOG_START && + (val & POST_STAGE_ARMFW_UE) +- != POST_STAGE_ARMFW_UE) ++ != POST_STAGE_ARMFW_UE && ++ (val & POST_STAGE_RECOVERABLE_ERR) ++ != POST_STAGE_RECOVERABLE_ERR) + return; + } + diff --git a/queue-4.16/bnx2x-use-the-right-constant.patch b/queue-4.16/bnx2x-use-the-right-constant.patch new file mode 100644 index 00000000000..77695fd37f5 --- /dev/null +++ b/queue-4.16/bnx2x-use-the-right-constant.patch @@ -0,0 +1,42 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Julia Lawall +Date: Wed, 6 Jun 2018 15:03:22 +0200 +Subject: bnx2x: use the right constant + +From: Julia Lawall + +[ Upstream commit dd612f18a49b63af8b3a5f572d999bdb197385bc ] + +Nearby code that also tests port suggests that the P0 constant should be +used when port is zero. + +The semantic match that finds this problem is as follows: +(http://coccinelle.lip6.fr/) + +// +@@ +expression e,e1; +@@ + +* e ? e1 : e1 +// + +Fixes: 6c3218c6f7e5 ("bnx2x: Adjust ETS to 578xx") +Signed-off-by: Julia Lawall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +@@ -588,7 +588,7 @@ static void bnx2x_ets_e3b0_nig_disabled( + * slots for the highest priority. + */ + REG_WR(bp, (port) ? NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS : +- NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100); ++ NIG_REG_P0_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100); + /* Mapping between the CREDIT_WEIGHT registers and actual client + * numbers + */ diff --git a/queue-4.16/cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch b/queue-4.16/cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch new file mode 100644 index 00000000000..c39687d9829 --- /dev/null +++ b/queue-4.16/cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch @@ -0,0 +1,36 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Paul Blakey +Date: Wed, 30 May 2018 11:29:15 +0300 +Subject: cls_flower: Fix incorrect idr release when failing to modify rule + +From: Paul Blakey + +[ Upstream commit 8258d2da9f9f521dce7019e018360c28d116354e ] + +When we fail to modify a rule, we incorrectly release the idr handle +of the unmodified old rule. + +Fix that by checking if we need to release it. + +Fixes: fe2502e49b58 ("net_sched: remove cls_flower idr on failure") +Reported-by: Vlad Buslov +Reviewed-by: Roi Dayan +Acked-by: Jiri Pirko +Signed-off-by: Paul Blakey +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_flower.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -974,7 +974,7 @@ static int fl_change(struct net *net, st + return 0; + + errout_idr: +- if (fnew->handle) ++ if (!fold) + idr_remove(&head->handle_idr, fnew->handle); + errout: + tcf_exts_destroy(&fnew->exts); diff --git a/queue-4.16/dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch b/queue-4.16/dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch new file mode 100644 index 00000000000..23817bbfc8a --- /dev/null +++ b/queue-4.16/dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch @@ -0,0 +1,145 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Alexey Kodanev +Date: Mon, 21 May 2018 19:28:44 +0300 +Subject: dccp: don't free ccid2_hc_tx_sock struct in dccp_disconnect() + +From: Alexey Kodanev + +[ Upstream commit 2677d20677314101293e6da0094ede7b5526d2b1 ] + +Syzbot reported the use-after-free in timer_is_static_object() [1]. + +This can happen because the structure for the rto timer (ccid2_hc_tx_sock) +is removed in dccp_disconnect(), and ccid2_hc_tx_rto_expire() can be +called after that. + +The report [1] is similar to the one in commit 120e9dabaf55 ("dccp: +defer ccid_hc_tx_delete() at dismantle time"). And the fix is the same, +delay freeing ccid2_hc_tx_sock structure, so that it is freed in +dccp_sk_destruct(). + +[1] + +================================================================== +BUG: KASAN: use-after-free in timer_is_static_object+0x80/0x90 +kernel/time/timer.c:607 +Read of size 8 at addr ffff8801bebb5118 by task syz-executor2/25299 + +CPU: 1 PID: 25299 Comm: syz-executor2 Not tainted 4.17.0-rc5+ #54 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b9/0x294 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + timer_is_static_object+0x80/0x90 kernel/time/timer.c:607 + debug_object_activate+0x2d9/0x670 lib/debugobjects.c:508 + debug_timer_activate kernel/time/timer.c:709 [inline] + debug_activate kernel/time/timer.c:764 [inline] + __mod_timer kernel/time/timer.c:1041 [inline] + mod_timer+0x4d3/0x13b0 kernel/time/timer.c:1102 + sk_reset_timer+0x22/0x60 net/core/sock.c:2742 + ccid2_hc_tx_rto_expire+0x587/0x680 net/dccp/ccids/ccid2.c:147 + call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 + expire_timers kernel/time/timer.c:1363 [inline] + __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 + run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + invoke_softirq kernel/softirq.c:365 [inline] + irq_exit+0x1d1/0x200 kernel/softirq.c:405 + exiting_irq arch/x86/include/asm/apic.h:525 [inline] + smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 + +... +Allocated by task 25374: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 + kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 + kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 + ccid_new+0x25b/0x3e0 net/dccp/ccid.c:151 + dccp_hdlr_ccid+0x27/0x150 net/dccp/feat.c:44 + __dccp_feat_activate+0x184/0x270 net/dccp/feat.c:344 + dccp_feat_activate_values+0x3a7/0x819 net/dccp/feat.c:1538 + dccp_create_openreq_child+0x472/0x610 net/dccp/minisocks.c:128 + dccp_v4_request_recv_sock+0x12c/0xca0 net/dccp/ipv4.c:408 + dccp_v6_request_recv_sock+0x125d/0x1f10 net/dccp/ipv6.c:415 + dccp_check_req+0x455/0x6a0 net/dccp/minisocks.c:197 + dccp_v4_rcv+0x7b8/0x1f3f net/dccp/ipv4.c:841 + ip_local_deliver_finish+0x2e3/0xd80 net/ipv4/ip_input.c:215 + NF_HOOK include/linux/netfilter.h:288 [inline] + ip_local_deliver+0x1e1/0x720 net/ipv4/ip_input.c:256 + dst_input include/net/dst.h:450 [inline] + ip_rcv_finish+0x81b/0x2200 net/ipv4/ip_input.c:396 + NF_HOOK include/linux/netfilter.h:288 [inline] + ip_rcv+0xb70/0x143d net/ipv4/ip_input.c:492 + __netif_receive_skb_core+0x26f5/0x3630 net/core/dev.c:4592 + __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4657 + process_backlog+0x219/0x760 net/core/dev.c:5337 + napi_poll net/core/dev.c:5735 [inline] + net_rx_action+0x7b7/0x1930 net/core/dev.c:5801 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 + +Freed by task 25374: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 + ccid_hc_tx_delete+0xc3/0x100 net/dccp/ccid.c:190 + dccp_disconnect+0x130/0xc66 net/dccp/proto.c:286 + dccp_close+0x3bc/0xe60 net/dccp/proto.c:1045 + inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427 + inet6_release+0x50/0x70 net/ipv6/af_inet6.c:460 + sock_release+0x96/0x1b0 net/socket.c:594 + sock_close+0x16/0x20 net/socket.c:1149 + __fput+0x34d/0x890 fs/file_table.c:209 + ____fput+0x15/0x20 fs/file_table.c:243 + task_work_run+0x1e4/0x290 kernel/task_work.c:113 + tracehook_notify_resume include/linux/tracehook.h:191 [inline] + exit_to_usermode_loop+0x2bd/0x310 arch/x86/entry/common.c:166 + prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] + syscall_return_slowpath arch/x86/entry/common.c:265 [inline] + do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The buggy address belongs to the object at ffff8801bebb4cc0 + which belongs to the cache ccid2_hc_tx_sock of size 1240 +The buggy address is located 1112 bytes inside of + 1240-byte region [ffff8801bebb4cc0, ffff8801bebb5198) +The buggy address belongs to the page: +page:ffffea0006faed00 count:1 mapcount:0 mapping:ffff8801bebb41c0 +index:0xffff8801bebb5240 compound_mapcount: 0 +flags: 0x2fffc0000008100(slab|head) +raw: 02fffc0000008100 ffff8801bebb41c0 ffff8801bebb5240 0000000100000003 +raw: ffff8801cdba3138 ffffea0007634120 ffff8801cdbaab40 0000000000000000 +page dumped because: kasan: bad access detected +... +================================================================== + +Reported-by: syzbot+5d47e9ec91a6f15dbd6f@syzkaller.appspotmail.com +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/proto.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -283,9 +283,7 @@ int dccp_disconnect(struct sock *sk, int + + dccp_clear_xmit_timers(sk); + ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); +- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = NULL; +- dp->dccps_hc_tx_ccid = NULL; + + __skb_queue_purge(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_write_queue); diff --git a/queue-4.16/enic-set-dma-mask-to-47-bit.patch b/queue-4.16/enic-set-dma-mask-to-47-bit.patch new file mode 100644 index 00000000000..d8e945bec2d --- /dev/null +++ b/queue-4.16/enic-set-dma-mask-to-47-bit.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Govindarajulu Varadarajan +Date: Wed, 23 May 2018 11:17:39 -0700 +Subject: enic: set DMA mask to 47 bit + +From: Govindarajulu Varadarajan + +[ Upstream commit 322eaa06d55ebc1402a4a8d140945cff536638b4 ] + +In commit 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then +failover to DMA") DMA mask was changed from 40 bits to 64 bits. +Hardware actually supports only 47 bits. + +Fixes: 624dbf55a359b ("driver/net: enic: Try DMA 64 first, then failover to DMA") +Signed-off-by: Govindarajulu Varadarajan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cisco/enic/enic_main.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/cisco/enic/enic_main.c ++++ b/drivers/net/ethernet/cisco/enic/enic_main.c +@@ -2704,11 +2704,11 @@ static int enic_probe(struct pci_dev *pd + pci_set_master(pdev); + + /* Query PCI controller on system for DMA addressing +- * limitation for the device. Try 64-bit first, and ++ * limitation for the device. Try 47-bit first, and + * fail to 32-bit. + */ + +- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); ++ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47)); + if (err) { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { +@@ -2722,10 +2722,10 @@ static int enic_probe(struct pci_dev *pd + goto err_out_release_regions; + } + } else { +- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); ++ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47)); + if (err) { + dev_err(dev, "Unable to obtain %u-bit DMA " +- "for consistent allocations, aborting\n", 64); ++ "for consistent allocations, aborting\n", 47); + goto err_out_release_regions; + } + using_dac = 1; diff --git a/queue-4.16/ip6_tunnel-remove-magic-mtu-value-0xfff8.patch b/queue-4.16/ip6_tunnel-remove-magic-mtu-value-0xfff8.patch new file mode 100644 index 00000000000..e65e5bf8613 --- /dev/null +++ b/queue-4.16/ip6_tunnel-remove-magic-mtu-value-0xfff8.patch @@ -0,0 +1,70 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Nicolas Dichtel +Date: Thu, 31 May 2018 10:59:33 +0200 +Subject: ip6_tunnel: remove magic mtu value 0xFFF8 + +From: Nicolas Dichtel + +[ Upstream commit f7ff1fde9441b4fcc8ffb6e66e6e5a00d008937e ] + +I don't know where this value comes from (probably a copy and paste and +paste and paste ...). +Let's use standard values which are a bit greater. + +Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/netdev-vger-cvs.git/commit/?id=e5afd356a411a +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 11 ++++++++--- + net/ipv6/sit.c | 5 +++-- + 2 files changed, 11 insertions(+), 5 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1688,8 +1688,13 @@ int ip6_tnl_change_mtu(struct net_device + if (new_mtu < ETH_MIN_MTU) + return -EINVAL; + } +- if (new_mtu > 0xFFF8 - dev->hard_header_len) +- return -EINVAL; ++ if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { ++ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) ++ return -EINVAL; ++ } else { ++ if (new_mtu > IP_MAX_MTU - dev->hard_header_len) ++ return -EINVAL; ++ } + dev->mtu = new_mtu; + return 0; + } +@@ -1837,7 +1842,7 @@ ip6_tnl_dev_init_gen(struct net_device * + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->min_mtu = ETH_MIN_MTU; +- dev->max_mtu = 0xFFF8 - dev->hard_header_len; ++ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + + return 0; + +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -1371,7 +1371,7 @@ static void ipip6_tunnel_setup(struct ne + dev->hard_header_len = LL_MAX_HEADER + t_hlen; + dev->mtu = ETH_DATA_LEN - t_hlen; + dev->min_mtu = IPV6_MIN_MTU; +- dev->max_mtu = 0xFFF8 - t_hlen; ++ dev->max_mtu = IP6_MAX_MTU - t_hlen; + dev->flags = IFF_NOARP; + netif_keep_dst(dev); + dev->addr_len = 4; +@@ -1583,7 +1583,8 @@ static int ipip6_newlink(struct net *src + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + +- if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len) ++ if (mtu >= IPV6_MIN_MTU && ++ mtu <= IP6_MAX_MTU - dev->hard_header_len) + dev->mtu = mtu; + } + diff --git a/queue-4.16/ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch b/queue-4.16/ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch new file mode 100644 index 00000000000..204368324e4 --- /dev/null +++ b/queue-4.16/ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Sabrina Dubroca +Date: Tue, 5 Jun 2018 15:01:59 +0200 +Subject: ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds + +From: Sabrina Dubroca + +[ Upstream commit 848235edb5c93ed086700584c8ff64f6d7fc778d ] + +Currently, raw6_sk(sk)->ip6mr_table is set unconditionally during +ip6_mroute_setsockopt(MRT6_TABLE). A subsequent attempt at the same +setsockopt will fail with -ENOENT, since we haven't actually created +that table. + +A similar fix for ipv4 was included in commit 5e1859fbcc3c ("ipv4: ipmr: +various fixes and cleanups"). + +Fixes: d1db275dd3f6 ("ipv6: ip6mr: support multiple tables") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6mr.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -1800,7 +1800,8 @@ int ip6_mroute_setsockopt(struct sock *s + ret = 0; + if (!ip6mr_new_table(net, v)) + ret = -ENOMEM; +- raw6_sk(sk)->ip6mr_table = v; ++ else ++ raw6_sk(sk)->ip6mr_table = v; + rtnl_unlock(); + return ret; + } diff --git a/queue-4.16/ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch b/queue-4.16/ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch new file mode 100644 index 00000000000..ca2f5cfcfa3 --- /dev/null +++ b/queue-4.16/ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch @@ -0,0 +1,80 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Nicolas Dichtel +Date: Thu, 31 May 2018 10:59:32 +0200 +Subject: ip_tunnel: restore binding to ifaces with a large mtu + +From: Nicolas Dichtel + +[ Upstream commit 82612de1c98e610d194e34178bde3cca7dedce41 ] + +After commit f6cc9c054e77, the following conf is broken (note that the +default loopback mtu is 65536, ie IP_MAX_MTU + 1): + +$ ip tunnel add gre1 mode gre local 10.125.0.1 remote 10.125.0.2 dev lo +add tunnel "gre0" failed: Invalid argument +$ ip l a type dummy +$ ip l s dummy1 up +$ ip l s dummy1 mtu 65535 +$ ip tunnel add gre1 mode gre local 10.125.0.1 remote 10.125.0.2 dev dummy1 +add tunnel "gre0" failed: Invalid argument + +dev_set_mtu() doesn't allow to set a mtu which is too large. +First, let's cap the mtu returned by ip_tunnel_bind_dev(). Second, remove +the magic value 0xFFF8 and use IP_MAX_MTU instead. +0xFFF8 seems to be there for ages, I don't know why this value was used. + +With a recent kernel, it's also possible to set a mtu > IP_MAX_MTU: +$ ip l s dummy1 mtu 66000 +After that patch, it's also possible to bind an ip tunnel on that kind of +interface. + +CC: Petr Machata +CC: Ido Schimmel +Link: https://git.kernel.org/pub/scm/linux/kernel/git/davem/netdev-vger-cvs.git/commit/?id=e5afd356a411a +Fixes: f6cc9c054e77 ("ip_tunnel: Emit events for post-register MTU changes") +Signed-off-by: Nicolas Dichtel +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -344,7 +344,7 @@ static int ip_tunnel_bind_dev(struct net + + if (tdev) { + hlen = tdev->hard_header_len + tdev->needed_headroom; +- mtu = tdev->mtu; ++ mtu = min(tdev->mtu, IP_MAX_MTU); + } + + dev->needed_headroom = t_hlen + hlen; +@@ -379,7 +379,7 @@ static struct ip_tunnel *ip_tunnel_creat + nt = netdev_priv(dev); + t_hlen = nt->hlen + sizeof(struct iphdr); + dev->min_mtu = ETH_MIN_MTU; +- dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; ++ dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + ip_tunnel_add(itn, nt); + return nt; + +@@ -948,7 +948,7 @@ int __ip_tunnel_change_mtu(struct net_de + { + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); +- int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; ++ int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + + if (new_mtu < ETH_MIN_MTU) + return -EINVAL; +@@ -1119,7 +1119,7 @@ int ip_tunnel_newlink(struct net_device + + mtu = ip_tunnel_bind_dev(dev); + if (tb[IFLA_MTU]) { +- unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen; ++ unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; + + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, + (unsigned int)(max - sizeof(struct iphdr))); diff --git a/queue-4.16/ipmr-properly-check-rhltable_init-return-value.patch b/queue-4.16/ipmr-properly-check-rhltable_init-return-value.patch new file mode 100644 index 00000000000..cd47ef3b5cd --- /dev/null +++ b/queue-4.16/ipmr-properly-check-rhltable_init-return-value.patch @@ -0,0 +1,89 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Eric Dumazet +Date: Mon, 21 May 2018 10:51:53 -0700 +Subject: ipmr: properly check rhltable_init() return value + +From: Eric Dumazet + +[ Upstream commit 66fb33254f45df4b049f487aff1cbde1ef919390 ] + +commit 8fb472c09b9d ("ipmr: improve hash scalability") +added a call to rhltable_init() without checking its return value. + +This problem was then later copied to IPv6 and factorized in commit +0bbbf0e7d0e7 ("ipmr, ip6mr: Unite creation of new mr_table") + +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 31552 Comm: syz-executor7 Not tainted 4.17.0-rc5+ #60 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:rht_key_hashfn include/linux/rhashtable.h:277 [inline] +RIP: 0010:__rhashtable_lookup include/linux/rhashtable.h:630 [inline] +RIP: 0010:rhltable_lookup include/linux/rhashtable.h:716 [inline] +RIP: 0010:mr_mfc_find_parent+0x2ad/0xbb0 net/ipv4/ipmr_base.c:63 +RSP: 0018:ffff8801826aef70 EFLAGS: 00010203 +RAX: 0000000000000001 RBX: 0000000000000001 RCX: ffffc90001ea0000 +RDX: 0000000000000079 RSI: ffffffff8661e859 RDI: 000000000000000c +RBP: ffff8801826af1c0 R08: ffff8801b2212000 R09: ffffed003b5e46c2 +R10: ffffed003b5e46c2 R11: ffff8801daf23613 R12: dffffc0000000000 +R13: ffff8801826af198 R14: ffff8801cf8225c0 R15: ffff8801826af658 +FS: 00007ff7fa732700(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000003ffffff9c CR3: 00000001b0210000 CR4: 00000000001406e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ip6mr_cache_find_parent net/ipv6/ip6mr.c:981 [inline] + ip6mr_mfc_delete+0x1fe/0x6b0 net/ipv6/ip6mr.c:1221 + ip6_mroute_setsockopt+0x15c6/0x1d70 net/ipv6/ip6mr.c:1698 + do_ipv6_setsockopt.isra.9+0x422/0x4660 net/ipv6/ipv6_sockglue.c:163 + ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:922 + rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1060 + sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3039 + __sys_setsockopt+0x1bd/0x390 net/socket.c:1903 + __do_sys_setsockopt net/socket.c:1914 [inline] + __se_sys_setsockopt net/socket.c:1911 [inline] + __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Fixes: 8fb472c09b9d ("ipmr: improve hash scalability") +Fixes: 0bbbf0e7d0e7 ("ipmr, ip6mr: Unite creation of new mr_table") +Signed-off-by: Eric Dumazet +Cc: Nikolay Aleksandrov +Cc: Yuval Mintz +Reported-by: syzbot +Acked-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipmr.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -356,6 +356,7 @@ static const struct rhashtable_params ip + static struct mr_table *ipmr_new_table(struct net *net, u32 id) + { + struct mr_table *mrt; ++ int err; + + /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ + if (id != RT_TABLE_DEFAULT && id >= 1000000000) +@@ -371,7 +372,11 @@ static struct mr_table *ipmr_new_table(s + write_pnet(&mrt->net, net); + mrt->id = id; + +- rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); ++ err = rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); ++ if (err) { ++ kfree(mrt); ++ return ERR_PTR(err); ++ } + INIT_LIST_HEAD(&mrt->mfc_cache_list); + INIT_LIST_HEAD(&mrt->mfc_unres_queue); + diff --git a/queue-4.16/ipv4-remove-warning-in-ip_recv_error.patch b/queue-4.16/ipv4-remove-warning-in-ip_recv_error.patch new file mode 100644 index 00000000000..ff8d14c6d82 --- /dev/null +++ b/queue-4.16/ipv4-remove-warning-in-ip_recv_error.patch @@ -0,0 +1,66 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Willem de Bruijn +Date: Wed, 23 May 2018 14:29:52 -0400 +Subject: ipv4: remove warning in ip_recv_error + +From: Willem de Bruijn + +[ Upstream commit 730c54d59403658a62af6517338fa8d4922c1b28 ] + +A precondition check in ip_recv_error triggered on an otherwise benign +race. Remove the warning. + +The warning triggers when passing an ipv6 socket to this ipv4 error +handling function. RaceFuzzer was able to trigger it due to a race +in setsockopt IPV6_ADDRFORM. + + --- + CPU0 + do_ipv6_setsockopt + sk->sk_socket->ops = &inet_dgram_ops; + + --- + CPU1 + sk->sk_prot->recvmsg + udp_recvmsg + ip_recv_error + WARN_ON_ONCE(sk->sk_family == AF_INET6); + + --- + CPU0 + do_ipv6_setsockopt + sk->sk_family = PF_INET; + +This socket option converts a v6 socket that is connected to a v4 peer +to an v4 socket. It updates the socket on the fly, changing fields in +sk as well as other structs. This is inherently non-atomic. It races +with the lockless udp_recvmsg path. + +No other code makes an assumption that these fields are updated +atomically. It is benign here, too, as ip_recv_error cares only about +the protocol of the skbs enqueued on the error queue, for which +sk_family is not a precise predictor (thanks to another isue with +IPV6_ADDRFORM). + +Link: http://lkml.kernel.org/r/20180518120826.GA19515@dragonet.kaist.ac.kr +Fixes: 7ce875e5ecb8 ("ipv4: warn once on passing AF_INET6 socket to ip_recv_error") +Reported-by: DaeRyong Jeong +Suggested-by: Eric Dumazet +Signed-off-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_sockglue.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ipv4/ip_sockglue.c ++++ b/net/ipv4/ip_sockglue.c +@@ -511,8 +511,6 @@ int ip_recv_error(struct sock *sk, struc + int err; + int copied; + +- WARN_ON_ONCE(sk->sk_family == AF_INET6); +- + err = -EAGAIN; + skb = sock_dequeue_err_skb(sk); + if (!skb) diff --git a/queue-4.16/ipv6-omit-traffic-class-when-calculating-flow-hash.patch b/queue-4.16/ipv6-omit-traffic-class-when-calculating-flow-hash.patch new file mode 100644 index 00000000000..ea04115e67c --- /dev/null +++ b/queue-4.16/ipv6-omit-traffic-class-when-calculating-flow-hash.patch @@ -0,0 +1,66 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Michal Kubecek +Date: Mon, 4 Jun 2018 11:36:05 +0200 +Subject: ipv6: omit traffic class when calculating flow hash + +From: Michal Kubecek + +[ Upstream commit fa1be7e01ea863e911349e30456706749518eeab ] + +Some of the code paths calculating flow hash for IPv6 use flowlabel member +of struct flowi6 which, despite its name, encodes both flow label and +traffic class. If traffic class changes within a TCP connection (as e.g. +ssh does), ECMP route can switch between path. It's also inconsistent with +other code paths where ip6_flowlabel() (returning only flow label) is used +to feed the key. + +Use only flow label everywhere, including one place where hash key is set +using ip6_flowinfo(). + +Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") +Fixes: f70ea018da06 ("net: Add functions to get skb->hash based on flow structures") +Signed-off-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ipv6.h | 5 +++++ + net/core/flow_dissector.c | 2 +- + net/ipv6/route.c | 2 +- + 3 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -918,6 +918,11 @@ static inline __be32 ip6_make_flowinfo(u + return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; + } + ++static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6) ++{ ++ return fl6->flowlabel & IPV6_FLOWLABEL_MASK; ++} ++ + /* + * Prototypes exported by ipv6 + */ +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -1334,7 +1334,7 @@ __u32 __get_hash_from_flowi6(const struc + keys->ports.src = fl6->fl6_sport; + keys->ports.dst = fl6->fl6_dport; + keys->keyid.keyid = fl6->fl6_gre_key; +- keys->tags.flow_label = (__force u32)fl6->flowlabel; ++ keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); + keys->basic.ip_proto = fl6->flowi6_proto; + + return flow_hash_from_keys(keys); +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1850,7 +1850,7 @@ out: + keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys->addrs.v6addrs.src = key_iph->saddr; + keys->addrs.v6addrs.dst = key_iph->daddr; +- keys->tags.flow_label = ip6_flowinfo(key_iph); ++ keys->tags.flow_label = ip6_flowlabel(key_iph); + keys->basic.ip_proto = key_iph->nexthdr; + } + diff --git a/queue-4.16/ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch b/queue-4.16/ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch new file mode 100644 index 00000000000..77d041262fe --- /dev/null +++ b/queue-4.16/ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch @@ -0,0 +1,179 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Mathieu Xhonneux +Date: Fri, 25 May 2018 13:29:41 +0100 +Subject: ipv6: sr: fix memory OOB access in seg6_do_srh_encap/inline + +From: Mathieu Xhonneux + +[ Upstream commit bbb40a0b75209734ff9286f3326171638c9f6569 ] + +seg6_do_srh_encap and seg6_do_srh_inline can possibly do an +out-of-bounds access when adding the SRH to the packet. This no longer +happen when expanding the skb not only by the size of the SRH (+ +outer IPv6 header), but also by skb->mac_len. + +[ 53.793056] BUG: KASAN: use-after-free in seg6_do_srh_encap+0x284/0x620 +[ 53.794564] Write of size 14 at addr ffff88011975ecfa by task ping/674 + +[ 53.796665] CPU: 0 PID: 674 Comm: ping Not tainted 4.17.0-rc3-ARCH+ #90 +[ 53.796670] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), +BIOS 1.11.0-20171110_100015-anatol 04/01/2014 +[ 53.796673] Call Trace: +[ 53.796679] +[ 53.796689] dump_stack+0x71/0xab +[ 53.796700] print_address_description+0x6a/0x270 +[ 53.796707] kasan_report+0x258/0x380 +[ 53.796715] ? seg6_do_srh_encap+0x284/0x620 +[ 53.796722] memmove+0x34/0x50 +[ 53.796730] seg6_do_srh_encap+0x284/0x620 +[ 53.796741] ? seg6_do_srh+0x29b/0x360 +[ 53.796747] seg6_do_srh+0x29b/0x360 +[ 53.796756] seg6_input+0x2e/0x2e0 +[ 53.796765] lwtunnel_input+0x93/0xd0 +[ 53.796774] ipv6_rcv+0x690/0x920 +[ 53.796783] ? ip6_input+0x170/0x170 +[ 53.796791] ? eth_gro_receive+0x2d0/0x2d0 +[ 53.796800] ? ip6_input+0x170/0x170 +[ 53.796809] __netif_receive_skb_core+0xcc0/0x13f0 +[ 53.796820] ? netdev_info+0x110/0x110 +[ 53.796827] ? napi_complete_done+0xb6/0x170 +[ 53.796834] ? e1000_clean+0x6da/0xf70 +[ 53.796845] ? process_backlog+0x129/0x2a0 +[ 53.796853] process_backlog+0x129/0x2a0 +[ 53.796862] net_rx_action+0x211/0x5c0 +[ 53.796870] ? napi_complete_done+0x170/0x170 +[ 53.796887] ? run_rebalance_domains+0x11f/0x150 +[ 53.796891] __do_softirq+0x10e/0x39e +[ 53.796894] do_softirq_own_stack+0x2a/0x40 +[ 53.796895] +[ 53.796898] do_softirq.part.16+0x54/0x60 +[ 53.796900] __local_bh_enable_ip+0x5b/0x60 +[ 53.796903] ip6_finish_output2+0x416/0x9f0 +[ 53.796906] ? ip6_dst_lookup_flow+0x110/0x110 +[ 53.796909] ? ip6_sk_dst_lookup_flow+0x390/0x390 +[ 53.796911] ? __rcu_read_unlock+0x66/0x80 +[ 53.796913] ? ip6_mtu+0x44/0xf0 +[ 53.796916] ? ip6_output+0xfc/0x220 +[ 53.796918] ip6_output+0xfc/0x220 +[ 53.796921] ? ip6_finish_output+0x2b0/0x2b0 +[ 53.796923] ? memcpy+0x34/0x50 +[ 53.796926] ip6_send_skb+0x43/0xc0 +[ 53.796929] rawv6_sendmsg+0x1216/0x1530 +[ 53.796932] ? __orc_find+0x6b/0xc0 +[ 53.796934] ? rawv6_rcv_skb+0x160/0x160 +[ 53.796937] ? __rcu_read_unlock+0x66/0x80 +[ 53.796939] ? __rcu_read_unlock+0x66/0x80 +[ 53.796942] ? is_bpf_text_address+0x1e/0x30 +[ 53.796944] ? kernel_text_address+0xec/0x100 +[ 53.796946] ? __kernel_text_address+0xe/0x30 +[ 53.796948] ? unwind_get_return_address+0x2f/0x50 +[ 53.796950] ? __save_stack_trace+0x92/0x100 +[ 53.796954] ? save_stack+0x89/0xb0 +[ 53.796956] ? kasan_kmalloc+0xa0/0xd0 +[ 53.796958] ? kmem_cache_alloc+0xd2/0x1f0 +[ 53.796961] ? prepare_creds+0x23/0x160 +[ 53.796963] ? __x64_sys_capset+0x252/0x3e0 +[ 53.796966] ? do_syscall_64+0x69/0x160 +[ 53.796968] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 53.796971] ? __alloc_pages_nodemask+0x170/0x380 +[ 53.796973] ? __alloc_pages_slowpath+0x12c0/0x12c0 +[ 53.796977] ? tty_vhangup+0x20/0x20 +[ 53.796979] ? policy_nodemask+0x1a/0x90 +[ 53.796982] ? __mod_node_page_state+0x8d/0xa0 +[ 53.796986] ? __check_object_size+0xe7/0x240 +[ 53.796989] ? __sys_sendto+0x229/0x290 +[ 53.796991] ? rawv6_rcv_skb+0x160/0x160 +[ 53.796993] __sys_sendto+0x229/0x290 +[ 53.796996] ? __ia32_sys_getpeername+0x50/0x50 +[ 53.796999] ? commit_creds+0x2de/0x520 +[ 53.797002] ? security_capset+0x57/0x70 +[ 53.797004] ? __x64_sys_capset+0x29f/0x3e0 +[ 53.797007] ? __x64_sys_rt_sigsuspend+0xe0/0xe0 +[ 53.797011] ? __do_page_fault+0x664/0x770 +[ 53.797014] __x64_sys_sendto+0x74/0x90 +[ 53.797017] do_syscall_64+0x69/0x160 +[ 53.797019] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 53.797022] RIP: 0033:0x7f43b7a6714a +[ 53.797023] RSP: 002b:00007ffd891bd368 EFLAGS: 00000246 ORIG_RAX: +000000000000002c +[ 53.797026] RAX: ffffffffffffffda RBX: 00000000006129c0 RCX: 00007f43b7a6714a +[ 53.797028] RDX: 0000000000000040 RSI: 00000000006129c0 RDI: 0000000000000004 +[ 53.797029] RBP: 00007ffd891be640 R08: 0000000000610940 R09: 000000000000001c +[ 53.797030] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 +[ 53.797032] R13: 000000000060e6a0 R14: 0000000000008004 R15: 000000000040b661 + +[ 53.797171] Allocated by task 642: +[ 53.797460] kasan_kmalloc+0xa0/0xd0 +[ 53.797463] kmem_cache_alloc+0xd2/0x1f0 +[ 53.797465] getname_flags+0x40/0x210 +[ 53.797467] user_path_at_empty+0x1d/0x40 +[ 53.797469] do_faccessat+0x12a/0x320 +[ 53.797471] do_syscall_64+0x69/0x160 +[ 53.797473] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +[ 53.797607] Freed by task 642: +[ 53.797869] __kasan_slab_free+0x130/0x180 +[ 53.797871] kmem_cache_free+0xa8/0x230 +[ 53.797872] filename_lookup+0x15b/0x230 +[ 53.797874] do_faccessat+0x12a/0x320 +[ 53.797876] do_syscall_64+0x69/0x160 +[ 53.797878] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +[ 53.798014] The buggy address belongs to the object at ffff88011975e600 + which belongs to the cache names_cache of size 4096 +[ 53.799043] The buggy address is located 1786 bytes inside of + 4096-byte region [ffff88011975e600, ffff88011975f600) +[ 53.800013] The buggy address belongs to the page: +[ 53.800414] page:ffffea000465d600 count:1 mapcount:0 +mapping:0000000000000000 index:0x0 compound_mapcount: 0 +[ 53.801259] flags: 0x17fff0000008100(slab|head) +[ 53.801640] raw: 017fff0000008100 0000000000000000 0000000000000000 +0000000100070007 +[ 53.803147] raw: dead000000000100 dead000000000200 ffff88011b185a40 +0000000000000000 +[ 53.803787] page dumped because: kasan: bad access detected + +[ 53.804384] Memory state around the buggy address: +[ 53.804788] ffff88011975eb80: fb fb fb fb fb fb fb fb fb fb fb fb +fb fb fb fb +[ 53.805384] ffff88011975ec00: fb fb fb fb fb fb fb fb fb fb fb fb +fb fb fb fb +[ 53.805979] >ffff88011975ec80: fb fb fb fb fb fb fb fb fb fb fb fb +fb fb fb fb +[ 53.806577] ^ +[ 53.807165] ffff88011975ed00: fb fb fb fb fb fb fb fb fb fb fb fb +fb fb fb fb +[ 53.807762] ffff88011975ed80: fb fb fb fb fb fb fb fb fb fb fb fb +fb fb fb fb +[ 53.808356] ================================================================== +[ 53.808949] Disabling lock debugging due to kernel taint + +Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") +Signed-off-by: David Lebrun +Signed-off-by: Mathieu Xhonneux +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_iptunnel.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -103,7 +103,7 @@ int seg6_do_srh_encap(struct sk_buff *sk + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + +- err = skb_cow_head(skb, tot_len); ++ err = skb_cow_head(skb, tot_len + skb->mac_len); + if (unlikely(err)) + return err; + +@@ -161,7 +161,7 @@ int seg6_do_srh_inline(struct sk_buff *s + + hdrlen = (osrh->hdrlen + 1) << 3; + +- err = skb_cow_head(skb, hdrlen); ++ err = skb_cow_head(skb, hdrlen + skb->mac_len); + if (unlikely(err)) + return err; + diff --git a/queue-4.16/isdn-eicon-fix-a-missing-check-bug.patch b/queue-4.16/isdn-eicon-fix-a-missing-check-bug.patch new file mode 100644 index 00000000000..7d855af77ae --- /dev/null +++ b/queue-4.16/isdn-eicon-fix-a-missing-check-bug.patch @@ -0,0 +1,185 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Wenwen Wang +Date: Mon, 21 May 2018 01:58:07 -0500 +Subject: isdn: eicon: fix a missing-check bug + +From: Wenwen Wang + +[ Upstream commit 6009d1fe6ba3bb2dab55921da60465329cc1cd89 ] + +In divasmain.c, the function divas_write() firstly invokes the function +diva_xdi_open_adapter() to open the adapter that matches with the adapter +number provided by the user, and then invokes the function diva_xdi_write() +to perform the write operation using the matched adapter. The two functions +diva_xdi_open_adapter() and diva_xdi_write() are located in diva.c. + +In diva_xdi_open_adapter(), the user command is copied to the object 'msg' +from the userspace pointer 'src' through the function pointer 'cp_fn', +which eventually calls copy_from_user() to do the copy. Then, the adapter +number 'msg.adapter' is used to find out a matched adapter from the +'adapter_queue'. A matched adapter will be returned if it is found. +Otherwise, NULL is returned to indicate the failure of the verification on +the adapter number. + +As mentioned above, if a matched adapter is returned, the function +diva_xdi_write() is invoked to perform the write operation. In this +function, the user command is copied once again from the userspace pointer +'src', which is the same as the 'src' pointer in diva_xdi_open_adapter() as +both of them are from the 'buf' pointer in divas_write(). Similarly, the +copy is achieved through the function pointer 'cp_fn', which finally calls +copy_from_user(). After the successful copy, the corresponding command +processing handler of the matched adapter is invoked to perform the write +operation. + +It is obvious that there are two copies here from userspace, one is in +diva_xdi_open_adapter(), and one is in diva_xdi_write(). Plus, both of +these two copies share the same source userspace pointer, i.e., the 'buf' +pointer in divas_write(). Given that a malicious userspace process can race +to change the content pointed by the 'buf' pointer, this can pose potential +security issues. For example, in the first copy, the user provides a valid +adapter number to pass the verification process and a valid adapter can be +found. Then the user can modify the adapter number to an invalid number. +This way, the user can bypass the verification process of the adapter +number and inject inconsistent data. + +This patch reuses the data copied in +diva_xdi_open_adapter() and passes it to diva_xdi_write(). This way, the +above issues can be avoided. + +Signed-off-by: Wenwen Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/isdn/hardware/eicon/diva.c | 22 +++++++++++++++------- + drivers/isdn/hardware/eicon/diva.h | 5 +++-- + drivers/isdn/hardware/eicon/divasmain.c | 18 +++++++++++------- + 3 files changed, 29 insertions(+), 16 deletions(-) + +--- a/drivers/isdn/hardware/eicon/diva.c ++++ b/drivers/isdn/hardware/eicon/diva.c +@@ -388,10 +388,10 @@ void divasa_xdi_driver_unload(void) + ** Receive and process command from user mode utility + */ + void *diva_xdi_open_adapter(void *os_handle, const void __user *src, +- int length, ++ int length, void *mptr, + divas_xdi_copy_from_user_fn_t cp_fn) + { +- diva_xdi_um_cfg_cmd_t msg; ++ diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; + diva_os_xdi_adapter_t *a = NULL; + diva_os_spin_lock_magic_t old_irql; + struct list_head *tmp; +@@ -401,21 +401,21 @@ void *diva_xdi_open_adapter(void *os_han + length, sizeof(diva_xdi_um_cfg_cmd_t))) + return NULL; + } +- if ((*cp_fn) (os_handle, &msg, src, sizeof(msg)) <= 0) { ++ if ((*cp_fn) (os_handle, msg, src, sizeof(*msg)) <= 0) { + DBG_ERR(("A: A(?) open, write error")) + return NULL; + } + diva_os_enter_spin_lock(&adapter_lock, &old_irql, "open_adapter"); + list_for_each(tmp, &adapter_queue) { + a = list_entry(tmp, diva_os_xdi_adapter_t, link); +- if (a->controller == (int)msg.adapter) ++ if (a->controller == (int)msg->adapter) + break; + a = NULL; + } + diva_os_leave_spin_lock(&adapter_lock, &old_irql, "open_adapter"); + + if (!a) { +- DBG_ERR(("A: A(%d) open, adapter not found", msg.adapter)) ++ DBG_ERR(("A: A(%d) open, adapter not found", msg->adapter)) + } + + return (a); +@@ -437,8 +437,10 @@ void diva_xdi_close_adapter(void *adapte + + int + diva_xdi_write(void *adapter, void *os_handle, const void __user *src, +- int length, divas_xdi_copy_from_user_fn_t cp_fn) ++ int length, void *mptr, ++ divas_xdi_copy_from_user_fn_t cp_fn) + { ++ diva_xdi_um_cfg_cmd_t *msg = (diva_xdi_um_cfg_cmd_t *)mptr; + diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) adapter; + void *data; + +@@ -459,7 +461,13 @@ diva_xdi_write(void *adapter, void *os_h + return (-2); + } + +- length = (*cp_fn) (os_handle, data, src, length); ++ if (msg) { ++ *(diva_xdi_um_cfg_cmd_t *)data = *msg; ++ length = (*cp_fn) (os_handle, (char *)data + sizeof(*msg), ++ src + sizeof(*msg), length - sizeof(*msg)); ++ } else { ++ length = (*cp_fn) (os_handle, data, src, length); ++ } + if (length > 0) { + if ((*(a->interface.cmd_proc)) + (a, (diva_xdi_um_cfg_cmd_t *) data, length)) { +--- a/drivers/isdn/hardware/eicon/diva.h ++++ b/drivers/isdn/hardware/eicon/diva.h +@@ -20,10 +20,11 @@ int diva_xdi_read(void *adapter, void *o + int max_length, divas_xdi_copy_to_user_fn_t cp_fn); + + int diva_xdi_write(void *adapter, void *os_handle, const void __user *src, +- int length, divas_xdi_copy_from_user_fn_t cp_fn); ++ int length, void *msg, ++ divas_xdi_copy_from_user_fn_t cp_fn); + + void *diva_xdi_open_adapter(void *os_handle, const void __user *src, +- int length, ++ int length, void *msg, + divas_xdi_copy_from_user_fn_t cp_fn); + + void diva_xdi_close_adapter(void *adapter, void *os_handle); +--- a/drivers/isdn/hardware/eicon/divasmain.c ++++ b/drivers/isdn/hardware/eicon/divasmain.c +@@ -591,19 +591,22 @@ static int divas_release(struct inode *i + static ssize_t divas_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) + { ++ diva_xdi_um_cfg_cmd_t msg; + int ret = -EINVAL; + + if (!file->private_data) { + file->private_data = diva_xdi_open_adapter(file, buf, +- count, ++ count, &msg, + xdi_copy_from_user); +- } +- if (!file->private_data) { +- return (-ENODEV); ++ if (!file->private_data) ++ return (-ENODEV); ++ ret = diva_xdi_write(file->private_data, file, ++ buf, count, &msg, xdi_copy_from_user); ++ } else { ++ ret = diva_xdi_write(file->private_data, file, ++ buf, count, NULL, xdi_copy_from_user); + } + +- ret = diva_xdi_write(file->private_data, file, +- buf, count, xdi_copy_from_user); + switch (ret) { + case -1: /* Message should be removed from rx mailbox first */ + ret = -EBUSY; +@@ -622,11 +625,12 @@ static ssize_t divas_write(struct file * + static ssize_t divas_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) + { ++ diva_xdi_um_cfg_cmd_t msg; + int ret = -EINVAL; + + if (!file->private_data) { + file->private_data = diva_xdi_open_adapter(file, buf, +- count, ++ count, &msg, + xdi_copy_from_user); + } + if (!file->private_data) { diff --git a/queue-4.16/kcm-fix-use-after-free-caused-by-clonned-sockets.patch b/queue-4.16/kcm-fix-use-after-free-caused-by-clonned-sockets.patch new file mode 100644 index 00000000000..632e8d20818 --- /dev/null +++ b/queue-4.16/kcm-fix-use-after-free-caused-by-clonned-sockets.patch @@ -0,0 +1,34 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Kirill Tkhai +Date: Fri, 1 Jun 2018 14:30:38 +0300 +Subject: kcm: Fix use-after-free caused by clonned sockets + +From: Kirill Tkhai + +[ Upstream commit eb7f54b90bd8f469834c5e86dcf72ebf9a629811 ] + +(resend for properly queueing in patchwork) + +kcm_clone() creates kernel socket, which does not take net counter. +Thus, the net may die before the socket is completely destructed, +i.e. kcm_exit_net() is executed before kcm_done(). + +Reported-by: syzbot+5f1a04e374a635efc426@syzkaller.appspotmail.com +Signed-off-by: Kirill Tkhai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/kcm/kcmsock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/kcm/kcmsock.c ++++ b/net/kcm/kcmsock.c +@@ -1671,7 +1671,7 @@ static struct file *kcm_clone(struct soc + __module_get(newsock->ops->owner); + + newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL, +- &kcm_proto, true); ++ &kcm_proto, false); + if (!newsk) { + sock_release(newsock); + return ERR_PTR(-ENOMEM); diff --git a/queue-4.16/l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch b/queue-4.16/l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch new file mode 100644 index 00000000000..3527307b9ef --- /dev/null +++ b/queue-4.16/l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch @@ -0,0 +1,145 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Guillaume Nault +Date: Mon, 4 Jun 2018 18:52:19 +0200 +Subject: l2tp: fix refcount leakage on PPPoL2TP sockets + +From: Guillaume Nault + +[ Upstream commit 3d609342cc04129ff7568e19316ce3d7451a27e8 ] + +Commit d02ba2a6110c ("l2tp: fix race in pppol2tp_release with session +object destroy") tried to fix a race condition where a PPPoL2TP socket +would disappear while the L2TP session was still using it. However, it +missed the root issue which is that an L2TP session may accept to be +reconnected if its associated socket has entered the release process. + +The tentative fix makes the session hold the socket it is connected to. +That saves the kernel from crashing, but introduces refcount leakage, +preventing the socket from completing the release process. Once stalled, +everything the socket depends on can't be released anymore, including +the L2TP session and the l2tp_ppp module. + +The root issue is that, when releasing a connected PPPoL2TP socket, the +session's ->sk pointer (RCU-protected) is reset to NULL and we have to +wait for a grace period before destroying the socket. The socket drops +the session in its ->sk_destruct callback function, so the session +will exist until the last reference on the socket is dropped. +Therefore, there is a time frame where pppol2tp_connect() may accept +reconnecting a session, as it only checks ->sk to figure out if the +session is connected. This time frame is shortened by the fact that +pppol2tp_release() calls l2tp_session_delete(), making the session +unreachable before resetting ->sk. However, pppol2tp_connect() may +grab the session before it gets unhashed by l2tp_session_delete(), but +it may test ->sk after the later got reset. The race is not so hard to +trigger and syzbot found a pretty reliable reproducer: +https://syzkaller.appspot.com/bug?id=418578d2a4389074524e04d641eacb091961b2cf + +Before d02ba2a6110c, another race could let pppol2tp_release() +overwrite the ->__sk pointer of an L2TP session, thus tricking +pppol2tp_put_sk() into calling sock_put() on a socket that is different +than the one for which pppol2tp_release() was originally called. To get +there, we had to trigger the race described above, therefore having one +PPPoL2TP socket being released, while the session it is connected to is +reconnecting to a different PPPoL2TP socket. When releasing this new +socket fast enough, pppol2tp_release() overwrites the session's +->__sk pointer with the address of the new socket, before the first +pppol2tp_put_sk() call gets scheduled. Then the pppol2tp_put_sk() call +invoked by the original socket will sock_put() the new socket, +potentially dropping its last reference. When the second +pppol2tp_put_sk() finally runs, its socket has already been freed. + +With d02ba2a6110c, the session takes a reference on both sockets. +Furthermore, the session's ->sk pointer is reset in the +pppol2tp_session_close() callback function rather than in +pppol2tp_release(). Therefore, ->__sk can't be overwritten and +pppol2tp_put_sk() is called only once (l2tp_session_delete() will only +run pppol2tp_session_close() once, to protect the session against +concurrent deletion requests). Now pppol2tp_put_sk() will properly +sock_put() the original socket, but the new socket will remain, as +l2tp_session_delete() prevented the release process from completing. +Here, we don't depend on the ->__sk race to trigger the bug. Getting +into the pppol2tp_connect() race is enough to leak the reference, no +matter when new socket is released. + +So it all boils down to pppol2tp_connect() failing to realise that the +session has already been connected. This patch drops the unneeded extra +reference counting (mostly reverting d02ba2a6110c) and checks that +neither ->sk nor ->__sk is set before allowing a session to be +connected. + +Fixes: d02ba2a6110c ("l2tp: fix race in pppol2tp_release with session object destroy") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 35 +++++++++++++++++------------------ + 1 file changed, 17 insertions(+), 18 deletions(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -428,16 +428,6 @@ static void pppol2tp_put_sk(struct rcu_h + */ + static void pppol2tp_session_close(struct l2tp_session *session) + { +- struct pppol2tp_session *ps; +- +- ps = l2tp_session_priv(session); +- mutex_lock(&ps->sk_lock); +- ps->__sk = rcu_dereference_protected(ps->sk, +- lockdep_is_held(&ps->sk_lock)); +- RCU_INIT_POINTER(ps->sk, NULL); +- if (ps->__sk) +- call_rcu(&ps->rcu, pppol2tp_put_sk); +- mutex_unlock(&ps->sk_lock); + } + + /* Really kill the session socket. (Called from sock_put() if +@@ -480,15 +470,24 @@ static int pppol2tp_release(struct socke + sock_orphan(sk); + sock->sk = NULL; + +- /* If the socket is associated with a session, +- * l2tp_session_delete will call pppol2tp_session_close which +- * will drop the session's ref on the socket. +- */ + session = pppol2tp_sock_to_session(sk); + if (session) { ++ struct pppol2tp_session *ps; ++ + l2tp_session_delete(session); +- /* drop the ref obtained by pppol2tp_sock_to_session */ +- sock_put(sk); ++ ++ ps = l2tp_session_priv(session); ++ mutex_lock(&ps->sk_lock); ++ ps->__sk = rcu_dereference_protected(ps->sk, ++ lockdep_is_held(&ps->sk_lock)); ++ RCU_INIT_POINTER(ps->sk, NULL); ++ mutex_unlock(&ps->sk_lock); ++ call_rcu(&ps->rcu, pppol2tp_put_sk); ++ ++ /* Rely on the sock_put() call at the end of the function for ++ * dropping the reference held by pppol2tp_sock_to_session(). ++ * The last reference will be dropped by pppol2tp_put_sk(). ++ */ + } + + release_sock(sk); +@@ -742,7 +741,8 @@ static int pppol2tp_connect(struct socke + */ + mutex_lock(&ps->sk_lock); + if (rcu_dereference_protected(ps->sk, +- lockdep_is_held(&ps->sk_lock))) { ++ lockdep_is_held(&ps->sk_lock)) || ++ ps->__sk) { + mutex_unlock(&ps->sk_lock); + error = -EEXIST; + goto end; +@@ -803,7 +803,6 @@ static int pppol2tp_connect(struct socke + + out_no_ppp: + /* This is how we get the session context from the socket. */ +- sock_hold(sk); + sk->sk_user_data = session; + rcu_assign_pointer(ps->sk, sk); + mutex_unlock(&ps->sk_lock); diff --git a/queue-4.16/mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch b/queue-4.16/mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch new file mode 100644 index 00000000000..a8246a1704e --- /dev/null +++ b/queue-4.16/mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch @@ -0,0 +1,79 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Petr Machata +Date: Sun, 27 May 2018 09:48:41 +0300 +Subject: mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG + +From: Petr Machata + +[ Upstream commit 47bf9df2e8201d07c40670e093629f8dfd1b5d9f ] + +VLAN 1 is internally used for untagged traffic. Prevent creation of +explicit netdevice for that VLAN, because that currently isn't supported +and leads to the NULL pointer dereference cited below. + +Fix by preventing creation of VLAN devices with VID of 1 over mlxsw +devices or LAG devices that involve mlxsw devices. + +[ 327.175816] ================================================================================ +[ 327.184544] UBSAN: Undefined behaviour in drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c:200:12 +[ 327.193667] member access within null pointer of type 'const struct mlxsw_sp_fid' +[ 327.201226] CPU: 0 PID: 8983 Comm: ip Not tainted 4.17.0-rc4-petrm_net_ip6gre_headroom-custom-140 #11 +[ 327.210496] Hardware name: Mellanox Technologies Ltd. "MSN2410-CB2F"/"SA000874", BIOS 4.6.5 03/08/2016 +[ 327.219872] Call Trace: +[ 327.222384] dump_stack+0xc3/0x12b +[ 327.234007] ubsan_epilogue+0x9/0x49 +[ 327.237638] ubsan_type_mismatch_common+0x1f9/0x2d0 +[ 327.255769] __ubsan_handle_type_mismatch+0x90/0xa7 +[ 327.264716] mlxsw_sp_fid_type+0x35/0x50 [mlxsw_spectrum] +[ 327.270255] mlxsw_sp_port_vlan_router_leave+0x46/0xc0 [mlxsw_spectrum] +[ 327.277019] mlxsw_sp_inetaddr_port_vlan_event+0xe1/0x340 [mlxsw_spectrum] +[ 327.315031] mlxsw_sp_netdevice_vrf_event+0xa8/0x100 [mlxsw_spectrum] +[ 327.321626] mlxsw_sp_netdevice_event+0x276/0x430 [mlxsw_spectrum] +[ 327.367863] notifier_call_chain+0x4c/0x150 +[ 327.372128] __netdev_upper_dev_link+0x1b3/0x260 +[ 327.399450] vrf_add_slave+0xce/0x170 [vrf] +[ 327.403703] do_setlink+0x658/0x1d70 +[ 327.508998] rtnl_newlink+0x908/0xf20 +[ 327.559128] rtnetlink_rcv_msg+0x50c/0x720 +[ 327.571720] netlink_rcv_skb+0x16a/0x1f0 +[ 327.583450] netlink_unicast+0x2ca/0x3e0 +[ 327.599305] netlink_sendmsg+0x3e2/0x7f0 +[ 327.616655] sock_sendmsg+0x76/0xc0 +[ 327.620207] ___sys_sendmsg+0x494/0x5d0 +[ 327.666117] __sys_sendmsg+0xc2/0x130 +[ 327.690953] do_syscall_64+0x66/0x370 +[ 327.694677] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 327.699782] RIP: 0033:0x7f4c2f3f8037 +[ 327.703393] RSP: 002b:00007ffe8c389708 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 327.711035] RAX: ffffffffffffffda RBX: 000000005b03f53e RCX: 00007f4c2f3f8037 +[ 327.718229] RDX: 0000000000000000 RSI: 00007ffe8c389760 RDI: 0000000000000003 +[ 327.725431] RBP: 00007ffe8c389760 R08: 0000000000000000 R09: 00007f4c2f443630 +[ 327.732632] R10: 00000000000005eb R11: 0000000000000246 R12: 0000000000000000 +[ 327.739833] R13: 00000000006774e0 R14: 00007ffe8c3897e8 R15: 0000000000000000 +[ 327.747096] ================================================================================ + +Fixes: 9589a7b5d7d9 ("mlxsw: spectrum: Handle VLAN devices linking / unlinking") +Suggested-by: Ido Schimmel +Signed-off-by: Petr Machata +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -4870,6 +4870,11 @@ static int mlxsw_sp_netdevice_port_upper + "spectrum: Can not put a VLAN on an OVS port"); + return -EINVAL; + } ++ if (is_vlan_dev(upper_dev) && ++ vlan_dev_vlan_id(upper_dev) == 1) { ++ NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic"); ++ return -EINVAL; ++ } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; diff --git a/queue-4.16/net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch b/queue-4.16/net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch new file mode 100644 index 00000000000..9c75888bba1 --- /dev/null +++ b/queue-4.16/net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch @@ -0,0 +1,106 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Arun Parameswaran +Date: Tue, 5 Jun 2018 13:38:12 -0700 +Subject: net: dsa: b53: Fix for brcm tag issue in Cygnus SoC + +From: Arun Parameswaran + +[ Upstream commit 5040cc990cbac98733df4d58fdeac5bbdab15b49 ] + +In the Broadcom Cygnus SoC, the brcm tag needs to be inserted +in between the mac address and the ether type (should use +'DSA_PROTO_TAG_BRCM') for the packets sent to the internal +b53 switch. + +Since the Cygnus was added with the BCM58XX device id and the +BCM58XX uses 'DSA_PROTO_TAG_BRCM_PREPEND', the data path is +broken, due to the incorrect brcm tag location. + +Add a new b53 device id (BCM583XX) for Cygnus family to fix the +issue. Add the new device id to the BCM58XX family as Cygnus +is similar to the BCM58XX in most other functionalities. + +Fixes: 11606039604c ("net: dsa: b53: Support prepended Broadcom tags") + +Signed-off-by: Arun Parameswaran +Acked-by: Scott Branden +Reported-by: Clément Péron +Reviewed-by: Florian Fainelli +Tested-by: Clément Péron +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/b53/b53_common.c | 15 ++++++++++++++- + drivers/net/dsa/b53/b53_priv.h | 2 ++ + drivers/net/dsa/b53/b53_srab.c | 4 ++-- + 3 files changed, 18 insertions(+), 3 deletions(-) + +--- a/drivers/net/dsa/b53/b53_common.c ++++ b/drivers/net/dsa/b53/b53_common.c +@@ -684,7 +684,8 @@ static int b53_switch_reset(struct b53_d + * still use this driver as a library and need to perform the reset + * earlier. + */ +- if (dev->chip_id == BCM58XX_DEVICE_ID) { ++ if (dev->chip_id == BCM58XX_DEVICE_ID || ++ dev->chip_id == BCM583XX_DEVICE_ID) { + b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, ®); + reg |= SW_RST | EN_SW_RST | EN_CH_RST; + b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg); +@@ -1863,6 +1864,18 @@ static const struct b53_chip_data b53_sw + .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, ++ .duplex_reg = B53_DUPLEX_STAT_GE, ++ .jumbo_pm_reg = B53_JUMBO_PORT_MASK, ++ .jumbo_size_reg = B53_JUMBO_MAX_SIZE, ++ }, ++ { ++ .chip_id = BCM583XX_DEVICE_ID, ++ .dev_name = "BCM583xx/11360", ++ .vlans = 4096, ++ .enabled_ports = 0x103, ++ .arl_entries = 4, ++ .cpu_port = B53_CPU_PORT, ++ .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, +--- a/drivers/net/dsa/b53/b53_priv.h ++++ b/drivers/net/dsa/b53/b53_priv.h +@@ -61,6 +61,7 @@ enum { + BCM53018_DEVICE_ID = 0x53018, + BCM53019_DEVICE_ID = 0x53019, + BCM58XX_DEVICE_ID = 0x5800, ++ BCM583XX_DEVICE_ID = 0x58300, + BCM7445_DEVICE_ID = 0x7445, + BCM7278_DEVICE_ID = 0x7278, + }; +@@ -180,6 +181,7 @@ static inline int is5301x(struct b53_dev + static inline int is58xx(struct b53_device *dev) + { + return dev->chip_id == BCM58XX_DEVICE_ID || ++ dev->chip_id == BCM583XX_DEVICE_ID || + dev->chip_id == BCM7445_DEVICE_ID || + dev->chip_id == BCM7278_DEVICE_ID; + } +--- a/drivers/net/dsa/b53/b53_srab.c ++++ b/drivers/net/dsa/b53/b53_srab.c +@@ -364,7 +364,7 @@ static const struct of_device_id b53_sra + { .compatible = "brcm,bcm53018-srab" }, + { .compatible = "brcm,bcm53019-srab" }, + { .compatible = "brcm,bcm5301x-srab" }, +- { .compatible = "brcm,bcm11360-srab", .data = (void *)BCM58XX_DEVICE_ID }, ++ { .compatible = "brcm,bcm11360-srab", .data = (void *)BCM583XX_DEVICE_ID }, + { .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID }, + { .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID }, + { .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID }, +@@ -372,7 +372,7 @@ static const struct of_device_id b53_sra + { .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID }, + { .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID }, + { .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID }, +- { .compatible = "brcm,cygnus-srab", .data = (void *)BCM58XX_DEVICE_ID }, ++ { .compatible = "brcm,cygnus-srab", .data = (void *)BCM583XX_DEVICE_ID }, + { .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID }, + { /* sentinel */ }, + }; diff --git a/queue-4.16/net-ethernet-davinci_emac-fix-error-handling-in-probe.patch b/queue-4.16/net-ethernet-davinci_emac-fix-error-handling-in-probe.patch new file mode 100644 index 00000000000..675b0556e40 --- /dev/null +++ b/queue-4.16/net-ethernet-davinci_emac-fix-error-handling-in-probe.patch @@ -0,0 +1,93 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Dan Carpenter +Date: Thu, 31 May 2018 09:44:49 +0300 +Subject: net: ethernet: davinci_emac: fix error handling in probe() + +From: Dan Carpenter + +[ Upstream commit 8005b09d99fac78e6f5fb9da30b5ae94840af03b ] + +The current error handling code has an issue where it does: + + if (priv->txchan) + cpdma_chan_destroy(priv->txchan); + +The problem is that ->txchan is either valid or an error pointer (which +would lead to an Oops). I've changed it to use multiple error labels so +that the test can be removed. + +Also there were some missing calls to netif_napi_del(). + +Fixes: 3ef0fdb2342c ("net: davinci_emac: switch to new cpdma layer") +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/davinci_emac.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/ti/davinci_emac.c ++++ b/drivers/net/ethernet/ti/davinci_emac.c +@@ -1873,7 +1873,7 @@ static int davinci_emac_probe(struct pla + if (IS_ERR(priv->txchan)) { + dev_err(&pdev->dev, "error initializing tx dma channel\n"); + rc = PTR_ERR(priv->txchan); +- goto no_cpdma_chan; ++ goto err_free_dma; + } + + priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH, +@@ -1881,14 +1881,14 @@ static int davinci_emac_probe(struct pla + if (IS_ERR(priv->rxchan)) { + dev_err(&pdev->dev, "error initializing rx dma channel\n"); + rc = PTR_ERR(priv->rxchan); +- goto no_cpdma_chan; ++ goto err_free_txchan; + } + + res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res) { + dev_err(&pdev->dev, "error getting irq res\n"); + rc = -ENOENT; +- goto no_cpdma_chan; ++ goto err_free_rxchan; + } + ndev->irq = res->start; + +@@ -1914,7 +1914,7 @@ static int davinci_emac_probe(struct pla + pm_runtime_put_noidle(&pdev->dev); + dev_err(&pdev->dev, "%s: failed to get_sync(%d)\n", + __func__, rc); +- goto no_cpdma_chan; ++ goto err_napi_del; + } + + /* register the network device */ +@@ -1924,7 +1924,7 @@ static int davinci_emac_probe(struct pla + dev_err(&pdev->dev, "error in register_netdev\n"); + rc = -ENODEV; + pm_runtime_put(&pdev->dev); +- goto no_cpdma_chan; ++ goto err_napi_del; + } + + +@@ -1937,11 +1937,13 @@ static int davinci_emac_probe(struct pla + + return 0; + +-no_cpdma_chan: +- if (priv->txchan) +- cpdma_chan_destroy(priv->txchan); +- if (priv->rxchan) +- cpdma_chan_destroy(priv->rxchan); ++err_napi_del: ++ netif_napi_del(&priv->napi); ++err_free_rxchan: ++ cpdma_chan_destroy(priv->rxchan); ++err_free_txchan: ++ cpdma_chan_destroy(priv->txchan); ++err_free_dma: + cpdma_ctlr_destroy(priv->dma); + no_pdata: + if (of_phy_is_fixed_link(np)) diff --git a/queue-4.16/net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch b/queue-4.16/net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch new file mode 100644 index 00000000000..57a9e61eecf --- /dev/null +++ b/queue-4.16/net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch @@ -0,0 +1,26 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Roopa Prabhu +Date: Tue, 22 May 2018 13:44:51 -0700 +Subject: net: ipv4: add missing RTA_TABLE to rtm_ipv4_policy + +From: Roopa Prabhu + +[ Upstream commit 2eabd764cb5512f1338d06ffc054c8bc9fbe9104 ] + +Signed-off-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -643,6 +643,7 @@ const struct nla_policy rtm_ipv4_policy[ + [RTA_ENCAP] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, ++ [RTA_TABLE] = { .type = NLA_U32 }, + }; + + static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/queue-4.16/net-metrics-add-proper-netlink-validation.patch b/queue-4.16/net-metrics-add-proper-netlink-validation.patch new file mode 100644 index 00000000000..96c51c7cf92 --- /dev/null +++ b/queue-4.16/net-metrics-add-proper-netlink-validation.patch @@ -0,0 +1,129 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Eric Dumazet +Date: Tue, 5 Jun 2018 06:06:19 -0700 +Subject: net: metrics: add proper netlink validation + +From: Eric Dumazet + +[ Upstream commit 5b5e7a0de2bbf2a1afcd9f49e940010e9fb80d53 ] + +Before using nla_get_u32(), better make sure the attribute +is of the proper size. + +Code recently was changed, but bug has been there from beginning +of git. + +BUG: KMSAN: uninit-value in rtnetlink_put_metrics+0x553/0x960 net/core/rtnetlink.c:746 +CPU: 1 PID: 14139 Comm: syz-executor6 Not tainted 4.17.0-rc5+ #103 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:113 + kmsan_report+0x149/0x260 mm/kmsan/kmsan.c:1084 + __msan_warning_32+0x6e/0xc0 mm/kmsan/kmsan_instr.c:686 + rtnetlink_put_metrics+0x553/0x960 net/core/rtnetlink.c:746 + fib_dump_info+0xc42/0x2190 net/ipv4/fib_semantics.c:1361 + rtmsg_fib+0x65f/0x8c0 net/ipv4/fib_semantics.c:419 + fib_table_insert+0x2314/0x2b50 net/ipv4/fib_trie.c:1287 + inet_rtm_newroute+0x210/0x340 net/ipv4/fib_frontend.c:779 + rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646 + netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x455a09 +RSP: 002b:00007faae5fd8c68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007faae5fd96d4 RCX: 0000000000455a09 +RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000013 +RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000000005d0 R14: 00000000006fdc20 R15: 0000000000000000 + +Uninit was stored to memory at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] + kmsan_save_stack mm/kmsan/kmsan.c:294 [inline] + kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685 + __msan_chain_origin+0x69/0xc0 mm/kmsan/kmsan_instr.c:529 + fib_convert_metrics net/ipv4/fib_semantics.c:1056 [inline] + fib_create_info+0x2d46/0x9dc0 net/ipv4/fib_semantics.c:1150 + fib_table_insert+0x3e4/0x2b50 net/ipv4/fib_trie.c:1146 + inet_rtm_newroute+0x210/0x340 net/ipv4/fib_frontend.c:779 + rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646 + netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 + kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 + kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322 + slab_post_alloc_hook mm/slab.h:446 [inline] + slab_alloc_node mm/slub.c:2753 [inline] + __kmalloc_node_track_caller+0xb32/0x11b0 mm/slub.c:4395 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:988 [inline] + netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] + netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: a919525ad832 ("net: Move fib_convert_metrics to metrics file") +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + } else { ++ if (nla_len(nla) != sizeof(u32)) ++ return false; + val = nla_get_u32(nla); + } + +@@ -1043,6 +1045,8 @@ fib_convert_metrics(struct fib_info *fi, + if (val == TCP_CA_UNSPEC) + return -EINVAL; + } else { ++ if (nla_len(nla) != sizeof(u32)) ++ return -EINVAL; + val = nla_get_u32(nla); + } + if (type == RTAX_ADVMSS && val > 65535 - 40) diff --git a/queue-4.16/net-mlx4-fix-irq-unsafe-spinlock-usage.patch b/queue-4.16/net-mlx4-fix-irq-unsafe-spinlock-usage.patch new file mode 100644 index 00000000000..cab5258e959 --- /dev/null +++ b/queue-4.16/net-mlx4-fix-irq-unsafe-spinlock-usage.patch @@ -0,0 +1,72 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jack Morgenstein +Date: Wed, 23 May 2018 10:41:59 +0300 +Subject: net/mlx4: Fix irq-unsafe spinlock usage + +From: Jack Morgenstein + +[ Upstream commit d546b67cda015fb92bfee93d5dc0ceadb91deaee ] + +spin_lock/unlock was used instead of spin_un/lock_irq +in a procedure used in process space, on a spinlock +which can be grabbed in an interrupt. + +This caused the stack trace below to be displayed (on kernel +4.17.0-rc1 compiled with Lock Debugging enabled): + +[ 154.661474] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected +[ 154.668909] 4.17.0-rc1-rdma_rc_mlx+ #3 Tainted: G I +[ 154.675856] ----------------------------------------------------- +[ 154.682706] modprobe/10159 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: +[ 154.690254] 00000000f3b0e495 (&(&qp_table->lock)->rlock){+.+.}, at: mlx4_qp_remove+0x20/0x50 [mlx4_core] +[ 154.700927] +and this task is already holding: +[ 154.707461] 0000000094373b5d (&(&cq->lock)->rlock/1){....}, at: destroy_qp_common+0x111/0x560 [mlx4_ib] +[ 154.718028] which would create a new lock dependency: +[ 154.723705] (&(&cq->lock)->rlock/1){....} -> (&(&qp_table->lock)->rlock){+.+.} +[ 154.731922] +but this new dependency connects a SOFTIRQ-irq-safe lock: +[ 154.740798] (&(&cq->lock)->rlock){..-.} +[ 154.740800] +... which became SOFTIRQ-irq-safe at: +[ 154.752163] _raw_spin_lock_irqsave+0x3e/0x50 +[ 154.757163] mlx4_ib_poll_cq+0x36/0x900 [mlx4_ib] +[ 154.762554] ipoib_tx_poll+0x4a/0xf0 [ib_ipoib] +... +to a SOFTIRQ-irq-unsafe lock: +[ 154.815603] (&(&qp_table->lock)->rlock){+.+.} +[ 154.815604] +... which became SOFTIRQ-irq-unsafe at: +[ 154.827718] ... +[ 154.827720] _raw_spin_lock+0x35/0x50 +[ 154.833912] mlx4_qp_lookup+0x1e/0x50 [mlx4_core] +[ 154.839302] mlx4_flow_attach+0x3f/0x3d0 [mlx4_core] + +Since mlx4_qp_lookup() is called only in process space, we can +simply replace the spin_un/lock calls with spin_un/lock_irq calls. + +Fixes: 6dc06c08bef1 ("net/mlx4: Fix the check in attaching steering rules") +Signed-off-by: Jack Morgenstein +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/qp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/qp.c ++++ b/drivers/net/ethernet/mellanox/mlx4/qp.c +@@ -393,11 +393,11 @@ struct mlx4_qp *mlx4_qp_lookup(struct ml + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + +- spin_lock(&qp_table->lock); ++ spin_lock_irq(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + +- spin_unlock(&qp_table->lock); ++ spin_unlock_irq(&qp_table->lock); + return qp; + } + diff --git a/queue-4.16/net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch b/queue-4.16/net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch new file mode 100644 index 00000000000..0c5f5a4a201 --- /dev/null +++ b/queue-4.16/net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch @@ -0,0 +1,84 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Eran Ben Elisha +Date: Tue, 1 May 2018 16:25:07 +0300 +Subject: net/mlx5e: When RXFCS is set, add FCS data into checksum calculation + +From: Eran Ben Elisha + +[ Upstream commit 902a545904c71d719ed144234d67df75f31db63b ] + +When RXFCS feature is enabled, the HW do not strip the FCS data, +however it is not present in the checksum calculated by the HW. + +Fix that by manually calculating the FCS checksum and adding it to the SKB +checksum field. + +Add helper function to find the FCS data for all SKB forms (linear, +one fragment or more). + +Fixes: 102722fc6832 ("net/mlx5e: Add support for RXFCS feature flag") +Signed-off-by: Eran Ben Elisha +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 42 ++++++++++++++++++++++++ + 1 file changed, 42 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -635,6 +635,45 @@ static inline bool is_last_ethertype_ip( + return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); + } + ++static __be32 mlx5e_get_fcs(struct sk_buff *skb) ++{ ++ int last_frag_sz, bytes_in_prev, nr_frags; ++ u8 *fcs_p1, *fcs_p2; ++ skb_frag_t *last_frag; ++ __be32 fcs_bytes; ++ ++ if (!skb_is_nonlinear(skb)) ++ return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); ++ ++ nr_frags = skb_shinfo(skb)->nr_frags; ++ last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; ++ last_frag_sz = skb_frag_size(last_frag); ++ ++ /* If all FCS data is in last frag */ ++ if (last_frag_sz >= ETH_FCS_LEN) ++ return *(__be32 *)(skb_frag_address(last_frag) + ++ last_frag_sz - ETH_FCS_LEN); ++ ++ fcs_p2 = (u8 *)skb_frag_address(last_frag); ++ bytes_in_prev = ETH_FCS_LEN - last_frag_sz; ++ ++ /* Find where the other part of the FCS is - Linear or another frag */ ++ if (nr_frags == 1) { ++ fcs_p1 = skb_tail_pointer(skb); ++ } else { ++ skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; ++ ++ fcs_p1 = skb_frag_address(prev_frag) + ++ skb_frag_size(prev_frag); ++ } ++ fcs_p1 -= bytes_in_prev; ++ ++ memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); ++ memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); ++ ++ return fcs_bytes; ++} ++ + static inline void mlx5e_handle_csum(struct net_device *netdev, + struct mlx5_cqe64 *cqe, + struct mlx5e_rq *rq, +@@ -663,6 +702,9 @@ static inline void mlx5e_handle_csum(str + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); ++ if (unlikely(netdev->features & NETIF_F_RXFCS)) ++ skb->csum = csum_add(skb->csum, ++ (__force __wsum)mlx5e_get_fcs(skb)); + rq->stats.csum_complete++; + return; + } diff --git a/queue-4.16/net-netsec-reduce-dma-mask-to-40-bits.patch b/queue-4.16/net-netsec-reduce-dma-mask-to-40-bits.patch new file mode 100644 index 00000000000..7545cc31c9c --- /dev/null +++ b/queue-4.16/net-netsec-reduce-dma-mask-to-40-bits.patch @@ -0,0 +1,71 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Ard Biesheuvel +Date: Fri, 25 May 2018 14:50:37 +0200 +Subject: net: netsec: reduce DMA mask to 40 bits + +From: Ard Biesheuvel + +[ Upstream commit 312564269535892cc082bc80592150cd1f5e8ec3 ] + +The netsec network controller IP can drive 64 address bits for DMA, and +the DMA mask is set accordingly in the driver. However, the SynQuacer +SoC, which is the only silicon incorporating this IP at the moment, +integrates this IP in a manner that leaves address bits [63:40] +unconnected. + +Up until now, this has not resulted in any problems, given that the DDR +controller doesn't decode those bits to begin with. However, recent +firmware updates for platforms incorporating this SoC allow the IOMMU +to be enabled, which does decode address bits [47:40], and allocates +top down from the IOVA space, producing DMA addresses that have bits +set that have been left unconnected. + +Both the DT and ACPI (IORT) descriptions of the platform take this into +account, and only describe a DMA address space of 40 bits (using either +dma-ranges DT properties, or DMA address limits in IORT named component +nodes). However, even though our IOMMU and bus layers may take such +limitations into account by setting a narrower DMA mask when creating +the platform device, the netsec probe() entrypoint follows the common +practice of setting the DMA mask uncondionally, according to the +capabilities of the IP block itself rather than to its integration into +the chip. + +It is currently unclear what the correct fix is here. We could hack around +it by only setting the DMA mask if it deviates from its default value of +DMA_BIT_MASK(32). However, this makes it impossible for the bus layer to +use DMA_BIT_MASK(32) as the bus limit, and so it appears that a more +comprehensive approach is required to take DMA limits imposed by the +SoC as a whole into account. + +In the mean time, let's limit the DMA mask to 40 bits. Given that there +is currently only one SoC that incorporates this IP, this is a reasonable +approach that can be backported to -stable and buys us some time to come +up with a proper fix going forward. + +Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver") +Cc: Robin Murphy +Cc: Jassi Brar +Cc: Masahisa Kojima +Cc: Ilias Apalodimas +Signed-off-by: Ard Biesheuvel +Reviewed-by: Robin Murphy +Acked-by: Jassi Brar +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/socionext/netsec.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/socionext/netsec.c ++++ b/drivers/net/ethernet/socionext/netsec.c +@@ -1674,8 +1674,8 @@ static int netsec_probe(struct platform_ + if (ret) + goto unreg_napi; + +- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) +- dev_warn(&pdev->dev, "Failed to enable 64-bit DMA\n"); ++ if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40))) ++ dev_warn(&pdev->dev, "Failed to set DMA mask\n"); + + ret = register_netdev(ndev); + if (ret) { diff --git a/queue-4.16/net-packet-refine-check-for-priv-area-size.patch b/queue-4.16/net-packet-refine-check-for-priv-area-size.patch new file mode 100644 index 00000000000..6e80b1e7f12 --- /dev/null +++ b/queue-4.16/net-packet-refine-check-for-priv-area-size.patch @@ -0,0 +1,94 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Eric Dumazet +Date: Fri, 1 Jun 2018 09:23:02 -0700 +Subject: net/packet: refine check for priv area size + +From: Eric Dumazet + +[ Upstream commit eb73190f4fbeedf762394e92d6a4ec9ace684c88 ] + +syzbot was able to trick af_packet again [1] + +Various commits tried to address the problem in the past, +but failed to take into account V3 header size. + +[1] + +tpacket_rcv: packet too big, clamped from 72 to 4294967224. macoff=96 +BUG: KASAN: use-after-free in prb_run_all_ft_ops net/packet/af_packet.c:1016 [inline] +BUG: KASAN: use-after-free in prb_fill_curr_block.isra.59+0x4e5/0x5c0 net/packet/af_packet.c:1039 +Write of size 2 at addr ffff8801cb62000e by task kworker/1:2/2106 + +CPU: 1 PID: 2106 Comm: kworker/1:2 Not tainted 4.17.0-rc7+ #77 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: ipv6_addrconf addrconf_dad_work +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b9/0x294 lib/dump_stack.c:113 + print_address_description+0x6c/0x20b mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 + __asan_report_store2_noabort+0x17/0x20 mm/kasan/report.c:436 + prb_run_all_ft_ops net/packet/af_packet.c:1016 [inline] + prb_fill_curr_block.isra.59+0x4e5/0x5c0 net/packet/af_packet.c:1039 + __packet_lookup_frame_in_block net/packet/af_packet.c:1094 [inline] + packet_current_rx_frame net/packet/af_packet.c:1117 [inline] + tpacket_rcv+0x1866/0x3340 net/packet/af_packet.c:2282 + dev_queue_xmit_nit+0x891/0xb90 net/core/dev.c:2018 + xmit_one net/core/dev.c:3049 [inline] + dev_hard_start_xmit+0x16b/0xc10 net/core/dev.c:3069 + __dev_queue_xmit+0x2724/0x34c0 net/core/dev.c:3584 + dev_queue_xmit+0x17/0x20 net/core/dev.c:3617 + neigh_resolve_output+0x679/0xad0 net/core/neighbour.c:1358 + neigh_output include/net/neighbour.h:482 [inline] + ip6_finish_output2+0xc9c/0x2810 net/ipv6/ip6_output.c:120 + ip6_finish_output+0x5fe/0xbc0 net/ipv6/ip6_output.c:154 + NF_HOOK_COND include/linux/netfilter.h:277 [inline] + ip6_output+0x227/0x9b0 net/ipv6/ip6_output.c:171 + dst_output include/net/dst.h:444 [inline] + NF_HOOK include/linux/netfilter.h:288 [inline] + ndisc_send_skb+0x100d/0x1570 net/ipv6/ndisc.c:491 + ndisc_send_ns+0x3c1/0x8d0 net/ipv6/ndisc.c:633 + addrconf_dad_work+0xbef/0x1340 net/ipv6/addrconf.c:4033 + process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145 + worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279 + kthread+0x345/0x410 kernel/kthread.c:240 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 + +The buggy address belongs to the page: +page:ffffea00072d8800 count:0 mapcount:-127 mapping:0000000000000000 index:0xffff8801cb620e80 +flags: 0x2fffc0000000000() +raw: 02fffc0000000000 0000000000000000 ffff8801cb620e80 00000000ffffff80 +raw: ffffea00072e3820 ffffea0007132d20 0000000000000002 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8801cb61ff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff8801cb61ff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +>ffff8801cb620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff8801cb620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff8801cb620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Fixes: 2b6867c2ce76 ("net/packet: fix overflow in check for priv area size") +Fixes: dc808110bb62 ("packet: handle too big packets for PACKET_V3") +Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4284,7 +4284,7 @@ static int packet_set_ring(struct sock * + goto out; + if (po->tp_version >= TPACKET_V3 && + req->tp_block_size <= +- BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) ++ BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr)) + goto out; + if (unlikely(req->tp_frame_size < po->tp_hdrlen + + po->tp_reserve)) diff --git a/queue-4.16/net-phy-broadcom-fix-auxiliary-control-register-reads.patch b/queue-4.16/net-phy-broadcom-fix-auxiliary-control-register-reads.patch new file mode 100644 index 00000000000..f41e6a3288c --- /dev/null +++ b/queue-4.16/net-phy-broadcom-fix-auxiliary-control-register-reads.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Florian Fainelli +Date: Tue, 22 May 2018 16:22:26 -0700 +Subject: net: phy: broadcom: Fix auxiliary control register reads + +From: Florian Fainelli + +[ Upstream commit 733a969a7ed14fc5786bcc59c1bdda83c7ddb46e ] + +We are currently doing auxiliary control register reads with the shadow +register value 0b111 (0x7) which incidentally is also the selector value +that should be present in bits [2:0]. Fix this by using the appropriate +selector mask which is defined (MII_BCM54XX_AUXCTL_SHDWSEL_MASK). + +This does not have a functional impact yet because we always access the +MII_BCM54XX_AUXCTL_SHDWSEL_MISC (0x7) register in the current code. +This might change at some point though. + +Fixes: 5b4e29005123 ("net: phy: broadcom: add bcm54xx_auxctl_read") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/bcm-phy-lib.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/bcm-phy-lib.c ++++ b/drivers/net/phy/bcm-phy-lib.c +@@ -56,7 +56,7 @@ int bcm54xx_auxctl_read(struct phy_devic + /* The register must be written to both the Shadow Register Select and + * the Shadow Read Register Selector + */ +- phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | ++ phy_write(phydev, MII_BCM54XX_AUX_CTL, MII_BCM54XX_AUXCTL_SHDWSEL_MASK | + regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); + return phy_read(phydev, MII_BCM54XX_AUX_CTL); + } diff --git a/queue-4.16/net-phy-broadcom-fix-bcm_write_exp.patch b/queue-4.16/net-phy-broadcom-fix-bcm_write_exp.patch new file mode 100644 index 00000000000..1133acb01c2 --- /dev/null +++ b/queue-4.16/net-phy-broadcom-fix-bcm_write_exp.patch @@ -0,0 +1,90 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Florian Fainelli +Date: Tue, 22 May 2018 17:04:49 -0700 +Subject: net: phy: broadcom: Fix bcm_write_exp() + +From: Florian Fainelli + +[ Upstream commit 79fb218d97980d4fee9a64f4c8ff05289364ba25 ] + +On newer PHYs, we need to select the expansion register to write with +setting bits [11:8] to 0xf. This was done correctly by bcm7xxx.c prior +to being migrated to generic code under bcm-phy-lib.c which +unfortunately used the older implementation from the BCM54xx days. + +Fix this by creating an inline stub: bcm_write_exp_sel() which adds the +correct value (MII_BCM54XX_EXP_SEL_ER) and update both the Cygnus PHY +and BCM7xxx PHY drivers which require setting these bits. + +broadcom.c is unchanged because some PHYs even use a different selector +method, so let them specify it directly (e.g: SerDes secondary selector). + +Fixes: a1cba5613edf ("net: phy: Add Broadcom phy library for common interfaces") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/bcm-cygnus.c | 6 +++--- + drivers/net/phy/bcm-phy-lib.h | 7 +++++++ + drivers/net/phy/bcm7xxx.c | 4 ++-- + 3 files changed, 12 insertions(+), 5 deletions(-) + +--- a/drivers/net/phy/bcm-cygnus.c ++++ b/drivers/net/phy/bcm-cygnus.c +@@ -61,17 +61,17 @@ static int bcm_cygnus_afe_config(struct + return rc; + + /* make rcal=100, since rdb default is 000 */ +- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB1, 0x10); ++ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB1, 0x10); + if (rc < 0) + return rc; + + /* CORE_EXPB0, Reset R_CAL/RC_CAL Engine */ +- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x10); ++ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x10); + if (rc < 0) + return rc; + + /* CORE_EXPB0, Disable Reset R_CAL/RC_CAL Engine */ +- rc = bcm_phy_write_exp(phydev, MII_BRCM_CORE_EXPB0, 0x00); ++ rc = bcm_phy_write_exp_sel(phydev, MII_BRCM_CORE_EXPB0, 0x00); + + return 0; + } +--- a/drivers/net/phy/bcm-phy-lib.h ++++ b/drivers/net/phy/bcm-phy-lib.h +@@ -14,11 +14,18 @@ + #ifndef _LINUX_BCM_PHY_LIB_H + #define _LINUX_BCM_PHY_LIB_H + ++#include + #include + + int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val); + int bcm_phy_read_exp(struct phy_device *phydev, u16 reg); + ++static inline int bcm_phy_write_exp_sel(struct phy_device *phydev, ++ u16 reg, u16 val) ++{ ++ return bcm_phy_write_exp(phydev, reg | MII_BCM54XX_EXP_SEL_ER, val); ++} ++ + int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val); + int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum); + +--- a/drivers/net/phy/bcm7xxx.c ++++ b/drivers/net/phy/bcm7xxx.c +@@ -65,10 +65,10 @@ struct bcm7xxx_phy_priv { + static void r_rc_cal_reset(struct phy_device *phydev) + { + /* Reset R_CAL/RC_CAL Engine */ +- bcm_phy_write_exp(phydev, 0x00b0, 0x0010); ++ bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0010); + + /* Disable Reset R_AL/RC_CAL Engine */ +- bcm_phy_write_exp(phydev, 0x00b0, 0x0000); ++ bcm_phy_write_exp_sel(phydev, 0x00b0, 0x0000); + } + + static int bcm7xxx_28nm_b0_afe_config_init(struct phy_device *phydev) diff --git a/queue-4.16/net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch b/queue-4.16/net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch new file mode 100644 index 00000000000..ac77e77bfb9 --- /dev/null +++ b/queue-4.16/net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch @@ -0,0 +1,38 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Or Gerlitz +Date: Wed, 23 May 2018 19:24:48 +0300 +Subject: net : sched: cls_api: deal with egdev path only if needed + +From: Or Gerlitz + +[ Upstream commit f8f4bef322e4600c5856911c7a632c0e3da920d6 ] + +When dealing with ingress rule on a netdev, if we did fine through the +conventional path, there's no need to continue into the egdev route, +and we can stop right there. + +Not doing so may cause a 2nd rule to be added by the cls api layer +with the ingress being the egdev. + +For example, under sriov switchdev scheme, a user rule of VFR A --> VFR B +will end up with two HW rules (1) VF A --> VF B and (2) uplink --> VF B + +Fixes: 208c0f4b5237 ('net: sched: use tc_setup_cb_call to call per-block callbacks') +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -1587,7 +1587,7 @@ int tc_setup_cb_call(struct tcf_block *b + return ret; + ok_count = ret; + +- if (!exts) ++ if (!exts || ok_count) + return ok_count; + ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); + if (ret < 0) diff --git a/queue-4.16/net-sysfs-fix-memory-leak-in-xps-configuration.patch b/queue-4.16/net-sysfs-fix-memory-leak-in-xps-configuration.patch new file mode 100644 index 00000000000..9bf9ff4ea63 --- /dev/null +++ b/queue-4.16/net-sysfs-fix-memory-leak-in-xps-configuration.patch @@ -0,0 +1,43 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Alexander Duyck +Date: Thu, 31 May 2018 15:59:46 -0400 +Subject: net-sysfs: Fix memory leak in XPS configuration + +From: Alexander Duyck + +[ Upstream commit 664088f8d68178809b848ca450f2797efb34e8e7 ] + +This patch reorders the error cases in showing the XPS configuration so +that we hold off on memory allocation until after we have verified that we +can support XPS on a given ring. + +Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") +Signed-off-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net-sysfs.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/core/net-sysfs.c ++++ b/net/core/net-sysfs.c +@@ -1214,9 +1214,6 @@ static ssize_t xps_cpus_show(struct netd + cpumask_var_t mask; + unsigned long index; + +- if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) +- return -ENOMEM; +- + index = get_netdev_queue_index(queue); + + if (dev->num_tc) { +@@ -1226,6 +1223,9 @@ static ssize_t xps_cpus_show(struct netd + return -EINVAL; + } + ++ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) ++ return -ENOMEM; ++ + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { diff --git a/queue-4.16/net-usb-cdc_mbim-add-flag-flag_send_zlp.patch b/queue-4.16/net-usb-cdc_mbim-add-flag-flag_send_zlp.patch new file mode 100644 index 00000000000..ee5ec5bba1b --- /dev/null +++ b/queue-4.16/net-usb-cdc_mbim-add-flag-flag_send_zlp.patch @@ -0,0 +1,32 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Daniele Palmas +Date: Thu, 31 May 2018 11:18:29 +0200 +Subject: net: usb: cdc_mbim: add flag FLAG_SEND_ZLP + +From: Daniele Palmas + +[ Upstream commit 9f7c728332e8966084242fcd951aa46583bc308c ] + +Testing Telit LM940 with ICMP packets > 14552 bytes revealed that +the modem needs FLAG_SEND_ZLP to properly work, otherwise the cdc +mbim data interface won't be anymore responsive. + +Signed-off-by: Daniele Palmas +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/cdc_mbim.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/usb/cdc_mbim.c ++++ b/drivers/net/usb/cdc_mbim.c +@@ -609,7 +609,7 @@ static const struct driver_info cdc_mbim + */ + static const struct driver_info cdc_mbim_info_avoid_altsetting_toggle = { + .description = "CDC MBIM", +- .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN, ++ .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN | FLAG_SEND_ZLP, + .bind = cdc_mbim_bind, + .unbind = cdc_mbim_unbind, + .manage_power = cdc_mbim_manage_power, diff --git a/queue-4.16/netdev-faq-clarify-davem-s-position-for-stable-backports.patch b/queue-4.16/netdev-faq-clarify-davem-s-position-for-stable-backports.patch new file mode 100644 index 00000000000..c4d95e04ef8 --- /dev/null +++ b/queue-4.16/netdev-faq-clarify-davem-s-position-for-stable-backports.patch @@ -0,0 +1,41 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Cong Wang +Date: Tue, 5 Jun 2018 09:48:13 -0700 +Subject: netdev-FAQ: clarify DaveM's position for stable backports + +From: Cong Wang + +[ Upstream commit 75d4e704fa8d2cf33ff295e5b441317603d7f9fd ] + +Per discussion with David at netconf 2018, let's clarify +DaveM's position of handling stable backports in netdev-FAQ. + +This is important for people relying on upstream -stable +releases. + +Cc: Greg Kroah-Hartman +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/netdev-FAQ.txt | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/Documentation/networking/netdev-FAQ.txt ++++ b/Documentation/networking/netdev-FAQ.txt +@@ -179,6 +179,15 @@ A: No. See above answer. In short, if + dash marker line as described in Documentation/process/submitting-patches.rst to + temporarily embed that information into the patch that you send. + ++Q: Are all networking bug fixes backported to all stable releases? ++ ++A: Due to capacity, Dave could only take care of the backports for the last ++ 2 stable releases. For earlier stable releases, each stable branch maintainer ++ is supposed to take care of them. If you find any patch is missing from an ++ earlier stable branch, please notify stable@vger.kernel.org with either a ++ commit ID or a formal patch backported, and CC Dave and other relevant ++ networking developers. ++ + Q: Someone said that the comment style and coding convention is different + for the networking content. Is this true? + diff --git a/queue-4.16/packet-fix-reserve-calculation.patch b/queue-4.16/packet-fix-reserve-calculation.patch new file mode 100644 index 00000000000..9aa5cb11a56 --- /dev/null +++ b/queue-4.16/packet-fix-reserve-calculation.patch @@ -0,0 +1,46 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Willem de Bruijn +Date: Thu, 24 May 2018 18:10:30 -0400 +Subject: packet: fix reserve calculation + +From: Willem de Bruijn + +[ Upstream commit 9aad13b087ab0a588cd68259de618f100053360e ] + +Commit b84bbaf7a6c8 ("packet: in packet_snd start writing at link +layer allocation") ensures that packet_snd always starts writing +the link layer header in reserved headroom allocated for this +purpose. + +This is needed because packets may be shorter than hard_header_len, +in which case the space up to hard_header_len may be zeroed. But +that necessary padding is not accounted for in skb->len. + +The fix, however, is buggy. It calls skb_push, which grows skb->len +when moving skb->data back. But in this case packet length should not +change. + +Instead, call skb_reserve, which moves both skb->data and skb->tail +back, without changing length. + +Fixes: b84bbaf7a6c8 ("packet: in packet_snd start writing at link layer allocation") +Reported-by: Tariq Toukan +Signed-off-by: Willem de Bruijn +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -2911,7 +2911,7 @@ static int packet_snd(struct socket *soc + if (unlikely(offset < 0)) + goto out_free; + } else if (reserve) { +- skb_push(skb, reserve); ++ skb_reserve(skb, -reserve); + } + + /* Returns -EFAULT on error */ diff --git a/queue-4.16/qed-fix-mask-for-physical-address-in-ilt-entry.patch b/queue-4.16/qed-fix-mask-for-physical-address-in-ilt-entry.patch new file mode 100644 index 00000000000..06664520452 --- /dev/null +++ b/queue-4.16/qed-fix-mask-for-physical-address-in-ilt-entry.patch @@ -0,0 +1,38 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Shahed Shaikh +Date: Mon, 21 May 2018 12:31:47 -0700 +Subject: qed: Fix mask for physical address in ILT entry + +From: Shahed Shaikh + +[ Upstream commit fdd13dd350dda1826579eb5c333d76b14513b812 ] + +ILT entry requires 12 bit right shifted physical address. +Existing mask for ILT entry of physical address i.e. +ILT_ENTRY_PHY_ADDR_MASK is not sufficient to handle 64bit +address because upper 8 bits of 64 bit address were getting +masked which resulted in completer abort error on +PCIe bus due to invalid address. + +Fix that mask to handle 64bit physical address. + +Fixes: fe56b9e6a8d9 ("qed: Add module with basic common support") +Signed-off-by: Shahed Shaikh +Signed-off-by: Ariel Elior +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_cxt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c +@@ -77,7 +77,7 @@ + #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET + + /* ILT entry structure */ +-#define ILT_ENTRY_PHY_ADDR_MASK 0x000FFFFFFFFFFFULL ++#define ILT_ENTRY_PHY_ADDR_MASK (~0ULL >> 12) + #define ILT_ENTRY_PHY_ADDR_SHIFT 0 + #define ILT_ENTRY_VALID_MASK 0x1ULL + #define ILT_ENTRY_VALID_SHIFT 52 diff --git a/queue-4.16/rtnetlink-validate-attributes-in-do_setlink.patch b/queue-4.16/rtnetlink-validate-attributes-in-do_setlink.patch new file mode 100644 index 00000000000..b2c94e360ee --- /dev/null +++ b/queue-4.16/rtnetlink-validate-attributes-in-do_setlink.patch @@ -0,0 +1,139 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Eric Dumazet +Date: Tue, 5 Jun 2018 09:25:19 -0700 +Subject: rtnetlink: validate attributes in do_setlink() + +From: Eric Dumazet + +[ Upstream commit 644c7eebbfd59e72982d11ec6cc7d39af12450ae ] + +It seems that rtnl_group_changelink() can call do_setlink +while a prior call to validate_linkmsg(dev = NULL, ...) could +not validate IFLA_ADDRESS / IFLA_BROADCAST + +Make sure do_setlink() calls validate_linkmsg() instead +of letting its callers having this responsibility. + +With help from Dmitry Vyukov, thanks a lot ! + +BUG: KMSAN: uninit-value in is_valid_ether_addr include/linux/etherdevice.h:199 [inline] +BUG: KMSAN: uninit-value in eth_prepare_mac_addr_change net/ethernet/eth.c:275 [inline] +BUG: KMSAN: uninit-value in eth_mac_addr+0x203/0x2b0 net/ethernet/eth.c:308 +CPU: 1 PID: 8695 Comm: syz-executor3 Not tainted 4.17.0-rc5+ #103 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:113 + kmsan_report+0x149/0x260 mm/kmsan/kmsan.c:1084 + __msan_warning_32+0x6e/0xc0 mm/kmsan/kmsan_instr.c:686 + is_valid_ether_addr include/linux/etherdevice.h:199 [inline] + eth_prepare_mac_addr_change net/ethernet/eth.c:275 [inline] + eth_mac_addr+0x203/0x2b0 net/ethernet/eth.c:308 + dev_set_mac_address+0x261/0x530 net/core/dev.c:7157 + do_setlink+0xbc3/0x5fc0 net/core/rtnetlink.c:2317 + rtnl_group_changelink net/core/rtnetlink.c:2824 [inline] + rtnl_newlink+0x1fe9/0x37a0 net/core/rtnetlink.c:2976 + rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646 + netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x455a09 +RSP: 002b:00007fc07480ec68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007fc07480f6d4 RCX: 0000000000455a09 +RDX: 0000000000000000 RSI: 00000000200003c0 RDI: 0000000000000014 +RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 00000000000005d0 R14: 00000000006fdc20 R15: 0000000000000000 + +Uninit was stored to memory at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] + kmsan_save_stack mm/kmsan/kmsan.c:294 [inline] + kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:685 + kmsan_memcpy_origins+0x11d/0x170 mm/kmsan/kmsan.c:527 + __msan_memcpy+0x109/0x160 mm/kmsan/kmsan_instr.c:478 + do_setlink+0xb84/0x5fc0 net/core/rtnetlink.c:2315 + rtnl_group_changelink net/core/rtnetlink.c:2824 [inline] + rtnl_newlink+0x1fe9/0x37a0 net/core/rtnetlink.c:2976 + rtnetlink_rcv_msg+0xa32/0x1560 net/core/rtnetlink.c:4646 + netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2448 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4664 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1678/0x1750 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 + kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 + kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322 + slab_post_alloc_hook mm/slab.h:446 [inline] + slab_alloc_node mm/slub.c:2753 [inline] + __kmalloc_node_track_caller+0xb32/0x11b0 mm/slub.c:4395 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:988 [inline] + netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] + netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg net/socket.c:639 [inline] + ___sys_sendmsg+0xec0/0x1310 net/socket.c:2117 + __sys_sendmsg net/socket.c:2155 [inline] + __do_sys_sendmsg net/socket.c:2164 [inline] + __se_sys_sendmsg net/socket.c:2162 [inline] + __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 + do_syscall_64+0x152/0x230 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: e7ed828f10bd ("netlink: support setting devgroup parameters") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2245,6 +2245,10 @@ static int do_setlink(const struct sk_bu + const struct net_device_ops *ops = dev->netdev_ops; + int err; + ++ err = validate_linkmsg(dev, tb); ++ if (err < 0) ++ return err; ++ + if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_IF_NETNSID]) { + struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), + tb, CAP_NET_ADMIN); +@@ -2608,10 +2612,6 @@ static int rtnl_setlink(struct sk_buff * + goto errout; + } + +- err = validate_linkmsg(dev, tb); +- if (err < 0) +- goto errout; +- + err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0); + errout: + return err; diff --git a/queue-4.16/sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch b/queue-4.16/sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch new file mode 100644 index 00000000000..b1ca7d1e4a4 --- /dev/null +++ b/queue-4.16/sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch @@ -0,0 +1,44 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Xin Long +Date: Tue, 5 Jun 2018 12:16:58 +0800 +Subject: sctp: not allow transport timeout value less than HZ/5 for hb_timer + +From: Xin Long + +[ Upstream commit 1d88ba1ebb2763aa86172cd7ca05dedbeccc0d35 ] + +syzbot reported a rcu_sched self-detected stall on CPU which is caused +by too small value set on rto_min with SCTP_RTOINFO sockopt. With this +value, hb_timer will get stuck there, as in its timer handler it starts +this timer again with this value, then goes to the timer handler again. + +This problem is there since very beginning, and thanks to Eric for the +reproducer shared from a syzbot mail. + +This patch fixes it by not allowing sctp_transport_timeout to return a +smaller value than HZ/5 for hb_timer, which is based on TCP's min rto. + +Note that it doesn't fix this issue by limiting rto_min, as some users +are still using small rto and no proper value was found for it yet. + +Reported-by: syzbot+3dcd59a1f907245f891f@syzkaller.appspotmail.com +Suggested-by: Marcelo Ricardo Leitner +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/transport.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/transport.c ++++ b/net/sctp/transport.c +@@ -634,7 +634,7 @@ unsigned long sctp_transport_timeout(str + trans->state != SCTP_PF) + timeout += trans->hbinterval; + +- return timeout; ++ return max_t(unsigned long, timeout, HZ / 5); + } + + /* Reset transport variables to their initial values */ diff --git a/queue-4.16/series b/queue-4.16/series index 3da958089b0..9f620d4731d 100644 --- a/queue-4.16/series +++ b/queue-4.16/series @@ -2,3 +2,46 @@ mmap-introduce-sane-default-mmap-limits.patch mmap-relax-file-size-limit-for-regular-files.patch netfilter-nf_flow_table-attach-dst-to-skbs.patch kconfig-avoid-format-overflow-warning-from-gcc-8.1.patch +be2net-fix-error-detection-logic-for-be3.patch +bnx2x-use-the-right-constant.patch +cls_flower-fix-incorrect-idr-release-when-failing-to-modify-rule.patch +dccp-don-t-free-ccid2_hc_tx_sock-struct-in-dccp_disconnect.patch +enic-set-dma-mask-to-47-bit.patch +ip6mr-only-set-ip6mr_table-from-setsockopt-when-ip6mr_new_table-succeeds.patch +ip6_tunnel-remove-magic-mtu-value-0xfff8.patch +ipmr-properly-check-rhltable_init-return-value.patch +ipv4-remove-warning-in-ip_recv_error.patch +ipv6-omit-traffic-class-when-calculating-flow-hash.patch +isdn-eicon-fix-a-missing-check-bug.patch +kcm-fix-use-after-free-caused-by-clonned-sockets.patch +l2tp-fix-refcount-leakage-on-pppol2tp-sockets.patch +mlxsw-spectrum-forbid-creation-of-vlan-1-over-port-lag.patch +netdev-faq-clarify-davem-s-position-for-stable-backports.patch +net-ethernet-davinci_emac-fix-error-handling-in-probe.patch +net-ipv4-add-missing-rta_table-to-rtm_ipv4_policy.patch +net-metrics-add-proper-netlink-validation.patch +net-packet-refine-check-for-priv-area-size.patch +net-phy-broadcom-fix-bcm_write_exp.patch +net-usb-cdc_mbim-add-flag-flag_send_zlp.patch +packet-fix-reserve-calculation.patch +qed-fix-mask-for-physical-address-in-ilt-entry.patch +rtnetlink-validate-attributes-in-do_setlink.patch +sctp-not-allow-transport-timeout-value-less-than-hz-5-for-hb_timer.patch +team-use-netdev_features_t-instead-of-u32.patch +vhost-synchronize-iotlb-message-with-dev-cleanup.patch +vrf-check-the-original-netdevice-for-generating-redirect.patch +ipv6-sr-fix-memory-oob-access-in-seg6_do_srh_encap-inline.patch +net-phy-broadcom-fix-auxiliary-control-register-reads.patch +net-sysfs-fix-memory-leak-in-xps-configuration.patch +virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch +virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch +net-mlx4-fix-irq-unsafe-spinlock-usage.patch +net-mlx5e-when-rxfcs-is-set-add-fcs-data-into-checksum-calculation.patch +tun-fix-null-pointer-dereference-in-xdp-redirect.patch +virtio-net-correctly-check-num_buf-during-err-path.patch +net-dsa-b53-fix-for-brcm-tag-issue-in-cygnus-soc.patch +net-sched-cls_api-deal-with-egdev-path-only-if-needed.patch +virtio-net-correctly-redirect-linearized-packet.patch +ip_tunnel-restore-binding-to-ifaces-with-a-large-mtu.patch +net-netsec-reduce-dma-mask-to-40-bits.patch +vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch diff --git a/queue-4.16/team-use-netdev_features_t-instead-of-u32.patch b/queue-4.16/team-use-netdev_features_t-instead-of-u32.patch new file mode 100644 index 00000000000..5a321b2e9ab --- /dev/null +++ b/queue-4.16/team-use-netdev_features_t-instead-of-u32.patch @@ -0,0 +1,33 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Dan Carpenter +Date: Mon, 4 Jun 2018 17:46:01 +0300 +Subject: team: use netdev_features_t instead of u32 + +From: Dan Carpenter + +[ Upstream commit 25ea66544bfd1d9df1b7e1502f8717e85fa1e6e6 ] + +This code was introduced in 2011 around the same time that we made +netdev_features_t a u64 type. These days a u32 is not big enough to +hold all the potential features. + +Signed-off-by: Dan Carpenter +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1004,7 +1004,8 @@ static void team_port_disable(struct tea + static void __team_compute_features(struct team *team) + { + struct team_port *port; +- u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL; ++ netdev_features_t vlan_features = TEAM_VLAN_FEATURES & ++ NETIF_F_ALL_FOR_ALL; + netdev_features_t enc_features = TEAM_ENC_FEATURES; + unsigned short max_hard_header_len = ETH_HLEN; + unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | diff --git a/queue-4.16/tun-fix-null-pointer-dereference-in-xdp-redirect.patch b/queue-4.16/tun-fix-null-pointer-dereference-in-xdp-redirect.patch new file mode 100644 index 00000000000..ca66f4b8017 --- /dev/null +++ b/queue-4.16/tun-fix-null-pointer-dereference-in-xdp-redirect.patch @@ -0,0 +1,179 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Toshiaki Makita +Date: Mon, 28 May 2018 19:37:49 +0900 +Subject: tun: Fix NULL pointer dereference in XDP redirect + +From: Toshiaki Makita + +[ Upstream commit 6547e387d7f52f2ba681a229de3c13e5b9e01ee1 ] + +Calling XDP redirection requires bh disabled. Softirq can call another +XDP function and redirection functions, then the percpu static variable +ri->map can be overwritten to NULL. + +This is a generic XDP case called from tun. + +[ 3535.736058] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 +[ 3535.743974] PGD 0 P4D 0 +[ 3535.746530] Oops: 0000 [#1] SMP PTI +[ 3535.750049] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm ipmi_ssif irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel crypto_simd cryptd enclosure hpwdt hpilo glue_helper ipmi_si pcspkr wmi mei_me ioatdma mei ipmi_devintf shpchp dca ipmi_msghandler lpc_ich acpi_power_meter sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm smartpqi i40e crc32c_intel scsi_transport_sas tg3 i2c_core ptp pps_core +[ 3535.813456] CPU: 5 PID: 1630 Comm: vhost-1614 Not tainted 4.17.0-rc4 #2 +[ 3535.820127] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017 +[ 3535.828732] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30 +[ 3535.833740] RSP: 0018:ffffb4bc47bf7c58 EFLAGS: 00010246 +[ 3535.839009] RAX: ffff9fdfcfea1c40 RBX: 0000000000000000 RCX: ffff9fdf27fe3100 +[ 3535.846205] RDX: ffff9fdfca769200 RSI: 0000000000000000 RDI: 0000000000000000 +[ 3535.853402] RBP: ffffb4bc491d9000 R08: 00000000000045ad R09: 0000000000000ec0 +[ 3535.860597] R10: 0000000000000001 R11: ffff9fdf26c3ce4e R12: ffff9fdf9e72c000 +[ 3535.867794] R13: 0000000000000000 R14: fffffffffffffff2 R15: ffff9fdfc82cdd00 +[ 3535.874990] FS: 0000000000000000(0000) GS:ffff9fdfcfe80000(0000) knlGS:0000000000000000 +[ 3535.883152] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 3535.888948] CR2: 0000000000000018 CR3: 0000000bde724004 CR4: 00000000007626e0 +[ 3535.896145] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 3535.903342] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 3535.910538] PKRU: 55555554 +[ 3535.913267] Call Trace: +[ 3535.915736] xdp_do_generic_redirect+0x7a/0x310 +[ 3535.920310] do_xdp_generic.part.117+0x285/0x370 +[ 3535.924970] tun_get_user+0x5b9/0x1260 [tun] +[ 3535.929279] tun_sendmsg+0x52/0x70 [tun] +[ 3535.933237] handle_tx+0x2ad/0x5f0 [vhost_net] +[ 3535.937721] vhost_worker+0xa5/0x100 [vhost] +[ 3535.942030] kthread+0xf5/0x130 +[ 3535.945198] ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost] +[ 3535.950031] ? kthread_bind+0x10/0x10 +[ 3535.953727] ret_from_fork+0x35/0x40 +[ 3535.957334] Code: 0e 74 15 83 f8 10 75 05 e9 49 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 29 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 49 a9 b3 ff 31 c0 c3 +[ 3535.976387] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffffb4bc47bf7c58 +[ 3535.982883] CR2: 0000000000000018 +[ 3535.987096] ---[ end trace 383b299dd1430240 ]--- +[ 3536.131325] Kernel panic - not syncing: Fatal exception +[ 3536.137484] Kernel Offset: 0x26a00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) +[ 3536.281406] ---[ end Kernel panic - not syncing: Fatal exception ]--- + +And a kernel with generic case fixed still panics in tun driver XDP +redirect, because it disabled only preemption, but not bh. + +[ 2055.128746] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 +[ 2055.136662] PGD 0 P4D 0 +[ 2055.139219] Oops: 0000 [#1] SMP PTI +[ 2055.142736] Modules linked in: vhost_net vhost tap tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter sunrpc vfat fat ext4 mbcache jbd2 intel_rapl skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc ses aesni_intel ipmi_ssif crypto_simd enclosure cryptd hpwdt glue_helper ioatdma hpilo wmi dca pcspkr ipmi_si acpi_power_meter ipmi_devintf shpchp mei_me ipmi_msghandler mei lpc_ich sch_fq_codel ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm i40e smartpqi tg3 scsi_transport_sas crc32c_intel i2c_core ptp pps_core +[ 2055.206142] CPU: 6 PID: 1693 Comm: vhost-1683 Tainted: G W 4.17.0-rc5-fix-tun+ #1 +[ 2055.215011] Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 11/14/2017 +[ 2055.223617] RIP: 0010:__xdp_map_lookup_elem+0x5/0x30 +[ 2055.228624] RSP: 0018:ffff998b07607cc0 EFLAGS: 00010246 +[ 2055.233892] RAX: ffff8dbd8e235700 RBX: ffff8dbd8ff21c40 RCX: 0000000000000004 +[ 2055.241089] RDX: ffff998b097a9000 RSI: 0000000000000000 RDI: 0000000000000000 +[ 2055.248286] RBP: 0000000000000000 R08: 00000000000065a8 R09: 0000000000005d80 +[ 2055.255483] R10: 0000000000000040 R11: ffff8dbcf0100000 R12: ffff998b097a9000 +[ 2055.262681] R13: ffff8dbd8c98c000 R14: 0000000000000000 R15: ffff998b07607d78 +[ 2055.269879] FS: 0000000000000000(0000) GS:ffff8dbd8ff00000(0000) knlGS:0000000000000000 +[ 2055.278039] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 2055.283834] CR2: 0000000000000018 CR3: 0000000c0c8cc005 CR4: 00000000007626e0 +[ 2055.291030] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 2055.298227] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 2055.305424] PKRU: 55555554 +[ 2055.308153] Call Trace: +[ 2055.310624] xdp_do_redirect+0x7b/0x380 +[ 2055.314499] tun_get_user+0x10fe/0x12a0 [tun] +[ 2055.318895] tun_sendmsg+0x52/0x70 [tun] +[ 2055.322852] handle_tx+0x2ad/0x5f0 [vhost_net] +[ 2055.327337] vhost_worker+0xa5/0x100 [vhost] +[ 2055.331646] kthread+0xf5/0x130 +[ 2055.334813] ? vhost_dev_ioctl+0x3b0/0x3b0 [vhost] +[ 2055.339646] ? kthread_bind+0x10/0x10 +[ 2055.343343] ret_from_fork+0x35/0x40 +[ 2055.346950] Code: 0e 74 15 83 f8 10 75 05 e9 e9 aa b3 ff f3 c3 0f 1f 80 00 00 00 00 f3 c3 e9 c9 9d b3 ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <8b> 47 18 83 f8 0e 74 0d 83 f8 10 75 05 e9 e9 a9 b3 ff 31 c0 c3 +[ 2055.366004] RIP: __xdp_map_lookup_elem+0x5/0x30 RSP: ffff998b07607cc0 +[ 2055.372500] CR2: 0000000000000018 +[ 2055.375856] ---[ end trace 2a2dcc5e9e174268 ]--- +[ 2055.523626] Kernel panic - not syncing: Fatal exception +[ 2055.529796] Kernel Offset: 0x2e000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) +[ 2055.677539] ---[ end Kernel panic - not syncing: Fatal exception ]--- + +v2: + - Removed preempt_disable/enable since local_bh_disable will prevent + preemption as well, feedback from Jason Wang. + +Fixes: 761876c857cb ("tap: XDP support") +Signed-off-by: Toshiaki Makita +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1632,7 +1632,7 @@ static struct sk_buff *tun_build_skb(str + else + *skb_xdp = 0; + +- preempt_disable(); ++ local_bh_disable(); + rcu_read_lock(); + xdp_prog = rcu_dereference(tun->xdp_prog); + if (xdp_prog && !*skb_xdp) { +@@ -1657,7 +1657,7 @@ static struct sk_buff *tun_build_skb(str + if (err) + goto err_redirect; + rcu_read_unlock(); +- preempt_enable(); ++ local_bh_enable(); + return NULL; + case XDP_TX: + xdp_xmit = true; +@@ -1679,7 +1679,7 @@ static struct sk_buff *tun_build_skb(str + skb = build_skb(buf, buflen); + if (!skb) { + rcu_read_unlock(); +- preempt_enable(); ++ local_bh_enable(); + return ERR_PTR(-ENOMEM); + } + +@@ -1692,12 +1692,12 @@ static struct sk_buff *tun_build_skb(str + skb->dev = tun->dev; + generic_xdp_tx(skb, xdp_prog); + rcu_read_unlock(); +- preempt_enable(); ++ local_bh_enable(); + return NULL; + } + + rcu_read_unlock(); +- preempt_enable(); ++ local_bh_enable(); + + return skb; + +@@ -1705,7 +1705,7 @@ err_redirect: + put_page(alloc_frag->page); + err_xdp: + rcu_read_unlock(); +- preempt_enable(); ++ local_bh_enable(); + this_cpu_inc(tun->pcpu_stats->rx_dropped); + return NULL; + } +@@ -1901,16 +1901,19 @@ static ssize_t tun_get_user(struct tun_s + struct bpf_prog *xdp_prog; + int ret; + ++ local_bh_disable(); + rcu_read_lock(); + xdp_prog = rcu_dereference(tun->xdp_prog); + if (xdp_prog) { + ret = do_xdp_generic(xdp_prog, skb); + if (ret != XDP_PASS) { + rcu_read_unlock(); ++ local_bh_enable(); + return total_len; + } + } + rcu_read_unlock(); ++ local_bh_enable(); + } + + rcu_read_lock(); diff --git a/queue-4.16/vhost-synchronize-iotlb-message-with-dev-cleanup.patch b/queue-4.16/vhost-synchronize-iotlb-message-with-dev-cleanup.patch new file mode 100644 index 00000000000..9b2171d51ab --- /dev/null +++ b/queue-4.16/vhost-synchronize-iotlb-message-with-dev-cleanup.patch @@ -0,0 +1,57 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 22 May 2018 19:58:57 +0800 +Subject: vhost: synchronize IOTLB message with dev cleanup + +From: Jason Wang + +[ Upstream commit 1b15ad683ab42a203f98b67045b40720e99d0e9a ] + +DaeRyong Jeong reports a race between vhost_dev_cleanup() and +vhost_process_iotlb_msg(): + +Thread interleaving: +CPU0 (vhost_process_iotlb_msg) CPU1 (vhost_dev_cleanup) +(In the case of both VHOST_IOTLB_UPDATE and +VHOST_IOTLB_INVALIDATE) + +===== ===== + vhost_umem_clean(dev->iotlb); +if (!dev->iotlb) { + ret = -EFAULT; + break; +} + dev->iotlb = NULL; + +The reason is we don't synchronize between them, fixing by protecting +vhost_process_iotlb_msg() with dev mutex. + +Reported-by: DaeRyong Jeong +Fixes: 6b1e6cc7855b0 ("vhost: new device IOTLB API") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -981,6 +981,7 @@ static int vhost_process_iotlb_msg(struc + { + int ret = 0; + ++ mutex_lock(&dev->mutex); + vhost_dev_lock_vqs(dev); + switch (msg->type) { + case VHOST_IOTLB_UPDATE: +@@ -1016,6 +1017,8 @@ static int vhost_process_iotlb_msg(struc + } + + vhost_dev_unlock_vqs(dev); ++ mutex_unlock(&dev->mutex); ++ + return ret; + } + ssize_t vhost_chr_write_iter(struct vhost_dev *dev, diff --git a/queue-4.16/vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch b/queue-4.16/vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch new file mode 100644 index 00000000000..043f2d3ec01 --- /dev/null +++ b/queue-4.16/vhost_net-flush-batched-heads-before-trying-to-busy-polling.patch @@ -0,0 +1,114 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 29 May 2018 14:18:19 +0800 +Subject: vhost_net: flush batched heads before trying to busy polling + +From: Jason Wang + +[ Upstream commit f5a4941aa6d190e676065e8f4ed35999f52a01c3 ] + +After commit e2b3b35eb989 ("vhost_net: batch used ring update in rx"), +we tend to batch updating used heads. But it doesn't flush batched +heads before trying to do busy polling, this will cause vhost to wait +for guest TX which waits for the used RX. Fixing by flush batched +heads before busy loop. + +1 byte TCP_RR performance recovers from 13107.83 to 50402.65. + +Fixes: e2b3b35eb989 ("vhost_net: batch used ring update in rx") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 37 ++++++++++++++++++++++++------------- + 1 file changed, 24 insertions(+), 13 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -101,7 +101,9 @@ struct vhost_net_virtqueue { + /* vhost zerocopy support fields below: */ + /* last used idx for outstanding DMA zerocopy buffers */ + int upend_idx; +- /* first used idx for DMA done zerocopy buffers */ ++ /* For TX, first used idx for DMA done zerocopy buffers ++ * For RX, number of batched heads ++ */ + int done_idx; + /* an array of userspace buffers info */ + struct ubuf_info *ubuf_info; +@@ -620,6 +622,18 @@ static int sk_has_rx_data(struct sock *s + return skb_queue_empty(&sk->sk_receive_queue); + } + ++static void vhost_rx_signal_used(struct vhost_net_virtqueue *nvq) ++{ ++ struct vhost_virtqueue *vq = &nvq->vq; ++ struct vhost_dev *dev = vq->dev; ++ ++ if (!nvq->done_idx) ++ return; ++ ++ vhost_add_used_and_signal_n(dev, vq, vq->heads, nvq->done_idx); ++ nvq->done_idx = 0; ++} ++ + static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) + { + struct vhost_net_virtqueue *rvq = &net->vqs[VHOST_NET_VQ_RX]; +@@ -629,6 +643,8 @@ static int vhost_net_rx_peek_head_len(st + int len = peek_head_len(rvq, sk); + + if (!len && vq->busyloop_timeout) { ++ /* Flush batched heads first */ ++ vhost_rx_signal_used(rvq); + /* Both tx vq and rx socket were polled here */ + mutex_lock_nested(&vq->mutex, 1); + vhost_disable_notify(&net->dev, vq); +@@ -756,7 +772,7 @@ static void handle_rx(struct vhost_net * + }; + size_t total_len = 0; + int err, mergeable; +- s16 headcount, nheads = 0; ++ s16 headcount; + size_t vhost_hlen, sock_hlen; + size_t vhost_len, sock_len; + struct socket *sock; +@@ -784,8 +800,8 @@ static void handle_rx(struct vhost_net * + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { + sock_len += sock_hlen; + vhost_len = sock_len + vhost_hlen; +- headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len, +- &in, vq_log, &log, ++ headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx, ++ vhost_len, &in, vq_log, &log, + likely(mergeable) ? UIO_MAXIOV : 1); + /* On error, stop handling until the next kick. */ + if (unlikely(headcount < 0)) +@@ -856,12 +872,9 @@ static void handle_rx(struct vhost_net * + vhost_discard_vq_desc(vq, headcount); + goto out; + } +- nheads += headcount; +- if (nheads > VHOST_RX_BATCH) { +- vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, +- nheads); +- nheads = 0; +- } ++ nvq->done_idx += headcount; ++ if (nvq->done_idx > VHOST_RX_BATCH) ++ vhost_rx_signal_used(nvq); + if (unlikely(vq_log)) + vhost_log_write(vq, vq_log, log, vhost_len); + total_len += vhost_len; +@@ -872,9 +885,7 @@ static void handle_rx(struct vhost_net * + } + vhost_net_enable_vq(net, vq); + out: +- if (nheads) +- vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, +- nheads); ++ vhost_rx_signal_used(nvq); + mutex_unlock(&vq->mutex); + } + diff --git a/queue-4.16/virtio-net-correctly-check-num_buf-during-err-path.patch b/queue-4.16/virtio-net-correctly-check-num_buf-during-err-path.patch new file mode 100644 index 00000000000..073897c6e9b --- /dev/null +++ b/queue-4.16/virtio-net-correctly-check-num_buf-during-err-path.patch @@ -0,0 +1,33 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 22 May 2018 11:44:30 +0800 +Subject: virtio-net: correctly check num_buf during err path + +From: Jason Wang + +[ Upstream commit 850e088d5bbb333342fd4def08d0a4035f2b7126 ] + +If we successfully linearize the packet, num_buf will be set to zero +which may confuse error handling path which assumes num_buf is at +least 1 and this can lead the code tries to pop the descriptor of next +buffer. Fixing this by checking num_buf against 1 before decreasing. + +Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -874,7 +874,7 @@ err_xdp: + rcu_read_unlock(); + err_skb: + put_page(page); +- while (--num_buf) { ++ while (num_buf-- > 1) { + buf = virtqueue_get_buf(rq->vq, &len); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers missing\n", diff --git a/queue-4.16/virtio-net-correctly-redirect-linearized-packet.patch b/queue-4.16/virtio-net-correctly-redirect-linearized-packet.patch new file mode 100644 index 00000000000..c6287e6f292 --- /dev/null +++ b/queue-4.16/virtio-net-correctly-redirect-linearized-packet.patch @@ -0,0 +1,36 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 22 May 2018 11:44:28 +0800 +Subject: virtio-net: correctly redirect linearized packet + +From: Jason Wang + +[ Upstream commit 6890418bbb780f0ee9cf124055afa79777f1b4f1 ] + +After a linearized packet was redirected by XDP, we should not go for +the err path which will try to pop buffers for the next packet and +increase the drop counter. Fixing this by just drop the page refcnt +for the original page. + +Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") +Reported-by: David Ahern +Tested-by: David Ahern +Acked-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -786,7 +786,7 @@ static struct sk_buff *receive_mergeable + } + *xdp_xmit = true; + if (unlikely(xdp_page != page)) +- goto err_xdp; ++ put_page(page); + rcu_read_unlock(); + goto xdp_xmit; + default: diff --git a/queue-4.16/virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch b/queue-4.16/virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch new file mode 100644 index 00000000000..138e9e7e609 --- /dev/null +++ b/queue-4.16/virtio-net-correctly-transmit-xdp-buff-after-linearizing.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 22 May 2018 11:44:29 +0800 +Subject: virtio-net: correctly transmit XDP buff after linearizing + +From: Jason Wang + +[ Upstream commit 5d458a13dd59d04b4d6658a6d5b94d42732b15ae ] + +We should not go for the error path after successfully transmitting a +XDP buffer after linearizing. Since the error path may try to pop and +drop next packet and increase the drop counters. Fixing this by simply +drop the refcnt of original page and go for xmit path. + +Fixes: 72979a6c3590 ("virtio_net: xdp, add slowpath case for non contiguous buffers") +Cc: John Fastabend +Acked-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -775,7 +775,7 @@ static struct sk_buff *receive_mergeable + } + *xdp_xmit = true; + if (unlikely(xdp_page != page)) +- goto err_xdp; ++ put_page(page); + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: diff --git a/queue-4.16/virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch b/queue-4.16/virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch new file mode 100644 index 00000000000..3e59578f24f --- /dev/null +++ b/queue-4.16/virtio-net-fix-leaking-page-for-gso-packet-during-mergeable-xdp.patch @@ -0,0 +1,53 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Jason Wang +Date: Tue, 22 May 2018 11:44:31 +0800 +Subject: virtio-net: fix leaking page for gso packet during mergeable XDP + +From: Jason Wang + +[ Upstream commit 3d62b2a0db505bbf9ed0755f254e45d775f9807f ] + +We need to drop refcnt to xdp_page if we see a gso packet. Otherwise +it will be leaked. Fixing this by moving the check of gso packet above +the linearizing logic. While at it, remove useless comment as well. + +Cc: John Fastabend +Fixes: 72979a6c3590 ("virtio_net: xdp, add slowpath case for non contiguous buffers") +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -707,6 +707,13 @@ static struct sk_buff *receive_mergeable + void *data; + u32 act; + ++ /* Transient failure which in theory could occur if ++ * in-flight packets from before XDP was enabled reach ++ * the receive path after XDP is loaded. ++ */ ++ if (unlikely(hdr->hdr.gso_type)) ++ goto err_xdp; ++ + /* This happens when rx buffer size is underestimated + * or headroom is not enough because of the buffer + * was refilled before XDP is set. This should only +@@ -727,14 +734,6 @@ static struct sk_buff *receive_mergeable + xdp_page = page; + } + +- /* Transient failure which in theory could occur if +- * in-flight packets from before XDP was enabled reach +- * the receive path after XDP is loaded. In practice I +- * was not able to create this condition. +- */ +- if (unlikely(hdr->hdr.gso_type)) +- goto err_xdp; +- + /* Allow consuming headroom but reserve enough space to push + * the descriptor on if we get an XDP_TX return code. + */ diff --git a/queue-4.16/vrf-check-the-original-netdevice-for-generating-redirect.patch b/queue-4.16/vrf-check-the-original-netdevice-for-generating-redirect.patch new file mode 100644 index 00000000000..f15d8c4d3f3 --- /dev/null +++ b/queue-4.16/vrf-check-the-original-netdevice-for-generating-redirect.patch @@ -0,0 +1,48 @@ +From foo@baz Fri Jun 8 06:51:57 CEST 2018 +From: Stephen Suryaputra +Date: Fri, 1 Jun 2018 00:05:21 -0400 +Subject: vrf: check the original netdevice for generating redirect + +From: Stephen Suryaputra + +[ Upstream commit 2f17becfbea5e9a0529b51da7345783e96e69516 ] + +Use the right device to determine if redirect should be sent especially +when using vrf. Same as well as when sending the redirect. + +Signed-off-by: Stephen Suryaputra +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 3 ++- + net/ipv6/ndisc.c | 6 ++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -507,7 +507,8 @@ int ip6_forward(struct sk_buff *skb) + send redirects to source routed frames. + We don't send redirects to frames decapsulated from IPsec. + */ +- if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { ++ if (IP6CB(skb)->iif == dst->dev->ifindex && ++ opt->srcrt == 0 && !skb_sec_path(skb)) { + struct in6_addr *target = NULL; + struct inet_peer *peer; + struct rt6_info *rt; +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -1576,6 +1576,12 @@ void ndisc_send_redirect(struct sk_buff + ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; + bool ret; + ++ if (netif_is_l3_master(skb->dev)) { ++ dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); ++ if (!dev) ++ return; ++ } ++ + if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { + ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", + dev->name); -- 2.47.3