From: Greg Kroah-Hartman Date: Thu, 13 Dec 2018 09:45:58 +0000 (+0100) Subject: 4.19-stable patches X-Git-Tag: v4.19.10~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=15189b70545489833fa3fdfe7c13f7499a070196;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch net-fix-xps-static_key-accounting.patch net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch net-phy-sfp-correct-store-of-detected-link-modes.patch net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch net-use-skb_list_del_init-to-remove-from-rx-sublists.patch revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch sctp-kfree_rcu-asoc.patch sctp-update-frag_point-when-stream_interleave-is-set.patch tcp-do-not-underestimate-rwnd_limited.patch tcp-fix-null-ref-in-tail-loss-probe.patch tun-forbid-iface-creation-with-rtnl-ops.patch virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch --- diff --git a/queue-4.19/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch b/queue-4.19/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch new file mode 100644 index 00000000000..d242ab467b0 --- /dev/null +++ b/queue-4.19/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch @@ -0,0 +1,132 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Jiri Wiesner +Date: Wed, 5 Dec 2018 16:55:29 +0100 +Subject: ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes + +From: Jiri Wiesner + +[ Upstream commit ebaf39e6032faf77218220707fc3fa22487784e0 ] + +The *_frag_reasm() functions are susceptible to miscalculating the byte +count of packet fragments in case the truesize of a head buffer changes. +The truesize member may be changed by the call to skb_unclone(), leaving +the fragment memory limit counter unbalanced even if all fragments are +processed. This miscalculation goes unnoticed as long as the network +namespace which holds the counter is not destroyed. + +Should an attempt be made to destroy a network namespace that holds an +unbalanced fragment memory limit counter the cleanup of the namespace +never finishes. The thread handling the cleanup gets stuck in +inet_frags_exit_net() waiting for the percpu counter to reach zero. The +thread is usually in running state with a stacktrace similar to: + + PID: 1073 TASK: ffff880626711440 CPU: 1 COMMAND: "kworker/u48:4" + #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480 + #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b + #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c + #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856 + #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0 + #10 [ffff880621563e38] process_one_work at ffffffff81096f14 + +It is not possible to create new network namespaces, and processes +that call unshare() end up being stuck in uninterruptible sleep state +waiting to acquire the net_mutex. + +The bug was observed in the IPv6 netfilter code by Per Sundstrom. +I thank him for his analysis of the problem. The parts of this patch +that apply to IPv4 and IPv6 fragment reassembly are preemptive measures. + +Signed-off-by: Jiri Wiesner +Reported-by: Per Sundstrom +Acked-by: Peter Oskolkov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_fragment.c | 7 +++++++ + net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++++- + net/ipv6/reassembly.c | 8 +++++++- + 3 files changed, 21 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -513,6 +513,7 @@ static int ip_frag_reasm(struct ipq *qp, + struct rb_node *rbn; + int len; + int ihlen; ++ int delta; + int err; + u8 ecn; + +@@ -554,10 +555,16 @@ static int ip_frag_reasm(struct ipq *qp, + if (len > 65535) + goto out_oversize; + ++ delta = - head->truesize; ++ + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + goto out_nomem; + ++ delta += head->truesize; ++ if (delta) ++ add_frag_mem_limit(qp->q.net, delta); ++ + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -341,7 +341,7 @@ static bool + nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) + { + struct sk_buff *fp, *head = fq->q.fragments; +- int payload_len; ++ int payload_len, delta; + u8 ecn; + + inet_frag_kill(&fq->q); +@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, + return false; + } + ++ delta = - head->truesize; ++ + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + return false; + ++ delta += head->truesize; ++ if (delta) ++ add_frag_mem_limit(fq->q.net, delta); ++ + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ +--- a/net/ipv6/reassembly.c ++++ b/net/ipv6/reassembly.c +@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_qu + { + struct net *net = container_of(fq->q.net, struct net, ipv6.frags); + struct sk_buff *fp, *head = fq->q.fragments; +- int payload_len; ++ int payload_len, delta; + unsigned int nhoff; + int sum_truesize; + u8 ecn; +@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_qu + if (payload_len > IPV6_MAXPLEN) + goto out_oversize; + ++ delta = - head->truesize; ++ + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + goto out_oom; + ++ delta += head->truesize; ++ if (delta) ++ add_frag_mem_limit(fq->q.net, delta); ++ + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ diff --git a/queue-4.19/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch b/queue-4.19/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch new file mode 100644 index 00000000000..135a91717ac --- /dev/null +++ b/queue-4.19/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch @@ -0,0 +1,137 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Stefano Brivio +Date: Thu, 6 Dec 2018 19:30:36 +0100 +Subject: ipv6: Check available headroom in ip6_xmit() even without options + +From: Stefano Brivio + +[ Upstream commit 66033f47ca60294a95fc85ec3a3cc909dab7b765 ] + +Even if we send an IPv6 packet without options, MAX_HEADER might not be +enough to account for the additional headroom required by alignment of +hardware headers. + +On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL, +sending short SCTP packets over IPv4 over L2TP over IPv6, we start with +100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54 +bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2(). + +Those would be enough to append our 14 bytes header, but we're going to +align that to 16 bytes, and write 2 bytes out of the allocated slab in +neigh_hh_output(). + +KASan says: + +[ 264.967848] ================================================================== +[ 264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70 +[ 264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201 +[ 264.967870] +[ 264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1 +[ 264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0) +[ 264.967887] Call Trace: +[ 264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0) +[ 264.967903] [<00000000017e379c>] dump_stack+0x23c/0x290 +[ 264.967912] [<00000000007bc594>] print_address_description+0xf4/0x290 +[ 264.967919] [<00000000007bc8fc>] kasan_report+0x13c/0x240 +[ 264.967927] [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70 +[ 264.967935] [<000000000163f890>] ip6_finish_output+0x430/0x7f0 +[ 264.967943] [<000000000163fe44>] ip6_output+0x1f4/0x580 +[ 264.967953] [<000000000163882a>] ip6_xmit+0xfea/0x1ce8 +[ 264.967963] [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8 +[ 264.968033] [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core] +[ 264.968037] [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth] +[ 264.968041] [<0000000001220020>] dev_hard_start_xmit+0x268/0x928 +[ 264.968069] [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350 +[ 264.968071] [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478 +[ 264.968075] [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0 +[ 264.968078] [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8 +[ 264.968081] [<00000000013ddd1e>] ip_output+0x226/0x4c0 +[ 264.968083] [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938 +[ 264.968100] [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp] +[ 264.968116] [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp] +[ 264.968131] [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp] +[ 264.968146] [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp] +[ 264.968161] [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp] +[ 264.968177] [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp] +[ 264.968192] [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp] +[ 264.968208] [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp] +[ 264.968212] [<0000000001197942>] __sys_connect+0x21a/0x450 +[ 264.968215] [<000000000119aff8>] sys_socketcall+0x3d0/0xb08 +[ 264.968218] [<000000000184ea7a>] system_call+0x2a2/0x2c0 + +[...] + +Just like ip_finish_output2() does for IPv4, check that we have enough +headroom in ip6_xmit(), and reallocate it if we don't. + +This issue is older than git history. + +Reported-by: Jianlin Shi +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 42 +++++++++++++++++++++--------------------- + 1 file changed, 21 insertions(+), 21 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, stru + const struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *first_hop = &fl6->daddr; + struct dst_entry *dst = skb_dst(skb); ++ unsigned int head_room; + struct ipv6hdr *hdr; + u8 proto = fl6->flowi6_proto; + int seg_len = skb->len; + int hlimit = -1; + u32 mtu; + +- if (opt) { +- unsigned int head_room; +- +- /* First: exthdrs may take lots of space (~8K for now) +- MAX_HEADER is not enough. +- */ +- head_room = opt->opt_nflen + opt->opt_flen; +- seg_len += head_room; +- head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); ++ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); ++ if (opt) ++ head_room += opt->opt_nflen + opt->opt_flen; + +- if (skb_headroom(skb) < head_room) { +- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); +- if (!skb2) { +- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), +- IPSTATS_MIB_OUTDISCARDS); +- kfree_skb(skb); +- return -ENOBUFS; +- } +- if (skb->sk) +- skb_set_owner_w(skb2, skb->sk); +- consume_skb(skb); +- skb = skb2; ++ if (unlikely(skb_headroom(skb) < head_room)) { ++ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); ++ if (!skb2) { ++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), ++ IPSTATS_MIB_OUTDISCARDS); ++ kfree_skb(skb); ++ return -ENOBUFS; + } ++ if (skb->sk) ++ skb_set_owner_w(skb2, skb->sk); ++ consume_skb(skb); ++ skb = skb2; ++ } ++ ++ if (opt) { ++ seg_len += opt->opt_nflen + opt->opt_flen; ++ + if (opt->opt_flen) + ipv6_push_frag_opts(skb, opt, &proto); ++ + if (opt->opt_nflen) + ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, + &fl6->saddr); diff --git a/queue-4.19/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch b/queue-4.19/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch new file mode 100644 index 00000000000..db6a51bcff4 --- /dev/null +++ b/queue-4.19/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch @@ -0,0 +1,30 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Shmulik Ladkani +Date: Fri, 7 Dec 2018 09:50:17 +0200 +Subject: ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output + +From: Shmulik Ladkani + +[ Upstream commit 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 ] + +In 'seg6_output', stack variable 'struct flowi6 fl6' was missing +initialization. + +Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels") +Signed-off-by: Shmulik Ladkani +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_iptunnel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -347,6 +347,7 @@ static int seg6_output(struct net *net, + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + ++ memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); diff --git a/queue-4.19/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch b/queue-4.19/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch new file mode 100644 index 00000000000..198f1831093 --- /dev/null +++ b/queue-4.19/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch @@ -0,0 +1,87 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Stefano Brivio +Date: Thu, 6 Dec 2018 19:30:37 +0100 +Subject: neighbour: Avoid writing before skb->head in neigh_hh_output() + +From: Stefano Brivio + +[ Upstream commit e6ac64d4c4d095085d7dd71cbd05704ac99829b2 ] + +While skb_push() makes the kernel panic if the skb headroom is less than +the unaligned hardware header size, it will proceed normally in case we +copy more than that because of alignment, and we'll silently corrupt +adjacent slabs. + +In the case fixed by the previous patch, +"ipv6: Check available headroom in ip6_xmit() even without options", we +end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware +header and write 16 bytes, starting 2 bytes before the allocated buffer. + +Always check we're not writing before skb->head and, if the headroom is +not enough, warn and drop the packet. + +v2: + - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet + (Eric Dumazet) + - if we avoid the panic, though, we need to explicitly check the headroom + before the memcpy(), otherwise we'll have corrupted slabs on a running + kernel, after we warn + - use __skb_push() instead of skb_push(), as the headroom check is + already implemented here explicitly (Eric Dumazet) + +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/neighbour.h | 28 +++++++++++++++++++++++----- + 1 file changed, 23 insertions(+), 5 deletions(-) + +--- a/include/net/neighbour.h ++++ b/include/net/neighbour.h +@@ -453,6 +453,7 @@ static inline int neigh_hh_bridge(struct + + static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) + { ++ unsigned int hh_alen = 0; + unsigned int seq; + unsigned int hh_len; + +@@ -460,16 +461,33 @@ static inline int neigh_hh_output(const + seq = read_seqbegin(&hh->hh_lock); + hh_len = hh->hh_len; + if (likely(hh_len <= HH_DATA_MOD)) { +- /* this is inlined by gcc */ +- memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); ++ hh_alen = HH_DATA_MOD; ++ ++ /* skb_push() would proceed silently if we have room for ++ * the unaligned size but not for the aligned size: ++ * check headroom explicitly. ++ */ ++ if (likely(skb_headroom(skb) >= HH_DATA_MOD)) { ++ /* this is inlined by gcc */ ++ memcpy(skb->data - HH_DATA_MOD, hh->hh_data, ++ HH_DATA_MOD); ++ } + } else { +- unsigned int hh_alen = HH_DATA_ALIGN(hh_len); ++ hh_alen = HH_DATA_ALIGN(hh_len); + +- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); ++ if (likely(skb_headroom(skb) >= hh_alen)) { ++ memcpy(skb->data - hh_alen, hh->hh_data, ++ hh_alen); ++ } + } + } while (read_seqretry(&hh->hh_lock, seq)); + +- skb_push(skb, hh_len); ++ if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) { ++ kfree_skb(skb); ++ return NET_XMIT_DROP; ++ } ++ ++ __skb_push(skb, hh_len); + return dev_queue_xmit(skb); + } + diff --git a/queue-4.19/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch b/queue-4.19/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch new file mode 100644 index 00000000000..f11f47c0bcb --- /dev/null +++ b/queue-4.19/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch @@ -0,0 +1,143 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Su Yanjun +Date: Mon, 3 Dec 2018 15:33:07 +0800 +Subject: net: 8139cp: fix a BUG triggered by changing mtu with network traffic + +From: Su Yanjun + +[ Upstream commit a5d4a89245ead1f37ed135213653c5beebea4237 ] + +When changing mtu many times with traffic, a bug is triggered: + +[ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26! +[ 1035.684042] invalid opcode: 0000 [#1] SMP +[ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec +tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp +fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE +nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 +nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun +bridge stp llc ebtable_filter ebtables ip6table_filter devlink +ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel +snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul +parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm +aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper +ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs +libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic +[ 1035.684102] pata_acpi virtio_console qxl drm_kms_helper syscopyarea +sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul +crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too +virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror +dm_region_hash dm_log dm_mod [last unloaded: 8139cp] +[ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded +Tainted: G OE ------------ T 3.10.0-957.el7.x86_64 #1 +[ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 +[ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti: +ffff8f5a2e32c000 +[ 1035.684149] RIP: 0010:[] [] +dql_completed+0x180/0x190 +[ 1035.684162] RSP: 0000:ffff8f5a75483e50 EFLAGS: 00010093 +[ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX: +0000000000000000 +[ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI: +ffff8f599fea3ec0 +[ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09: +0000000000000000 +[ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12: +ffff8f599fea3e00 +[ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15: +0000000000000184 +[ 1035.684162] FS: 00007fa8434de740(0000) GS:ffff8f5a75480000(0000) +knlGS:0000000000000000 +[ 1035.684162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4: +00000000001406e0 +[ 1035.684162] Call Trace: +[ 1035.684162] +[ 1035.684162] [] ? cp_interrupt+0x478/0x580 [8139cp] +[ 1035.684162] [] +__handle_irq_event_percpu+0x44/0x1c0 +[ 1035.684162] [] handle_irq_event_percpu+0x32/0x80 +[ 1035.684162] [] handle_irq_event+0x3c/0x60 +[ 1035.684162] [] handle_fasteoi_irq+0x59/0x110 +[ 1035.684162] [] handle_irq+0xe4/0x1a0 +[ 1035.684162] [] do_IRQ+0x4d/0xf0 +[ 1035.684162] [] common_interrupt+0x162/0x162 +[ 1035.684162] +[ 1035.684162] [] ? __wake_up_bit+0x24/0x70 +[ 1035.684162] [] ? do_set_pte+0xd5/0x120 +[ 1035.684162] [] unlock_page+0x2b/0x30 +[ 1035.684162] [] do_read_fault.isra.61+0x139/0x1b0 +[ 1035.684162] [] handle_pte_fault+0x2f4/0xd10 +[ 1035.684162] [] handle_mm_fault+0x39d/0x9b0 +[ 1035.684162] [] __do_page_fault+0x203/0x500 +[ 1035.684162] [] trace_do_page_fault+0x56/0x150 +[ 1035.684162] [] do_async_page_fault+0x22/0xf0 +[ 1035.684162] [] async_page_fault+0x28/0x30 +[ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f +9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe +ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48 +[ 1035.684162] RIP [] dql_completed+0x180/0x190 +[ 1035.684162] RSP + +It's not the same as in 7fe0ee09 patch described. +As 8139cp uses shared irq mode, other device irq will trigger +cp_interrupt to execute. + +cp_change_mtu + -> cp_close + -> cp_open + +In cp_close routine just before free_irq(), some interrupt may occur. +In my environment, cp_interrupt exectutes and IntrStatus is 0x4, +exactly TxOk. That will cause cp_tx to wake device queue. + +As device queue is started, cp_start_xmit and cp_open will run at same +time which will cause kernel BUG. + +For example: +[#] for tx descriptor + +At start: + +[#][#][#] +num_queued=3 + +After cp_init_hw->cp_start_hw->netdev_reset_queue: + +[#][#][#] +num_queued=0 + +When 8139cp starts to work then cp_tx will check +num_queued mismatchs the complete_bytes. + +The patch will check IntrMask before check IntrStatus in cp_interrupt. +When 8139cp interrupt is disabled, just return. + +Signed-off-by: Su Yanjun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139cp.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/net/ethernet/realtek/8139cp.c ++++ b/drivers/net/ethernet/realtek/8139cp.c +@@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq + struct cp_private *cp; + int handled = 0; + u16 status; ++ u16 mask; + + if (unlikely(dev == NULL)) + return IRQ_NONE; +@@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq + + spin_lock(&cp->lock); + ++ mask = cpr16(IntrMask); ++ if (!mask) ++ goto out_unlock; ++ + status = cpr16(IntrStatus); + if (!status || (status == 0xFFFF)) + goto out_unlock; diff --git a/queue-4.19/net-fix-xps-static_key-accounting.patch b/queue-4.19/net-fix-xps-static_key-accounting.patch new file mode 100644 index 00000000000..4dd3512199f --- /dev/null +++ b/queue-4.19/net-fix-xps-static_key-accounting.patch @@ -0,0 +1,127 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Sabrina Dubroca +Date: Thu, 29 Nov 2018 14:14:49 +0100 +Subject: net: fix XPS static_key accounting + +From: Sabrina Dubroca + +[ Upstream commit 867d0ad476db89a1e8af3f297af402399a54eea5 ] + +Commit 04157469b7b8 ("net: Use static_key for XPS maps") introduced a +static key for XPS, but the increments/decrements don't match. + +First, the static key's counter is incremented once for each queue, but +only decremented once for a whole batch of queues, leading to large +unbalances. + +Second, the xps_rxqs_needed key is decremented whenever we reset a batch +of queues, whether they had any rxqs mapping or not, so that if we setup +cpu-XPS on em1 and RXQS-XPS on em2, resetting the queues on em1 would +decrement the xps_rxqs_needed key. + +This reworks the accounting scheme so that the xps_needed key is +incremented only once for each type of XPS for all the queues on a +device, and the xps_rxqs_needed key is incremented only once for all +queues. This is sufficient to let us retrieve queues via +get_xps_queue(). + +This patch introduces a new reset_xps_maps(), which reinitializes and +frees the appropriate map (xps_rxqs_map or xps_cpus_map), and drops a +reference to the needed keys: + - both xps_needed and xps_rxqs_needed, in case of rxqs maps, + - only xps_needed, in case of CPU maps. + +Now, we also need to call reset_xps_maps() at the end of +__netif_set_xps_queue() when there's no active map left, for example +when writing '00000000,00000000' to all queues' xps_rxqs setting. + +Fixes: 04157469b7b8 ("net: Use static_key for XPS maps") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 45 ++++++++++++++++++++++++--------------------- + 1 file changed, 24 insertions(+), 21 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2161,6 +2161,20 @@ static bool remove_xps_queue_cpu(struct + return active; + } + ++static void reset_xps_maps(struct net_device *dev, ++ struct xps_dev_maps *dev_maps, ++ bool is_rxqs_map) ++{ ++ if (is_rxqs_map) { ++ static_key_slow_dec_cpuslocked(&xps_rxqs_needed); ++ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); ++ } else { ++ RCU_INIT_POINTER(dev->xps_cpus_map, NULL); ++ } ++ static_key_slow_dec_cpuslocked(&xps_needed); ++ kfree_rcu(dev_maps, rcu); ++} ++ + static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, + struct xps_dev_maps *dev_maps, unsigned int nr_ids, + u16 offset, u16 count, bool is_rxqs_map) +@@ -2172,13 +2186,8 @@ static void clean_xps_maps(struct net_de + j < nr_ids;) + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, + count); +- if (!active) { +- if (is_rxqs_map) +- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); +- else +- RCU_INIT_POINTER(dev->xps_cpus_map, NULL); +- kfree_rcu(dev_maps, rcu); +- } ++ if (!active) ++ reset_xps_maps(dev, dev_maps, is_rxqs_map); + + if (!is_rxqs_map) { + for (i = offset + (count - 1); count--; i--) { +@@ -2222,10 +2231,6 @@ static void netif_reset_xps_queues(struc + false); + + out_no_maps: +- if (static_key_enabled(&xps_rxqs_needed)) +- static_key_slow_dec_cpuslocked(&xps_rxqs_needed); +- +- static_key_slow_dec_cpuslocked(&xps_needed); + mutex_unlock(&xps_map_mutex); + cpus_read_unlock(); + } +@@ -2343,9 +2348,12 @@ int __netif_set_xps_queue(struct net_dev + if (!new_dev_maps) + goto out_no_new_maps; + +- static_key_slow_inc_cpuslocked(&xps_needed); +- if (is_rxqs_map) +- static_key_slow_inc_cpuslocked(&xps_rxqs_needed); ++ if (!dev_maps) { ++ /* Increment static keys at most once per type */ ++ static_key_slow_inc_cpuslocked(&xps_needed); ++ if (is_rxqs_map) ++ static_key_slow_inc_cpuslocked(&xps_rxqs_needed); ++ } + + for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), + j < nr_ids;) { +@@ -2443,13 +2451,8 @@ out_no_new_maps: + } + + /* free map if not active */ +- if (!active) { +- if (is_rxqs_map) +- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); +- else +- RCU_INIT_POINTER(dev->xps_cpus_map, NULL); +- kfree_rcu(dev_maps, rcu); +- } ++ if (!active) ++ reset_xps_maps(dev, dev_maps, is_rxqs_map); + + out_no_maps: + mutex_unlock(&xps_map_mutex); diff --git a/queue-4.19/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch b/queue-4.19/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch new file mode 100644 index 00000000000..0064999bf99 --- /dev/null +++ b/queue-4.19/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Tarick Bedeir +Date: Fri, 7 Dec 2018 00:30:26 -0800 +Subject: net/mlx4_core: Correctly set PFC param if global pause is turned off. + +From: Tarick Bedeir + +[ Upstream commit bd5122cd1e0644d8bd8dd84517c932773e999766 ] + +rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1. + +Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests") +Signed-off-by: Tarick Bedeir +Reviewed-by: Eran Ben Elisha +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +@@ -1084,8 +1084,8 @@ static int mlx4_en_set_pauseparam(struct + + tx_pause = !!(pause->tx_pause); + rx_pause = !!(pause->rx_pause); +- rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause); +- tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause); ++ rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp; ++ tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp; + + err = mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, diff --git a/queue-4.19/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch b/queue-4.19/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch new file mode 100644 index 00000000000..3ade53d9e11 --- /dev/null +++ b/queue-4.19/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Eran Ben Elisha +Date: Sun, 2 Dec 2018 14:34:36 +0200 +Subject: net/mlx4_en: Change min MTU size to ETH_MIN_MTU + +From: Eran Ben Elisha + +[ Upstream commit 24be19e47779d604d1492c114459dca9a92acf78 ] + +NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in +the RFC791 and in the network stack. Remove old mlx4_en only define for +it, which was set to wrong value. + +Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking") +Signed-off-by: Eran Ben Elisha +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 - + 2 files changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -3494,8 +3494,8 @@ int mlx4_en_init_netdev(struct mlx4_en_d + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + +- /* MTU range: 46 - hw-specific max */ +- dev->min_mtu = MLX4_EN_MIN_MTU; ++ /* MTU range: 68 - hw-specific max */ ++ dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = priv->max_mtu; + + mdev->pndev[port] = dev; +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +@@ -161,7 +161,6 @@ + #define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \ + ETH_HLEN + PREAMBLE_LEN) + +-#define MLX4_EN_MIN_MTU 46 + /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ diff --git a/queue-4.19/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch b/queue-4.19/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch new file mode 100644 index 00000000000..bb78e65addb --- /dev/null +++ b/queue-4.19/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch @@ -0,0 +1,56 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Heiner Kallweit +Date: Mon, 3 Dec 2018 08:19:33 +0100 +Subject: net: phy: don't allow __set_phy_supported to add unsupported modes + +From: Heiner Kallweit + +[ Upstream commit d2a36971ef595069b7a600d1144c2e0881a930a1 ] + +Currently __set_phy_supported allows to add modes w/o checking whether +the PHY supports them. This is wrong, it should never add modes but +only remove modes we don't want to support. + +The commit marked as fixed didn't do anything wrong, it just copied +existing functionality to the helper which is being fixed now. + +Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper") +Signed-off-by: Heiner Kallweit +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -1738,20 +1738,17 @@ EXPORT_SYMBOL(genphy_loopback); + + static int __set_phy_supported(struct phy_device *phydev, u32 max_speed) + { +- phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES | +- PHY_10BT_FEATURES); +- + switch (max_speed) { +- default: +- return -ENOTSUPP; +- case SPEED_1000: +- phydev->supported |= PHY_1000BT_FEATURES; ++ case SPEED_10: ++ phydev->supported &= ~PHY_100BT_FEATURES; + /* fall through */ + case SPEED_100: +- phydev->supported |= PHY_100BT_FEATURES; +- /* fall through */ +- case SPEED_10: +- phydev->supported |= PHY_10BT_FEATURES; ++ phydev->supported &= ~PHY_1000BT_FEATURES; ++ break; ++ case SPEED_1000: ++ break; ++ default: ++ return -ENOTSUPP; + } + + return 0; diff --git a/queue-4.19/net-phy-sfp-correct-store-of-detected-link-modes.patch b/queue-4.19/net-phy-sfp-correct-store-of-detected-link-modes.patch new file mode 100644 index 00000000000..8074b05b9bc --- /dev/null +++ b/queue-4.19/net-phy-sfp-correct-store-of-detected-link-modes.patch @@ -0,0 +1,33 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Baruch Siach +Date: Thu, 29 Nov 2018 12:40:11 +0200 +Subject: net: phy: sfp: correct store of detected link modes + +From: Baruch Siach + +[ Upstream commit d7f7e0018b96fd1a30a968faa9464eb57372c1ec ] + +The link modes that sfp_parse_support() detects are stored in the +'modes' bitmap. There is no reason to make an exception for 1000Base-PX +or 1000Base-BX10. + +Fixes: 03145864bd0f ("sfp: support 1G BiDi (eg, FiberStore SFP-GE-BX) modules") +Signed-off-by: Baruch Siach +Acked-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/sfp-bus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/sfp-bus.c ++++ b/drivers/net/phy/sfp-bus.c +@@ -162,7 +162,7 @@ void sfp_parse_support(struct sfp_bus *b + /* 1000Base-PX or 1000Base-BX10 */ + if ((id->base.e_base_px || id->base.e_base_bx10) && + br_min <= 1300 && br_max >= 1200) +- phylink_set(support, 1000baseX_Full); ++ phylink_set(modes, 1000baseX_Full); + + /* For active or passive cables, select the link modes + * based on the bit rates and the cable compliance bytes. diff --git a/queue-4.19/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch b/queue-4.19/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch new file mode 100644 index 00000000000..fa0e5046635 --- /dev/null +++ b/queue-4.19/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch @@ -0,0 +1,98 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Christoph Paasch +Date: Thu, 29 Nov 2018 16:01:04 -0800 +Subject: net: Prevent invalid access to skb->prev in __qdisc_drop_all + +From: Christoph Paasch + +[ Upstream commit 9410d386d0a829ace9558336263086c2fbbe8aed ] + +__qdisc_drop_all() accesses skb->prev to get to the tail of the +segment-list. + +With commit 68d2f84a1368 ("net: gro: properly remove skb from list") +the skb-list handling has been changed to set skb->next to NULL and set +the list-poison on skb->prev. + +With that change, __qdisc_drop_all() will panic when it tries to +dereference skb->prev. + +Since commit 992cba7e276d ("net: Add and use skb_list_del_init().") +__list_del_entry is used, leaving skb->prev unchanged (thus, +pointing to the list-head if it's the first skb of the list). +This will make __qdisc_drop_all modify the next-pointer of the list-head +and result in a panic later on: + +[ 34.501053] general protection fault: 0000 [#1] SMP KASAN PTI +[ 34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108 +[ 34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 +[ 34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90 +[ 34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04 +[ 34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202 +[ 34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6 +[ 34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038 +[ 34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062 +[ 34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000 +[ 34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008 +[ 34.512082] FS: 0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000 +[ 34.513036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0 +[ 34.514593] Call Trace: +[ 34.514893] +[ 34.515157] napi_gro_receive+0x93/0x150 +[ 34.515632] receive_buf+0x893/0x3700 +[ 34.516094] ? __netif_receive_skb+0x1f/0x1a0 +[ 34.516629] ? virtnet_probe+0x1b40/0x1b40 +[ 34.517153] ? __stable_node_chain+0x4d0/0x850 +[ 34.517684] ? kfree+0x9a/0x180 +[ 34.518067] ? __kasan_slab_free+0x171/0x190 +[ 34.518582] ? detach_buf+0x1df/0x650 +[ 34.519061] ? lapic_next_event+0x5a/0x90 +[ 34.519539] ? virtqueue_get_buf_ctx+0x280/0x7f0 +[ 34.520093] virtnet_poll+0x2df/0xd60 +[ 34.520533] ? receive_buf+0x3700/0x3700 +[ 34.521027] ? qdisc_watchdog_schedule_ns+0xd5/0x140 +[ 34.521631] ? htb_dequeue+0x1817/0x25f0 +[ 34.522107] ? sch_direct_xmit+0x142/0xf30 +[ 34.522595] ? virtqueue_napi_schedule+0x26/0x30 +[ 34.523155] net_rx_action+0x2f6/0xc50 +[ 34.523601] ? napi_complete_done+0x2f0/0x2f0 +[ 34.524126] ? kasan_check_read+0x11/0x20 +[ 34.524608] ? _raw_spin_lock+0x7d/0xd0 +[ 34.525070] ? _raw_spin_lock_bh+0xd0/0xd0 +[ 34.525563] ? kvm_guest_apic_eoi_write+0x6b/0x80 +[ 34.526130] ? apic_ack_irq+0x9e/0xe0 +[ 34.526567] __do_softirq+0x188/0x4b5 +[ 34.527015] irq_exit+0x151/0x180 +[ 34.527417] do_IRQ+0xdb/0x150 +[ 34.527783] common_interrupt+0xf/0xf +[ 34.528223] + +This patch makes sure that skb->prev is set to NULL when entering +netem_enqueue. + +Cc: Prashant Bhole +Cc: Tyler Hicks +Cc: Eric Dumazet +Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list") +Suggested-by: Eric Dumazet +Signed-off-by: Christoph Paasch +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_netem.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/sched/sch_netem.c ++++ b/net/sched/sch_netem.c +@@ -441,6 +441,9 @@ static int netem_enqueue(struct sk_buff + int count = 1; + int rc = NET_XMIT_SUCCESS; + ++ /* Do not fool qdisc_drop_all() */ ++ skb->prev = NULL; ++ + /* Random duplication */ + if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) + ++count; diff --git a/queue-4.19/net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch b/queue-4.19/net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch new file mode 100644 index 00000000000..18901e8f997 --- /dev/null +++ b/queue-4.19/net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Sabrina Dubroca +Date: Thu, 29 Nov 2018 14:14:48 +0100 +Subject: net: restore call to netdev_queue_numa_node_write when resetting XPS + +From: Sabrina Dubroca + +[ Upstream commit f28c020fb488e1a8b87469812017044bef88aa2b ] + +Before commit 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues"), +netif_reset_xps_queues() did netdev_queue_numa_node_write() for all the +queues being reset. Now, this is only done when the "active" variable in +clean_xps_maps() is false, ie when on all the CPUs, there's no active +XPS mapping left. + +Fixes: 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -2173,17 +2173,19 @@ static void clean_xps_maps(struct net_de + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, + count); + if (!active) { +- if (is_rxqs_map) { ++ if (is_rxqs_map) + RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); +- } else { ++ else + RCU_INIT_POINTER(dev->xps_cpus_map, NULL); ++ kfree_rcu(dev_maps, rcu); ++ } + +- for (i = offset + (count - 1); count--; i--) +- netdev_queue_numa_node_write( +- netdev_get_tx_queue(dev, i), +- NUMA_NO_NODE); ++ if (!is_rxqs_map) { ++ for (i = offset + (count - 1); count--; i--) { ++ netdev_queue_numa_node_write( ++ netdev_get_tx_queue(dev, i), ++ NUMA_NO_NODE); + } +- kfree_rcu(dev_maps, rcu); + } + } + diff --git a/queue-4.19/net-use-skb_list_del_init-to-remove-from-rx-sublists.patch b/queue-4.19/net-use-skb_list_del_init-to-remove-from-rx-sublists.patch new file mode 100644 index 00000000000..6f4b0b1acca --- /dev/null +++ b/queue-4.19/net-use-skb_list_del_init-to-remove-from-rx-sublists.patch @@ -0,0 +1,193 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Edward Cree +Date: Tue, 4 Dec 2018 17:37:57 +0000 +Subject: net: use skb_list_del_init() to remove from RX sublists + +From: Edward Cree + +[ Upstream commit 22f6bbb7bcfcef0b373b0502a7ff390275c575dd ] + +list_del() leaves the skb->next pointer poisoned, which can then lead to + a crash in e.g. OVS forwarding. For example, setting up an OVS VXLAN + forwarding bridge on sfc as per: + +======== +$ ovs-vsctl show +5dfd9c47-f04b-4aaa-aa96-4fbb0a522a30 + Bridge "br0" + Port "br0" + Interface "br0" + type: internal + Port "enp6s0f0" + Interface "enp6s0f0" + Port "vxlan0" + Interface "vxlan0" + type: vxlan + options: {key="1", local_ip="10.0.0.5", remote_ip="10.0.0.4"} + ovs_version: "2.5.0" +======== +(where 10.0.0.5 is an address on enp6s0f1) +and sending traffic across it will lead to the following panic: +======== +general protection fault: 0000 [#1] SMP PTI +CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.20.0-rc3-ehc+ #701 +Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013 +RIP: 0010:dev_hard_start_xmit+0x38/0x200 +Code: 53 48 89 fb 48 83 ec 20 48 85 ff 48 89 54 24 08 48 89 4c 24 18 0f 84 ab 01 00 00 48 8d 86 90 00 00 00 48 89 f5 48 89 44 24 10 <4c> 8b 33 48 c7 03 00 00 00 00 48 8b 05 c7 d1 b3 00 4d 85 f6 0f 95 +RSP: 0018:ffff888627b437e0 EFLAGS: 00010202 +RAX: 0000000000000000 RBX: dead000000000100 RCX: ffff88862279c000 +RDX: ffff888614a342c0 RSI: 0000000000000000 RDI: 0000000000000000 +RBP: ffff888618a88000 R08: 0000000000000001 R09: 00000000000003e8 +R10: 0000000000000000 R11: ffff888614a34140 R12: 0000000000000000 +R13: 0000000000000062 R14: dead000000000100 R15: ffff888616430000 +FS: 0000000000000000(0000) GS:ffff888627b40000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f6d2bc6d000 CR3: 000000000200a000 CR4: 00000000000006e0 +Call Trace: + + __dev_queue_xmit+0x623/0x870 + ? masked_flow_lookup+0xf7/0x220 [openvswitch] + ? ep_poll_callback+0x101/0x310 + do_execute_actions+0xaba/0xaf0 [openvswitch] + ? __wake_up_common+0x8a/0x150 + ? __wake_up_common_lock+0x87/0xc0 + ? queue_userspace_packet+0x31c/0x5b0 [openvswitch] + ovs_execute_actions+0x47/0x120 [openvswitch] + ovs_dp_process_packet+0x7d/0x110 [openvswitch] + ovs_vport_receive+0x6e/0xd0 [openvswitch] + ? dst_alloc+0x64/0x90 + ? rt_dst_alloc+0x50/0xd0 + ? ip_route_input_slow+0x19a/0x9a0 + ? __udp_enqueue_schedule_skb+0x198/0x1b0 + ? __udp4_lib_rcv+0x856/0xa30 + ? __udp4_lib_rcv+0x856/0xa30 + ? cpumask_next_and+0x19/0x20 + ? find_busiest_group+0x12d/0xcd0 + netdev_frame_hook+0xce/0x150 [openvswitch] + __netif_receive_skb_core+0x205/0xae0 + __netif_receive_skb_list_core+0x11e/0x220 + netif_receive_skb_list+0x203/0x460 + ? __efx_rx_packet+0x335/0x5e0 [sfc] + efx_poll+0x182/0x320 [sfc] + net_rx_action+0x294/0x3c0 + __do_softirq+0xca/0x297 + irq_exit+0xa6/0xb0 + do_IRQ+0x54/0xd0 + common_interrupt+0xf/0xf + +======== +So, in all listified-receive handling, instead pull skbs off the lists with + skb_list_del_init(). + +Fixes: 9af86f933894 ("net: core: fix use-after-free in __netif_receive_skb_list_core") +Fixes: 7da517a3bc52 ("net: core: Another step of skb receive list processing") +Fixes: a4ca8b7df73c ("net: ipv4: fix drop handling in ip_list_rcv() and ip_list_rcv_finish()") +Fixes: d8269e2cbf90 ("net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()") +Signed-off-by: Edward Cree +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 11 +++++++++++ + net/core/dev.c | 8 ++++---- + net/ipv4/ip_input.c | 4 ++-- + net/ipv6/ip6_input.c | 4 ++-- + 4 files changed, 19 insertions(+), 8 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -1355,6 +1355,17 @@ static inline void skb_zcopy_abort(struc + } + } + ++static inline void skb_mark_not_on_list(struct sk_buff *skb) ++{ ++ skb->next = NULL; ++} ++ ++static inline void skb_list_del_init(struct sk_buff *skb) ++{ ++ __list_del_entry(&skb->list); ++ skb_mark_not_on_list(skb); ++} ++ + /** + * skb_queue_empty - check if a queue is empty + * @list: queue head +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4981,7 +4981,7 @@ static void __netif_receive_skb_list_cor + struct net_device *orig_dev = skb->dev; + struct packet_type *pt_prev = NULL; + +- list_del(&skb->list); ++ skb_list_del_init(skb); + __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + if (!pt_prev) + continue; +@@ -5137,7 +5137,7 @@ static void netif_receive_skb_list_inter + INIT_LIST_HEAD(&sublist); + list_for_each_entry_safe(skb, next, head, list) { + net_timestamp_check(netdev_tstamp_prequeue, skb); +- list_del(&skb->list); ++ skb_list_del_init(skb); + if (!skb_defer_rx_timestamp(skb)) + list_add_tail(&skb->list, &sublist); + } +@@ -5148,7 +5148,7 @@ static void netif_receive_skb_list_inter + rcu_read_lock(); + list_for_each_entry_safe(skb, next, head, list) { + xdp_prog = rcu_dereference(skb->dev->xdp_prog); +- list_del(&skb->list); ++ skb_list_del_init(skb); + if (do_xdp_generic(xdp_prog, skb) == XDP_PASS) + list_add_tail(&skb->list, &sublist); + } +@@ -5167,7 +5167,7 @@ static void netif_receive_skb_list_inter + + if (cpu >= 0) { + /* Will be handled, remove from list */ +- list_del(&skb->list); ++ skb_list_del_init(skb); + enqueue_to_backlog(skb, cpu, &rflow->last_qtail); + } + } +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -551,7 +551,7 @@ static void ip_list_rcv_finish(struct ne + list_for_each_entry_safe(skb, next, head, list) { + struct dst_entry *dst; + +- list_del(&skb->list); ++ skb_list_del_init(skb); + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ +@@ -598,7 +598,7 @@ void ip_list_rcv(struct list_head *head, + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + +- list_del(&skb->list); ++ skb_list_del_init(skb); + skb = ip_rcv_core(skb, net); + if (skb == NULL) + continue; +--- a/net/ipv6/ip6_input.c ++++ b/net/ipv6/ip6_input.c +@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct n + list_for_each_entry_safe(skb, next, head, list) { + struct dst_entry *dst; + +- list_del(&skb->list); ++ skb_list_del_init(skb); + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ +@@ -295,7 +295,7 @@ void ipv6_list_rcv(struct list_head *hea + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + +- list_del(&skb->list); ++ skb_list_del_init(skb); + skb = ip6_rcv_core(skb, dev, net); + if (skb == NULL) + continue; diff --git a/queue-4.19/revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch b/queue-4.19/revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch new file mode 100644 index 00000000000..cc25cfc9f0c --- /dev/null +++ b/queue-4.19/revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch @@ -0,0 +1,43 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Benjamin Herrenschmidt +Date: Fri, 7 Dec 2018 15:05:04 +1100 +Subject: Revert "net/ibm/emac: wrong bit is used for STA control" + +From: Benjamin Herrenschmidt + +[ Upstream commit 5b3279e2cba2238b37f6c18adfdea8bddb32715a ] + +This reverts commit 624ca9c33c8a853a4a589836e310d776620f4ab9. + +This commit is completely bogus. The STACR register has two formats, old +and new, depending on the version of the IP block used. There's a pair of +device-tree properties that can be used to specify the format used: + + has-inverted-stacr-oc + has-new-stacr-staopc + +What this commit did was to change the bit definition used with the old +parts to match the new parts. This of course breaks the driver on all +the old ones. + +Instead, the author should have set the appropriate properties in the +device-tree for the variant used on his board. + +Signed-off-by: Benjamin Herrenschmidt +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/emac/emac.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ibm/emac/emac.h ++++ b/drivers/net/ethernet/ibm/emac/emac.h +@@ -231,7 +231,7 @@ struct emac_regs { + #define EMAC_STACR_PHYE 0x00004000 + #define EMAC_STACR_STAC_MASK 0x00003000 + #define EMAC_STACR_STAC_READ 0x00001000 +-#define EMAC_STACR_STAC_WRITE 0x00000800 ++#define EMAC_STACR_STAC_WRITE 0x00002000 + #define EMAC_STACR_OPBC_MASK 0x00000C00 + #define EMAC_STACR_OPBC_50 0x00000000 + #define EMAC_STACR_OPBC_66 0x00000400 diff --git a/queue-4.19/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch b/queue-4.19/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch new file mode 100644 index 00000000000..115ec4397eb --- /dev/null +++ b/queue-4.19/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch @@ -0,0 +1,152 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Eric Dumazet +Date: Tue, 4 Dec 2018 09:40:35 -0800 +Subject: rtnetlink: ndo_dflt_fdb_dump() only work for ARPHRD_ETHER devices + +From: Eric Dumazet + +[ Upstream commit 688838934c231bb08f46db687e57f6d8bf82709c ] + +kmsan was able to trigger a kernel-infoleak using a gre device [1] + +nlmsg_populate_fdb_fill() has a hard coded assumption +that dev->addr_len is ETH_ALEN, as normally guaranteed +for ARPHRD_ETHER devices. + +A similar issue was fixed recently in commit da71577545a5 +("rtnetlink: Disallow FDB configuration for non-Ethernet device") + +[1] +BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline] +BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 +CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x32d/0x480 lib/dump_stack.c:113 + kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683 + kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743 + kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634 + copyout lib/iov_iter.c:143 [inline] + _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576 + copy_to_iter include/linux/uio.h:143 [inline] + skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431 + skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline] + netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975 + sock_recvmsg_nosec net/socket.c:794 [inline] + sock_recvmsg+0x1d1/0x230 net/socket.c:801 + ___sys_recvmsg+0x444/0xae0 net/socket.c:2278 + __sys_recvmsg net/socket.c:2327 [inline] + __do_sys_recvmsg net/socket.c:2337 [inline] + __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334 + __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334 + do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 +RIP: 0033:0x441119 +Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f +RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119 +RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003 +RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100 +R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080 +R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was stored to memory at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] + kmsan_save_stack mm/kmsan/kmsan.c:261 [inline] + kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469 + kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344 + kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362 + __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162 + __nla_put lib/nlattr.c:744 [inline] + nla_put+0x20a/0x2d0 lib/nlattr.c:802 + nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466 + nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline] + ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807 + rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979 + netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244 + __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352 + netlink_dump_start include/linux/netlink.h:216 [inline] + rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910 + netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 + __sys_sendmsg net/socket.c:2154 [inline] + __do_sys_sendmsg net/socket.c:2163 [inline] + __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 + do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline] + kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170 + kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186 + __kmalloc+0x14c/0x4d0 mm/slub.c:3825 + kmalloc include/linux/slab.h:551 [inline] + __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline] + __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline] + __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670 + dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687 + ip_mc_filter_add net/ipv4/igmp.c:1128 [inline] + igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311 + __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444 + ip_mc_inc_group net/ipv4/igmp.c:1453 [inline] + ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775 + inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522 + notifier_call_chain kernel/notifier.c:93 [inline] + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401 + __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733 + dev_change_flags+0x1ac/0x230 net/core/dev.c:7569 + do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492 + rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111 + rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947 + netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965 + netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] + netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336 + netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116 + __sys_sendmsg net/socket.c:2154 [inline] + __do_sys_sendmsg net/socket.c:2163 [inline] + __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 + do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 + +Bytes 36-37 of 105 are uninitialized +Memory access of size 105 starts at ffff88819686c000 +Data copied to user address 0000000020000380 + +Fixes: d83b06036048 ("net: add fdb generic dump routine") +Signed-off-by: Eric Dumazet +Cc: John Fastabend +Cc: Ido Schimmel +Cc: David Ahern +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3730,6 +3730,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *sk + { + int err; + ++ if (dev->type != ARPHRD_ETHER) ++ return -EINVAL; ++ + netif_addr_lock_bh(dev); + err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); + if (err) diff --git a/queue-4.19/sctp-kfree_rcu-asoc.patch b/queue-4.19/sctp-kfree_rcu-asoc.patch new file mode 100644 index 00000000000..76eb509208e --- /dev/null +++ b/queue-4.19/sctp-kfree_rcu-asoc.patch @@ -0,0 +1,58 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Xin Long +Date: Sat, 1 Dec 2018 01:36:59 +0800 +Subject: sctp: kfree_rcu asoc + +From: Xin Long + +[ Upstream commit fb6df5a6234c38a9c551559506a49a677ac6f07a ] + +In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences +a transport's asoc under rcu_read_lock while asoc is freed not after +a grace period, which leads to a use-after-free panic. + +This patch fixes it by calling kfree_rcu to make asoc be freed after +a grace period. + +Note that only the asoc's memory is delayed to free in the patch, it +won't cause sk to linger longer. + +Thanks Neil and Marcelo to make this clear. + +Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable") +Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport") +Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com +Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com +Suggested-by: Neil Horman +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/structs.h | 2 ++ + net/sctp/associola.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/include/net/sctp/structs.h ++++ b/include/net/sctp/structs.h +@@ -2075,6 +2075,8 @@ struct sctp_association { + + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; ++ ++ struct rcu_head rcu; + }; + + +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -434,7 +434,7 @@ static void sctp_association_destroy(str + + WARN_ON(atomic_read(&asoc->rmem_alloc)); + +- kfree(asoc); ++ kfree_rcu(asoc, rcu); + SCTP_DBG_OBJCNT_DEC(assoc); + } + diff --git a/queue-4.19/sctp-update-frag_point-when-stream_interleave-is-set.patch b/queue-4.19/sctp-update-frag_point-when-stream_interleave-is-set.patch new file mode 100644 index 00000000000..aec29b14678 --- /dev/null +++ b/queue-4.19/sctp-update-frag_point-when-stream_interleave-is-set.patch @@ -0,0 +1,77 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Xin Long +Date: Tue, 27 Nov 2018 19:11:50 +0800 +Subject: sctp: update frag_point when stream_interleave is set + +From: Xin Long + +[ Upstream commit 4135cce7fd0a0d755665c02728578c7c5afe4726 ] + +sctp_assoc_update_frag_point() should be called whenever asoc->pathmtu +changes, but we missed one place in sctp_association_init(). It would +cause frag_point is zero when sending data. + +As says in Jakub's reproducer, if sp->pathmtu is set by socketopt, the +new asoc->pathmtu inherits it in sctp_association_init(). Later when +transports are added and their pmtu >= asoc->pathmtu, it will never +call sctp_assoc_update_frag_point() to set frag_point. + +This patch is to fix it by updating frag_point after asoc->pathmtu is +set as sp->pathmtu in sctp_association_init(). Note that it moved them +after sctp_stream_init(), as stream->si needs to be set first. + +Frag_point's calculation is also related with datachunk's type, so it +needs to update frag_point when stream->si may be changed in +sctp_process_init(). + +v1->v2: + - call sctp_assoc_update_frag_point() separately in sctp_process_init + and sctp_association_init, per Marcelo's suggestion. + +Fixes: 2f5e3c9df693 ("sctp: introduce sctp_assoc_update_frag_point") +Reported-by: Jakub Audykowicz +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/associola.c | 7 ++++--- + net/sctp/sm_make_chunk.c | 3 +++ + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -118,9 +118,6 @@ static struct sctp_association *sctp_ass + asoc->flowlabel = sp->flowlabel; + asoc->dscp = sp->dscp; + +- /* Initialize default path MTU. */ +- asoc->pathmtu = sp->pathmtu; +- + /* Set association default SACK delay */ + asoc->sackdelay = msecs_to_jiffies(sp->sackdelay); + asoc->sackfreq = sp->sackfreq; +@@ -252,6 +249,10 @@ static struct sctp_association *sctp_ass + 0, gfp)) + goto fail_init; + ++ /* Initialize default path MTU. */ ++ asoc->pathmtu = sp->pathmtu; ++ sctp_assoc_update_frag_point(asoc); ++ + /* Assume that peer would support both address types unless we are + * told otherwise. + */ +--- a/net/sctp/sm_make_chunk.c ++++ b/net/sctp/sm_make_chunk.c +@@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_associ + asoc->c.sinit_max_instreams, gfp)) + goto clean_up; + ++ /* Update frag_point when stream_interleave may get changed. */ ++ sctp_assoc_update_frag_point(asoc); ++ + if (!asoc->temp && sctp_assoc_set_id(asoc, gfp)) + goto clean_up; + diff --git a/queue-4.19/series b/queue-4.19/series new file mode 100644 index 00000000000..a7358cf3256 --- /dev/null +++ b/queue-4.19/series @@ -0,0 +1,21 @@ +ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch +ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch +neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch +ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch +net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch +net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch +net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch +net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch +net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch +net-use-skb_list_del_init-to-remove-from-rx-sublists.patch +revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch +rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch +sctp-kfree_rcu-asoc.patch +tcp-do-not-underestimate-rwnd_limited.patch +tcp-fix-null-ref-in-tail-loss-probe.patch +tun-forbid-iface-creation-with-rtnl-ops.patch +virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch +net-phy-sfp-correct-store-of-detected-link-modes.patch +sctp-update-frag_point-when-stream_interleave-is-set.patch +net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch +net-fix-xps-static_key-accounting.patch diff --git a/queue-4.19/tcp-do-not-underestimate-rwnd_limited.patch b/queue-4.19/tcp-do-not-underestimate-rwnd_limited.patch new file mode 100644 index 00000000000..76bca8ea678 --- /dev/null +++ b/queue-4.19/tcp-do-not-underestimate-rwnd_limited.patch @@ -0,0 +1,39 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Eric Dumazet +Date: Wed, 5 Dec 2018 14:24:31 -0800 +Subject: tcp: Do not underestimate rwnd_limited + +From: Eric Dumazet + +[ Upstream commit 41727549de3e7281feb174d568c6e46823db8684 ] + +If available rwnd is too small, tcp_tso_should_defer() +can decide it is worth waiting before splitting a TSO packet. + +This really means we are rwnd limited. + +Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window") +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Reviewed-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2338,8 +2338,11 @@ static bool tcp_write_xmit(struct sock * + } else { + if (!push_one && + tcp_tso_should_defer(sk, skb, &is_cwnd_limited, +- max_segs)) ++ max_segs)) { ++ if (!is_cwnd_limited) ++ is_rwnd_limited = true; + break; ++ } + } + + limit = mss_now; diff --git a/queue-4.19/tcp-fix-null-ref-in-tail-loss-probe.patch b/queue-4.19/tcp-fix-null-ref-in-tail-loss-probe.patch new file mode 100644 index 00000000000..40635753b76 --- /dev/null +++ b/queue-4.19/tcp-fix-null-ref-in-tail-loss-probe.patch @@ -0,0 +1,52 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Yuchung Cheng +Date: Wed, 5 Dec 2018 14:38:38 -0800 +Subject: tcp: fix NULL ref in tail loss probe + +From: Yuchung Cheng + +[ Upstream commit b2b7af861122a0c0f6260155c29a1b2e594cd5b5 ] + +TCP loss probe timer may fire when the retranmission queue is empty but +has a non-zero tp->packets_out counter. tcp_send_loss_probe will call +tcp_rearm_rto which triggers NULL pointer reference by fetching the +retranmission queue head in its sub-routines. + +Add a more detailed warning to help catch the root cause of the inflight +accounting inconsistency. + +Reported-by: Rafael Tinoco +Signed-off-by: Yuchung Cheng +Signed-off-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2479,15 +2479,18 @@ void tcp_send_loss_probe(struct sock *sk + goto rearm_timer; + } + skb = skb_rb_last(&sk->tcp_rtx_queue); ++ if (unlikely(!skb)) { ++ WARN_ONCE(tp->packets_out, ++ "invalid inflight: %u state %u cwnd %u mss %d\n", ++ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); ++ inet_csk(sk)->icsk_pending = 0; ++ return; ++ } + + /* At most one outstanding TLP retransmission. */ + if (tp->tlp_high_seq) + goto rearm_timer; + +- /* Retransmit last segment. */ +- if (WARN_ON(!skb)) +- goto rearm_timer; +- + if (skb_still_in_host_queue(sk, skb)) + goto rearm_timer; + diff --git a/queue-4.19/tun-forbid-iface-creation-with-rtnl-ops.patch b/queue-4.19/tun-forbid-iface-creation-with-rtnl-ops.patch new file mode 100644 index 00000000000..ba36eeba1be --- /dev/null +++ b/queue-4.19/tun-forbid-iface-creation-with-rtnl-ops.patch @@ -0,0 +1,46 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Nicolas Dichtel +Date: Thu, 29 Nov 2018 14:45:39 +0100 +Subject: tun: forbid iface creation with rtnl ops + +From: Nicolas Dichtel + +[ Upstream commit 35b827b6d06199841a83839e8bb69c0cd13a28be ] + +It's not supported right now (the goal of the initial patch was to support +'ip link del' only). + +Before the patch: +$ ip link add foo type tun +[ 239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 +[snip] +[ 239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0 + +This panic occurs because dev->netdev_ops is not set by tun_setup(). But to +have something usable, it will require more than just setting +netdev_ops. + +Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX") +CC: Eric W. Biederman +Signed-off-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2268,9 +2268,9 @@ static void tun_setup(struct net_device + static int tun_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) + { +- if (!data) +- return 0; +- return -EINVAL; ++ NL_SET_ERR_MSG(extack, ++ "tun/tap creation via rtnetlink is not supported."); ++ return -EOPNOTSUPP; + } + + static size_t tun_get_size(const struct net_device *dev) diff --git a/queue-4.19/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch b/queue-4.19/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch new file mode 100644 index 00000000000..2057a04f6d2 --- /dev/null +++ b/queue-4.19/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch @@ -0,0 +1,77 @@ +From foo@baz Thu Dec 13 10:38:53 CET 2018 +From: Jason Wang +Date: Thu, 29 Nov 2018 13:53:16 +0800 +Subject: virtio-net: keep vnet header zeroed after processing XDP + +From: Jason Wang + +[ Upstream commit 436c9453a1ac0944b82870ef2e0d9be956b396d9 ] + +We copy vnet header unconditionally in page_to_skb() this is wrong +since XDP may modify the packet data. So let's keep a zeroed vnet +header for not confusing the conversion between vnet header and skb +metadata. + +In the future, we should able to detect whether or not the packet was +modified and keep using the vnet header when packet was not touched. + +Fixes: f600b6905015 ("virtio_net: Add XDP support") +Reported-by: Pavel Popa +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -365,7 +365,8 @@ static unsigned int mergeable_ctx_to_tru + static struct sk_buff *page_to_skb(struct virtnet_info *vi, + struct receive_queue *rq, + struct page *page, unsigned int offset, +- unsigned int len, unsigned int truesize) ++ unsigned int len, unsigned int truesize, ++ bool hdr_valid) + { + struct sk_buff *skb; + struct virtio_net_hdr_mrg_rxbuf *hdr; +@@ -387,7 +388,8 @@ static struct sk_buff *page_to_skb(struc + else + hdr_padded_len = sizeof(struct padded_vnet_hdr); + +- memcpy(hdr, p, hdr_len); ++ if (hdr_valid) ++ memcpy(hdr, p, hdr_len); + + len -= hdr_len; + offset += hdr_padded_len; +@@ -739,7 +741,8 @@ static struct sk_buff *receive_big(struc + struct virtnet_rq_stats *stats) + { + struct page *page = buf; +- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); ++ struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, ++ PAGE_SIZE, true); + + stats->bytes += len - vi->hdr_len; + if (unlikely(!skb)) +@@ -842,7 +845,8 @@ static struct sk_buff *receive_mergeable + rcu_read_unlock(); + put_page(page); + head_skb = page_to_skb(vi, rq, xdp_page, +- offset, len, PAGE_SIZE); ++ offset, len, ++ PAGE_SIZE, false); + return head_skb; + } + break; +@@ -898,7 +902,7 @@ static struct sk_buff *receive_mergeable + goto err_skb; + } + +- head_skb = page_to_skb(vi, rq, page, offset, len, truesize); ++ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); + curr_skb = head_skb; + + if (unlikely(!curr_skb))