--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Jiri Wiesner <jwiesner@suse.com>
+Date: Wed, 5 Dec 2018 16:55:29 +0100
+Subject: ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes
+
+From: Jiri Wiesner <jwiesner@suse.com>
+
+[ Upstream commit ebaf39e6032faf77218220707fc3fa22487784e0 ]
+
+The *_frag_reasm() functions are susceptible to miscalculating the byte
+count of packet fragments in case the truesize of a head buffer changes.
+The truesize member may be changed by the call to skb_unclone(), leaving
+the fragment memory limit counter unbalanced even if all fragments are
+processed. This miscalculation goes unnoticed as long as the network
+namespace which holds the counter is not destroyed.
+
+Should an attempt be made to destroy a network namespace that holds an
+unbalanced fragment memory limit counter the cleanup of the namespace
+never finishes. The thread handling the cleanup gets stuck in
+inet_frags_exit_net() waiting for the percpu counter to reach zero. The
+thread is usually in running state with a stacktrace similar to:
+
+ PID: 1073 TASK: ffff880626711440 CPU: 1 COMMAND: "kworker/u48:4"
+ #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480
+ #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b
+ #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c
+ #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856
+ #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0
+ #10 [ffff880621563e38] process_one_work at ffffffff81096f14
+
+It is not possible to create new network namespaces, and processes
+that call unshare() end up being stuck in uninterruptible sleep state
+waiting to acquire the net_mutex.
+
+The bug was observed in the IPv6 netfilter code by Per Sundstrom.
+I thank him for his analysis of the problem. The parts of this patch
+that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.
+
+Signed-off-by: Jiri Wiesner <jwiesner@suse.com>
+Reported-by: Per Sundstrom <per.sundstrom@redqube.se>
+Acked-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_fragment.c | 7 +++++++
+ net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++++-
+ net/ipv6/reassembly.c | 8 +++++++-
+ 3 files changed, 21 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -513,6 +513,7 @@ static int ip_frag_reasm(struct ipq *qp,
+ struct rb_node *rbn;
+ int len;
+ int ihlen;
++ int delta;
+ int err;
+ u8 ecn;
+
+@@ -554,10 +555,16 @@ static int ip_frag_reasm(struct ipq *qp,
+ if (len > 65535)
+ goto out_oversize;
+
++ delta = - head->truesize;
++
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ goto out_nomem;
+
++ delta += head->truesize;
++ if (delta)
++ add_frag_mem_limit(qp->q.net, delta);
++
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments. */
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -341,7 +341,7 @@ static bool
+ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
+ {
+ struct sk_buff *fp, *head = fq->q.fragments;
+- int payload_len;
++ int payload_len, delta;
+ u8 ecn;
+
+ inet_frag_kill(&fq->q);
+@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq,
+ return false;
+ }
+
++ delta = - head->truesize;
++
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ return false;
+
++ delta += head->truesize;
++ if (delta)
++ add_frag_mem_limit(fq->q.net, delta);
++
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments. */
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_qu
+ {
+ struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+ struct sk_buff *fp, *head = fq->q.fragments;
+- int payload_len;
++ int payload_len, delta;
+ unsigned int nhoff;
+ int sum_truesize;
+ u8 ecn;
+@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_qu
+ if (payload_len > IPV6_MAXPLEN)
+ goto out_oversize;
+
++ delta = - head->truesize;
++
+ /* Head of list must not be cloned. */
+ if (skb_unclone(head, GFP_ATOMIC))
+ goto out_oom;
+
++ delta += head->truesize;
++ if (delta)
++ add_frag_mem_limit(fq->q.net, delta);
++
+ /* If the first fragment is fragmented itself, we split
+ * it to two chunks: the first with data and paged part
+ * and the second, holding only fragments. */
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 6 Dec 2018 19:30:36 +0100
+Subject: ipv6: Check available headroom in ip6_xmit() even without options
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+[ Upstream commit 66033f47ca60294a95fc85ec3a3cc909dab7b765 ]
+
+Even if we send an IPv6 packet without options, MAX_HEADER might not be
+enough to account for the additional headroom required by alignment of
+hardware headers.
+
+On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL,
+sending short SCTP packets over IPv4 over L2TP over IPv6, we start with
+100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54
+bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2().
+
+Those would be enough to append our 14 bytes header, but we're going to
+align that to 16 bytes, and write 2 bytes out of the allocated slab in
+neigh_hh_output().
+
+KASan says:
+
+[ 264.967848] ==================================================================
+[ 264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70
+[ 264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201
+[ 264.967870]
+[ 264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1
+[ 264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0)
+[ 264.967887] Call Trace:
+[ 264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0)
+[ 264.967903] [<00000000017e379c>] dump_stack+0x23c/0x290
+[ 264.967912] [<00000000007bc594>] print_address_description+0xf4/0x290
+[ 264.967919] [<00000000007bc8fc>] kasan_report+0x13c/0x240
+[ 264.967927] [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70
+[ 264.967935] [<000000000163f890>] ip6_finish_output+0x430/0x7f0
+[ 264.967943] [<000000000163fe44>] ip6_output+0x1f4/0x580
+[ 264.967953] [<000000000163882a>] ip6_xmit+0xfea/0x1ce8
+[ 264.967963] [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8
+[ 264.968033] [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core]
+[ 264.968037] [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth]
+[ 264.968041] [<0000000001220020>] dev_hard_start_xmit+0x268/0x928
+[ 264.968069] [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350
+[ 264.968071] [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478
+[ 264.968075] [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0
+[ 264.968078] [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8
+[ 264.968081] [<00000000013ddd1e>] ip_output+0x226/0x4c0
+[ 264.968083] [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938
+[ 264.968100] [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp]
+[ 264.968116] [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp]
+[ 264.968131] [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp]
+[ 264.968146] [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp]
+[ 264.968161] [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp]
+[ 264.968177] [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp]
+[ 264.968192] [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp]
+[ 264.968208] [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp]
+[ 264.968212] [<0000000001197942>] __sys_connect+0x21a/0x450
+[ 264.968215] [<000000000119aff8>] sys_socketcall+0x3d0/0xb08
+[ 264.968218] [<000000000184ea7a>] system_call+0x2a2/0x2c0
+
+[...]
+
+Just like ip_finish_output2() does for IPv4, check that we have enough
+headroom in ip6_xmit(), and reallocate it if we don't.
+
+This issue is older than git history.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c | 42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, stru
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *first_hop = &fl6->daddr;
+ struct dst_entry *dst = skb_dst(skb);
++ unsigned int head_room;
+ struct ipv6hdr *hdr;
+ u8 proto = fl6->flowi6_proto;
+ int seg_len = skb->len;
+ int hlimit = -1;
+ u32 mtu;
+
+- if (opt) {
+- unsigned int head_room;
+-
+- /* First: exthdrs may take lots of space (~8K for now)
+- MAX_HEADER is not enough.
+- */
+- head_room = opt->opt_nflen + opt->opt_flen;
+- seg_len += head_room;
+- head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
++ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
++ if (opt)
++ head_room += opt->opt_nflen + opt->opt_flen;
+
+- if (skb_headroom(skb) < head_room) {
+- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+- if (!skb2) {
+- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+- IPSTATS_MIB_OUTDISCARDS);
+- kfree_skb(skb);
+- return -ENOBUFS;
+- }
+- if (skb->sk)
+- skb_set_owner_w(skb2, skb->sk);
+- consume_skb(skb);
+- skb = skb2;
++ if (unlikely(skb_headroom(skb) < head_room)) {
++ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
++ if (!skb2) {
++ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
++ IPSTATS_MIB_OUTDISCARDS);
++ kfree_skb(skb);
++ return -ENOBUFS;
+ }
++ if (skb->sk)
++ skb_set_owner_w(skb2, skb->sk);
++ consume_skb(skb);
++ skb = skb2;
++ }
++
++ if (opt) {
++ seg_len += opt->opt_nflen + opt->opt_flen;
++
+ if (opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
++
+ if (opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
+ &fl6->saddr);
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Shmulik Ladkani <shmulik@metanetworks.com>
+Date: Fri, 7 Dec 2018 09:50:17 +0200
+Subject: ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output
+
+From: Shmulik Ladkani <shmulik@metanetworks.com>
+
+[ Upstream commit 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 ]
+
+In 'seg6_output', stack variable 'struct flowi6 fl6' was missing
+initialization.
+
+Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
+Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/seg6_iptunnel.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -347,6 +347,7 @@ static int seg6_output(struct net *net,
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
+
++ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 6 Dec 2018 19:30:37 +0100
+Subject: neighbour: Avoid writing before skb->head in neigh_hh_output()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+[ Upstream commit e6ac64d4c4d095085d7dd71cbd05704ac99829b2 ]
+
+While skb_push() makes the kernel panic if the skb headroom is less than
+the unaligned hardware header size, it will proceed normally in case we
+copy more than that because of alignment, and we'll silently corrupt
+adjacent slabs.
+
+In the case fixed by the previous patch,
+"ipv6: Check available headroom in ip6_xmit() even without options", we
+end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware
+header and write 16 bytes, starting 2 bytes before the allocated buffer.
+
+Always check we're not writing before skb->head and, if the headroom is
+not enough, warn and drop the packet.
+
+v2:
+ - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet
+ (Eric Dumazet)
+ - if we avoid the panic, though, we need to explicitly check the headroom
+ before the memcpy(), otherwise we'll have corrupted slabs on a running
+ kernel, after we warn
+ - use __skb_push() instead of skb_push(), as the headroom check is
+ already implemented here explicitly (Eric Dumazet)
+
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/neighbour.h | 28 +++++++++++++++++++++++-----
+ 1 file changed, 23 insertions(+), 5 deletions(-)
+
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -453,6 +453,7 @@ static inline int neigh_hh_bridge(struct
+
+ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
+ {
++ unsigned int hh_alen = 0;
+ unsigned int seq;
+ unsigned int hh_len;
+
+@@ -460,16 +461,33 @@ static inline int neigh_hh_output(const
+ seq = read_seqbegin(&hh->hh_lock);
+ hh_len = hh->hh_len;
+ if (likely(hh_len <= HH_DATA_MOD)) {
+- /* this is inlined by gcc */
+- memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
++ hh_alen = HH_DATA_MOD;
++
++ /* skb_push() would proceed silently if we have room for
++ * the unaligned size but not for the aligned size:
++ * check headroom explicitly.
++ */
++ if (likely(skb_headroom(skb) >= HH_DATA_MOD)) {
++ /* this is inlined by gcc */
++ memcpy(skb->data - HH_DATA_MOD, hh->hh_data,
++ HH_DATA_MOD);
++ }
+ } else {
+- unsigned int hh_alen = HH_DATA_ALIGN(hh_len);
++ hh_alen = HH_DATA_ALIGN(hh_len);
+
+- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
++ if (likely(skb_headroom(skb) >= hh_alen)) {
++ memcpy(skb->data - hh_alen, hh->hh_data,
++ hh_alen);
++ }
+ }
+ } while (read_seqretry(&hh->hh_lock, seq));
+
+- skb_push(skb, hh_len);
++ if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) {
++ kfree_skb(skb);
++ return NET_XMIT_DROP;
++ }
++
++ __skb_push(skb, hh_len);
+ return dev_queue_xmit(skb);
+ }
+
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+Date: Mon, 3 Dec 2018 15:33:07 +0800
+Subject: net: 8139cp: fix a BUG triggered by changing mtu with network traffic
+
+From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+
+[ Upstream commit a5d4a89245ead1f37ed135213653c5beebea4237 ]
+
+When changing mtu many times with traffic, a bug is triggered:
+
+[ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26!
+[ 1035.684042] invalid opcode: 0000 [#1] SMP
+[ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec
+tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp
+fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE
+nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4
+nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun
+bridge stp llc ebtable_filter ebtables ip6table_filter devlink
+ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel
+snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul
+parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm
+aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper
+ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs
+libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic
+[ 1035.684102] pata_acpi virtio_console qxl drm_kms_helper syscopyarea
+sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul
+crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too
+virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror
+dm_region_hash dm_log dm_mod [last unloaded: 8139cp]
+[ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded
+Tainted: G OE ------------ T 3.10.0-957.el7.x86_64 #1
+[ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+[ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti:
+ffff8f5a2e32c000
+[ 1035.684149] RIP: 0010:[<ffffffffba3a40d0>] [<ffffffffba3a40d0>]
+dql_completed+0x180/0x190
+[ 1035.684162] RSP: 0000:ffff8f5a75483e50 EFLAGS: 00010093
+[ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX:
+0000000000000000
+[ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI:
+ffff8f599fea3ec0
+[ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09:
+0000000000000000
+[ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12:
+ffff8f599fea3e00
+[ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15:
+0000000000000184
+[ 1035.684162] FS: 00007fa8434de740(0000) GS:ffff8f5a75480000(0000)
+knlGS:0000000000000000
+[ 1035.684162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4:
+00000000001406e0
+[ 1035.684162] Call Trace:
+[ 1035.684162] <IRQ>
+[ 1035.684162] [<ffffffffc08cbaf8>] ? cp_interrupt+0x478/0x580 [8139cp]
+[ 1035.684162] [<ffffffffba14a294>]
+__handle_irq_event_percpu+0x44/0x1c0
+[ 1035.684162] [<ffffffffba14a442>] handle_irq_event_percpu+0x32/0x80
+[ 1035.684162] [<ffffffffba14a4cc>] handle_irq_event+0x3c/0x60
+[ 1035.684162] [<ffffffffba14db29>] handle_fasteoi_irq+0x59/0x110
+[ 1035.684162] [<ffffffffba02e554>] handle_irq+0xe4/0x1a0
+[ 1035.684162] [<ffffffffba7795dd>] do_IRQ+0x4d/0xf0
+[ 1035.684162] [<ffffffffba76b362>] common_interrupt+0x162/0x162
+[ 1035.684162] <EOI>
+[ 1035.684162] [<ffffffffba0c2ae4>] ? __wake_up_bit+0x24/0x70
+[ 1035.684162] [<ffffffffba1e46f5>] ? do_set_pte+0xd5/0x120
+[ 1035.684162] [<ffffffffba1b64fb>] unlock_page+0x2b/0x30
+[ 1035.684162] [<ffffffffba1e4879>] do_read_fault.isra.61+0x139/0x1b0
+[ 1035.684162] [<ffffffffba1e9134>] handle_pte_fault+0x2f4/0xd10
+[ 1035.684162] [<ffffffffba1ebc6d>] handle_mm_fault+0x39d/0x9b0
+[ 1035.684162] [<ffffffffba76f5e3>] __do_page_fault+0x203/0x500
+[ 1035.684162] [<ffffffffba76f9c6>] trace_do_page_fault+0x56/0x150
+[ 1035.684162] [<ffffffffba76ef42>] do_async_page_fault+0x22/0xf0
+[ 1035.684162] [<ffffffffba76b788>] async_page_fault+0x28/0x30
+[ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f
+9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe
+ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48
+[ 1035.684162] RIP [<ffffffffba3a40d0>] dql_completed+0x180/0x190
+[ 1035.684162] RSP <ffff8f5a75483e50>
+
+It's not the same as in 7fe0ee09 patch described.
+As 8139cp uses shared irq mode, other device irq will trigger
+cp_interrupt to execute.
+
+cp_change_mtu
+ -> cp_close
+ -> cp_open
+
+In cp_close routine just before free_irq(), some interrupt may occur.
+In my environment, cp_interrupt exectutes and IntrStatus is 0x4,
+exactly TxOk. That will cause cp_tx to wake device queue.
+
+As device queue is started, cp_start_xmit and cp_open will run at same
+time which will cause kernel BUG.
+
+For example:
+[#] for tx descriptor
+
+At start:
+
+[#][#][#]
+num_queued=3
+
+After cp_init_hw->cp_start_hw->netdev_reset_queue:
+
+[#][#][#]
+num_queued=0
+
+When 8139cp starts to work then cp_tx will check
+num_queued mismatchs the complete_bytes.
+
+The patch will check IntrMask before check IntrStatus in cp_interrupt.
+When 8139cp interrupt is disabled, just return.
+
+Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/8139cp.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/realtek/8139cp.c
++++ b/drivers/net/ethernet/realtek/8139cp.c
+@@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq
+ struct cp_private *cp;
+ int handled = 0;
+ u16 status;
++ u16 mask;
+
+ if (unlikely(dev == NULL))
+ return IRQ_NONE;
+@@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq
+
+ spin_lock(&cp->lock);
+
++ mask = cpr16(IntrMask);
++ if (!mask)
++ goto out_unlock;
++
+ status = cpr16(IntrStatus);
+ if (!status || (status == 0xFFFF))
+ goto out_unlock;
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Thu, 29 Nov 2018 14:14:49 +0100
+Subject: net: fix XPS static_key accounting
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 867d0ad476db89a1e8af3f297af402399a54eea5 ]
+
+Commit 04157469b7b8 ("net: Use static_key for XPS maps") introduced a
+static key for XPS, but the increments/decrements don't match.
+
+First, the static key's counter is incremented once for each queue, but
+only decremented once for a whole batch of queues, leading to large
+unbalances.
+
+Second, the xps_rxqs_needed key is decremented whenever we reset a batch
+of queues, whether they had any rxqs mapping or not, so that if we setup
+cpu-XPS on em1 and RXQS-XPS on em2, resetting the queues on em1 would
+decrement the xps_rxqs_needed key.
+
+This reworks the accounting scheme so that the xps_needed key is
+incremented only once for each type of XPS for all the queues on a
+device, and the xps_rxqs_needed key is incremented only once for all
+queues. This is sufficient to let us retrieve queues via
+get_xps_queue().
+
+This patch introduces a new reset_xps_maps(), which reinitializes and
+frees the appropriate map (xps_rxqs_map or xps_cpus_map), and drops a
+reference to the needed keys:
+ - both xps_needed and xps_rxqs_needed, in case of rxqs maps,
+ - only xps_needed, in case of CPU maps.
+
+Now, we also need to call reset_xps_maps() at the end of
+__netif_set_xps_queue() when there's no active map left, for example
+when writing '00000000,00000000' to all queues' xps_rxqs setting.
+
+Fixes: 04157469b7b8 ("net: Use static_key for XPS maps")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 45 ++++++++++++++++++++++++---------------------
+ 1 file changed, 24 insertions(+), 21 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2161,6 +2161,20 @@ static bool remove_xps_queue_cpu(struct
+ return active;
+ }
+
++static void reset_xps_maps(struct net_device *dev,
++ struct xps_dev_maps *dev_maps,
++ bool is_rxqs_map)
++{
++ if (is_rxqs_map) {
++ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
++ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
++ } else {
++ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
++ }
++ static_key_slow_dec_cpuslocked(&xps_needed);
++ kfree_rcu(dev_maps, rcu);
++}
++
+ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+ struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+ u16 offset, u16 count, bool is_rxqs_map)
+@@ -2172,13 +2186,8 @@ static void clean_xps_maps(struct net_de
+ j < nr_ids;)
+ active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+ count);
+- if (!active) {
+- if (is_rxqs_map)
+- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+- else
+- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+- kfree_rcu(dev_maps, rcu);
+- }
++ if (!active)
++ reset_xps_maps(dev, dev_maps, is_rxqs_map);
+
+ if (!is_rxqs_map) {
+ for (i = offset + (count - 1); count--; i--) {
+@@ -2222,10 +2231,6 @@ static void netif_reset_xps_queues(struc
+ false);
+
+ out_no_maps:
+- if (static_key_enabled(&xps_rxqs_needed))
+- static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+-
+- static_key_slow_dec_cpuslocked(&xps_needed);
+ mutex_unlock(&xps_map_mutex);
+ cpus_read_unlock();
+ }
+@@ -2343,9 +2348,12 @@ int __netif_set_xps_queue(struct net_dev
+ if (!new_dev_maps)
+ goto out_no_new_maps;
+
+- static_key_slow_inc_cpuslocked(&xps_needed);
+- if (is_rxqs_map)
+- static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
++ if (!dev_maps) {
++ /* Increment static keys at most once per type */
++ static_key_slow_inc_cpuslocked(&xps_needed);
++ if (is_rxqs_map)
++ static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
++ }
+
+ for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+ j < nr_ids;) {
+@@ -2443,13 +2451,8 @@ out_no_new_maps:
+ }
+
+ /* free map if not active */
+- if (!active) {
+- if (is_rxqs_map)
+- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+- else
+- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+- kfree_rcu(dev_maps, rcu);
+- }
++ if (!active)
++ reset_xps_maps(dev, dev_maps, is_rxqs_map);
+
+ out_no_maps:
+ mutex_unlock(&xps_map_mutex);
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Tarick Bedeir <tarick@google.com>
+Date: Fri, 7 Dec 2018 00:30:26 -0800
+Subject: net/mlx4_core: Correctly set PFC param if global pause is turned off.
+
+From: Tarick Bedeir <tarick@google.com>
+
+[ Upstream commit bd5122cd1e0644d8bd8dd84517c932773e999766 ]
+
+rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1.
+
+Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests")
+Signed-off-by: Tarick Bedeir <tarick@google.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+@@ -1084,8 +1084,8 @@ static int mlx4_en_set_pauseparam(struct
+
+ tx_pause = !!(pause->tx_pause);
+ rx_pause = !!(pause->rx_pause);
+- rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause);
+- tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause);
++ rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp;
++ tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp;
+
+ err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+ priv->rx_skb_size + ETH_FCS_LEN,
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 2 Dec 2018 14:34:36 +0200
+Subject: net/mlx4_en: Change min MTU size to ETH_MIN_MTU
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit 24be19e47779d604d1492c114459dca9a92acf78 ]
+
+NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in
+the RFC791 and in the network stack. Remove old mlx4_en only define for
+it, which was set to wrong value.
+
+Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++--
+ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 -
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -3494,8 +3494,8 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+ dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ }
+
+- /* MTU range: 46 - hw-specific max */
+- dev->min_mtu = MLX4_EN_MIN_MTU;
++ /* MTU range: 68 - hw-specific max */
++ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = priv->max_mtu;
+
+ mdev->pndev[port] = dev;
+--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+@@ -161,7 +161,6 @@
+ #define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
+ ETH_HLEN + PREAMBLE_LEN)
+
+-#define MLX4_EN_MIN_MTU 46
+ /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+ * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+ */
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Mon, 3 Dec 2018 08:19:33 +0100
+Subject: net: phy: don't allow __set_phy_supported to add unsupported modes
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit d2a36971ef595069b7a600d1144c2e0881a930a1 ]
+
+Currently __set_phy_supported allows to add modes w/o checking whether
+the PHY supports them. This is wrong, it should never add modes but
+only remove modes we don't want to support.
+
+The commit marked as fixed didn't do anything wrong, it just copied
+existing functionality to the helper which is being fixed now.
+
+Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c | 19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1738,20 +1738,17 @@ EXPORT_SYMBOL(genphy_loopback);
+
+ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+ {
+- phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
+- PHY_10BT_FEATURES);
+-
+ switch (max_speed) {
+- default:
+- return -ENOTSUPP;
+- case SPEED_1000:
+- phydev->supported |= PHY_1000BT_FEATURES;
++ case SPEED_10:
++ phydev->supported &= ~PHY_100BT_FEATURES;
+ /* fall through */
+ case SPEED_100:
+- phydev->supported |= PHY_100BT_FEATURES;
+- /* fall through */
+- case SPEED_10:
+- phydev->supported |= PHY_10BT_FEATURES;
++ phydev->supported &= ~PHY_1000BT_FEATURES;
++ break;
++ case SPEED_1000:
++ break;
++ default:
++ return -ENOTSUPP;
+ }
+
+ return 0;
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Baruch Siach <baruch@tkos.co.il>
+Date: Thu, 29 Nov 2018 12:40:11 +0200
+Subject: net: phy: sfp: correct store of detected link modes
+
+From: Baruch Siach <baruch@tkos.co.il>
+
+[ Upstream commit d7f7e0018b96fd1a30a968faa9464eb57372c1ec ]
+
+The link modes that sfp_parse_support() detects are stored in the
+'modes' bitmap. There is no reason to make an exception for 1000Base-PX
+or 1000Base-BX10.
+
+Fixes: 03145864bd0f ("sfp: support 1G BiDi (eg, FiberStore SFP-GE-BX) modules")
+Signed-off-by: Baruch Siach <baruch@tkos.co.il>
+Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/sfp-bus.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/sfp-bus.c
++++ b/drivers/net/phy/sfp-bus.c
+@@ -162,7 +162,7 @@ void sfp_parse_support(struct sfp_bus *b
+ /* 1000Base-PX or 1000Base-BX10 */
+ if ((id->base.e_base_px || id->base.e_base_bx10) &&
+ br_min <= 1300 && br_max >= 1200)
+- phylink_set(support, 1000baseX_Full);
++ phylink_set(modes, 1000baseX_Full);
+
+ /* For active or passive cables, select the link modes
+ * based on the bit rates and the cable compliance bytes.
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Thu, 29 Nov 2018 16:01:04 -0800
+Subject: net: Prevent invalid access to skb->prev in __qdisc_drop_all
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+[ Upstream commit 9410d386d0a829ace9558336263086c2fbbe8aed ]
+
+__qdisc_drop_all() accesses skb->prev to get to the tail of the
+segment-list.
+
+With commit 68d2f84a1368 ("net: gro: properly remove skb from list")
+the skb-list handling has been changed to set skb->next to NULL and set
+the list-poison on skb->prev.
+
+With that change, __qdisc_drop_all() will panic when it tries to
+dereference skb->prev.
+
+Since commit 992cba7e276d ("net: Add and use skb_list_del_init().")
+__list_del_entry is used, leaving skb->prev unchanged (thus,
+pointing to the list-head if it's the first skb of the list).
+This will make __qdisc_drop_all modify the next-pointer of the list-head
+and result in a panic later on:
+
+[ 34.501053] general protection fault: 0000 [#1] SMP KASAN PTI
+[ 34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108
+[ 34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011
+[ 34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90
+[ 34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04
+[ 34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202
+[ 34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6
+[ 34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038
+[ 34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062
+[ 34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000
+[ 34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008
+[ 34.512082] FS: 0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000
+[ 34.513036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0
+[ 34.514593] Call Trace:
+[ 34.514893] <IRQ>
+[ 34.515157] napi_gro_receive+0x93/0x150
+[ 34.515632] receive_buf+0x893/0x3700
+[ 34.516094] ? __netif_receive_skb+0x1f/0x1a0
+[ 34.516629] ? virtnet_probe+0x1b40/0x1b40
+[ 34.517153] ? __stable_node_chain+0x4d0/0x850
+[ 34.517684] ? kfree+0x9a/0x180
+[ 34.518067] ? __kasan_slab_free+0x171/0x190
+[ 34.518582] ? detach_buf+0x1df/0x650
+[ 34.519061] ? lapic_next_event+0x5a/0x90
+[ 34.519539] ? virtqueue_get_buf_ctx+0x280/0x7f0
+[ 34.520093] virtnet_poll+0x2df/0xd60
+[ 34.520533] ? receive_buf+0x3700/0x3700
+[ 34.521027] ? qdisc_watchdog_schedule_ns+0xd5/0x140
+[ 34.521631] ? htb_dequeue+0x1817/0x25f0
+[ 34.522107] ? sch_direct_xmit+0x142/0xf30
+[ 34.522595] ? virtqueue_napi_schedule+0x26/0x30
+[ 34.523155] net_rx_action+0x2f6/0xc50
+[ 34.523601] ? napi_complete_done+0x2f0/0x2f0
+[ 34.524126] ? kasan_check_read+0x11/0x20
+[ 34.524608] ? _raw_spin_lock+0x7d/0xd0
+[ 34.525070] ? _raw_spin_lock_bh+0xd0/0xd0
+[ 34.525563] ? kvm_guest_apic_eoi_write+0x6b/0x80
+[ 34.526130] ? apic_ack_irq+0x9e/0xe0
+[ 34.526567] __do_softirq+0x188/0x4b5
+[ 34.527015] irq_exit+0x151/0x180
+[ 34.527417] do_IRQ+0xdb/0x150
+[ 34.527783] common_interrupt+0xf/0xf
+[ 34.528223] </IRQ>
+
+This patch makes sure that skb->prev is set to NULL when entering
+netem_enqueue.
+
+Cc: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list")
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_netem.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -441,6 +441,9 @@ static int netem_enqueue(struct sk_buff
+ int count = 1;
+ int rc = NET_XMIT_SUCCESS;
+
++ /* Do not fool qdisc_drop_all() */
++ skb->prev = NULL;
++
+ /* Random duplication */
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ ++count;
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Thu, 29 Nov 2018 14:14:48 +0100
+Subject: net: restore call to netdev_queue_numa_node_write when resetting XPS
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit f28c020fb488e1a8b87469812017044bef88aa2b ]
+
+Before commit 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues"),
+netif_reset_xps_queues() did netdev_queue_numa_node_write() for all the
+queues being reset. Now, this is only done when the "active" variable in
+clean_xps_maps() is false, ie when on all the CPUs, there's no active
+XPS mapping left.
+
+Fixes: 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2173,17 +2173,19 @@ static void clean_xps_maps(struct net_de
+ active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+ count);
+ if (!active) {
+- if (is_rxqs_map) {
++ if (is_rxqs_map)
+ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+- } else {
++ else
+ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
++ kfree_rcu(dev_maps, rcu);
++ }
+
+- for (i = offset + (count - 1); count--; i--)
+- netdev_queue_numa_node_write(
+- netdev_get_tx_queue(dev, i),
+- NUMA_NO_NODE);
++ if (!is_rxqs_map) {
++ for (i = offset + (count - 1); count--; i--) {
++ netdev_queue_numa_node_write(
++ netdev_get_tx_queue(dev, i),
++ NUMA_NO_NODE);
+ }
+- kfree_rcu(dev_maps, rcu);
+ }
+ }
+
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Edward Cree <ecree@solarflare.com>
+Date: Tue, 4 Dec 2018 17:37:57 +0000
+Subject: net: use skb_list_del_init() to remove from RX sublists
+
+From: Edward Cree <ecree@solarflare.com>
+
+[ Upstream commit 22f6bbb7bcfcef0b373b0502a7ff390275c575dd ]
+
+list_del() leaves the skb->next pointer poisoned, which can then lead to
+ a crash in e.g. OVS forwarding. For example, setting up an OVS VXLAN
+ forwarding bridge on sfc as per:
+
+========
+$ ovs-vsctl show
+5dfd9c47-f04b-4aaa-aa96-4fbb0a522a30
+ Bridge "br0"
+ Port "br0"
+ Interface "br0"
+ type: internal
+ Port "enp6s0f0"
+ Interface "enp6s0f0"
+ Port "vxlan0"
+ Interface "vxlan0"
+ type: vxlan
+ options: {key="1", local_ip="10.0.0.5", remote_ip="10.0.0.4"}
+ ovs_version: "2.5.0"
+========
+(where 10.0.0.5 is an address on enp6s0f1)
+and sending traffic across it will lead to the following panic:
+========
+general protection fault: 0000 [#1] SMP PTI
+CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.20.0-rc3-ehc+ #701
+Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013
+RIP: 0010:dev_hard_start_xmit+0x38/0x200
+Code: 53 48 89 fb 48 83 ec 20 48 85 ff 48 89 54 24 08 48 89 4c 24 18 0f 84 ab 01 00 00 48 8d 86 90 00 00 00 48 89 f5 48 89 44 24 10 <4c> 8b 33 48 c7 03 00 00 00 00 48 8b 05 c7 d1 b3 00 4d 85 f6 0f 95
+RSP: 0018:ffff888627b437e0 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: dead000000000100 RCX: ffff88862279c000
+RDX: ffff888614a342c0 RSI: 0000000000000000 RDI: 0000000000000000
+RBP: ffff888618a88000 R08: 0000000000000001 R09: 00000000000003e8
+R10: 0000000000000000 R11: ffff888614a34140 R12: 0000000000000000
+R13: 0000000000000062 R14: dead000000000100 R15: ffff888616430000
+FS: 0000000000000000(0000) GS:ffff888627b40000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f6d2bc6d000 CR3: 000000000200a000 CR4: 00000000000006e0
+Call Trace:
+ <IRQ>
+ __dev_queue_xmit+0x623/0x870
+ ? masked_flow_lookup+0xf7/0x220 [openvswitch]
+ ? ep_poll_callback+0x101/0x310
+ do_execute_actions+0xaba/0xaf0 [openvswitch]
+ ? __wake_up_common+0x8a/0x150
+ ? __wake_up_common_lock+0x87/0xc0
+ ? queue_userspace_packet+0x31c/0x5b0 [openvswitch]
+ ovs_execute_actions+0x47/0x120 [openvswitch]
+ ovs_dp_process_packet+0x7d/0x110 [openvswitch]
+ ovs_vport_receive+0x6e/0xd0 [openvswitch]
+ ? dst_alloc+0x64/0x90
+ ? rt_dst_alloc+0x50/0xd0
+ ? ip_route_input_slow+0x19a/0x9a0
+ ? __udp_enqueue_schedule_skb+0x198/0x1b0
+ ? __udp4_lib_rcv+0x856/0xa30
+ ? __udp4_lib_rcv+0x856/0xa30
+ ? cpumask_next_and+0x19/0x20
+ ? find_busiest_group+0x12d/0xcd0
+ netdev_frame_hook+0xce/0x150 [openvswitch]
+ __netif_receive_skb_core+0x205/0xae0
+ __netif_receive_skb_list_core+0x11e/0x220
+ netif_receive_skb_list+0x203/0x460
+ ? __efx_rx_packet+0x335/0x5e0 [sfc]
+ efx_poll+0x182/0x320 [sfc]
+ net_rx_action+0x294/0x3c0
+ __do_softirq+0xca/0x297
+ irq_exit+0xa6/0xb0
+ do_IRQ+0x54/0xd0
+ common_interrupt+0xf/0xf
+ </IRQ>
+========
+So, in all listified-receive handling, instead pull skbs off the lists with
+ skb_list_del_init().
+
+Fixes: 9af86f933894 ("net: core: fix use-after-free in __netif_receive_skb_list_core")
+Fixes: 7da517a3bc52 ("net: core: Another step of skb receive list processing")
+Fixes: a4ca8b7df73c ("net: ipv4: fix drop handling in ip_list_rcv() and ip_list_rcv_finish()")
+Fixes: d8269e2cbf90 ("net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()")
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/skbuff.h | 11 +++++++++++
+ net/core/dev.c | 8 ++++----
+ net/ipv4/ip_input.c | 4 ++--
+ net/ipv6/ip6_input.c | 4 ++--
+ 4 files changed, 19 insertions(+), 8 deletions(-)
+
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1355,6 +1355,17 @@ static inline void skb_zcopy_abort(struc
+ }
+ }
+
++static inline void skb_mark_not_on_list(struct sk_buff *skb)
++{
++ skb->next = NULL;
++}
++
++static inline void skb_list_del_init(struct sk_buff *skb)
++{
++ __list_del_entry(&skb->list);
++ skb_mark_not_on_list(skb);
++}
++
+ /**
+ * skb_queue_empty - check if a queue is empty
+ * @list: queue head
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4981,7 +4981,7 @@ static void __netif_receive_skb_list_cor
+ struct net_device *orig_dev = skb->dev;
+ struct packet_type *pt_prev = NULL;
+
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+ if (!pt_prev)
+ continue;
+@@ -5137,7 +5137,7 @@ static void netif_receive_skb_list_inter
+ INIT_LIST_HEAD(&sublist);
+ list_for_each_entry_safe(skb, next, head, list) {
+ net_timestamp_check(netdev_tstamp_prequeue, skb);
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ if (!skb_defer_rx_timestamp(skb))
+ list_add_tail(&skb->list, &sublist);
+ }
+@@ -5148,7 +5148,7 @@ static void netif_receive_skb_list_inter
+ rcu_read_lock();
+ list_for_each_entry_safe(skb, next, head, list) {
+ xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
+ list_add_tail(&skb->list, &sublist);
+ }
+@@ -5167,7 +5167,7 @@ static void netif_receive_skb_list_inter
+
+ if (cpu >= 0) {
+ /* Will be handled, remove from list */
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ }
+ }
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -551,7 +551,7 @@ static void ip_list_rcv_finish(struct ne
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct dst_entry *dst;
+
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+@@ -598,7 +598,7 @@ void ip_list_rcv(struct list_head *head,
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ continue;
+--- a/net/ipv6/ip6_input.c
++++ b/net/ipv6/ip6_input.c
+@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct n
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct dst_entry *dst;
+
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+@@ -295,7 +295,7 @@ void ipv6_list_rcv(struct list_head *hea
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+
+- list_del(&skb->list);
++ skb_list_del_init(skb);
+ skb = ip6_rcv_core(skb, dev, net);
+ if (skb == NULL)
+ continue;
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Date: Fri, 7 Dec 2018 15:05:04 +1100
+Subject: Revert "net/ibm/emac: wrong bit is used for STA control"
+
+From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+
+[ Upstream commit 5b3279e2cba2238b37f6c18adfdea8bddb32715a ]
+
+This reverts commit 624ca9c33c8a853a4a589836e310d776620f4ab9.
+
+This commit is completely bogus. The STACR register has two formats, old
+and new, depending on the version of the IP block used. There's a pair of
+device-tree properties that can be used to specify the format used:
+
+ has-inverted-stacr-oc
+ has-new-stacr-staopc
+
+What this commit did was to change the bit definition used with the old
+parts to match the new parts. This of course breaks the driver on all
+the old ones.
+
+Instead, the author should have set the appropriate properties in the
+device-tree for the variant used on his board.
+
+Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/emac/emac.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ibm/emac/emac.h
++++ b/drivers/net/ethernet/ibm/emac/emac.h
+@@ -231,7 +231,7 @@ struct emac_regs {
+ #define EMAC_STACR_PHYE 0x00004000
+ #define EMAC_STACR_STAC_MASK 0x00003000
+ #define EMAC_STACR_STAC_READ 0x00001000
+-#define EMAC_STACR_STAC_WRITE 0x00000800
++#define EMAC_STACR_STAC_WRITE 0x00002000
+ #define EMAC_STACR_OPBC_MASK 0x00000C00
+ #define EMAC_STACR_OPBC_50 0x00000000
+ #define EMAC_STACR_OPBC_66 0x00000400
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 4 Dec 2018 09:40:35 -0800
+Subject: rtnetlink: ndo_dflt_fdb_dump() only work for ARPHRD_ETHER devices
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 688838934c231bb08f46db687e57f6d8bf82709c ]
+
+kmsan was able to trigger a kernel-infoleak using a gre device [1]
+
+nlmsg_populate_fdb_fill() has a hard coded assumption
+that dev->addr_len is ETH_ALEN, as normally guaranteed
+for ARPHRD_ETHER devices.
+
+A similar issue was fixed recently in commit da71577545a5
+("rtnetlink: Disallow FDB configuration for non-Ethernet device")
+
+[1]
+BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline]
+BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
+CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x32d/0x480 lib/dump_stack.c:113
+ kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683
+ kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743
+ kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634
+ copyout lib/iov_iter.c:143 [inline]
+ _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
+ copy_to_iter include/linux/uio.h:143 [inline]
+ skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431
+ skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline]
+ netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975
+ sock_recvmsg_nosec net/socket.c:794 [inline]
+ sock_recvmsg+0x1d1/0x230 net/socket.c:801
+ ___sys_recvmsg+0x444/0xae0 net/socket.c:2278
+ __sys_recvmsg net/socket.c:2327 [inline]
+ __do_sys_recvmsg net/socket.c:2337 [inline]
+ __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334
+ __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+RIP: 0033:0x441119
+Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f
+RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119
+RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003
+RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100
+R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080
+R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000
+
+Uninit was stored to memory at:
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
+ kmsan_save_stack mm/kmsan/kmsan.c:261 [inline]
+ kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469
+ kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344
+ kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362
+ __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162
+ __nla_put lib/nlattr.c:744 [inline]
+ nla_put+0x20a/0x2d0 lib/nlattr.c:802
+ nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466
+ nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline]
+ ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807
+ rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979
+ netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244
+ __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352
+ netlink_dump_start include/linux/netlink.h:216 [inline]
+ rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910
+ netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
+ rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
+ netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+ netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
+ netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg net/socket.c:631 [inline]
+ ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
+ __sys_sendmsg net/socket.c:2154 [inline]
+ __do_sys_sendmsg net/socket.c:2163 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+
+Uninit was created at:
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
+ kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170
+ kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186
+ __kmalloc+0x14c/0x4d0 mm/slub.c:3825
+ kmalloc include/linux/slab.h:551 [inline]
+ __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline]
+ __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline]
+ __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670
+ dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687
+ ip_mc_filter_add net/ipv4/igmp.c:1128 [inline]
+ igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311
+ __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444
+ ip_mc_inc_group net/ipv4/igmp.c:1453 [inline]
+ ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775
+ inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522
+ notifier_call_chain kernel/notifier.c:93 [inline]
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401
+ __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733
+ dev_change_flags+0x1ac/0x230 net/core/dev.c:7569
+ do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492
+ rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111
+ rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947
+ netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
+ rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
+ netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+ netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
+ netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg net/socket.c:631 [inline]
+ ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
+ __sys_sendmsg net/socket.c:2154 [inline]
+ __do_sys_sendmsg net/socket.c:2163 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+
+Bytes 36-37 of 105 are uninitialized
+Memory access of size 105 starts at ffff88819686c000
+Data copied to user address 0000000020000380
+
+Fixes: d83b06036048 ("net: add fdb generic dump routine")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: John Fastabend <john.fastabend@gmail.com>
+Cc: Ido Schimmel <idosch@mellanox.com>
+Cc: David Ahern <dsahern@gmail.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3730,6 +3730,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *sk
+ {
+ int err;
+
++ if (dev->type != ARPHRD_ETHER)
++ return -EINVAL;
++
+ netif_addr_lock_bh(dev);
+ err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
+ if (err)
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 1 Dec 2018 01:36:59 +0800
+Subject: sctp: kfree_rcu asoc
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit fb6df5a6234c38a9c551559506a49a677ac6f07a ]
+
+In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences
+a transport's asoc under rcu_read_lock while asoc is freed not after
+a grace period, which leads to a use-after-free panic.
+
+This patch fixes it by calling kfree_rcu to make asoc be freed after
+a grace period.
+
+Note that only the asoc's memory is delayed to free in the patch, it
+won't cause sk to linger longer.
+
+Thanks Neil and Marcelo to make this clear.
+
+Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable")
+Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport")
+Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com
+Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com
+Suggested-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/structs.h | 2 ++
+ net/sctp/associola.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -2075,6 +2075,8 @@ struct sctp_association {
+
+ __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+ __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
++
++ struct rcu_head rcu;
+ };
+
+
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -434,7 +434,7 @@ static void sctp_association_destroy(str
+
+ WARN_ON(atomic_read(&asoc->rmem_alloc));
+
+- kfree(asoc);
++ kfree_rcu(asoc, rcu);
+ SCTP_DBG_OBJCNT_DEC(assoc);
+ }
+
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 27 Nov 2018 19:11:50 +0800
+Subject: sctp: update frag_point when stream_interleave is set
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 4135cce7fd0a0d755665c02728578c7c5afe4726 ]
+
+sctp_assoc_update_frag_point() should be called whenever asoc->pathmtu
+changes, but we missed one place in sctp_association_init(). It would
+cause frag_point is zero when sending data.
+
+As says in Jakub's reproducer, if sp->pathmtu is set by socketopt, the
+new asoc->pathmtu inherits it in sctp_association_init(). Later when
+transports are added and their pmtu >= asoc->pathmtu, it will never
+call sctp_assoc_update_frag_point() to set frag_point.
+
+This patch is to fix it by updating frag_point after asoc->pathmtu is
+set as sp->pathmtu in sctp_association_init(). Note that it moved them
+after sctp_stream_init(), as stream->si needs to be set first.
+
+Frag_point's calculation is also related with datachunk's type, so it
+needs to update frag_point when stream->si may be changed in
+sctp_process_init().
+
+v1->v2:
+ - call sctp_assoc_update_frag_point() separately in sctp_process_init
+ and sctp_association_init, per Marcelo's suggestion.
+
+Fixes: 2f5e3c9df693 ("sctp: introduce sctp_assoc_update_frag_point")
+Reported-by: Jakub Audykowicz <jakub.audykowicz@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/associola.c | 7 ++++---
+ net/sctp/sm_make_chunk.c | 3 +++
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -118,9 +118,6 @@ static struct sctp_association *sctp_ass
+ asoc->flowlabel = sp->flowlabel;
+ asoc->dscp = sp->dscp;
+
+- /* Initialize default path MTU. */
+- asoc->pathmtu = sp->pathmtu;
+-
+ /* Set association default SACK delay */
+ asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
+ asoc->sackfreq = sp->sackfreq;
+@@ -252,6 +249,10 @@ static struct sctp_association *sctp_ass
+ 0, gfp))
+ goto fail_init;
+
++ /* Initialize default path MTU. */
++ asoc->pathmtu = sp->pathmtu;
++ sctp_assoc_update_frag_point(asoc);
++
+ /* Assume that peer would support both address types unless we are
+ * told otherwise.
+ */
+--- a/net/sctp/sm_make_chunk.c
++++ b/net/sctp/sm_make_chunk.c
+@@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_associ
+ asoc->c.sinit_max_instreams, gfp))
+ goto clean_up;
+
++ /* Update frag_point when stream_interleave may get changed. */
++ sctp_assoc_update_frag_point(asoc);
++
+ if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
+ goto clean_up;
+
--- /dev/null
+ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch
+ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch
+neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch
+ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch
+net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch
+net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch
+net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch
+net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch
+net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch
+net-use-skb_list_del_init-to-remove-from-rx-sublists.patch
+revert-net-ibm-emac-wrong-bit-is-used-for-sta-control.patch
+rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch
+sctp-kfree_rcu-asoc.patch
+tcp-do-not-underestimate-rwnd_limited.patch
+tcp-fix-null-ref-in-tail-loss-probe.patch
+tun-forbid-iface-creation-with-rtnl-ops.patch
+virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch
+net-phy-sfp-correct-store-of-detected-link-modes.patch
+sctp-update-frag_point-when-stream_interleave-is-set.patch
+net-restore-call-to-netdev_queue_numa_node_write-when-resetting-xps.patch
+net-fix-xps-static_key-accounting.patch
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 5 Dec 2018 14:24:31 -0800
+Subject: tcp: Do not underestimate rwnd_limited
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 41727549de3e7281feb174d568c6e46823db8684 ]
+
+If available rwnd is too small, tcp_tso_should_defer()
+can decide it is worth waiting before splitting a TSO packet.
+
+This really means we are rwnd limited.
+
+Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Reviewed-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2338,8 +2338,11 @@ static bool tcp_write_xmit(struct sock *
+ } else {
+ if (!push_one &&
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
+- max_segs))
++ max_segs)) {
++ if (!is_cwnd_limited)
++ is_rwnd_limited = true;
+ break;
++ }
+ }
+
+ limit = mss_now;
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 5 Dec 2018 14:38:38 -0800
+Subject: tcp: fix NULL ref in tail loss probe
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit b2b7af861122a0c0f6260155c29a1b2e594cd5b5 ]
+
+TCP loss probe timer may fire when the retranmission queue is empty but
+has a non-zero tp->packets_out counter. tcp_send_loss_probe will call
+tcp_rearm_rto which triggers NULL pointer reference by fetching the
+retranmission queue head in its sub-routines.
+
+Add a more detailed warning to help catch the root cause of the inflight
+accounting inconsistency.
+
+Reported-by: Rafael Tinoco <rafael.tinoco@linaro.org>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2479,15 +2479,18 @@ void tcp_send_loss_probe(struct sock *sk
+ goto rearm_timer;
+ }
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
++ if (unlikely(!skb)) {
++ WARN_ONCE(tp->packets_out,
++ "invalid inflight: %u state %u cwnd %u mss %d\n",
++ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
++ inet_csk(sk)->icsk_pending = 0;
++ return;
++ }
+
+ /* At most one outstanding TLP retransmission. */
+ if (tp->tlp_high_seq)
+ goto rearm_timer;
+
+- /* Retransmit last segment. */
+- if (WARN_ON(!skb))
+- goto rearm_timer;
+-
+ if (skb_still_in_host_queue(sk, skb))
+ goto rearm_timer;
+
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Thu, 29 Nov 2018 14:45:39 +0100
+Subject: tun: forbid iface creation with rtnl ops
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 35b827b6d06199841a83839e8bb69c0cd13a28be ]
+
+It's not supported right now (the goal of the initial patch was to support
+'ip link del' only).
+
+Before the patch:
+$ ip link add foo type tun
+[ 239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+[snip]
+[ 239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0
+
+This panic occurs because dev->netdev_ops is not set by tun_setup(). But to
+have something usable, it will require more than just setting
+netdev_ops.
+
+Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX")
+CC: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -2268,9 +2268,9 @@ static void tun_setup(struct net_device
+ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+ {
+- if (!data)
+- return 0;
+- return -EINVAL;
++ NL_SET_ERR_MSG(extack,
++ "tun/tap creation via rtnetlink is not supported.");
++ return -EOPNOTSUPP;
+ }
+
+ static size_t tun_get_size(const struct net_device *dev)
--- /dev/null
+From foo@baz Thu Dec 13 10:38:53 CET 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Thu, 29 Nov 2018 13:53:16 +0800
+Subject: virtio-net: keep vnet header zeroed after processing XDP
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 436c9453a1ac0944b82870ef2e0d9be956b396d9 ]
+
+We copy vnet header unconditionally in page_to_skb() this is wrong
+since XDP may modify the packet data. So let's keep a zeroed vnet
+header for not confusing the conversion between vnet header and skb
+metadata.
+
+In the future, we should able to detect whether or not the packet was
+modified and keep using the vnet header when packet was not touched.
+
+Fixes: f600b6905015 ("virtio_net: Add XDP support")
+Reported-by: Pavel Popa <pashinho1990@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -365,7 +365,8 @@ static unsigned int mergeable_ctx_to_tru
+ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ struct receive_queue *rq,
+ struct page *page, unsigned int offset,
+- unsigned int len, unsigned int truesize)
++ unsigned int len, unsigned int truesize,
++ bool hdr_valid)
+ {
+ struct sk_buff *skb;
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+@@ -387,7 +388,8 @@ static struct sk_buff *page_to_skb(struc
+ else
+ hdr_padded_len = sizeof(struct padded_vnet_hdr);
+
+- memcpy(hdr, p, hdr_len);
++ if (hdr_valid)
++ memcpy(hdr, p, hdr_len);
+
+ len -= hdr_len;
+ offset += hdr_padded_len;
+@@ -739,7 +741,8 @@ static struct sk_buff *receive_big(struc
+ struct virtnet_rq_stats *stats)
+ {
+ struct page *page = buf;
+- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
++ struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
++ PAGE_SIZE, true);
+
+ stats->bytes += len - vi->hdr_len;
+ if (unlikely(!skb))
+@@ -842,7 +845,8 @@ static struct sk_buff *receive_mergeable
+ rcu_read_unlock();
+ put_page(page);
+ head_skb = page_to_skb(vi, rq, xdp_page,
+- offset, len, PAGE_SIZE);
++ offset, len,
++ PAGE_SIZE, false);
+ return head_skb;
+ }
+ break;
+@@ -898,7 +902,7 @@ static struct sk_buff *receive_mergeable
+ goto err_skb;
+ }
+
+- head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
++ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+ curr_skb = head_skb;
+
+ if (unlikely(!curr_skb))