]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Dec 2018 09:47:44 +0000 (10:47 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Dec 2018 09:47:44 +0000 (10:47 +0100)
added patches:
ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch
ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch
ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch
neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch
net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch
net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch
net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch
net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch
net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch
rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch
sctp-kfree_rcu-asoc.patch
tcp-do-not-underestimate-rwnd_limited.patch
tcp-fix-null-ref-in-tail-loss-probe.patch
tun-forbid-iface-creation-with-rtnl-ops.patch
virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch

16 files changed:
queue-4.14/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch [new file with mode: 0644]
queue-4.14/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch [new file with mode: 0644]
queue-4.14/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch [new file with mode: 0644]
queue-4.14/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch [new file with mode: 0644]
queue-4.14/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch [new file with mode: 0644]
queue-4.14/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch [new file with mode: 0644]
queue-4.14/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch [new file with mode: 0644]
queue-4.14/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch [new file with mode: 0644]
queue-4.14/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch [new file with mode: 0644]
queue-4.14/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch [new file with mode: 0644]
queue-4.14/sctp-kfree_rcu-asoc.patch [new file with mode: 0644]
queue-4.14/series [new file with mode: 0644]
queue-4.14/tcp-do-not-underestimate-rwnd_limited.patch [new file with mode: 0644]
queue-4.14/tcp-fix-null-ref-in-tail-loss-probe.patch [new file with mode: 0644]
queue-4.14/tun-forbid-iface-creation-with-rtnl-ops.patch [new file with mode: 0644]
queue-4.14/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch [new file with mode: 0644]

diff --git a/queue-4.14/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch b/queue-4.14/ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch
new file mode 100644 (file)
index 0000000..10af622
--- /dev/null
@@ -0,0 +1,132 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Jiri Wiesner <jwiesner@suse.com>
+Date: Wed, 5 Dec 2018 16:55:29 +0100
+Subject: ipv4: ipv6: netfilter: Adjust the frag mem limit when truesize changes
+
+From: Jiri Wiesner <jwiesner@suse.com>
+
+[ Upstream commit ebaf39e6032faf77218220707fc3fa22487784e0 ]
+
+The *_frag_reasm() functions are susceptible to miscalculating the byte
+count of packet fragments in case the truesize of a head buffer changes.
+The truesize member may be changed by the call to skb_unclone(), leaving
+the fragment memory limit counter unbalanced even if all fragments are
+processed. This miscalculation goes unnoticed as long as the network
+namespace which holds the counter is not destroyed.
+
+Should an attempt be made to destroy a network namespace that holds an
+unbalanced fragment memory limit counter the cleanup of the namespace
+never finishes. The thread handling the cleanup gets stuck in
+inet_frags_exit_net() waiting for the percpu counter to reach zero. The
+thread is usually in running state with a stacktrace similar to:
+
+ PID: 1073   TASK: ffff880626711440  CPU: 1   COMMAND: "kworker/u48:4"
+  #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480
+  #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b
+  #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c
+  #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856
+  #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0
+ #10 [ffff880621563e38] process_one_work at ffffffff81096f14
+
+It is not possible to create new network namespaces, and processes
+that call unshare() end up being stuck in uninterruptible sleep state
+waiting to acquire the net_mutex.
+
+The bug was observed in the IPv6 netfilter code by Per Sundstrom.
+I thank him for his analysis of the problem. The parts of this patch
+that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.
+
+Signed-off-by: Jiri Wiesner <jwiesner@suse.com>
+Reported-by: Per Sundstrom <per.sundstrom@redqube.se>
+Acked-by: Peter Oskolkov <posk@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_fragment.c                  |    7 +++++++
+ net/ipv6/netfilter/nf_conntrack_reasm.c |    8 +++++++-
+ net/ipv6/reassembly.c                   |    8 +++++++-
+ 3 files changed, 21 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_fragment.c
++++ b/net/ipv4/ip_fragment.c
+@@ -513,6 +513,7 @@ static int ip_frag_reasm(struct ipq *qp,
+       struct rb_node *rbn;
+       int len;
+       int ihlen;
++      int delta;
+       int err;
+       u8 ecn;
+@@ -554,10 +555,16 @@ static int ip_frag_reasm(struct ipq *qp,
+       if (len > 65535)
+               goto out_oversize;
++      delta = - head->truesize;
++
+       /* Head of list must not be cloned. */
+       if (skb_unclone(head, GFP_ATOMIC))
+               goto out_nomem;
++      delta += head->truesize;
++      if (delta)
++              add_frag_mem_limit(qp->q.net, delta);
++
+       /* If the first fragment is fragmented itself, we split
+        * it to two chunks: the first with data and paged part
+        * and the second, holding only fragments. */
+--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
+@@ -349,7 +349,7 @@ static bool
+ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_device *dev)
+ {
+       struct sk_buff *fp, *head = fq->q.fragments;
+-      int    payload_len;
++      int    payload_len, delta;
+       u8 ecn;
+       inet_frag_kill(&fq->q);
+@@ -371,10 +371,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq,
+               return false;
+       }
++      delta = - head->truesize;
++
+       /* Head of list must not be cloned. */
+       if (skb_unclone(head, GFP_ATOMIC))
+               return false;
++      delta += head->truesize;
++      if (delta)
++              add_frag_mem_limit(fq->q.net, delta);
++
+       /* If the first fragment is fragmented itself, we split
+        * it to two chunks: the first with data and paged part
+        * and the second, holding only fragments. */
+--- a/net/ipv6/reassembly.c
++++ b/net/ipv6/reassembly.c
+@@ -348,7 +348,7 @@ static int ip6_frag_reasm(struct frag_qu
+ {
+       struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+       struct sk_buff *fp, *head = fq->q.fragments;
+-      int    payload_len;
++      int    payload_len, delta;
+       unsigned int nhoff;
+       int sum_truesize;
+       u8 ecn;
+@@ -389,10 +389,16 @@ static int ip6_frag_reasm(struct frag_qu
+       if (payload_len > IPV6_MAXPLEN)
+               goto out_oversize;
++      delta = - head->truesize;
++
+       /* Head of list must not be cloned. */
+       if (skb_unclone(head, GFP_ATOMIC))
+               goto out_oom;
++      delta += head->truesize;
++      if (delta)
++              add_frag_mem_limit(fq->q.net, delta);
++
+       /* If the first fragment is fragmented itself, we split
+        * it to two chunks: the first with data and paged part
+        * and the second, holding only fragments. */
diff --git a/queue-4.14/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch b/queue-4.14/ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch
new file mode 100644 (file)
index 0000000..d6e5729
--- /dev/null
@@ -0,0 +1,137 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 6 Dec 2018 19:30:36 +0100
+Subject: ipv6: Check available headroom in ip6_xmit() even without options
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+[ Upstream commit 66033f47ca60294a95fc85ec3a3cc909dab7b765 ]
+
+Even if we send an IPv6 packet without options, MAX_HEADER might not be
+enough to account for the additional headroom required by alignment of
+hardware headers.
+
+On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL,
+sending short SCTP packets over IPv4 over L2TP over IPv6, we start with
+100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54
+bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2().
+
+Those would be enough to append our 14 bytes header, but we're going to
+align that to 16 bytes, and write 2 bytes out of the allocated slab in
+neigh_hh_output().
+
+KASan says:
+
+[  264.967848] ==================================================================
+[  264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70
+[  264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201
+[  264.967870]
+[  264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1
+[  264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0)
+[  264.967887] Call Trace:
+[  264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0)
+[  264.967903]  [<00000000017e379c>] dump_stack+0x23c/0x290
+[  264.967912]  [<00000000007bc594>] print_address_description+0xf4/0x290
+[  264.967919]  [<00000000007bc8fc>] kasan_report+0x13c/0x240
+[  264.967927]  [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70
+[  264.967935]  [<000000000163f890>] ip6_finish_output+0x430/0x7f0
+[  264.967943]  [<000000000163fe44>] ip6_output+0x1f4/0x580
+[  264.967953]  [<000000000163882a>] ip6_xmit+0xfea/0x1ce8
+[  264.967963]  [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8
+[  264.968033]  [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core]
+[  264.968037]  [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth]
+[  264.968041]  [<0000000001220020>] dev_hard_start_xmit+0x268/0x928
+[  264.968069]  [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350
+[  264.968071]  [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478
+[  264.968075]  [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0
+[  264.968078]  [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8
+[  264.968081]  [<00000000013ddd1e>] ip_output+0x226/0x4c0
+[  264.968083]  [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938
+[  264.968100]  [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp]
+[  264.968116]  [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp]
+[  264.968131]  [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp]
+[  264.968146]  [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp]
+[  264.968161]  [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp]
+[  264.968177]  [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp]
+[  264.968192]  [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp]
+[  264.968208]  [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp]
+[  264.968212]  [<0000000001197942>] __sys_connect+0x21a/0x450
+[  264.968215]  [<000000000119aff8>] sys_socketcall+0x3d0/0xb08
+[  264.968218]  [<000000000184ea7a>] system_call+0x2a2/0x2c0
+
+[...]
+
+Just like ip_finish_output2() does for IPv4, check that we have enough
+headroom in ip6_xmit(), and reallocate it if we don't.
+
+This issue is older than git history.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_output.c |   42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, stru
+       const struct ipv6_pinfo *np = inet6_sk(sk);
+       struct in6_addr *first_hop = &fl6->daddr;
+       struct dst_entry *dst = skb_dst(skb);
++      unsigned int head_room;
+       struct ipv6hdr *hdr;
+       u8  proto = fl6->flowi6_proto;
+       int seg_len = skb->len;
+       int hlimit = -1;
+       u32 mtu;
+-      if (opt) {
+-              unsigned int head_room;
+-
+-              /* First: exthdrs may take lots of space (~8K for now)
+-                 MAX_HEADER is not enough.
+-               */
+-              head_room = opt->opt_nflen + opt->opt_flen;
+-              seg_len += head_room;
+-              head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
++      head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
++      if (opt)
++              head_room += opt->opt_nflen + opt->opt_flen;
+-              if (skb_headroom(skb) < head_room) {
+-                      struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+-                      if (!skb2) {
+-                              IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+-                                            IPSTATS_MIB_OUTDISCARDS);
+-                              kfree_skb(skb);
+-                              return -ENOBUFS;
+-                      }
+-                      if (skb->sk)
+-                              skb_set_owner_w(skb2, skb->sk);
+-                      consume_skb(skb);
+-                      skb = skb2;
++      if (unlikely(skb_headroom(skb) < head_room)) {
++              struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
++              if (!skb2) {
++                      IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
++                                    IPSTATS_MIB_OUTDISCARDS);
++                      kfree_skb(skb);
++                      return -ENOBUFS;
+               }
++              if (skb->sk)
++                      skb_set_owner_w(skb2, skb->sk);
++              consume_skb(skb);
++              skb = skb2;
++      }
++
++      if (opt) {
++              seg_len += opt->opt_nflen + opt->opt_flen;
++
+               if (opt->opt_flen)
+                       ipv6_push_frag_opts(skb, opt, &proto);
++
+               if (opt->opt_nflen)
+                       ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
+                                            &fl6->saddr);
diff --git a/queue-4.14/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch b/queue-4.14/ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch
new file mode 100644 (file)
index 0000000..4183247
--- /dev/null
@@ -0,0 +1,30 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Shmulik Ladkani <shmulik@metanetworks.com>
+Date: Fri, 7 Dec 2018 09:50:17 +0200
+Subject: ipv6: sr: properly initialize flowi6 prior passing to ip6_route_output
+
+From: Shmulik Ladkani <shmulik@metanetworks.com>
+
+[ Upstream commit 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 ]
+
+In 'seg6_output', stack variable 'struct flowi6 fl6' was missing
+initialization.
+
+Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
+Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/seg6_iptunnel.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -327,6 +327,7 @@ static int seg6_output(struct net *net,
+               struct ipv6hdr *hdr = ipv6_hdr(skb);
+               struct flowi6 fl6;
++              memset(&fl6, 0, sizeof(fl6));
+               fl6.daddr = hdr->daddr;
+               fl6.saddr = hdr->saddr;
+               fl6.flowlabel = ip6_flowinfo(hdr);
diff --git a/queue-4.14/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch b/queue-4.14/neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch
new file mode 100644 (file)
index 0000000..8cfd214
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 6 Dec 2018 19:30:37 +0100
+Subject: neighbour: Avoid writing before skb->head in neigh_hh_output()
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+[ Upstream commit e6ac64d4c4d095085d7dd71cbd05704ac99829b2 ]
+
+While skb_push() makes the kernel panic if the skb headroom is less than
+the unaligned hardware header size, it will proceed normally in case we
+copy more than that because of alignment, and we'll silently corrupt
+adjacent slabs.
+
+In the case fixed by the previous patch,
+"ipv6: Check available headroom in ip6_xmit() even without options", we
+end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware
+header and write 16 bytes, starting 2 bytes before the allocated buffer.
+
+Always check we're not writing before skb->head and, if the headroom is
+not enough, warn and drop the packet.
+
+v2:
+ - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet
+   (Eric Dumazet)
+ - if we avoid the panic, though, we need to explicitly check the headroom
+   before the memcpy(), otherwise we'll have corrupted slabs on a running
+   kernel, after we warn
+ - use __skb_push() instead of skb_push(), as the headroom check is
+   already implemented here explicitly (Eric Dumazet)
+
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/neighbour.h |   28 +++++++++++++++++++++++-----
+ 1 file changed, 23 insertions(+), 5 deletions(-)
+
+--- a/include/net/neighbour.h
++++ b/include/net/neighbour.h
+@@ -452,6 +452,7 @@ static inline int neigh_hh_bridge(struct
+ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
+ {
++      unsigned int hh_alen = 0;
+       unsigned int seq;
+       unsigned int hh_len;
+@@ -459,16 +460,33 @@ static inline int neigh_hh_output(const
+               seq = read_seqbegin(&hh->hh_lock);
+               hh_len = hh->hh_len;
+               if (likely(hh_len <= HH_DATA_MOD)) {
+-                      /* this is inlined by gcc */
+-                      memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
++                      hh_alen = HH_DATA_MOD;
++
++                      /* skb_push() would proceed silently if we have room for
++                       * the unaligned size but not for the aligned size:
++                       * check headroom explicitly.
++                       */
++                      if (likely(skb_headroom(skb) >= HH_DATA_MOD)) {
++                              /* this is inlined by gcc */
++                              memcpy(skb->data - HH_DATA_MOD, hh->hh_data,
++                                     HH_DATA_MOD);
++                      }
+               } else {
+-                      unsigned int hh_alen = HH_DATA_ALIGN(hh_len);
++                      hh_alen = HH_DATA_ALIGN(hh_len);
+-                      memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
++                      if (likely(skb_headroom(skb) >= hh_alen)) {
++                              memcpy(skb->data - hh_alen, hh->hh_data,
++                                     hh_alen);
++                      }
+               }
+       } while (read_seqretry(&hh->hh_lock, seq));
+-      skb_push(skb, hh_len);
++      if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) {
++              kfree_skb(skb);
++              return NET_XMIT_DROP;
++      }
++
++      __skb_push(skb, hh_len);
+       return dev_queue_xmit(skb);
+ }
diff --git a/queue-4.14/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch b/queue-4.14/net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch
new file mode 100644 (file)
index 0000000..1195164
--- /dev/null
@@ -0,0 +1,143 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+Date: Mon, 3 Dec 2018 15:33:07 +0800
+Subject: net: 8139cp: fix a BUG triggered by changing mtu with network traffic
+
+From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+
+[ Upstream commit a5d4a89245ead1f37ed135213653c5beebea4237 ]
+
+When changing mtu many times with traffic, a bug is triggered:
+
+[ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26!
+[ 1035.684042] invalid opcode: 0000 [#1] SMP
+[ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec
+tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp
+fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE
+nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4
+nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun
+bridge stp llc ebtable_filter ebtables ip6table_filter devlink
+ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel
+snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul
+parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm
+aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper
+ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs
+libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic
+[ 1035.684102]  pata_acpi virtio_console qxl drm_kms_helper syscopyarea
+sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul
+crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too
+virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror
+dm_region_hash dm_log dm_mod [last unloaded: 8139cp]
+[ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded
+Tainted: G           OE  ------------ T 3.10.0-957.el7.x86_64 #1
+[ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+[ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti:
+ffff8f5a2e32c000
+[ 1035.684149] RIP: 0010:[<ffffffffba3a40d0>]  [<ffffffffba3a40d0>]
+dql_completed+0x180/0x190
+[ 1035.684162] RSP: 0000:ffff8f5a75483e50  EFLAGS: 00010093
+[ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX:
+0000000000000000
+[ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI:
+ffff8f599fea3ec0
+[ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09:
+0000000000000000
+[ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12:
+ffff8f599fea3e00
+[ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15:
+0000000000000184
+[ 1035.684162] FS:  00007fa8434de740(0000) GS:ffff8f5a75480000(0000)
+knlGS:0000000000000000
+[ 1035.684162] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4:
+00000000001406e0
+[ 1035.684162] Call Trace:
+[ 1035.684162]  <IRQ>
+[ 1035.684162]  [<ffffffffc08cbaf8>] ? cp_interrupt+0x478/0x580 [8139cp]
+[ 1035.684162]  [<ffffffffba14a294>]
+__handle_irq_event_percpu+0x44/0x1c0
+[ 1035.684162]  [<ffffffffba14a442>] handle_irq_event_percpu+0x32/0x80
+[ 1035.684162]  [<ffffffffba14a4cc>] handle_irq_event+0x3c/0x60
+[ 1035.684162]  [<ffffffffba14db29>] handle_fasteoi_irq+0x59/0x110
+[ 1035.684162]  [<ffffffffba02e554>] handle_irq+0xe4/0x1a0
+[ 1035.684162]  [<ffffffffba7795dd>] do_IRQ+0x4d/0xf0
+[ 1035.684162]  [<ffffffffba76b362>] common_interrupt+0x162/0x162
+[ 1035.684162]  <EOI>
+[ 1035.684162]  [<ffffffffba0c2ae4>] ? __wake_up_bit+0x24/0x70
+[ 1035.684162]  [<ffffffffba1e46f5>] ? do_set_pte+0xd5/0x120
+[ 1035.684162]  [<ffffffffba1b64fb>] unlock_page+0x2b/0x30
+[ 1035.684162]  [<ffffffffba1e4879>] do_read_fault.isra.61+0x139/0x1b0
+[ 1035.684162]  [<ffffffffba1e9134>] handle_pte_fault+0x2f4/0xd10
+[ 1035.684162]  [<ffffffffba1ebc6d>] handle_mm_fault+0x39d/0x9b0
+[ 1035.684162]  [<ffffffffba76f5e3>] __do_page_fault+0x203/0x500
+[ 1035.684162]  [<ffffffffba76f9c6>] trace_do_page_fault+0x56/0x150
+[ 1035.684162]  [<ffffffffba76ef42>] do_async_page_fault+0x22/0xf0
+[ 1035.684162]  [<ffffffffba76b788>] async_page_fault+0x28/0x30
+[ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f
+9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe
+ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48
+[ 1035.684162] RIP  [<ffffffffba3a40d0>] dql_completed+0x180/0x190
+[ 1035.684162]  RSP <ffff8f5a75483e50>
+
+It's not the same as in 7fe0ee09 patch described.
+As 8139cp uses shared irq mode, other device irq will trigger
+cp_interrupt to execute.
+
+cp_change_mtu
+ -> cp_close
+ -> cp_open
+
+In cp_close routine  just before free_irq(), some interrupt may occur.
+In my environment, cp_interrupt exectutes and IntrStatus is 0x4,
+exactly TxOk. That will cause cp_tx to wake device queue.
+
+As device queue is started, cp_start_xmit and cp_open will run at same
+time which will cause kernel BUG.
+
+For example:
+[#] for tx descriptor
+
+At start:
+
+[#][#][#]
+num_queued=3
+
+After cp_init_hw->cp_start_hw->netdev_reset_queue:
+
+[#][#][#]
+num_queued=0
+
+When 8139cp starts to work then cp_tx will check
+num_queued mismatchs the complete_bytes.
+
+The patch will check IntrMask before check IntrStatus in cp_interrupt.
+When 8139cp interrupt is disabled, just return.
+
+Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/8139cp.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/net/ethernet/realtek/8139cp.c
++++ b/drivers/net/ethernet/realtek/8139cp.c
+@@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq
+       struct cp_private *cp;
+       int handled = 0;
+       u16 status;
++      u16 mask;
+       if (unlikely(dev == NULL))
+               return IRQ_NONE;
+@@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq
+       spin_lock(&cp->lock);
++      mask = cpr16(IntrMask);
++      if (!mask)
++              goto out_unlock;
++
+       status = cpr16(IntrStatus);
+       if (!status || (status == 0xFFFF))
+               goto out_unlock;
diff --git a/queue-4.14/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch b/queue-4.14/net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch
new file mode 100644 (file)
index 0000000..e508e04
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Tarick Bedeir <tarick@google.com>
+Date: Fri, 7 Dec 2018 00:30:26 -0800
+Subject: net/mlx4_core: Correctly set PFC param if global pause is turned off.
+
+From: Tarick Bedeir <tarick@google.com>
+
+[ Upstream commit bd5122cd1e0644d8bd8dd84517c932773e999766 ]
+
+rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1.
+
+Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests")
+Signed-off-by: Tarick Bedeir <tarick@google.com>
+Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+@@ -1070,8 +1070,8 @@ static int mlx4_en_set_pauseparam(struct
+       tx_pause = !!(pause->tx_pause);
+       rx_pause = !!(pause->rx_pause);
+-      rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause);
+-      tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause);
++      rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp;
++      tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp;
+       err = mlx4_SET_PORT_general(mdev->dev, priv->port,
+                                   priv->rx_skb_size + ETH_FCS_LEN,
diff --git a/queue-4.14/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch b/queue-4.14/net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch
new file mode 100644 (file)
index 0000000..e6fb633
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Sun, 2 Dec 2018 14:34:36 +0200
+Subject: net/mlx4_en: Change min MTU size to ETH_MIN_MTU
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+[ Upstream commit 24be19e47779d604d1492c114459dca9a92acf78 ]
+
+NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in
+the RFC791 and in the network stack. Remove old mlx4_en only define for
+it, which was set to wrong value.
+
+Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    4 ++--
+ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    1 -
+ 2 files changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -3505,8 +3505,8 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+               dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
+       }
+-      /* MTU range: 46 - hw-specific max */
+-      dev->min_mtu = MLX4_EN_MIN_MTU;
++      /* MTU range: 68 - hw-specific max */
++      dev->min_mtu = ETH_MIN_MTU;
+       dev->max_mtu = priv->max_mtu;
+       mdev->pndev[port] = dev;
+--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+@@ -157,7 +157,6 @@
+ #define HEADER_COPY_SIZE       (128 - NET_IP_ALIGN)
+ #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN)
+-#define MLX4_EN_MIN_MTU               46
+ /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
+  * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
+  */
diff --git a/queue-4.14/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch b/queue-4.14/net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch
new file mode 100644 (file)
index 0000000..e9cfb11
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Heiner Kallweit <hkallweit1@gmail.com>
+Date: Mon, 3 Dec 2018 08:19:33 +0100
+Subject: net: phy: don't allow __set_phy_supported to add unsupported modes
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit d2a36971ef595069b7a600d1144c2e0881a930a1 ]
+
+Currently __set_phy_supported allows to add modes w/o checking whether
+the PHY supports them. This is wrong, it should never add modes but
+only remove modes we don't want to support.
+
+The commit marked as fixed didn't do anything wrong, it just copied
+existing functionality to the helper which is being fixed now.
+
+Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper")
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c |   19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1703,20 +1703,17 @@ EXPORT_SYMBOL(genphy_loopback);
+ static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+ {
+-      phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
+-                             PHY_10BT_FEATURES);
+-
+       switch (max_speed) {
+-      default:
+-              return -ENOTSUPP;
+-      case SPEED_1000:
+-              phydev->supported |= PHY_1000BT_FEATURES;
++      case SPEED_10:
++              phydev->supported &= ~PHY_100BT_FEATURES;
+               /* fall through */
+       case SPEED_100:
+-              phydev->supported |= PHY_100BT_FEATURES;
+-              /* fall through */
+-      case SPEED_10:
+-              phydev->supported |= PHY_10BT_FEATURES;
++              phydev->supported &= ~PHY_1000BT_FEATURES;
++              break;
++      case SPEED_1000:
++              break;
++      default:
++              return -ENOTSUPP;
+       }
+       return 0;
diff --git a/queue-4.14/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch b/queue-4.14/net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch
new file mode 100644 (file)
index 0000000..393b743
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Thu, 29 Nov 2018 16:01:04 -0800
+Subject: net: Prevent invalid access to skb->prev in __qdisc_drop_all
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+[ Upstream commit 9410d386d0a829ace9558336263086c2fbbe8aed ]
+
+__qdisc_drop_all() accesses skb->prev to get to the tail of the
+segment-list.
+
+With commit 68d2f84a1368 ("net: gro: properly remove skb from list")
+the skb-list handling has been changed to set skb->next to NULL and set
+the list-poison on skb->prev.
+
+With that change, __qdisc_drop_all() will panic when it tries to
+dereference skb->prev.
+
+Since commit 992cba7e276d ("net: Add and use skb_list_del_init().")
+__list_del_entry is used, leaving skb->prev unchanged (thus,
+pointing to the list-head if it's the first skb of the list).
+This will make __qdisc_drop_all modify the next-pointer of the list-head
+and result in a panic later on:
+
+[   34.501053] general protection fault: 0000 [#1] SMP KASAN PTI
+[   34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108
+[   34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011
+[   34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90
+[   34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04
+[   34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202
+[   34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6
+[   34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038
+[   34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062
+[   34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000
+[   34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008
+[   34.512082] FS:  0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000
+[   34.513036] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0
+[   34.514593] Call Trace:
+[   34.514893]  <IRQ>
+[   34.515157]  napi_gro_receive+0x93/0x150
+[   34.515632]  receive_buf+0x893/0x3700
+[   34.516094]  ? __netif_receive_skb+0x1f/0x1a0
+[   34.516629]  ? virtnet_probe+0x1b40/0x1b40
+[   34.517153]  ? __stable_node_chain+0x4d0/0x850
+[   34.517684]  ? kfree+0x9a/0x180
+[   34.518067]  ? __kasan_slab_free+0x171/0x190
+[   34.518582]  ? detach_buf+0x1df/0x650
+[   34.519061]  ? lapic_next_event+0x5a/0x90
+[   34.519539]  ? virtqueue_get_buf_ctx+0x280/0x7f0
+[   34.520093]  virtnet_poll+0x2df/0xd60
+[   34.520533]  ? receive_buf+0x3700/0x3700
+[   34.521027]  ? qdisc_watchdog_schedule_ns+0xd5/0x140
+[   34.521631]  ? htb_dequeue+0x1817/0x25f0
+[   34.522107]  ? sch_direct_xmit+0x142/0xf30
+[   34.522595]  ? virtqueue_napi_schedule+0x26/0x30
+[   34.523155]  net_rx_action+0x2f6/0xc50
+[   34.523601]  ? napi_complete_done+0x2f0/0x2f0
+[   34.524126]  ? kasan_check_read+0x11/0x20
+[   34.524608]  ? _raw_spin_lock+0x7d/0xd0
+[   34.525070]  ? _raw_spin_lock_bh+0xd0/0xd0
+[   34.525563]  ? kvm_guest_apic_eoi_write+0x6b/0x80
+[   34.526130]  ? apic_ack_irq+0x9e/0xe0
+[   34.526567]  __do_softirq+0x188/0x4b5
+[   34.527015]  irq_exit+0x151/0x180
+[   34.527417]  do_IRQ+0xdb/0x150
+[   34.527783]  common_interrupt+0xf/0xf
+[   34.528223]  </IRQ>
+
+This patch makes sure that skb->prev is set to NULL when entering
+netem_enqueue.
+
+Cc: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
+Cc: Tyler Hicks <tyhicks@canonical.com>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list")
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_netem.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -436,6 +436,9 @@ static int netem_enqueue(struct sk_buff
+       int count = 1;
+       int rc = NET_XMIT_SUCCESS;
++      /* Do not fool qdisc_drop_all() */
++      skb->prev = NULL;
++
+       /* Random duplication */
+       if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+               ++count;
diff --git a/queue-4.14/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch b/queue-4.14/rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch
new file mode 100644 (file)
index 0000000..d64a88e
--- /dev/null
@@ -0,0 +1,152 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 4 Dec 2018 09:40:35 -0800
+Subject: rtnetlink: ndo_dflt_fdb_dump() only work for ARPHRD_ETHER devices
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 688838934c231bb08f46db687e57f6d8bf82709c ]
+
+kmsan was able to trigger a kernel-infoleak using a gre device [1]
+
+nlmsg_populate_fdb_fill() has a hard coded assumption
+that dev->addr_len is ETH_ALEN, as normally guaranteed
+for ARPHRD_ETHER devices.
+
+A similar issue was fixed recently in commit da71577545a5
+("rtnetlink: Disallow FDB configuration for non-Ethernet device")
+
+[1]
+BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline]
+BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
+CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x32d/0x480 lib/dump_stack.c:113
+ kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683
+ kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743
+ kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634
+ copyout lib/iov_iter.c:143 [inline]
+ _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
+ copy_to_iter include/linux/uio.h:143 [inline]
+ skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431
+ skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline]
+ netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975
+ sock_recvmsg_nosec net/socket.c:794 [inline]
+ sock_recvmsg+0x1d1/0x230 net/socket.c:801
+ ___sys_recvmsg+0x444/0xae0 net/socket.c:2278
+ __sys_recvmsg net/socket.c:2327 [inline]
+ __do_sys_recvmsg net/socket.c:2337 [inline]
+ __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334
+ __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+RIP: 0033:0x441119
+Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00
+RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f
+RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119
+RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003
+RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100
+R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080
+R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000
+
+Uninit was stored to memory at:
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
+ kmsan_save_stack mm/kmsan/kmsan.c:261 [inline]
+ kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469
+ kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344
+ kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362
+ __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162
+ __nla_put lib/nlattr.c:744 [inline]
+ nla_put+0x20a/0x2d0 lib/nlattr.c:802
+ nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466
+ nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline]
+ ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807
+ rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979
+ netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244
+ __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352
+ netlink_dump_start include/linux/netlink.h:216 [inline]
+ rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910
+ netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
+ rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
+ netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+ netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
+ netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg net/socket.c:631 [inline]
+ ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
+ __sys_sendmsg net/socket.c:2154 [inline]
+ __do_sys_sendmsg net/socket.c:2163 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+
+Uninit was created at:
+ kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
+ kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170
+ kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186
+ __kmalloc+0x14c/0x4d0 mm/slub.c:3825
+ kmalloc include/linux/slab.h:551 [inline]
+ __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline]
+ __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline]
+ __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670
+ dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687
+ ip_mc_filter_add net/ipv4/igmp.c:1128 [inline]
+ igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311
+ __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444
+ ip_mc_inc_group net/ipv4/igmp.c:1453 [inline]
+ ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775
+ inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522
+ notifier_call_chain kernel/notifier.c:93 [inline]
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401
+ __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733
+ dev_change_flags+0x1ac/0x230 net/core/dev.c:7569
+ do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492
+ rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111
+ rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947
+ netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
+ rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
+ netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
+ netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
+ netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
+ sock_sendmsg_nosec net/socket.c:621 [inline]
+ sock_sendmsg net/socket.c:631 [inline]
+ ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
+ __sys_sendmsg net/socket.c:2154 [inline]
+ __do_sys_sendmsg net/socket.c:2163 [inline]
+ __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
+ __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
+ do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
+ entry_SYSCALL_64_after_hwframe+0x63/0xe7
+
+Bytes 36-37 of 105 are uninitialized
+Memory access of size 105 starts at ffff88819686c000
+Data copied to user address 0000000020000380
+
+Fixes: d83b06036048 ("net: add fdb generic dump routine")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: John Fastabend <john.fastabend@gmail.com>
+Cc: Ido Schimmel <idosch@mellanox.com>
+Cc: David Ahern <dsahern@gmail.com>
+Reviewed-by: Ido Schimmel <idosch@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3280,6 +3280,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *sk
+ {
+       int err;
++      if (dev->type != ARPHRD_ETHER)
++              return -EINVAL;
++
+       netif_addr_lock_bh(dev);
+       err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
+       if (err)
diff --git a/queue-4.14/sctp-kfree_rcu-asoc.patch b/queue-4.14/sctp-kfree_rcu-asoc.patch
new file mode 100644 (file)
index 0000000..e394705
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 1 Dec 2018 01:36:59 +0800
+Subject: sctp: kfree_rcu asoc
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit fb6df5a6234c38a9c551559506a49a677ac6f07a ]
+
+In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences
+a transport's asoc under rcu_read_lock while asoc is freed not after
+a grace period, which leads to a use-after-free panic.
+
+This patch fixes it by calling kfree_rcu to make asoc be freed after
+a grace period.
+
+Note that only the asoc's memory is delayed to free in the patch, it
+won't cause sk to linger longer.
+
+Thanks Neil and Marcelo to make this clear.
+
+Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable")
+Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport")
+Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com
+Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com
+Suggested-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/structs.h |    2 ++
+ net/sctp/associola.c       |    2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/net/sctp/structs.h
++++ b/include/net/sctp/structs.h
+@@ -1902,6 +1902,8 @@ struct sctp_association {
+       __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+       __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
++
++      struct rcu_head rcu;
+ };
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -432,7 +432,7 @@ static void sctp_association_destroy(str
+       WARN_ON(atomic_read(&asoc->rmem_alloc));
+-      kfree(asoc);
++      kfree_rcu(asoc, rcu);
+       SCTP_DBG_OBJCNT_DEC(assoc);
+ }
diff --git a/queue-4.14/series b/queue-4.14/series
new file mode 100644 (file)
index 0000000..721240d
--- /dev/null
@@ -0,0 +1,15 @@
+ipv4-ipv6-netfilter-adjust-the-frag-mem-limit-when-truesize-changes.patch
+ipv6-check-available-headroom-in-ip6_xmit-even-without-options.patch
+neighbour-avoid-writing-before-skb-head-in-neigh_hh_output.patch
+ipv6-sr-properly-initialize-flowi6-prior-passing-to-ip6_route_output.patch
+net-8139cp-fix-a-bug-triggered-by-changing-mtu-with-network-traffic.patch
+net-mlx4_core-correctly-set-pfc-param-if-global-pause-is-turned-off.patch
+net-mlx4_en-change-min-mtu-size-to-eth_min_mtu.patch
+net-phy-don-t-allow-__set_phy_supported-to-add-unsupported-modes.patch
+net-prevent-invalid-access-to-skb-prev-in-__qdisc_drop_all.patch
+rtnetlink-ndo_dflt_fdb_dump-only-work-for-arphrd_ether-devices.patch
+sctp-kfree_rcu-asoc.patch
+tcp-do-not-underestimate-rwnd_limited.patch
+tcp-fix-null-ref-in-tail-loss-probe.patch
+tun-forbid-iface-creation-with-rtnl-ops.patch
+virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch
diff --git a/queue-4.14/tcp-do-not-underestimate-rwnd_limited.patch b/queue-4.14/tcp-do-not-underestimate-rwnd_limited.patch
new file mode 100644 (file)
index 0000000..26cd6a0
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 5 Dec 2018 14:24:31 -0800
+Subject: tcp: Do not underestimate rwnd_limited
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 41727549de3e7281feb174d568c6e46823db8684 ]
+
+If available rwnd is too small, tcp_tso_should_defer()
+can decide it is worth waiting before splitting a TSO packet.
+
+This really means we are rwnd limited.
+
+Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Reviewed-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2328,8 +2328,11 @@ static bool tcp_write_xmit(struct sock *
+               } else {
+                       if (!push_one &&
+                           tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
+-                                               max_segs))
++                                               max_segs)) {
++                              if (!is_cwnd_limited)
++                                      is_rwnd_limited = true;
+                               break;
++                      }
+               }
+               limit = mss_now;
diff --git a/queue-4.14/tcp-fix-null-ref-in-tail-loss-probe.patch b/queue-4.14/tcp-fix-null-ref-in-tail-loss-probe.patch
new file mode 100644 (file)
index 0000000..961f4b4
--- /dev/null
@@ -0,0 +1,52 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Yuchung Cheng <ycheng@google.com>
+Date: Wed, 5 Dec 2018 14:38:38 -0800
+Subject: tcp: fix NULL ref in tail loss probe
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit b2b7af861122a0c0f6260155c29a1b2e594cd5b5 ]
+
+TCP loss probe timer may fire when the retranmission queue is empty but
+has a non-zero tp->packets_out counter. tcp_send_loss_probe will call
+tcp_rearm_rto which triggers NULL pointer reference by fetching the
+retranmission queue head in its sub-routines.
+
+Add a more detailed warning to help catch the root cause of the inflight
+accounting inconsistency.
+
+Reported-by: Rafael Tinoco <rafael.tinoco@linaro.org>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2476,14 +2476,18 @@ void tcp_send_loss_probe(struct sock *sk
+               skb = tcp_write_queue_tail(sk);
+       }
++      if (unlikely(!skb)) {
++              WARN_ONCE(tp->packets_out,
++                        "invalid inflight: %u state %u cwnd %u mss %d\n",
++                        tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
++              inet_csk(sk)->icsk_pending = 0;
++              return;
++      }
++
+       /* At most one outstanding TLP retransmission. */
+       if (tp->tlp_high_seq)
+               goto rearm_timer;
+-      /* Retransmit last segment. */
+-      if (WARN_ON(!skb))
+-              goto rearm_timer;
+-
+       if (skb_still_in_host_queue(sk, skb))
+               goto rearm_timer;
diff --git a/queue-4.14/tun-forbid-iface-creation-with-rtnl-ops.patch b/queue-4.14/tun-forbid-iface-creation-with-rtnl-ops.patch
new file mode 100644 (file)
index 0000000..9f325b7
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Date: Thu, 29 Nov 2018 14:45:39 +0100
+Subject: tun: forbid iface creation with rtnl ops
+
+From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+
+[ Upstream commit 35b827b6d06199841a83839e8bb69c0cd13a28be ]
+
+It's not supported right now (the goal of the initial patch was to support
+'ip link del' only).
+
+Before the patch:
+$ ip link add foo type tun
+[  239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
+[snip]
+[  239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0
+
+This panic occurs because dev->netdev_ops is not set by tun_setup(). But to
+have something usable, it will require more than just setting
+netdev_ops.
+
+Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX")
+CC: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1818,9 +1818,9 @@ static void tun_setup(struct net_device
+ static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
+                       struct netlink_ext_ack *extack)
+ {
+-      if (!data)
+-              return 0;
+-      return -EINVAL;
++      NL_SET_ERR_MSG(extack,
++                     "tun/tap creation via rtnetlink is not supported.");
++      return -EOPNOTSUPP;
+ }
+ static struct rtnl_link_ops tun_link_ops __read_mostly = {
diff --git a/queue-4.14/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch b/queue-4.14/virtio-net-keep-vnet-header-zeroed-after-processing-xdp.patch
new file mode 100644 (file)
index 0000000..6fb1805
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Thu Dec 13 10:39:23 CET 2018
+From: Jason Wang <jasowang@redhat.com>
+Date: Thu, 29 Nov 2018 13:53:16 +0800
+Subject: virtio-net: keep vnet header zeroed after processing XDP
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 436c9453a1ac0944b82870ef2e0d9be956b396d9 ]
+
+We copy vnet header unconditionally in page_to_skb() this is wrong
+since XDP may modify the packet data. So let's keep a zeroed vnet
+header for not confusing the conversion between vnet header and skb
+metadata.
+
+In the future, we should able to detect whether or not the packet was
+modified and keep using the vnet header when packet was not touched.
+
+Fixes: f600b6905015 ("virtio_net: Add XDP support")
+Reported-by: Pavel Popa <pashinho1990@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -309,7 +309,8 @@ static unsigned int mergeable_ctx_to_tru
+ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+                                  struct receive_queue *rq,
+                                  struct page *page, unsigned int offset,
+-                                 unsigned int len, unsigned int truesize)
++                                 unsigned int len, unsigned int truesize,
++                                 bool hdr_valid)
+ {
+       struct sk_buff *skb;
+       struct virtio_net_hdr_mrg_rxbuf *hdr;
+@@ -331,7 +332,8 @@ static struct sk_buff *page_to_skb(struc
+       else
+               hdr_padded_len = sizeof(struct padded_vnet_hdr);
+-      memcpy(hdr, p, hdr_len);
++      if (hdr_valid)
++              memcpy(hdr, p, hdr_len);
+       len -= hdr_len;
+       offset += hdr_padded_len;
+@@ -594,7 +596,8 @@ static struct sk_buff *receive_big(struc
+                                  unsigned int len)
+ {
+       struct page *page = buf;
+-      struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
++      struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
++                                        PAGE_SIZE, true);
+       if (unlikely(!skb))
+               goto err;
+@@ -678,7 +681,8 @@ static struct sk_buff *receive_mergeable
+                               rcu_read_unlock();
+                               put_page(page);
+                               head_skb = page_to_skb(vi, rq, xdp_page,
+-                                                     offset, len, PAGE_SIZE);
++                                                     offset, len,
++                                                     PAGE_SIZE, false);
+                               ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+                               return head_skb;
+                       }
+@@ -712,7 +716,7 @@ static struct sk_buff *receive_mergeable
+               goto err_skb;
+       }
+-      head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
++      head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+       curr_skb = head_skb;
+       if (unlikely(!curr_skb))