]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 14 Dec 2017 10:47:09 +0000 (11:47 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 14 Dec 2017 10:47:09 +0000 (11:47 +0100)
added patches:
cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch
net-accept-ufo-datagrams-from-tuntap-and-packet.patch
net-ipv6-fixup-device-for-anycast-routes-during-copy.patch
net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch
net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
net-realtek-r8169-implement-set_link_ksettings.patch
net-remove-hlist_nulls_add_tail_rcu.patch
net-sched-cbq-create-block-for-q-link.block.patch
net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch
net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch
packet-fix-crash-in-fanout_demux_rollover.patch
rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch
s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch
s390-qeth-fix-early-exit-from-error-path.patch
s390-qeth-fix-gso-throughput-regression.patch
s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch
sctp-use-right-member-as-the-param-of-list_for_each_entry.patch
sit-update-frag_off-info.patch
stmmac-reset-last-tso-segment-size-after-device-open.patch
tap-free-skb-if-flags-error.patch
tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch
tcp-dccp-block-bh-before-arming-time_wait-timer.patch
tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch
tcp-use-current-time-in-tcp_rcv_space_adjust.patch
tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch
tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch
tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch
tipc-fix-memory-leak-in-tipc_accept_from_sock.patch
tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch
tun-free-skb-in-early-errors.patch
usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch
vhost-fix-skb-leak-in-handle_rx.patch

34 files changed:
queue-4.14/cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch [new file with mode: 0644]
queue-4.14/net-accept-ufo-datagrams-from-tuntap-and-packet.patch [new file with mode: 0644]
queue-4.14/net-ipv6-fixup-device-for-anycast-routes-during-copy.patch [new file with mode: 0644]
queue-4.14/net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch [new file with mode: 0644]
queue-4.14/net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch [new file with mode: 0644]
queue-4.14/net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch [new file with mode: 0644]
queue-4.14/net-realtek-r8169-implement-set_link_ksettings.patch [new file with mode: 0644]
queue-4.14/net-remove-hlist_nulls_add_tail_rcu.patch [new file with mode: 0644]
queue-4.14/net-sched-cbq-create-block-for-q-link.block.patch [new file with mode: 0644]
queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch [new file with mode: 0644]
queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch [new file with mode: 0644]
queue-4.14/packet-fix-crash-in-fanout_demux_rollover.patch [new file with mode: 0644]
queue-4.14/rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch [new file with mode: 0644]
queue-4.14/s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch [new file with mode: 0644]
queue-4.14/s390-qeth-fix-early-exit-from-error-path.patch [new file with mode: 0644]
queue-4.14/s390-qeth-fix-gso-throughput-regression.patch [new file with mode: 0644]
queue-4.14/s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch [new file with mode: 0644]
queue-4.14/sctp-use-right-member-as-the-param-of-list_for_each_entry.patch [new file with mode: 0644]
queue-4.14/series [new file with mode: 0644]
queue-4.14/sit-update-frag_off-info.patch [new file with mode: 0644]
queue-4.14/stmmac-reset-last-tso-segment-size-after-device-open.patch [new file with mode: 0644]
queue-4.14/tap-free-skb-if-flags-error.patch [new file with mode: 0644]
queue-4.14/tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch [new file with mode: 0644]
queue-4.14/tcp-dccp-block-bh-before-arming-time_wait-timer.patch [new file with mode: 0644]
queue-4.14/tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch [new file with mode: 0644]
queue-4.14/tcp-use-current-time-in-tcp_rcv_space_adjust.patch [new file with mode: 0644]
queue-4.14/tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch [new file with mode: 0644]
queue-4.14/tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch [new file with mode: 0644]
queue-4.14/tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch [new file with mode: 0644]
queue-4.14/tipc-fix-memory-leak-in-tipc_accept_from_sock.patch [new file with mode: 0644]
queue-4.14/tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch [new file with mode: 0644]
queue-4.14/tun-free-skb-in-early-errors.patch [new file with mode: 0644]
queue-4.14/usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch [new file with mode: 0644]
queue-4.14/vhost-fix-skb-leak-in-handle_rx.patch [new file with mode: 0644]

diff --git a/queue-4.14/cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch b/queue-4.14/cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch
new file mode 100644 (file)
index 0000000..90d8464
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Mon, 27 Nov 2017 11:11:41 -0800
+Subject: cls_bpf: don't decrement net's refcount when offload fails
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+
+[ Upstream commit 25415cec502a1232b19fffc85465882b19a90415 ]
+
+When cls_bpf offload was added it seemed like a good idea to
+call cls_bpf_delete_prog() instead of extending the error
+handling path, since the software state is fully initialized
+at that point.  This handling of errors without jumping to
+the end of the function is error prone, as proven by later
+commit missing that extra call to __cls_bpf_delete_prog().
+
+__cls_bpf_delete_prog() is now expected to be invoked with
+a reference on exts->net or the field zeroed out.  The call
+on the offload's error patch does not fullfil this requirement,
+leading to each error stealing a reference on net namespace.
+
+Create a function undoing what cls_bpf_set_parms() did and
+use it from __cls_bpf_delete_prog() and the error path.
+
+Fixes: aae2c35ec892 ("cls_bpf: use tcf_exts_get_net() before call_rcu()")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_bpf.c |   21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -246,11 +246,8 @@ static int cls_bpf_init(struct tcf_proto
+       return 0;
+ }
+-static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
++static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
+ {
+-      tcf_exts_destroy(&prog->exts);
+-      tcf_exts_put_net(&prog->exts);
+-
+       if (cls_bpf_is_ebpf(prog))
+               bpf_prog_put(prog->filter);
+       else
+@@ -258,6 +255,14 @@ static void __cls_bpf_delete_prog(struct
+       kfree(prog->bpf_name);
+       kfree(prog->bpf_ops);
++}
++
++static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
++{
++      tcf_exts_destroy(&prog->exts);
++      tcf_exts_put_net(&prog->exts);
++
++      cls_bpf_free_parms(prog);
+       kfree(prog);
+ }
+@@ -509,10 +514,8 @@ static int cls_bpf_change(struct net *ne
+               goto errout;
+       ret = cls_bpf_offload(tp, prog, oldprog);
+-      if (ret) {
+-              __cls_bpf_delete_prog(prog);
+-              return ret;
+-      }
++      if (ret)
++              goto errout_parms;
+       if (!tc_in_hw(prog->gen_flags))
+               prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+@@ -529,6 +532,8 @@ static int cls_bpf_change(struct net *ne
+       *arg = prog;
+       return 0;
++errout_parms:
++      cls_bpf_free_parms(prog);
+ errout:
+       tcf_exts_destroy(&prog->exts);
+       kfree(prog);
diff --git a/queue-4.14/net-accept-ufo-datagrams-from-tuntap-and-packet.patch b/queue-4.14/net-accept-ufo-datagrams-from-tuntap-and-packet.patch
new file mode 100644 (file)
index 0000000..a1c80ca
--- /dev/null
@@ -0,0 +1,519 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 21 Nov 2017 10:22:25 -0500
+Subject: net: accept UFO datagrams from tuntap and packet
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 0c19f846d582af919db66a5914a0189f9f92c936 ]
+
+Tuntap and similar devices can inject GSO packets. Accept type
+VIRTIO_NET_HDR_GSO_UDP, even though not generating UFO natively.
+
+Processes are expected to use feature negotiation such as TUNSETOFFLOAD
+to detect supported offload types and refrain from injecting other
+packets. This process breaks down with live migration: guest kernels
+do not renegotiate flags, so destination hosts need to expose all
+features that the source host does.
+
+Partially revert the UFO removal from 182e0b6b5846~1..d9d30adf5677.
+This patch introduces nearly(*) no new code to simplify verification.
+It brings back verbatim tuntap UFO negotiation, VIRTIO_NET_HDR_GSO_UDP
+insertion and software UFO segmentation.
+
+It does not reinstate protocol stack support, hardware offload
+(NETIF_F_UFO), SKB_GSO_UDP tunneling in SKB_GSO_SOFTWARE or reception
+of VIRTIO_NET_HDR_GSO_UDP packets in tuntap.
+
+To support SKB_GSO_UDP reappearing in the stack, also reinstate
+logic in act_csum and openvswitch. Achieve equivalence with v4.13 HEAD
+by squashing in commit 939912216fa8 ("net: skb_needs_check() removes
+CHECKSUM_UNNECESSARY check for tx.") and reverting commit 8d63bee643f1
+("net: avoid skb_warn_bad_offload false positives on UFO").
+
+(*) To avoid having to bring back skb_shinfo(skb)->ip6_frag_id,
+ipv6_proxy_select_ident is changed to return a __be32 and this is
+assigned directly to the frag_hdr. Also, SKB_GSO_UDP is inserted
+at the end of the enum to minimize code churn.
+
+Tested
+  Booted a v4.13 guest kernel with QEMU. On a host kernel before this
+  patch `ethtool -k eth0` shows UFO disabled. After the patch, it is
+  enabled, same as on a v4.13 host kernel.
+
+  A UFO packet sent from the guest appears on the tap device:
+    host:
+      nc -l -p -u 8000 &
+      tcpdump -n -i tap0
+
+    guest:
+      dd if=/dev/zero of=payload.txt bs=1 count=2000
+      nc -u 192.16.1.1 8000 < payload.txt
+
+  Direct tap to tap transmission of VIRTIO_NET_HDR_GSO_UDP succeeds,
+  packets arriving fragmented:
+
+    ./with_tap_pair.sh ./tap_send_ufo tap0 tap1
+    (from https://github.com/wdebruij/kerneltools/tree/master/tests)
+
+Changes
+  v1 -> v2
+    - simplified set_offload change (review comment)
+    - documented test procedure
+
+Link: http://lkml.kernel.org/r/<CAF=yD-LuUeDuL9YWPJD9ykOZ0QCjNeznPDr6whqZ9NGMNF12Mw@mail.gmail.com>
+Fixes: fb652fdfe837 ("macvlan/macvtap: Remove NETIF_F_UFO advertisement.")
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c               |    2 
+ drivers/net/tun.c               |    2 
+ include/linux/netdev_features.h |    4 +
+ include/linux/netdevice.h       |    1 
+ include/linux/skbuff.h          |    2 
+ include/linux/virtio_net.h      |    5 +-
+ include/net/ipv6.h              |    2 
+ net/core/dev.c                  |    3 -
+ net/ipv4/af_inet.c              |   12 ++++-
+ net/ipv4/udp_offload.c          |   49 +++++++++++++++++++++--
+ net/ipv6/output_core.c          |    6 +-
+ net/ipv6/udp_offload.c          |   85 ++++++++++++++++++++++++++++++++++++++--
+ net/openvswitch/datapath.c      |   14 ++++++
+ net/openvswitch/flow.c          |    6 ++
+ net/sched/act_csum.c            |    6 ++
+ 15 files changed, 181 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -1080,7 +1080,7 @@ static long tap_ioctl(struct file *file,
+       case TUNSETOFFLOAD:
+               /* let the user check for future flags */
+               if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+-                          TUN_F_TSO_ECN))
++                          TUN_F_TSO_ECN | TUN_F_UFO))
+                       return -EINVAL;
+               rtnl_lock();
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -2157,6 +2157,8 @@ static int set_offload(struct tun_struct
+                               features |= NETIF_F_TSO6;
+                       arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
+               }
++
++              arg &= ~TUN_F_UFO;
+       }
+       /* This gives the user a way to test for new features in future by
+--- a/include/linux/netdev_features.h
++++ b/include/linux/netdev_features.h
+@@ -54,8 +54,9 @@ enum {
+       NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
+       NETIF_F_GSO_SCTP_BIT,           /* ... SCTP fragmentation */
+       NETIF_F_GSO_ESP_BIT,            /* ... ESP with TSO */
++      NETIF_F_GSO_UDP_BIT,            /* ... UFO, deprecated except tuntap */
+       /**/NETIF_F_GSO_LAST =          /* last bit, see GSO_MASK */
+-              NETIF_F_GSO_ESP_BIT,
++              NETIF_F_GSO_UDP_BIT,
+       NETIF_F_FCOE_CRC_BIT,           /* FCoE CRC32 */
+       NETIF_F_SCTP_CRC_BIT,           /* SCTP checksum offload */
+@@ -132,6 +133,7 @@ enum {
+ #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
+ #define NETIF_F_GSO_SCTP      __NETIF_F(GSO_SCTP)
+ #define NETIF_F_GSO_ESP               __NETIF_F(GSO_ESP)
++#define NETIF_F_GSO_UDP               __NETIF_F(GSO_UDP)
+ #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
+ #define NETIF_F_HW_VLAN_STAG_RX       __NETIF_F(HW_VLAN_STAG_RX)
+ #define NETIF_F_HW_VLAN_STAG_TX       __NETIF_F(HW_VLAN_STAG_TX)
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4101,6 +4101,7 @@ static inline bool net_gso_ok(netdev_fea
+       BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
+       BUILD_BUG_ON(SKB_GSO_SCTP    != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
+       BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
++      BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
+       return (features & feature) == feature;
+ }
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -569,6 +569,8 @@ enum {
+       SKB_GSO_SCTP = 1 << 14,
+       SKB_GSO_ESP = 1 << 15,
++
++      SKB_GSO_UDP = 1 << 16,
+ };
+ #if BITS_PER_LONG > 32
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -9,7 +9,7 @@ static inline int virtio_net_hdr_to_skb(
+                                       const struct virtio_net_hdr *hdr,
+                                       bool little_endian)
+ {
+-      unsigned short gso_type = 0;
++      unsigned int gso_type = 0;
+       if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+               switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+@@ -19,6 +19,9 @@ static inline int virtio_net_hdr_to_skb(
+               case VIRTIO_NET_HDR_GSO_TCPV6:
+                       gso_type = SKB_GSO_TCPV6;
+                       break;
++              case VIRTIO_NET_HDR_GSO_UDP:
++                      gso_type = SKB_GSO_UDP;
++                      break;
+               default:
+                       return -EINVAL;
+               }
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -727,7 +727,7 @@ static inline int ipv6_addr_diff(const s
+ __be32 ipv6_select_ident(struct net *net,
+                        const struct in6_addr *daddr,
+                        const struct in6_addr *saddr);
+-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
++__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
+ int ip6_dst_hoplimit(struct dst_entry *dst);
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2735,7 +2735,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
+ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+ {
+       if (tx_path)
+-              return skb->ip_summed != CHECKSUM_PARTIAL;
++              return skb->ip_summed != CHECKSUM_PARTIAL &&
++                     skb->ip_summed != CHECKSUM_UNNECESSARY;
+       return skb->ip_summed == CHECKSUM_NONE;
+ }
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1221,9 +1221,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
+ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+                                netdev_features_t features)
+ {
+-      bool fixedid = false, gso_partial, encap;
++      bool udpfrag = false, fixedid = false, gso_partial, encap;
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       const struct net_offload *ops;
++      unsigned int offset = 0;
+       struct iphdr *iph;
+       int proto, tot_len;
+       int nhoff;
+@@ -1258,6 +1259,7 @@ struct sk_buff *inet_gso_segment(struct
+       segs = ERR_PTR(-EPROTONOSUPPORT);
+       if (!skb->encapsulation || encap) {
++              udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+               fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
+               /* fixed ID is invalid if DF bit is not set */
+@@ -1277,7 +1279,13 @@ struct sk_buff *inet_gso_segment(struct
+       skb = segs;
+       do {
+               iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
+-              if (skb_is_gso(skb)) {
++              if (udpfrag) {
++                      iph->frag_off = htons(offset >> 3);
++                      if (skb->next)
++                              iph->frag_off |= htons(IP_MF);
++                      offset += skb->len - nhoff - ihl;
++                      tot_len = skb->len - nhoff;
++              } else if (skb_is_gso(skb)) {
+                       if (!fixedid) {
+                               iph->id = htons(id);
+                               id += skb_shinfo(skb)->gso_segs;
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -187,16 +187,57 @@ out_unlock:
+ }
+ EXPORT_SYMBOL(skb_udp_tunnel_segment);
+-static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+-                                         netdev_features_t features)
++static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
++                                       netdev_features_t features)
+ {
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
++      unsigned int mss;
++      __wsum csum;
++      struct udphdr *uh;
++      struct iphdr *iph;
+       if (skb->encapsulation &&
+           (skb_shinfo(skb)->gso_type &
+-           (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
++           (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+               segs = skb_udp_tunnel_segment(skb, features, false);
++              goto out;
++      }
++
++      if (!pskb_may_pull(skb, sizeof(struct udphdr)))
++              goto out;
++      mss = skb_shinfo(skb)->gso_size;
++      if (unlikely(skb->len <= mss))
++              goto out;
++
++      /* Do software UFO. Complete and fill in the UDP checksum as
++       * HW cannot do checksum of UDP packets sent as multiple
++       * IP fragments.
++       */
++
++      uh = udp_hdr(skb);
++      iph = ip_hdr(skb);
++
++      uh->check = 0;
++      csum = skb_checksum(skb, 0, skb->len, 0);
++      uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
++      if (uh->check == 0)
++              uh->check = CSUM_MANGLED_0;
++
++      skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++      /* If there is no outer header we can fake a checksum offload
++       * due to the fact that we have already done the checksum in
++       * software prior to segmenting the frame.
++       */
++      if (!skb->encap_hdr_csum)
++              features |= NETIF_F_HW_CSUM;
++
++      /* Fragment the skb. IP headers of the fragments are updated in
++       * inet_gso_segment()
++       */
++      segs = skb_segment(skb, features);
++out:
+       return segs;
+ }
+@@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_b
+ static const struct net_offload udpv4_offload = {
+       .callbacks = {
+-              .gso_segment = udp4_tunnel_segment,
++              .gso_segment = udp4_ufo_fragment,
+               .gro_receive  = udp4_gro_receive,
+               .gro_complete = udp4_gro_complete,
+       },
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -39,7 +39,7 @@ static u32 __ipv6_select_ident(struct ne
+  *
+  * The network header must be set before calling this.
+  */
+-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
++__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
+ {
+       static u32 ip6_proxy_idents_hashrnd __read_mostly;
+       struct in6_addr buf[2];
+@@ -51,14 +51,14 @@ void ipv6_proxy_select_ident(struct net
+                                  offsetof(struct ipv6hdr, saddr),
+                                  sizeof(buf), buf);
+       if (!addrs)
+-              return;
++              return 0;
+       net_get_random_once(&ip6_proxy_idents_hashrnd,
+                           sizeof(ip6_proxy_idents_hashrnd));
+       id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
+                                &addrs[1], &addrs[0]);
+-      skb_shinfo(skb)->ip6_frag_id = htonl(id);
++      return htonl(id);
+ }
+ EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -17,15 +17,94 @@
+ #include <net/ip6_checksum.h>
+ #include "ip6_offload.h"
+-static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+-                                         netdev_features_t features)
++static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
++                                       netdev_features_t features)
+ {
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
++      unsigned int mss;
++      unsigned int unfrag_ip6hlen, unfrag_len;
++      struct frag_hdr *fptr;
++      u8 *packet_start, *prevhdr;
++      u8 nexthdr;
++      u8 frag_hdr_sz = sizeof(struct frag_hdr);
++      __wsum csum;
++      int tnl_hlen;
++      int err;
++
++      mss = skb_shinfo(skb)->gso_size;
++      if (unlikely(skb->len <= mss))
++              goto out;
+       if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+           (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
+               segs = skb_udp_tunnel_segment(skb, features, true);
++      else {
++              const struct ipv6hdr *ipv6h;
++              struct udphdr *uh;
++
++              if (!pskb_may_pull(skb, sizeof(struct udphdr)))
++                      goto out;
++
++              /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
++               * do checksum of UDP packets sent as multiple IP fragments.
++               */
++
++              uh = udp_hdr(skb);
++              ipv6h = ipv6_hdr(skb);
++
++              uh->check = 0;
++              csum = skb_checksum(skb, 0, skb->len, 0);
++              uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
++                                        &ipv6h->daddr, csum);
++              if (uh->check == 0)
++                      uh->check = CSUM_MANGLED_0;
++
++              skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++              /* If there is no outer header we can fake a checksum offload
++               * due to the fact that we have already done the checksum in
++               * software prior to segmenting the frame.
++               */
++              if (!skb->encap_hdr_csum)
++                      features |= NETIF_F_HW_CSUM;
++
++              /* Check if there is enough headroom to insert fragment header. */
++              tnl_hlen = skb_tnl_header_len(skb);
++              if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
++                      if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
++                              goto out;
++              }
++
++              /* Find the unfragmentable header and shift it left by frag_hdr_sz
++               * bytes to insert fragment header.
++               */
++              err = ip6_find_1stfragopt(skb, &prevhdr);
++              if (err < 0)
++                      return ERR_PTR(err);
++              unfrag_ip6hlen = err;
++              nexthdr = *prevhdr;
++              *prevhdr = NEXTHDR_FRAGMENT;
++              unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
++                           unfrag_ip6hlen + tnl_hlen;
++              packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
++              memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
++
++              SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
++              skb->mac_header -= frag_hdr_sz;
++              skb->network_header -= frag_hdr_sz;
++
++              fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
++              fptr->nexthdr = nexthdr;
++              fptr->reserved = 0;
++              fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb);
++
++              /* Fragment the skb. ipv6 header and the remaining fields of the
++               * fragment header are updated in ipv6_gso_segment()
++               */
++              segs = skb_segment(skb, features);
++      }
++out:
+       return segs;
+ }
+@@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_b
+ static const struct net_offload udpv6_offload = {
+       .callbacks = {
+-              .gso_segment    =       udp6_tunnel_segment,
++              .gso_segment    =       udp6_ufo_fragment,
+               .gro_receive    =       udp6_gro_receive,
+               .gro_complete   =       udp6_gro_complete,
+       },
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -335,6 +335,8 @@ static int queue_gso_packets(struct data
+                            const struct dp_upcall_info *upcall_info,
+                                uint32_t cutlen)
+ {
++      unsigned short gso_type = skb_shinfo(skb)->gso_type;
++      struct sw_flow_key later_key;
+       struct sk_buff *segs, *nskb;
+       int err;
+@@ -345,9 +347,21 @@ static int queue_gso_packets(struct data
+       if (segs == NULL)
+               return -EINVAL;
++      if (gso_type & SKB_GSO_UDP) {
++              /* The initial flow key extracted by ovs_flow_key_extract()
++               * in this case is for a first fragment, so we need to
++               * properly mark later fragments.
++               */
++              later_key = *key;
++              later_key.ip.frag = OVS_FRAG_TYPE_LATER;
++      }
++
+       /* Queue all of the segments. */
+       skb = segs;
+       do {
++              if (gso_type & SKB_GSO_UDP && skb != segs)
++                      key = &later_key;
++
+               err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
+               if (err)
+                       break;
+--- a/net/openvswitch/flow.c
++++ b/net/openvswitch/flow.c
+@@ -584,7 +584,8 @@ static int key_extract(struct sk_buff *s
+                       key->ip.frag = OVS_FRAG_TYPE_LATER;
+                       return 0;
+               }
+-              if (nh->frag_off & htons(IP_MF))
++              if (nh->frag_off & htons(IP_MF) ||
++                      skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+                       key->ip.frag = OVS_FRAG_TYPE_FIRST;
+               else
+                       key->ip.frag = OVS_FRAG_TYPE_NONE;
+@@ -700,6 +701,9 @@ static int key_extract(struct sk_buff *s
+               if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+                       return 0;
++              if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++                      key->ip.frag = OVS_FRAG_TYPE_FIRST;
++
+               /* Transport layer. */
+               if (key->ip.proto == NEXTHDR_TCP) {
+                       if (tcphdr_ok(skb)) {
+--- a/net/sched/act_csum.c
++++ b/net/sched/act_csum.c
+@@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_b
+       const struct iphdr *iph;
+       u16 ul;
++      if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++              return 1;
++
+       /*
+        * Support both UDP and UDPLITE checksum algorithms, Don't use
+        * udph->len to get the real length without any protocol check,
+@@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_b
+       const struct ipv6hdr *ip6h;
+       u16 ul;
++      if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++              return 1;
++
+       /*
+        * Support both UDP and UDPLITE checksum algorithms, Don't use
+        * udph->len to get the real length without any protocol check,
diff --git a/queue-4.14/net-ipv6-fixup-device-for-anycast-routes-during-copy.patch b/queue-4.14/net-ipv6-fixup-device-for-anycast-routes-during-copy.patch
new file mode 100644 (file)
index 0000000..a7ea01d
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Tue, 21 Nov 2017 07:08:57 -0800
+Subject: net: ipv6: Fixup device for anycast routes during copy
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit 98d11291d189cb5adf49694d0ad1b971c0212697 ]
+
+Florian reported a breakage with anycast routes due to commit
+4832c30d5458 ("net: ipv6: put host and anycast routes on device with
+address"). Prior to this commit anycast routes were added against the
+loopback device causing repetitive route entries with no insight into
+why they existed. e.g.:
+  $ ip -6 ro ls  table local type anycast
+  anycast 2001:db8:1:: dev lo proto kernel metric 0 pref medium
+  anycast 2001:db8:2:: dev lo proto kernel metric 0 pref medium
+  anycast fe80:: dev lo proto kernel metric 0 pref medium
+  anycast fe80:: dev lo proto kernel metric 0 pref medium
+
+The point of commit 4832c30d5458 is to add the routes using the device
+with the address which is causing the route to be added. e.g.,:
+  $ ip -6 ro ls  table local type anycast
+  anycast 2001:db8:1:: dev eth1 proto kernel metric 0 pref medium
+  anycast 2001:db8:2:: dev eth2 proto kernel metric 0 pref medium
+  anycast fe80:: dev eth2 proto kernel metric 0 pref medium
+  anycast fe80:: dev eth1 proto kernel metric 0 pref medium
+
+For traffic to work as it did before, the dst device needs to be switched
+to the loopback when the copy is created similar to local routes.
+
+Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -960,7 +960,7 @@ static struct net_device *ip6_rt_get_dev
+ {
+       struct net_device *dev = rt->dst.dev;
+-      if (rt->rt6i_flags & RTF_LOCAL) {
++      if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+               /* for copies of local routes, dst->dev needs to be the
+                * device if it is a master device, the master device if
+                * device is enslaved, and the loopback as the default
diff --git a/queue-4.14/net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch b/queue-4.14/net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch
new file mode 100644 (file)
index 0000000..0c43ace
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+Date: Sat, 25 Nov 2017 13:14:40 -0600
+Subject: net: openvswitch: datapath: fix data type in queue_gso_packets
+
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+
+
+[ Upstream commit 2734166e89639c973c6e125ac8bcfc2d9db72b70 ]
+
+gso_type is being used in binary AND operations together with SKB_GSO_UDP.
+The issue is that variable gso_type is of type unsigned short and
+SKB_GSO_UDP expands to more than 16 bits:
+
+SKB_GSO_UDP = 1 << 16
+
+this makes any binary AND operation between gso_type and SKB_GSO_UDP to
+be always zero, hence making some code unreachable and likely causing
+undesired behavior.
+
+Fix this by changing the data type of variable gso_type to unsigned int.
+
+Addresses-Coverity-ID: 1462223
+Fixes: 0c19f846d582 ("net: accept UFO datagrams from tuntap and packet")
+Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/datapath.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -335,7 +335,7 @@ static int queue_gso_packets(struct data
+                            const struct dp_upcall_info *upcall_info,
+                                uint32_t cutlen)
+ {
+-      unsigned short gso_type = skb_shinfo(skb)->gso_type;
++      unsigned int gso_type = skb_shinfo(skb)->gso_type;
+       struct sw_flow_key later_key;
+       struct sk_buff *segs, *nskb;
+       int err;
diff --git a/queue-4.14/net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch b/queue-4.14/net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
new file mode 100644 (file)
index 0000000..65558a2
--- /dev/null
@@ -0,0 +1,93 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 28 Nov 2017 08:03:30 -0800
+Subject: net/packet: fix a race in packet_bind() and packet_notifier()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 15fe076edea787807a7cdc168df832544b58eba6 ]
+
+syzbot reported crashes [1] and provided a C repro easing bug hunting.
+
+When/if packet_do_bind() calls __unregister_prot_hook() and releases
+po->bind_lock, another thread can run packet_notifier() and process an
+NETDEV_UP event.
+
+This calls register_prot_hook() and hooks again the socket right before
+first thread is able to grab again po->bind_lock.
+
+Fixes this issue by temporarily setting po->num to 0, as suggested by
+David Miller.
+
+[1]
+dev_remove_pack: ffff8801bf16fa80 not found
+------------[ cut here ]------------
+kernel BUG at net/core/dev.c:7945!  ( BUG_ON(!list_empty(&dev->ptype_all)); )
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+device syz0 entered promiscuous mode
+CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801cc57a500 task.stack: ffff8801cc588000
+RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945
+RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293
+RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2
+RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810
+device syz0 entered promiscuous mode
+RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8
+R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0
+FS:  0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106
+ tun_detach drivers/net/tun.c:670 [inline]
+ tun_chr_close+0x49/0x60 drivers/net/tun.c:2845
+ __fput+0x333/0x7f0 fs/file_table.c:210
+ ____fput+0x15/0x20 fs/file_table.c:244
+ task_work_run+0x199/0x270 kernel/task_work.c:113
+ exit_task_work include/linux/task_work.h:22 [inline]
+ do_exit+0x9bb/0x1ae0 kernel/exit.c:865
+ do_group_exit+0x149/0x400 kernel/exit.c:968
+ SYSC_exit_group kernel/exit.c:979 [inline]
+ SyS_exit_group+0x1d/0x20 kernel/exit.c:977
+ entry_SYSCALL_64_fastpath+0x1f/0x96
+RIP: 0033:0x44ad19
+
+Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Francesco Ruggeri <fruggeri@aristanetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3101,6 +3101,10 @@ static int packet_do_bind(struct sock *s
+       if (need_rehook) {
+               if (po->running) {
+                       rcu_read_unlock();
++                      /* prevents packet_notifier() from calling
++                       * register_prot_hook()
++                       */
++                      po->num = 0;
+                       __unregister_prot_hook(sk, true);
+                       rcu_read_lock();
+                       dev_curr = po->prot_hook.dev;
+@@ -3109,6 +3113,7 @@ static int packet_do_bind(struct sock *s
+                                                                dev->ifindex);
+               }
++              BUG_ON(po->running);
+               po->num = proto;
+               po->prot_hook.type = proto;
diff --git a/queue-4.14/net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch b/queue-4.14/net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
new file mode 100644 (file)
index 0000000..20e1672
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+Date: Mon, 20 Nov 2017 19:05:17 +0100
+Subject: net: qmi_wwan: add Quectel BG96 2c7c:0296
+
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+
+
+[ Upstream commit f9409e7f086fa6c4623769b4b2f4f17a024d8143 ]
+
+Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both
+CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development
+board (EVB). The USB id is added to qmi_wwan.c to allow QMI
+communication with the BG96.
+
+Signed-off-by: Sebastian Sjoholm <ssjoholm@mac.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1239,6 +1239,7 @@ static const struct usb_device_id produc
+       {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)},    /* SIMCom 7230E */
+       {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0  Mini PCIe */
+       {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
++      {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},    /* Quectel BG96 */
+       /* 4. Gobi 1000 devices */
+       {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
diff --git a/queue-4.14/net-realtek-r8169-implement-set_link_ksettings.patch b/queue-4.14/net-realtek-r8169-implement-set_link_ksettings.patch
new file mode 100644 (file)
index 0000000..38b2336
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Date: Tue, 21 Nov 2017 16:15:57 +0100
+Subject: net: realtek: r8169: implement set_link_ksettings()
+
+From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+
+
+[ Upstream commit 9e77d7a5549dc4d4999a60676373ab3fd1dae4db ]
+
+Commit 6fa1ba61520576cf1346c4ff09a056f2950cb3bf partially
+implemented the new ethtool API, by replacing get_settings()
+with get_link_ksettings(). This breaks ethtool, since the
+userspace tool (according to the new API specs) never tries
+the legacy set() call, when the new get() call succeeds.
+
+All attempts to chance some setting from userspace result in:
+> Cannot set new settings: Operation not supported
+
+Implement the missing set() call.
+
+Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c |   38 ++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -2025,21 +2025,6 @@ out:
+       return ret;
+ }
+-static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+-{
+-      struct rtl8169_private *tp = netdev_priv(dev);
+-      int ret;
+-
+-      del_timer_sync(&tp->timer);
+-
+-      rtl_lock_work(tp);
+-      ret = rtl8169_set_speed(dev, cmd->autoneg, ethtool_cmd_speed(cmd),
+-                              cmd->duplex, cmd->advertising);
+-      rtl_unlock_work(tp);
+-
+-      return ret;
+-}
+-
+ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
+       netdev_features_t features)
+ {
+@@ -2166,6 +2151,27 @@ static int rtl8169_get_link_ksettings(st
+       return rc;
+ }
++static int rtl8169_set_link_ksettings(struct net_device *dev,
++                                    const struct ethtool_link_ksettings *cmd)
++{
++      struct rtl8169_private *tp = netdev_priv(dev);
++      int rc;
++      u32 advertising;
++
++      if (!ethtool_convert_link_mode_to_legacy_u32(&advertising,
++          cmd->link_modes.advertising))
++              return -EINVAL;
++
++      del_timer_sync(&tp->timer);
++
++      rtl_lock_work(tp);
++      rc = rtl8169_set_speed(dev, cmd->base.autoneg, cmd->base.speed,
++                             cmd->base.duplex, advertising);
++      rtl_unlock_work(tp);
++
++      return rc;
++}
++
+ static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+                            void *p)
+ {
+@@ -2367,7 +2373,6 @@ static const struct ethtool_ops rtl8169_
+       .get_drvinfo            = rtl8169_get_drvinfo,
+       .get_regs_len           = rtl8169_get_regs_len,
+       .get_link               = ethtool_op_get_link,
+-      .set_settings           = rtl8169_set_settings,
+       .get_msglevel           = rtl8169_get_msglevel,
+       .set_msglevel           = rtl8169_set_msglevel,
+       .get_regs               = rtl8169_get_regs,
+@@ -2379,6 +2384,7 @@ static const struct ethtool_ops rtl8169_
+       .get_ts_info            = ethtool_op_get_ts_info,
+       .nway_reset             = rtl8169_nway_reset,
+       .get_link_ksettings     = rtl8169_get_link_ksettings,
++      .set_link_ksettings     = rtl8169_set_link_ksettings,
+ };
+ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
diff --git a/queue-4.14/net-remove-hlist_nulls_add_tail_rcu.patch b/queue-4.14/net-remove-hlist_nulls_add_tail_rcu.patch
new file mode 100644 (file)
index 0000000..de5a6a2
--- /dev/null
@@ -0,0 +1,149 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 5 Dec 2017 12:45:56 -0800
+Subject: net: remove hlist_nulls_add_tail_rcu()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 ]
+
+Alexander Potapenko reported use of uninitialized memory [1]
+
+This happens when inserting a request socket into TCP ehash,
+in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized.
+
+Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for
+mixed v4/v6 sockets")
+
+Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6
+ordering fix") missed the opportunity to get rid of
+hlist_nulls_add_tail_rcu() :
+
+Both UDP sockets and TCP/DCCP listeners no longer use
+__sk_nulls_add_node_rcu() for their hash insertion.
+
+Since all other sockets have unique 4-tuple, the reuseport status
+has no special meaning, so we can always use hlist_nulls_add_head_rcu()
+for them and save few cycles/instructions.
+
+[1]
+
+==================================================================
+BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16
+ dump_stack+0x185/0x1d0 lib/dump_stack.c:52
+ kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016
+ __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766
+ __sk_nulls_add_node_rcu ./include/net/sock.h:684
+ inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413
+ reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754
+ inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765
+ tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414
+ tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314
+ tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917
+ tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483
+ tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763
+ ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216
+ NF_HOOK ./include/linux/netfilter.h:248
+ ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257
+ dst_input ./include/net/dst.h:477
+ ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397
+ NF_HOOK ./include/linux/netfilter.h:248
+ ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488
+ __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298
+ __netif_receive_skb net/core/dev.c:4336
+ netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497
+ napi_skb_finish net/core/dev.c:4858
+ napi_gro_receive+0x629/0xa50 net/core/dev.c:4889
+ e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018
+ e1000_clean_rx_irq+0x1492/0x1d30
+drivers/net/ethernet/intel/e1000/e1000_main.c:4474
+ e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819
+ napi_poll net/core/dev.c:5500
+ net_rx_action+0x73c/0x1820 net/core/dev.c:5566
+ __do_softirq+0x4b4/0x8dd kernel/softirq.c:284
+ invoke_softirq kernel/softirq.c:364
+ irq_exit+0x203/0x240 kernel/softirq.c:405
+ exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638
+ do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263
+ common_interrupt+0x86/0x86
+
+Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets")
+Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Alexander Potapenko <glider@google.com>
+Acked-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h |   38 --------------------------------------
+ include/net/sock.h            |    6 +-----
+ 2 files changed, 1 insertion(+), 43 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,44 +101,6 @@ static inline void hlist_nulls_add_head_
+ }
+ /**
+- * hlist_nulls_add_tail_rcu
+- * @n: the element to add to the hash list.
+- * @h: the list to add to.
+- *
+- * Description:
+- * Adds the specified element to the end of the specified hlist_nulls,
+- * while permitting racing traversals.  NOTE: tail insertion requires
+- * list traversal.
+- *
+- * The caller must take whatever precautions are necessary
+- * (such as holding appropriate locks) to avoid racing
+- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+- * or hlist_nulls_del_rcu(), running on this same list.
+- * However, it is perfectly legal to run concurrently with
+- * the _rcu list-traversal primitives, such as
+- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+- * problems on Alpha CPUs.  Regardless of the type of CPU, the
+- * list-traversal primitive must be guarded by rcu_read_lock().
+- */
+-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
+-                                      struct hlist_nulls_head *h)
+-{
+-      struct hlist_nulls_node *i, *last = NULL;
+-
+-      for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
+-           i = hlist_nulls_next_rcu(i))
+-              last = i;
+-
+-      if (last) {
+-              n->next = last->next;
+-              n->pprev = &last->next;
+-              rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
+-      } else {
+-              hlist_nulls_add_head_rcu(n, h);
+-      }
+-}
+-
+-/**
+  * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+  * @tpos:     the type * to use as a loop cursor.
+  * @pos:      the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -683,11 +683,7 @@ static inline void sk_add_node_rcu(struc
+ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+-      if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+-          sk->sk_family == AF_INET6)
+-              hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+-      else
+-              hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
++      hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
diff --git a/queue-4.14/net-sched-cbq-create-block-for-q-link.block.patch b/queue-4.14/net-sched-cbq-create-block-for-q-link.block.patch
new file mode 100644 (file)
index 0000000..f104d59
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Mon, 27 Nov 2017 18:37:21 +0100
+Subject: net: sched: cbq: create block for q->link.block
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit d51aae68b142f48232257e96ce317db25445418d ]
+
+q->link.block is not initialized, that leads to EINVAL when one tries to
+add filter there. So initialize it properly.
+
+This can be reproduced by:
+$ tc qdisc add dev eth0 root handle 1: cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit
+$ tc filter add dev eth0 parent 1: protocol ip prio 100 u32 match ip protocol 0 0x00 flowid 1:1
+
+Reported-by: Jaroslav Aster <jaster@redhat.com>
+Reported-by: Ivan Vecera <ivecera@redhat.com>
+Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Acked-by: Eelco Chaudron <echaudro@redhat.com>
+Reviewed-by: Ivan Vecera <ivecera@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cbq.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1157,9 +1157,13 @@ static int cbq_init(struct Qdisc *sch, s
+       if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
+               return -EINVAL;
++      err = tcf_block_get(&q->link.block, &q->link.filter_list);
++      if (err)
++              goto put_rtab;
++
+       err = qdisc_class_hash_init(&q->clhash);
+       if (err < 0)
+-              goto put_rtab;
++              goto put_block;
+       q->link.sibling = &q->link;
+       q->link.common.classid = sch->handle;
+@@ -1193,6 +1197,9 @@ static int cbq_init(struct Qdisc *sch, s
+       cbq_addprio(q, &q->link);
+       return 0;
++put_block:
++      tcf_block_put(q->link.block);
++
+ put_rtab:
+       qdisc_put_rtab(q->link.R_tab);
+       return err;
diff --git a/queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch b/queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch
new file mode 100644 (file)
index 0000000..a50ba1f
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 6 Dec 2017 01:04:50 +0100
+Subject: net: thunderx: Fix TCP/UDP checksum offload for IPv4 pkts
+
+From: Florian Westphal <fw@strlen.de>
+
+
+[ Upstream commit 134059fd2775be79e26c2dff87d25cc2f6ea5626 ]
+
+Offload IP header checksum to NIC.
+
+This fixes a previous patch which disabled checksum offloading
+for both IPv4 and IPv6 packets.  So L3 checksum offload was
+getting disabled for IPv4 pkts.  And HW is dropping these pkts
+for some reason.
+
+Without this patch, IPv4 TSO appears to be broken:
+
+WIthout this patch I get ~16kbyte/s, with patch close to 2mbyte/s
+when copying files via scp from test box to my home workstation.
+
+Looking at tcpdump on sender it looks like hardware drops IPv4 TSO skbs.
+This patch restores performance for me, ipv6 looks good too.
+
+Fixes: fa6d7cb5d76c ("net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts")
+Cc: Sunil Goutham <sgoutham@cavium.com>
+Cc: Aleksey Makarov <aleksey.makarov@auriga.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_queues.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -1355,6 +1355,8 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *n
+       /* Offload checksum calculation to HW */
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
++              if (ip.v4->version == 4)
++                      hdr->csum_l3 = 1; /* Enable IP csum calculation */
+               hdr->l3_offset = skb_network_offset(skb);
+               hdr->l4_offset = skb_transport_offset(skb);
diff --git a/queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch b/queue-4.14/net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch
new file mode 100644 (file)
index 0000000..103d066
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Sunil Goutham <sgoutham@cavium.com>
+Date: Thu, 23 Nov 2017 22:34:31 +0300
+Subject: net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts
+
+From: Sunil Goutham <sgoutham@cavium.com>
+
+
+[ Upstream commit fa6d7cb5d76cf0467c61420fc9238045aedfd379 ]
+
+Don't offload IP header checksum to NIC.
+
+This fixes a previous patch which enabled checksum offloading
+for both IPv4 and IPv6 packets.  So L3 checksum offload was
+getting enabled for IPv6 pkts.  And HW is dropping these pkts
+as it assumes the pkt is IPv4 when IP csum offload is set
+in the SQ descriptor.
+
+Fixes:  3a9024f52c2e ("net: thunderx: Enable TSO and checksum offloads for ipv6")
+Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
+Signed-off-by: Aleksey Makarov <aleksey.makarov@auriga.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_queues.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -1355,7 +1355,6 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *n
+       /* Offload checksum calculation to HW */
+       if (skb->ip_summed == CHECKSUM_PARTIAL) {
+-              hdr->csum_l3 = 1; /* Enable IP csum calculation */
+               hdr->l3_offset = skb_network_offset(skb);
+               hdr->l4_offset = skb_transport_offset(skb);
diff --git a/queue-4.14/packet-fix-crash-in-fanout_demux_rollover.patch b/queue-4.14/packet-fix-crash-in-fanout_demux_rollover.patch
new file mode 100644 (file)
index 0000000..44b1d08
--- /dev/null
@@ -0,0 +1,155 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Mike Maloney <maloney@google.com>
+Date: Tue, 28 Nov 2017 10:44:29 -0500
+Subject: packet: fix crash in fanout_demux_rollover()
+
+From: Mike Maloney <maloney@google.com>
+
+
+syzkaller found a race condition fanout_demux_rollover() while removing
+a packet socket from a fanout group.
+
+po->rollover is read and operated on during packet_rcv_fanout(), via
+fanout_demux_rollover(), but the pointer is currently cleared before the
+synchronization in packet_release().   It is safer to delay the cleanup
+until after synchronize_net() has been called, ensuring all calls to
+packet_rcv_fanout() for this socket have finished.
+
+To further simplify synchronization around the rollover structure, set
+po->rollover in fanout_add() only if there are no errors.  This removes
+the need for rcu in the struct and in the call to
+packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...).
+
+Crashing stack trace:
+ fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392
+ packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487
+ dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953
+ xmit_one net/core/dev.c:2975 [inline]
+ dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995
+ __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476
+ dev_queue_xmit+0x17/0x20 net/core/dev.c:3509
+ neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379
+ neigh_output include/net/neighbour.h:482 [inline]
+ ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120
+ ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146
+ NF_HOOK_COND include/linux/netfilter.h:239 [inline]
+ ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163
+ dst_output include/net/dst.h:459 [inline]
+ NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250
+ mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660
+ mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072
+ mld_send_initial_cr net/ipv6/mcast.c:2056 [inline]
+ ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079
+ addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039
+ addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971
+ process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113
+ worker_thread+0x223/0x1990 kernel/workqueue.c:2247
+ kthread+0x35e/0x430 kernel/kthread.c:231
+ ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432
+
+Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state")
+Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Mike Maloney <maloney@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   32 ++++++++++----------------------
+ net/packet/internal.h  |    1 -
+ 2 files changed, 10 insertions(+), 23 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1697,7 +1697,6 @@ static int fanout_add(struct sock *sk, u
+               atomic_long_set(&rollover->num, 0);
+               atomic_long_set(&rollover->num_huge, 0);
+               atomic_long_set(&rollover->num_failed, 0);
+-              po->rollover = rollover;
+       }
+       if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
+@@ -1755,6 +1754,8 @@ static int fanout_add(struct sock *sk, u
+               if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+                       __dev_remove_pack(&po->prot_hook);
+                       po->fanout = match;
++                      po->rollover = rollover;
++                      rollover = NULL;
+                       refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+                       __fanout_link(sk, po);
+                       err = 0;
+@@ -1768,10 +1769,7 @@ static int fanout_add(struct sock *sk, u
+       }
+ out:
+-      if (err && rollover) {
+-              kfree_rcu(rollover, rcu);
+-              po->rollover = NULL;
+-      }
++      kfree(rollover);
+       mutex_unlock(&fanout_mutex);
+       return err;
+ }
+@@ -1795,11 +1793,6 @@ static struct packet_fanout *fanout_rele
+                       list_del(&f->list);
+               else
+                       f = NULL;
+-
+-              if (po->rollover) {
+-                      kfree_rcu(po->rollover, rcu);
+-                      po->rollover = NULL;
+-              }
+       }
+       mutex_unlock(&fanout_mutex);
+@@ -3039,6 +3032,7 @@ static int packet_release(struct socket
+       synchronize_net();
+       if (f) {
++              kfree(po->rollover);
+               fanout_release_data(f);
+               kfree(f);
+       }
+@@ -3853,7 +3847,6 @@ static int packet_getsockopt(struct sock
+       void *data = &val;
+       union tpacket_stats_u st;
+       struct tpacket_rollover_stats rstats;
+-      struct packet_rollover *rollover;
+       if (level != SOL_PACKET)
+               return -ENOPROTOOPT;
+@@ -3932,18 +3925,13 @@ static int packet_getsockopt(struct sock
+                      0);
+               break;
+       case PACKET_ROLLOVER_STATS:
+-              rcu_read_lock();
+-              rollover = rcu_dereference(po->rollover);
+-              if (rollover) {
+-                      rstats.tp_all = atomic_long_read(&rollover->num);
+-                      rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+-                      rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+-                      data = &rstats;
+-                      lv = sizeof(rstats);
+-              }
+-              rcu_read_unlock();
+-              if (!rollover)
++              if (!po->rollover)
+                       return -EINVAL;
++              rstats.tp_all = atomic_long_read(&po->rollover->num);
++              rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
++              rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
++              data = &rstats;
++              lv = sizeof(rstats);
+               break;
+       case PACKET_TX_HAS_OFF:
+               val = po->tp_tx_has_off;
+--- a/net/packet/internal.h
++++ b/net/packet/internal.h
+@@ -95,7 +95,6 @@ struct packet_fanout {
+ struct packet_rollover {
+       int                     sock;
+-      struct rcu_head         rcu;
+       atomic_long_t           num;
+       atomic_long_t           num_huge;
+       atomic_long_t           num_failed;
diff --git a/queue-4.14/rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch b/queue-4.14/rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch
new file mode 100644 (file)
index 0000000..9170871
--- /dev/null
@@ -0,0 +1,89 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Håkon Bugge <Haakon.Bugge@oracle.com>
+Date: Wed, 6 Dec 2017 17:18:28 +0100
+Subject: rds: Fix NULL pointer dereference in __rds_rdma_map
+
+From: Håkon Bugge <Haakon.Bugge@oracle.com>
+
+
+[ Upstream commit f3069c6d33f6ae63a1668737bc78aaaa51bff7ca ]
+
+This is a fix for syzkaller719569, where memory registration was
+attempted without any underlying transport being loaded.
+
+Analysis of the case reveals that it is the setsockopt() RDS_GET_MR
+(2) and RDS_GET_MR_FOR_DEST (7) that are vulnerable.
+
+Here is an example stack trace when the bug is hit:
+
+BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0
+IP: __rds_rdma_map+0x36/0x440 [rds]
+PGD 2f93d03067 P4D 2f93d03067 PUD 2f93d02067 PMD 0
+Oops: 0000 [#1] SMP
+Modules linked in: bridge stp llc tun rpcsec_gss_krb5 nfsv4
+dns_resolver nfs fscache rds binfmt_misc sb_edac intel_powerclamp
+coretemp kvm_intel kvm irqbypass crct10dif_pclmul c rc32_pclmul
+ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd
+iTCO_wdt mei_me sg iTCO_vendor_support ipmi_si mei ipmi_devintf nfsd
+shpchp pcspkr i2c_i801 ioatd ma ipmi_msghandler wmi lpc_ich mfd_core
+auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2
+mgag200 i2c_algo_bit drm_kms_helper ixgbe syscopyarea ahci sysfillrect
+sysimgblt libahci mdio fb_sys_fops ttm ptp libata sd_mod mlx4_core drm
+crc32c_intel pps_core megaraid_sas i2c_core dca dm_mirror
+dm_region_hash dm_log dm_mod
+CPU: 48 PID: 45787 Comm: repro_set2 Not tainted 4.14.2-3.el7uek.x86_64 #2
+Hardware name: Oracle Corporation ORACLE SERVER X5-2L/ASM,MOBO TRAY,2U, BIOS 31110000 03/03/2017
+task: ffff882f9190db00 task.stack: ffffc9002b994000
+RIP: 0010:__rds_rdma_map+0x36/0x440 [rds]
+RSP: 0018:ffffc9002b997df0 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: ffff882fa2182580 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffc9002b997e40 RDI: ffff882fa2182580
+RBP: ffffc9002b997e30 R08: 0000000000000000 R09: 0000000000000002
+R10: ffff885fb29e3838 R11: 0000000000000000 R12: ffff882fa2182580
+R13: ffff882fa2182580 R14: 0000000000000002 R15: 0000000020000ffc
+FS:  00007fbffa20b700(0000) GS:ffff882fbfb80000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000000000c0 CR3: 0000002f98a66006 CR4: 00000000001606e0
+Call Trace:
+ rds_get_mr+0x56/0x80 [rds]
+ rds_setsockopt+0x172/0x340 [rds]
+ ? __fget_light+0x25/0x60
+ ? __fdget+0x13/0x20
+ SyS_setsockopt+0x80/0xe0
+ do_syscall_64+0x67/0x1b0
+ entry_SYSCALL64_slow_path+0x25/0x25
+RIP: 0033:0x7fbff9b117f9
+RSP: 002b:00007fbffa20aed8 EFLAGS: 00000293 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 00000000000c84a4 RCX: 00007fbff9b117f9
+RDX: 0000000000000002 RSI: 0000400000000114 RDI: 000000000000109b
+RBP: 00007fbffa20af10 R08: 0000000000000020 R09: 00007fbff9dd7860
+R10: 0000000020000ffc R11: 0000000000000293 R12: 0000000000000000
+R13: 00007fbffa20b9c0 R14: 00007fbffa20b700 R15: 0000000000000021
+
+Code: 41 56 41 55 49 89 fd 41 54 53 48 83 ec 18 8b 87 f0 02 00 00 48
+89 55 d0 48 89 4d c8 85 c0 0f 84 2d 03 00 00 48 8b 87 00 03 00 00 <48>
+83 b8 c0 00 00 00 00 0f 84 25 03 00 0 0 48 8b 06 48 8b 56 08
+
+The fix is to check the existence of an underlying transport in
+__rds_rdma_map().
+
+Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/rdma.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/rds/rdma.c
++++ b/net/rds/rdma.c
+@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_soc
+       long i;
+       int ret;
+-      if (rs->rs_bound_addr == 0) {
++      if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
+               ret = -ENOTCONN; /* XXX not a great errno */
+               goto out;
+       }
diff --git a/queue-4.14/s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch b/queue-4.14/s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch
new file mode 100644 (file)
index 0000000..ceeff57
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:51 +0100
+Subject: s390/qeth: build max size GSO skbs on L2 devices
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 ]
+
+The current GSO skb size limit was copy&pasted over from the L3 path,
+where it is needed due to a TSO limitation.
+As L2 devices don't offer TSO support (and thus all GSO skbs are
+segmented before they reach the driver), there's no reason to restrict
+the stack in how large it may build the GSO skbs.
+
+Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l2_main.c |    2 --
+ drivers/s390/net/qeth_l3_main.c |    4 ++--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1027,8 +1027,6 @@ static int qeth_l2_setup_netdev(struct q
+       card->info.broadcast_capable = 1;
+       qeth_l2_request_initial_mac(card);
+-      card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+-                                PAGE_SIZE;
+       SET_NETDEV_DEV(card->dev, &card->gdev->dev);
+       netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
+       netif_carrier_off(card->dev);
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2989,8 +2989,8 @@ static int qeth_l3_setup_netdev(struct q
+                               NETIF_F_HW_VLAN_CTAG_RX |
+                               NETIF_F_HW_VLAN_CTAG_FILTER;
+       netif_keep_dst(card->dev);
+-      card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+-                                PAGE_SIZE;
++      netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
++                                        PAGE_SIZE);
+       SET_NETDEV_DEV(card->dev, &card->gdev->dev);
+       netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
diff --git a/queue-4.14/s390-qeth-fix-early-exit-from-error-path.patch b/queue-4.14/s390-qeth-fix-early-exit-from-error-path.patch
new file mode 100644 (file)
index 0000000..0950611
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Wed, 18 Oct 2017 17:40:17 +0200
+Subject: s390/qeth: fix early exit from error path
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 83cf79a2fec3cf499eb6cb9eb608656fc2a82776 ]
+
+When the allocation of the addr buffer fails, we need to free
+our refcount on the inetdevice before returning.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -1553,7 +1553,7 @@ static void qeth_l3_free_vlan_addresses4
+       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+       if (!addr)
+-              return;
++              goto out;
+       spin_lock_bh(&card->ip_lock);
+@@ -1567,6 +1567,7 @@ static void qeth_l3_free_vlan_addresses4
+       spin_unlock_bh(&card->ip_lock);
+       kfree(addr);
++out:
+       in_dev_put(in_dev);
+ }
+@@ -1591,7 +1592,7 @@ static void qeth_l3_free_vlan_addresses6
+       addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+       if (!addr)
+-              return;
++              goto out;
+       spin_lock_bh(&card->ip_lock);
+@@ -1606,6 +1607,7 @@ static void qeth_l3_free_vlan_addresses6
+       spin_unlock_bh(&card->ip_lock);
+       kfree(addr);
++out:
+       in6_dev_put(in6_dev);
+ #endif /* CONFIG_QETH_IPV6 */
+ }
diff --git a/queue-4.14/s390-qeth-fix-gso-throughput-regression.patch b/queue-4.14/s390-qeth-fix-gso-throughput-regression.patch
new file mode 100644 (file)
index 0000000..ec2de26
--- /dev/null
@@ -0,0 +1,140 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:50 +0100
+Subject: s390/qeth: fix GSO throughput regression
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb ]
+
+Using GSO with small MTUs currently results in a substantial throughput
+regression - which is caused by how qeth needs to map non-linear skbs
+into its IO buffer elements:
+compared to a linear skb, each GSO-segmented skb effectively consumes
+twice as many buffer elements (ie two instead of one) due to the
+additional header-only part. This causes the Output Queue to be
+congested with low-utilized IO buffers.
+
+Fix this as follows:
+If the MSS is low enough so that a non-SG GSO segmentation produces
+order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is
+where we anticipate the biggest savings, since an SG-enabled
+GSO segmentation produces skbs that always consume at least two
+buffer elements.
+
+Larger MSS values continue to get a SG-enabled GSO segmentation, since
+1) the relative overhead of the additional header-only buffer element
+becomes less noticeable, and
+2) the linearization overhead increases.
+
+With the throughput regression fixed, re-enable NETIF_F_SG by default to
+reap the significant CPU savings of GSO.
+
+Fixes: 5722963a8e83 ("qeth: do not turn on SG per default")
+Reported-by: Nils Hoppmann <niho@de.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h      |    3 +++
+ drivers/s390/net/qeth_core_main.c |   31 +++++++++++++++++++++++++++++++
+ drivers/s390/net/qeth_l2_main.c   |    2 ++
+ drivers/s390/net/qeth_l3_main.c   |    2 ++
+ 4 files changed, 38 insertions(+)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -985,6 +985,9 @@ struct qeth_cmd_buffer *qeth_get_setassp
+ int qeth_set_features(struct net_device *, netdev_features_t);
+ int qeth_recover_features(struct net_device *);
+ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
++netdev_features_t qeth_features_check(struct sk_buff *skb,
++                                    struct net_device *dev,
++                                    netdev_features_t features);
+ int qeth_vm_request_mac(struct qeth_card *card);
+ int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -19,6 +19,11 @@
+ #include <linux/mii.h>
+ #include <linux/kthread.h>
+ #include <linux/slab.h>
++#include <linux/if_vlan.h>
++#include <linux/netdevice.h>
++#include <linux/netdev_features.h>
++#include <linux/skbuff.h>
++
+ #include <net/iucv/af_iucv.h>
+ #include <net/dsfield.h>
+@@ -6505,6 +6510,32 @@ netdev_features_t qeth_fix_features(stru
+ }
+ EXPORT_SYMBOL_GPL(qeth_fix_features);
++netdev_features_t qeth_features_check(struct sk_buff *skb,
++                                    struct net_device *dev,
++                                    netdev_features_t features)
++{
++      /* GSO segmentation builds skbs with
++       *      a (small) linear part for the headers, and
++       *      page frags for the data.
++       * Compared to a linear skb, the header-only part consumes an
++       * additional buffer element. This reduces buffer utilization, and
++       * hurts throughput. So compress small segments into one element.
++       */
++      if (netif_needs_gso(skb, features)) {
++              /* match skb_segment(): */
++              unsigned int doffset = skb->data - skb_mac_header(skb);
++              unsigned int hsize = skb_shinfo(skb)->gso_size;
++              unsigned int hroom = skb_headroom(skb);
++
++              /* linearize only if resulting skb allocations are order-0: */
++              if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
++                      features &= ~NETIF_F_SG;
++      }
++
++      return vlan_features_check(skb, features);
++}
++EXPORT_SYMBOL_GPL(qeth_features_check);
++
+ static int __init qeth_core_init(void)
+ {
+       int rc;
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -963,6 +963,7 @@ static const struct net_device_ops qeth_
+       .ndo_stop               = qeth_l2_stop,
+       .ndo_get_stats          = qeth_get_stats,
+       .ndo_start_xmit         = qeth_l2_hard_start_xmit,
++      .ndo_features_check     = qeth_features_check,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_rx_mode        = qeth_l2_set_rx_mode,
+       .ndo_do_ioctl           = qeth_do_ioctl,
+@@ -1009,6 +1010,7 @@ static int qeth_l2_setup_netdev(struct q
+       if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
+               card->dev->hw_features = NETIF_F_SG;
+               card->dev->vlan_features = NETIF_F_SG;
++              card->dev->features |= NETIF_F_SG;
+               /* OSA 3S and earlier has no RX/TX support */
+               if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
+                       card->dev->hw_features |= NETIF_F_IP_CSUM;
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2923,6 +2923,7 @@ static const struct net_device_ops qeth_
+       .ndo_stop               = qeth_l3_stop,
+       .ndo_get_stats          = qeth_get_stats,
+       .ndo_start_xmit         = qeth_l3_hard_start_xmit,
++      .ndo_features_check     = qeth_features_check,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_rx_mode        = qeth_l3_set_multicast_list,
+       .ndo_do_ioctl           = qeth_do_ioctl,
+@@ -2963,6 +2964,7 @@ static int qeth_l3_setup_netdev(struct q
+                               card->dev->vlan_features = NETIF_F_SG |
+                                       NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+                                       NETIF_F_TSO;
++                              card->dev->features |= NETIF_F_SG;
+                       }
+               }
+       } else if (card->info.type == QETH_CARD_TYPE_IQD) {
diff --git a/queue-4.14/s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch b/queue-4.14/s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch
new file mode 100644 (file)
index 0000000..ce890c4
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:49 +0100
+Subject: s390/qeth: fix thinko in IPv4 multicast address tracking
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upsteam commit bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 ]
+
+Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+reworked how secondary addresses are managed for qeth devices.
+Instead of dropping & subsequently re-adding all addresses on every
+ndo_set_rx_mode() call, qeth now keeps track of the addresses that are
+currently registered with the HW.
+On a ndo_set_rx_mode(), we thus only need to do (de-)registration
+requests for the addresses that have actually changed.
+
+On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong
+hashtable - and thus never finds a match. As a result, we first delete
+*all* such addresses, and then re-add them again. So each set_rx_mode()
+causes a short period where the IPv4 Multicast addresses are not
+registered, and the card stops forwarding inbound traffic for them.
+
+Fix this by setting the ->is_multicast flag on the lookup object, thus
+enabling qeth_l3_ip_from_hash() to search the correct hashtable and
+find a match there.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -1376,6 +1376,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card
+               tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
+               memcpy(tmp->mac, buf, sizeof(tmp->mac));
++              tmp->is_multicast = 1;
+               ipm = qeth_l3_ip_from_hash(card, tmp);
+               if (ipm) {
diff --git a/queue-4.14/sctp-use-right-member-as-the-param-of-list_for_each_entry.patch b/queue-4.14/sctp-use-right-member-as-the-param-of-list_for_each_entry.patch
new file mode 100644 (file)
index 0000000..4d2f230
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 26 Nov 2017 20:56:07 +0800
+Subject: sctp: use right member as the param of list_for_each_entry
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit a8dd397903a6e57157f6265911f7d35681364427 ]
+
+Commit d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues
+when migrating a sock") made a mistake that using 'list' as the param of
+list_for_each_entry to traverse the retransmit, sacked and abandoned
+queues, while chunks are using 'transmitted_list' to link into these
+queues.
+
+It could cause NULL dereference panic if there are chunks in any of these
+queues when peeling off one asoc.
+
+So use the chunk member 'transmitted_list' instead in this patch.
+
+Fixes: d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -187,13 +187,13 @@ static void sctp_for_each_tx_datachunk(s
+               list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+                       cb(chunk);
+-      list_for_each_entry(chunk, &q->retransmit, list)
++      list_for_each_entry(chunk, &q->retransmit, transmitted_list)
+               cb(chunk);
+-      list_for_each_entry(chunk, &q->sacked, list)
++      list_for_each_entry(chunk, &q->sacked, transmitted_list)
+               cb(chunk);
+-      list_for_each_entry(chunk, &q->abandoned, list)
++      list_for_each_entry(chunk, &q->abandoned, transmitted_list)
+               cb(chunk);
+       list_for_each_entry(chunk, &q->out_chunk_list, list)
diff --git a/queue-4.14/series b/queue-4.14/series
new file mode 100644 (file)
index 0000000..5d0f32a
--- /dev/null
@@ -0,0 +1,33 @@
+net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
+net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch
+net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch
+net-realtek-r8169-implement-set_link_ksettings.patch
+s390-qeth-fix-early-exit-from-error-path.patch
+tipc-fix-memory-leak-in-tipc_accept_from_sock.patch
+vhost-fix-skb-leak-in-handle_rx.patch
+rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch
+sit-update-frag_off-info.patch
+tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch
+packet-fix-crash-in-fanout_demux_rollover.patch
+net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
+tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch
+usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch
+net-remove-hlist_nulls_add_tail_rcu.patch
+stmmac-reset-last-tso-segment-size-after-device-open.patch
+tcp-dccp-block-bh-before-arming-time_wait-timer.patch
+s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch
+s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch
+s390-qeth-fix-gso-throughput-regression.patch
+tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch
+tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch
+tcp-use-current-time-in-tcp_rcv_space_adjust.patch
+net-sched-cbq-create-block-for-q-link.block.patch
+tap-free-skb-if-flags-error.patch
+tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch
+tun-free-skb-in-early-errors.patch
+net-ipv6-fixup-device-for-anycast-routes-during-copy.patch
+tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch
+net-accept-ufo-datagrams-from-tuntap-and-packet.patch
+net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch
+cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch
+sctp-use-right-member-as-the-param-of-list_for_each_entry.patch
diff --git a/queue-4.14/sit-update-frag_off-info.patch b/queue-4.14/sit-update-frag_off-info.patch
new file mode 100644 (file)
index 0000000..b1728dc
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Thu, 30 Nov 2017 10:41:14 +0800
+Subject: sit: update frag_off info
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+
+[ Upstream commit f859b4af1c52493ec21173ccc73d0b60029b5b88 ]
+
+After parsing the sit netlink change info, we forget to update frag_off in
+ipip6_tunnel_update(). Fix it by assigning frag_off with new value.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -1087,6 +1087,7 @@ static void ipip6_tunnel_update(struct i
+       ipip6_tunnel_link(sitn, t);
+       t->parms.iph.ttl = p->iph.ttl;
+       t->parms.iph.tos = p->iph.tos;
++      t->parms.iph.frag_off = p->iph.frag_off;
+       if (t->parms.link != p->link || t->fwmark != fwmark) {
+               t->parms.link = p->link;
+               t->fwmark = fwmark;
diff --git a/queue-4.14/stmmac-reset-last-tso-segment-size-after-device-open.patch b/queue-4.14/stmmac-reset-last-tso-segment-size-after-device-open.patch
new file mode 100644 (file)
index 0000000..d98146a
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Lars Persson <lars.persson@axis.com>
+Date: Fri, 1 Dec 2017 11:12:44 +0100
+Subject: stmmac: reset last TSO segment size after device open
+
+From: Lars Persson <lars.persson@axis.com>
+
+
+[ Upstream commit 45ab4b13e46325d00f4acdb365d406e941a15f81 ]
+
+The mss variable tracks the last max segment size sent to the TSO
+engine. We do not update the hardware as long as we receive skb:s with
+the same value in gso_size.
+
+During a network device down/up cycle (mapped to stmmac_release() and
+stmmac_open() callbacks) we issue a reset to the hardware and it
+forgets the setting for mss. However we did not zero out our mss
+variable so the next transmission of a gso packet happens with an
+undefined hardware setting.
+
+This triggers a hang in the TSO engine and eventuelly the netdev
+watchdog will bark.
+
+Fixes: f748be531d70 ("stmmac: support new GMAC4")
+Signed-off-by: Lars Persson <larper@axis.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -2564,6 +2564,7 @@ static int stmmac_open(struct net_device
+       priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
+       priv->rx_copybreak = STMMAC_RX_COPYBREAK;
++      priv->mss = 0;
+       ret = alloc_dma_desc_resources(priv);
+       if (ret < 0) {
diff --git a/queue-4.14/tap-free-skb-if-flags-error.patch b/queue-4.14/tap-free-skb-if-flags-error.patch
new file mode 100644 (file)
index 0000000..2e5e414
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:38 -0500
+Subject: tap: free skb if flags error
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit 61d78537843e676e7f56ac6db333db0c0529b892 ]
+
+tap_recvmsg() supports accepting skb by msg_control after
+commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
+the skb if presented should be freed within the function, otherwise
+it would be leaked.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c |   14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_qu
+       DEFINE_WAIT(wait);
+       ssize_t ret = 0;
+-      if (!iov_iter_count(to))
++      if (!iov_iter_count(to)) {
++              if (skb)
++                      kfree_skb(skb);
+               return 0;
++      }
+       if (skb)
+               goto put;
+@@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *so
+                      size_t total_len, int flags)
+ {
+       struct tap_queue *q = container_of(sock, struct tap_queue, sock);
++      struct sk_buff *skb = m->msg_control;
+       int ret;
+-      if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
++      if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
++              if (skb)
++                      kfree_skb(skb);
+               return -EINVAL;
+-      ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
+-                        m->msg_control);
++      }
++      ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+       if (ret > total_len) {
+               m->msg_flags |= MSG_TRUNC;
+               ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/queue-4.14/tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch b/queue-4.14/tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch
new file mode 100644 (file)
index 0000000..b23e5f5
--- /dev/null
@@ -0,0 +1,256 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 3 Dec 2017 09:32:59 -0800
+Subject: tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit eeea10b83a139451130df1594f26710c8fa390c8 ]
+
+James Morris reported kernel stack corruption bug [1] while
+running the SELinux testsuite, and bisected to a recent
+commit bffa72cf7f9d ("net: sk_buff rbnode reorg")
+
+We believe this commit is fine, but exposes an older bug.
+
+SELinux code runs from tcp_filter() and might send an ICMP,
+expecting IP options to be found in skb->cb[] using regular IPCB placement.
+
+We need to defer TCP mangling of skb->cb[] after tcp_filter() calls.
+
+This patch adds tcp_v4_fill_cb()/tcp_v4_restore_cb() in a very
+similar way we added them for IPv6.
+
+[1]
+[  339.806024] SELinux: failure in selinux_parse_skb(), unable to parse packet
+[  339.822505] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81745af5
+[  339.822505]
+[  339.852250] CPU: 4 PID: 3642 Comm: client Not tainted 4.15.0-rc1-test #15
+[  339.868498] Hardware name: LENOVO 10FGS0VA1L/30BC, BIOS FWKT68A   01/19/2017
+[  339.885060] Call Trace:
+[  339.896875]  <IRQ>
+[  339.908103]  dump_stack+0x63/0x87
+[  339.920645]  panic+0xe8/0x248
+[  339.932668]  ? ip_push_pending_frames+0x33/0x40
+[  339.946328]  ? icmp_send+0x525/0x530
+[  339.958861]  ? kfree_skbmem+0x60/0x70
+[  339.971431]  __stack_chk_fail+0x1b/0x20
+[  339.984049]  icmp_send+0x525/0x530
+[  339.996205]  ? netlbl_skbuff_err+0x36/0x40
+[  340.008997]  ? selinux_netlbl_err+0x11/0x20
+[  340.021816]  ? selinux_socket_sock_rcv_skb+0x211/0x230
+[  340.035529]  ? security_sock_rcv_skb+0x3b/0x50
+[  340.048471]  ? sk_filter_trim_cap+0x44/0x1c0
+[  340.061246]  ? tcp_v4_inbound_md5_hash+0x69/0x1b0
+[  340.074562]  ? tcp_filter+0x2c/0x40
+[  340.086400]  ? tcp_v4_rcv+0x820/0xa20
+[  340.098329]  ? ip_local_deliver_finish+0x71/0x1a0
+[  340.111279]  ? ip_local_deliver+0x6f/0xe0
+[  340.123535]  ? ip_rcv_finish+0x3a0/0x3a0
+[  340.135523]  ? ip_rcv_finish+0xdb/0x3a0
+[  340.147442]  ? ip_rcv+0x27c/0x3c0
+[  340.158668]  ? inet_del_offload+0x40/0x40
+[  340.170580]  ? __netif_receive_skb_core+0x4ac/0x900
+[  340.183285]  ? rcu_accelerate_cbs+0x5b/0x80
+[  340.195282]  ? __netif_receive_skb+0x18/0x60
+[  340.207288]  ? process_backlog+0x95/0x140
+[  340.218948]  ? net_rx_action+0x26c/0x3b0
+[  340.230416]  ? __do_softirq+0xc9/0x26a
+[  340.241625]  ? do_softirq_own_stack+0x2a/0x40
+[  340.253368]  </IRQ>
+[  340.262673]  ? do_softirq+0x50/0x60
+[  340.273450]  ? __local_bh_enable_ip+0x57/0x60
+[  340.285045]  ? ip_finish_output2+0x175/0x350
+[  340.296403]  ? ip_finish_output+0x127/0x1d0
+[  340.307665]  ? nf_hook_slow+0x3c/0xb0
+[  340.318230]  ? ip_output+0x72/0xe0
+[  340.328524]  ? ip_fragment.constprop.54+0x80/0x80
+[  340.340070]  ? ip_local_out+0x35/0x40
+[  340.350497]  ? ip_queue_xmit+0x15c/0x3f0
+[  340.361060]  ? __kmalloc_reserve.isra.40+0x31/0x90
+[  340.372484]  ? __skb_clone+0x2e/0x130
+[  340.382633]  ? tcp_transmit_skb+0x558/0xa10
+[  340.393262]  ? tcp_connect+0x938/0xad0
+[  340.403370]  ? ktime_get_with_offset+0x4c/0xb0
+[  340.414206]  ? tcp_v4_connect+0x457/0x4e0
+[  340.424471]  ? __inet_stream_connect+0xb3/0x300
+[  340.435195]  ? inet_stream_connect+0x3b/0x60
+[  340.445607]  ? SYSC_connect+0xd9/0x110
+[  340.455455]  ? __audit_syscall_entry+0xaf/0x100
+[  340.466112]  ? syscall_trace_enter+0x1d0/0x2b0
+[  340.476636]  ? __audit_syscall_exit+0x209/0x290
+[  340.487151]  ? SyS_connect+0xe/0x10
+[  340.496453]  ? do_syscall_64+0x67/0x1b0
+[  340.506078]  ? entry_SYSCALL64_slow_path+0x25/0x25
+
+Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: James Morris <james.l.morris@oracle.com>
+Tested-by: James Morris <james.l.morris@oracle.com>
+Tested-by: Casey Schaufler <casey@schaufler-ca.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c |   59 +++++++++++++++++++++++++++++++++++-----------------
+ net/ipv6/tcp_ipv6.c |   10 +++++---
+ 2 files changed, 46 insertions(+), 23 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1587,6 +1587,34 @@ int tcp_filter(struct sock *sk, struct s
+ }
+ EXPORT_SYMBOL(tcp_filter);
++static void tcp_v4_restore_cb(struct sk_buff *skb)
++{
++      memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
++              sizeof(struct inet_skb_parm));
++}
++
++static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
++                         const struct tcphdr *th)
++{
++      /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
++       * barrier() makes sure compiler wont play fool^Waliasing games.
++       */
++      memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
++              sizeof(struct inet_skb_parm));
++      barrier();
++
++      TCP_SKB_CB(skb)->seq = ntohl(th->seq);
++      TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
++                                  skb->len - th->doff * 4);
++      TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
++      TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
++      TCP_SKB_CB(skb)->tcp_tw_isn = 0;
++      TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
++      TCP_SKB_CB(skb)->sacked  = 0;
++      TCP_SKB_CB(skb)->has_rxtstamp =
++                      skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
++}
++
+ /*
+  *    From tcp_input.c
+  */
+@@ -1627,24 +1655,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
+       th = (const struct tcphdr *)skb->data;
+       iph = ip_hdr(skb);
+-      /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+-       * barrier() makes sure compiler wont play fool^Waliasing games.
+-       */
+-      memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+-              sizeof(struct inet_skb_parm));
+-      barrier();
+-
+-      TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+-      TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+-                                  skb->len - th->doff * 4);
+-      TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+-      TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+-      TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+-      TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+-      TCP_SKB_CB(skb)->sacked  = 0;
+-      TCP_SKB_CB(skb)->has_rxtstamp =
+-                      skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+-
+ lookup:
+       sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+                              th->dest, sdif, &refcounted);
+@@ -1675,14 +1685,19 @@ process:
+               sock_hold(sk);
+               refcounted = true;
+               nsk = NULL;
+-              if (!tcp_filter(sk, skb))
++              if (!tcp_filter(sk, skb)) {
++                      th = (const struct tcphdr *)skb->data;
++                      iph = ip_hdr(skb);
++                      tcp_v4_fill_cb(skb, iph, th);
+                       nsk = tcp_check_req(sk, skb, req, false);
++              }
+               if (!nsk) {
+                       reqsk_put(req);
+                       goto discard_and_relse;
+               }
+               if (nsk == sk) {
+                       reqsk_put(req);
++                      tcp_v4_restore_cb(skb);
+               } else if (tcp_child_process(sk, nsk, skb)) {
+                       tcp_v4_send_reset(nsk, skb);
+                       goto discard_and_relse;
+@@ -1708,6 +1723,7 @@ process:
+               goto discard_and_relse;
+       th = (const struct tcphdr *)skb->data;
+       iph = ip_hdr(skb);
++      tcp_v4_fill_cb(skb, iph, th);
+       skb->dev = NULL;
+@@ -1738,6 +1754,8 @@ no_tcp_socket:
+       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+               goto discard_it;
++      tcp_v4_fill_cb(skb, iph, th);
++
+       if (tcp_checksum_complete(skb)) {
+ csum_error:
+               __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
+@@ -1764,6 +1782,8 @@ do_time_wait:
+               goto discard_it;
+       }
++      tcp_v4_fill_cb(skb, iph, th);
++
+       if (tcp_checksum_complete(skb)) {
+               inet_twsk_put(inet_twsk(sk));
+               goto csum_error;
+@@ -1780,6 +1800,7 @@ do_time_wait:
+               if (sk2) {
+                       inet_twsk_deschedule_put(inet_twsk(sk));
+                       sk = sk2;
++                      tcp_v4_restore_cb(skb);
+                       refcounted = false;
+                       goto process;
+               }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1448,7 +1448,6 @@ process:
+               struct sock *nsk;
+               sk = req->rsk_listener;
+-              tcp_v6_fill_cb(skb, hdr, th);
+               if (tcp_v6_inbound_md5_hash(sk, skb)) {
+                       sk_drops_add(sk, skb);
+                       reqsk_put(req);
+@@ -1461,8 +1460,12 @@ process:
+               sock_hold(sk);
+               refcounted = true;
+               nsk = NULL;
+-              if (!tcp_filter(sk, skb))
++              if (!tcp_filter(sk, skb)) {
++                      th = (const struct tcphdr *)skb->data;
++                      hdr = ipv6_hdr(skb);
++                      tcp_v6_fill_cb(skb, hdr, th);
+                       nsk = tcp_check_req(sk, skb, req, false);
++              }
+               if (!nsk) {
+                       reqsk_put(req);
+                       goto discard_and_relse;
+@@ -1486,8 +1489,6 @@ process:
+       if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+               goto discard_and_relse;
+-      tcp_v6_fill_cb(skb, hdr, th);
+-
+       if (tcp_v6_inbound_md5_hash(sk, skb))
+               goto discard_and_relse;
+@@ -1495,6 +1496,7 @@ process:
+               goto discard_and_relse;
+       th = (const struct tcphdr *)skb->data;
+       hdr = ipv6_hdr(skb);
++      tcp_v6_fill_cb(skb, hdr, th);
+       skb->dev = NULL;
diff --git a/queue-4.14/tcp-dccp-block-bh-before-arming-time_wait-timer.patch b/queue-4.14/tcp-dccp-block-bh-before-arming-time_wait-timer.patch
new file mode 100644 (file)
index 0000000..828587a
--- /dev/null
@@ -0,0 +1,79 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Dec 2017 10:06:56 -0800
+Subject: tcp/dccp: block bh before arming time_wait timer
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit cfac7f836a715b91f08c851df915d401a4d52783 ]
+
+Maciej Żenczykowski reported some panics in tcp_twsk_destructor()
+that might be caused by the following bug.
+
+timewait timer is pinned to the cpu, because we want to transition
+timwewait refcount from 0 to 4 in one go, once everything has been
+initialized.
+
+At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer
+handling") was merged, TCP was always running from BH habdler.
+
+After commit 5413d1babe8f ("net: do not block BH while processing
+socket backlog") we definitely can run tcp_time_wait() from process
+context.
+
+We need to block BH in the critical section so that the pinned timer
+has still its purpose.
+
+This bug is more likely to happen under stress and when very small RTO
+are used in datacenter flows.
+
+Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Acked-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/minisocks.c     |    6 ++++++
+ net/ipv4/tcp_minisocks.c |    6 ++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/net/dccp/minisocks.c
++++ b/net/dccp/minisocks.c
+@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int
+               if (state == DCCP_TIME_WAIT)
+                       timeo = DCCP_TIMEWAIT_LEN;
++              /* tw_timer is pinned, so we need to make sure BH are disabled
++               * in following section, otherwise timer handler could run before
++               * we complete the initialization.
++               */
++              local_bh_disable();
+               inet_twsk_schedule(tw, timeo);
+               /* Linkage updates. */
+               __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+               inet_twsk_put(tw);
++              local_bh_enable();
+       } else {
+               /* Sorry, if we're out of memory, just CLOSE this
+                * socket up.  We've got bigger problems than
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -312,10 +312,16 @@ void tcp_time_wait(struct sock *sk, int
+               if (state == TCP_TIME_WAIT)
+                       timeo = TCP_TIMEWAIT_LEN;
++              /* tw_timer is pinned, so we need to make sure BH are disabled
++               * in following section, otherwise timer handler could run before
++               * we complete the initialization.
++               */
++              local_bh_disable();
+               inet_twsk_schedule(tw, timeo);
+               /* Linkage updates. */
+               __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+               inet_twsk_put(tw);
++              local_bh_enable();
+       } else {
+               /* Sorry, if we're out of memory, just CLOSE this
+                * socket up.  We've got bigger problems than
diff --git a/queue-4.14/tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch b/queue-4.14/tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch
new file mode 100644 (file)
index 0000000..8e02c8d
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 29 Nov 2017 17:43:57 -0800
+Subject: tcp: remove buggy call to tcp_v6_restore_cb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 3016dad75b48279e579117ee3ed566ba90a3b023 ]
+
+tcp_v6_send_reset() expects to receive an skb with skb->cb[] layout as
+used in TCP stack.
+MD5 lookup uses tcp_v6_iif() and tcp_v6_sdif() and thus
+TCP_SKB_CB(skb)->header.h6
+
+This patch probably fixes RST packets sent on behalf of a timewait md5
+ipv6 socket.
+
+Before Florian patch, tcp_v6_restore_cb() was needed before jumping to
+no_tcp_socket label.
+
+Fixes: 271c3b9b7bda ("tcp: honour SO_BINDTODEVICE for TW_RST case too")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1585,7 +1585,6 @@ do_time_wait:
+               tcp_v6_timewait_ack(sk, skb);
+               break;
+       case TCP_TW_RST:
+-              tcp_v6_restore_cb(skb);
+               tcp_v6_send_reset(sk, skb);
+               inet_twsk_deschedule_put(inet_twsk(sk));
+               goto discard_it;
diff --git a/queue-4.14/tcp-use-current-time-in-tcp_rcv_space_adjust.patch b/queue-4.14/tcp-use-current-time-in-tcp_rcv_space_adjust.patch
new file mode 100644 (file)
index 0000000..929c7f7
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 6 Dec 2017 11:08:19 -0800
+Subject: tcp: use current time in tcp_rcv_space_adjust()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 8632385022f2b05a6ca0b9e0f95575865de0e2ce ]
+
+When I switched rcv_rtt_est to high resolution timestamps, I forgot
+that tp->tcp_mstamp needed to be refreshed in tcp_rcv_space_adjust()
+
+Using an old timestamp leads to autotuning lags.
+
+Fixes: 645f4c6f2ebd ("tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Wei Wang <weiwan@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -592,6 +592,7 @@ void tcp_rcv_space_adjust(struct sock *s
+       int time;
+       int copied;
++      tcp_mstamp_refresh(tp);
+       time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
+       if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
+               return;
diff --git a/queue-4.14/tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch b/queue-4.14/tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch
new file mode 100644 (file)
index 0000000..4ef3a16
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Sun, 3 Dec 2017 09:33:00 -0800
+Subject: tcp: use IPCB instead of TCP_SKB_CB in inet_exact_dif_match()
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Usptream commit b4d1605a8ea608fd7dc45b926a05d75d340bde4b ]
+
+After this fix : ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"),
+socket lookups happen while skb->cb[] has not been mangled yet by TCP.
+
+Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -874,12 +874,11 @@ static inline int tcp_v6_sdif(const stru
+ }
+ #endif
+-/* TCP_SKB_CB reference means this can not be used from early demux */
+ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+       if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
+-          skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
++          skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+               return true;
+ #endif
+       return false;
diff --git a/queue-4.14/tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch b/queue-4.14/tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch
new file mode 100644 (file)
index 0000000..9867d3a
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 17 Nov 2017 21:06:14 -0500
+Subject: tcp: when scheduling TLP, time of RTO should account for current ACK
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit ed66dfaf236c04d414de1d218441296e57fb2bd2 ]
+
+Fix the TLP scheduling logic so that when scheduling a TLP probe, we
+ensure that the estimated time at which an RTO would fire accounts for
+the fact that ACKs indicating forward progress should push back RTO
+times.
+
+After the following fix:
+
+df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed")
+
+we had an unintentional behavior change in the following kind of
+scenario: suppose the RTT variance has been very low recently. Then
+suppose we send out a flight of N packets and our RTT is 100ms:
+
+t=0: send a flight of N packets
+t=100ms: receive an ACK for N-1 packets
+
+The response before df92c8394e6e that was:
+  -> schedule a TLP for now + RTO_interval
+
+The response after df92c8394e6e is:
+  -> schedule a TLP for t=0 + RTO_interval
+
+Since RTO_interval = srtt + RTT_variance, this means that we have
+scheduled a TLP timer at a point in the future that only accounts for
+RTT_variance. If the RTT_variance term is small, this means that the
+timer fires soon.
+
+Before df92c8394e6e this would not happen, because in that code, when
+we receive an ACK for a prefix of flight, we did:
+
+    1) Near the top of tcp_ack(), switch from TLP timer to RTO
+       at write_queue_head->paket_tx_time + RTO_interval:
+            if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+                   tcp_rearm_rto(sk);
+
+    2) In tcp_clean_rtx_queue(), update the RTO to now + RTO_interval:
+            if (flag & FLAG_ACKED) {
+                   tcp_rearm_rto(sk);
+
+    3) In tcp_ack() after tcp_fastretrans_alert() switch from RTO
+       to TLP at now + RTO_interval:
+            if (icsk->icsk_pending == ICSK_TIME_RETRANS)
+                   tcp_schedule_loss_probe(sk);
+
+In df92c8394e6e we removed that 3-phase dance, and instead directly
+set the TLP timer once: we set the TLP timer in cases like this to
+write_queue_head->packet_tx_time + RTO_interval. So if the RTT
+variance is small, then this means that this is setting the TLP timer
+to fire quite soon. This means if the ACK for the tail of the flight
+takes longer than an RTT to arrive (often due to delayed ACKs), then
+the TLP timer fires too quickly.
+
+Fixes: df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h     |    2 +-
+ net/ipv4/tcp_input.c  |    2 +-
+ net/ipv4/tcp_output.c |    8 +++++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -563,7 +563,7 @@ void tcp_push_one(struct sock *, unsigne
+ void tcp_send_ack(struct sock *sk);
+ void tcp_send_delayed_ack(struct sock *sk);
+ void tcp_send_loss_probe(struct sock *sk);
+-bool tcp_schedule_loss_probe(struct sock *sk);
++bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
+ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
+                            const struct sk_buff *next_skb);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3021,7 +3021,7 @@ void tcp_rearm_rto(struct sock *sk)
+ /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+ static void tcp_set_xmit_timer(struct sock *sk)
+ {
+-      if (!tcp_schedule_loss_probe(sk))
++      if (!tcp_schedule_loss_probe(sk, true))
+               tcp_rearm_rto(sk);
+ }
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2337,7 +2337,7 @@ repair:
+               /* Send one loss probe per tail loss episode. */
+               if (push_one != 2)
+-                      tcp_schedule_loss_probe(sk);
++                      tcp_schedule_loss_probe(sk, false);
+               is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+               tcp_cwnd_validate(sk, is_cwnd_limited);
+               return false;
+@@ -2345,7 +2345,7 @@ repair:
+       return !tp->packets_out && tcp_send_head(sk);
+ }
+-bool tcp_schedule_loss_probe(struct sock *sk)
++bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+@@ -2384,7 +2384,9 @@ bool tcp_schedule_loss_probe(struct sock
+       }
+       /* If the RTO formula yields an earlier time, then use that time. */
+-      rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
++      rto_delta_us = advancing_rto ?
++                      jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
++                      tcp_rto_delta_us(sk);  /* How far in future is RTO? */
+       if (rto_delta_us > 0)
+               timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
diff --git a/queue-4.14/tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch b/queue-4.14/tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch
new file mode 100644 (file)
index 0000000..529e916
--- /dev/null
@@ -0,0 +1,126 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+Date: Wed, 29 Nov 2017 12:48:42 +0200
+Subject: tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
+
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+
+
+[ Upstream commit c7799c067c2ae33e348508c8afec354f3257ff25 ]
+
+Remove the second tipc_rcv() call in tipc_udp_recv(). We have just
+checked that the bearer is not up, and calling tipc_rcv() with a bearer
+that is not up leads to a TIPC div-by-zero crash in
+tipc_node_calculate_timer(). The crash is rare in practice, but can
+happen like this:
+
+  We're enabling a bearer, but it's not yet up and fully initialized.
+  At the same time we receive a discovery packet, and in tipc_udp_recv()
+  we end up calling tipc_rcv() with the not-yet-initialized bearer,
+  causing later the div-by-zero crash in tipc_node_calculate_timer().
+
+Jon Maloy explains the impact of removing the second tipc_rcv() call:
+  "link setup in the worst case will be delayed until the next arriving
+   discovery messages, 1 sec later, and this is an acceptable delay."
+
+As the tipc_rcv() call is removed, just leave the function via the
+rcu_out label, so that we will kfree_skb().
+
+[   12.590450] Own node address <1.1.1>, network identity 1
+[   12.668088] divide error: 0000 [#1] SMP
+[   12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1
+[   12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014
+[   12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000
+[   12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
+[   12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246
+[   12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000
+[   12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600
+[   12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001
+[   12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8
+[   12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800
+[   12.702338] FS:  0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000
+[   12.705099] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[   12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0
+[   12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[   12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[   12.712627] Call Trace:
+[   12.713390]  <IRQ>
+[   12.714011]  tipc_node_check_dest+0x2e8/0x350 [tipc]
+[   12.715286]  tipc_disc_rcv+0x14d/0x1d0 [tipc]
+[   12.716370]  tipc_rcv+0x8b0/0xd40 [tipc]
+[   12.717396]  ? minmax_running_min+0x2f/0x60
+[   12.718248]  ? dst_alloc+0x4c/0xa0
+[   12.718964]  ? tcp_ack+0xaf1/0x10b0
+[   12.719658]  ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
+[   12.720634]  tipc_udp_recv+0x71/0x1d0 [tipc]
+[   12.721459]  ? dst_alloc+0x4c/0xa0
+[   12.722130]  udp_queue_rcv_skb+0x264/0x490
+[   12.722924]  __udp4_lib_rcv+0x21e/0x990
+[   12.723670]  ? ip_route_input_rcu+0x2dd/0xbf0
+[   12.724442]  ? tcp_v4_rcv+0x958/0xa40
+[   12.725039]  udp_rcv+0x1a/0x20
+[   12.725587]  ip_local_deliver_finish+0x97/0x1d0
+[   12.726323]  ip_local_deliver+0xaf/0xc0
+[   12.726959]  ? ip_route_input_noref+0x19/0x20
+[   12.727689]  ip_rcv_finish+0xdd/0x3b0
+[   12.728307]  ip_rcv+0x2ac/0x360
+[   12.728839]  __netif_receive_skb_core+0x6fb/0xa90
+[   12.729580]  ? udp4_gro_receive+0x1a7/0x2c0
+[   12.730274]  __netif_receive_skb+0x1d/0x60
+[   12.730953]  ? __netif_receive_skb+0x1d/0x60
+[   12.731637]  netif_receive_skb_internal+0x37/0xd0
+[   12.732371]  napi_gro_receive+0xc7/0xf0
+[   12.732920]  receive_buf+0x3c3/0xd40
+[   12.733441]  virtnet_poll+0xb1/0x250
+[   12.733944]  net_rx_action+0x23e/0x370
+[   12.734476]  __do_softirq+0xc5/0x2f8
+[   12.734922]  irq_exit+0xfa/0x100
+[   12.735315]  do_IRQ+0x4f/0xd0
+[   12.735680]  common_interrupt+0xa2/0xa2
+[   12.736126]  </IRQ>
+[   12.736416] RIP: 0010:native_safe_halt+0x6/0x10
+[   12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d
+[   12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000
+[   12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+[   12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88
+[   12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
+[   12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000
+[   12.741831]  default_idle+0x2a/0x100
+[   12.742323]  arch_cpu_idle+0xf/0x20
+[   12.742796]  default_idle_call+0x28/0x40
+[   12.743312]  do_idle+0x179/0x1f0
+[   12.743761]  cpu_startup_entry+0x1d/0x20
+[   12.744291]  start_secondary+0x112/0x120
+[   12.744816]  secondary_startup_64+0xa5/0xa5
+[   12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00
+00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48
+89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f
+[   12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0
+[   12.748555] ---[ end trace 1399ab83390650fd ]---
+[   12.749296] Kernel panic - not syncing: Fatal exception in interrupt
+[   12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000
+(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+[   12.751215] Rebooting in 60 seconds..
+
+Fixes: c9b64d492b1f ("tipc: add replicast peer discovery")
+Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
+Cc: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/udp_media.c |    4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk
+                       goto rcu_out;
+       }
+-      tipc_rcv(sock_net(sk), skb, b);
+-      rcu_read_unlock();
+-      return 0;
+-
+ rcu_out:
+       rcu_read_unlock();
+ out:
diff --git a/queue-4.14/tipc-fix-memory-leak-in-tipc_accept_from_sock.patch b/queue-4.14/tipc-fix-memory-leak-in-tipc_accept_from_sock.patch
new file mode 100644 (file)
index 0000000..af65cbe
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jon Maloy <jon.maloy@ericsson.com>
+Date: Mon, 4 Dec 2017 22:00:20 +0100
+Subject: tipc: fix memory leak in tipc_accept_from_sock()
+
+From: Jon Maloy <jon.maloy@ericsson.com>
+
+
+[ Upstream commit a7d5f107b4978e08eeab599ee7449af34d034053 ]
+
+When the function tipc_accept_from_sock() fails to create an instance of
+struct tipc_subscriber it omits to free the already created instance of
+struct tipc_conn instance before it returns.
+
+We fix that with this commit.
+
+Reported-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/server.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/tipc/server.c
++++ b/net/tipc/server.c
+@@ -313,6 +313,7 @@ static int tipc_accept_from_sock(struct
+       newcon->usr_data = s->tipc_conn_new(newcon->conid);
+       if (!newcon->usr_data) {
+               sock_release(newsock);
++              conn_put(newcon);
+               return -ENOMEM;
+       }
diff --git a/queue-4.14/tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch b/queue-4.14/tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch
new file mode 100644 (file)
index 0000000..34b96fb
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 19 Nov 2017 19:31:04 +0800
+Subject: tun: fix rcu_read_lock imbalance in tun_build_skb
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 654d573845f35017dc397840fa03610fef3d08b0 ]
+
+rcu_read_lock in tun_build_skb is used to rcu_dereference tun->xdp_prog
+safely, rcu_read_unlock should be done in every return path.
+
+Now I could see one place missing it, where it returns NULL in switch-case
+XDP_REDIRECT,  another palce using rcu_read_lock wrongly, where it returns
+NULL in if (xdp_xmit) chunk.
+
+So fix both in this patch.
+
+Fixes: 761876c857cb ("tap: XDP support")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1326,6 +1326,7 @@ static struct sk_buff *tun_build_skb(str
+                       err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+                       if (err)
+                               goto err_redirect;
++                      rcu_read_unlock();
+                       return NULL;
+               case XDP_TX:
+                       xdp_xmit = true;
+@@ -1358,7 +1359,7 @@ static struct sk_buff *tun_build_skb(str
+       if (xdp_xmit) {
+               skb->dev = tun->dev;
+               generic_xdp_tx(skb, xdp_prog);
+-              rcu_read_lock();
++              rcu_read_unlock();
+               return NULL;
+       }
diff --git a/queue-4.14/tun-free-skb-in-early-errors.patch b/queue-4.14/tun-free-skb-in-early-errors.patch
new file mode 100644 (file)
index 0000000..74e7052
--- /dev/null
@@ -0,0 +1,85 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:37 -0500
+Subject: tun: free skb in early errors
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit c33ee15b3820a03cf8229ba9415084197b827f8c ]
+
+tun_recvmsg() supports accepting skb by msg_control after
+commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
+the skb if presented should be freed no matter how far it can go
+along, otherwise it would be leaked.
+
+This patch fixes several missed cases.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |   24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1734,8 +1734,11 @@ static ssize_t tun_do_read(struct tun_st
+       tun_debug(KERN_INFO, tun, "tun_do_read\n");
+-      if (!iov_iter_count(to))
++      if (!iov_iter_count(to)) {
++              if (skb)
++                      kfree_skb(skb);
+               return 0;
++      }
+       if (!skb) {
+               /* Read frames from ring */
+@@ -1851,22 +1854,24 @@ static int tun_recvmsg(struct socket *so
+ {
+       struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+       struct tun_struct *tun = __tun_get(tfile);
++      struct sk_buff *skb = m->msg_control;
+       int ret;
+-      if (!tun)
+-              return -EBADFD;
++      if (!tun) {
++              ret = -EBADFD;
++              goto out_free_skb;
++      }
+       if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
+               ret = -EINVAL;
+-              goto out;
++              goto out_put_tun;
+       }
+       if (flags & MSG_ERRQUEUE) {
+               ret = sock_recv_errqueue(sock->sk, m, total_len,
+                                        SOL_PACKET, TUN_TX_TIMESTAMP);
+               goto out;
+       }
+-      ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
+-                        m->msg_control);
++      ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+       if (ret > (ssize_t)total_len) {
+               m->msg_flags |= MSG_TRUNC;
+               ret = flags & MSG_TRUNC ? ret : total_len;
+@@ -1874,6 +1879,13 @@ static int tun_recvmsg(struct socket *so
+ out:
+       tun_put(tun);
+       return ret;
++
++out_put_tun:
++      tun_put(tun);
++out_free_skb:
++      if (skb)
++              kfree_skb(skb);
++      return ret;
+ }
+ static int tun_peek_len(struct socket *sock)
diff --git a/queue-4.14/usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch b/queue-4.14/usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch
new file mode 100644 (file)
index 0000000..c790920
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Bjørn Mork <bjorn@mork.no>
+Date: Wed, 6 Dec 2017 20:21:24 +0100
+Subject: usbnet: fix alignment for frames with no ethernet header
+
+From: Bjørn Mork <bjorn@mork.no>
+
+
+[ Upstream commit a4abd7a80addb4a9547f7dfc7812566b60ec505c ]
+
+The qmi_wwan minidriver support a 'raw-ip' mode where frames are
+received without any ethernet header. This causes alignment issues
+because the skbs allocated by usbnet are "IP aligned".
+
+Fix by allowing minidrivers to disable the additional alignment
+offset. This is implemented using a per-device flag, since the same
+minidriver also supports 'ethernet' mode.
+
+Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode")
+Reported-and-tested-by: Jay Foster <jay@systech.com>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c |    2 ++
+ drivers/net/usb/usbnet.c   |    5 ++++-
+ include/linux/usb/usbnet.h |    1 +
+ 3 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -261,9 +261,11 @@ static void qmi_wwan_netdev_setup(struct
+               net->hard_header_len = 0;
+               net->addr_len        = 0;
+               net->flags           = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
++              set_bit(EVENT_NO_IP_ALIGN, &dev->flags);
+               netdev_dbg(net, "mode: raw IP\n");
+       } else if (!net->header_ops) { /* don't bother if already set */
+               ether_setup(net);
++              clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
+               netdev_dbg(net, "mode: Ethernet\n");
+       }
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -484,7 +484,10 @@ static int rx_submit (struct usbnet *dev
+               return -ENOLINK;
+       }
+-      skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
++      if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags))
++              skb = __netdev_alloc_skb(dev->net, size, flags);
++      else
++              skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+       if (!skb) {
+               netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
+               usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
+--- a/include/linux/usb/usbnet.h
++++ b/include/linux/usb/usbnet.h
+@@ -81,6 +81,7 @@ struct usbnet {
+ #             define EVENT_RX_KILL    10
+ #             define EVENT_LINK_CHANGE        11
+ #             define EVENT_SET_RX_MODE        12
++#             define EVENT_NO_IP_ALIGN        13
+ };
+ static inline struct usb_driver *driver_of(struct usb_interface *intf)
diff --git a/queue-4.14/vhost-fix-skb-leak-in-handle_rx.patch b/queue-4.14/vhost-fix-skb-leak-in-handle_rx.patch
new file mode 100644 (file)
index 0000000..8f773c6
--- /dev/null
@@ -0,0 +1,71 @@
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:36 -0500
+Subject: vhost: fix skb leak in handle_rx()
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit 6e474083f3daf3a3546737f5d7d502ad12eb257c ]
+
+Matthew found a roughly 40% tcp throughput regression with commit
+c67df11f(vhost_net: try batch dequing from skb array) as discussed
+in the following thread:
+https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
+
+Eventually we figured out that it was a skb leak in handle_rx()
+when sending packets to the VM. This usually happens when a guest
+can not drain out vq as fast as vhost fills in, afterwards it sets
+off the traffic jam and leaks skb(s) which occurs as no headcount
+to send on the vq from vhost side.
+
+This can be avoided by making sure we have got enough headcount
+before actually consuming a skb from the batched rx array while
+transmitting, which is simply done by moving checking the zero
+headcount a bit ahead.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -782,16 +782,6 @@ static void handle_rx(struct vhost_net *
+               /* On error, stop handling until the next kick. */
+               if (unlikely(headcount < 0))
+                       goto out;
+-              if (nvq->rx_array)
+-                      msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+-              /* On overrun, truncate and discard */
+-              if (unlikely(headcount > UIO_MAXIOV)) {
+-                      iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+-                      err = sock->ops->recvmsg(sock, &msg,
+-                                               1, MSG_DONTWAIT | MSG_TRUNC);
+-                      pr_debug("Discarded rx packet: len %zd\n", sock_len);
+-                      continue;
+-              }
+               /* OK, now we need to know about added descriptors. */
+               if (!headcount) {
+                       if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+@@ -804,6 +794,16 @@ static void handle_rx(struct vhost_net *
+                        * they refilled. */
+                       goto out;
+               }
++              if (nvq->rx_array)
++                      msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
++              /* On overrun, truncate and discard */
++              if (unlikely(headcount > UIO_MAXIOV)) {
++                      iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
++                      err = sock->ops->recvmsg(sock, &msg,
++                                               1, MSG_DONTWAIT | MSG_TRUNC);
++                      pr_debug("Discarded rx packet: len %zd\n", sock_len);
++                      continue;
++              }
+               /* We don't need to be notified again. */
+               iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+               fixup = msg.msg_iter;